131d157ae1ac2cd9c787dc3c1d28e64c682803844Jia Liu//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//
348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//                     The LLVM Compiler Infrastructure
448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//
548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// This file is distributed under the University of Illinois Open Source
648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// License. See LICENSE.TXT for details.
748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//
848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//===----------------------------------------------------------------------===//
948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//
1048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
1148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// multiple and add / sub instructions) when special VMLx hazards are detected.
1248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//
1348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//===----------------------------------------------------------------------===//
1448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
1548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "ARM.h"
1648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "ARMBaseInstrInfo.h"
1784c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson#include "ARMSubtarget.h"
18d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/SmallPtrSet.h"
19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/Statistic.h"
20d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/CodeGen/MachineFunctionPass.h"
2148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/CodeGen/MachineInstr.h"
2248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/CodeGen/MachineInstrBuilder.h"
2348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/CodeGen/MachineRegisterInfo.h"
2448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/Support/CommandLine.h"
2548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/Support/Debug.h"
2648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/Support/raw_ostream.h"
27d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Target/TargetRegisterInfo.h"
2848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengusing namespace llvm;
2948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "mlx-expansion"
31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
3248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengstatic cl::opt<bool>
3348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
3448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengstatic cl::opt<unsigned>
3548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
3648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
3748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengSTATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
3848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
3948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengnamespace {
4048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  struct MLxExpansion : public MachineFunctionPass {
4148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    static char ID;
4248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MLxExpansion() : MachineFunctionPass(ID) {}
4348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
4436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    bool runOnMachineFunction(MachineFunction &Fn) override;
4548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    const char *getPassName() const override {
4748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      return "ARM MLA / MLS expansion pass";
4848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    }
4948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
5048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  private:
5148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    const ARMBaseInstrInfo *TII;
5248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    const TargetRegisterInfo *TRI;
5348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MachineRegisterInfo *MRI;
5448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
55616471d4bfe4717fa86259ff4534703357b3b723Silviu Baranga    bool isLikeA9;
56eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    bool isSwift;
5748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    unsigned MIIdx;
5848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MachineInstr* LastMIs[4];
5984c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    SmallPtrSet<MachineInstr*, 4> IgnoreStall;
6048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
6148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    void clearStack();
6248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    void pushStack(MachineInstr *MI);
6348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MachineInstr *getAccDefMI(MachineInstr *MI) const;
6448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    unsigned getDefReg(MachineInstr *MI) const;
65eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    bool hasLoopHazard(MachineInstr *MI) const;
6648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
6784c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    bool FindMLxHazard(MachineInstr *MI);
6848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
6948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng                                unsigned MulOpc, unsigned AddSubOpc,
7048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng                                bool NegAcc, bool HasLane);
7148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
7248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  };
7348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  char MLxExpansion::ID = 0;
7448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
7548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
7648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengvoid MLxExpansion::clearStack() {
77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  std::fill(LastMIs, LastMIs + 4, nullptr);
7848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MIIdx = 0;
7948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
8048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
8148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengvoid MLxExpansion::pushStack(MachineInstr *MI) {
8248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  LastMIs[MIIdx] = MI;
8348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (++MIIdx == 4)
8448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MIIdx = 0;
8548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
8648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
8748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengMachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
8848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // Look past COPY and INSERT_SUBREG instructions to find the
8948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // real definition MI. This is important for _sfp instructions.
9048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned Reg = MI->getOperand(1).getReg();
9148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (TargetRegisterInfo::isPhysicalRegister(Reg))
92dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
9348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
9448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MachineBasicBlock *MBB = MI->getParent();
9548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MachineInstr *DefMI = MRI->getVRegDef(Reg);
9648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  while (true) {
9748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    if (DefMI->getParent() != MBB)
9848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      break;
9948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    if (DefMI->isCopyLike()) {
10048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      Reg = DefMI->getOperand(1).getReg();
10148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
10248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        DefMI = MRI->getVRegDef(Reg);
10348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        continue;
10448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      }
10548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    } else if (DefMI->isInsertSubreg()) {
10648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      Reg = DefMI->getOperand(2).getReg();
10748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
10848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        DefMI = MRI->getVRegDef(Reg);
10948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        continue;
11048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      }
11148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    }
11248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    break;
11348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  }
11448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return DefMI;
11548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
11648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
11748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengunsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
11848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned Reg = MI->getOperand(0).getReg();
11948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
12048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      !MRI->hasOneNonDBGUse(Reg))
12148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    return Reg;
12248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
12348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MachineBasicBlock *MBB = MI->getParent();
12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg);
12548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (UseMI->getParent() != MBB)
12648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    return Reg;
12748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
12848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
12948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    Reg = UseMI->getOperand(0).getReg();
13048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
13148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        !MRI->hasOneNonDBGUse(Reg))
13248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      return Reg;
13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    UseMI = &*MRI->use_instr_nodbg_begin(Reg);
13448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    if (UseMI->getParent() != MBB)
13548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      return Reg;
13648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  }
13748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
13848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return Reg;
13948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
14048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
141eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson/// hasLoopHazard - Check whether an MLx instruction is chained to itself across
142eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson/// a single-MBB loop.
143eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilsonbool MLxExpansion::hasLoopHazard(MachineInstr *MI) const {
144eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  unsigned Reg = MI->getOperand(1).getReg();
145eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  if (TargetRegisterInfo::isPhysicalRegister(Reg))
146eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    return false;
147eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson
148eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  MachineBasicBlock *MBB = MI->getParent();
149eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  MachineInstr *DefMI = MRI->getVRegDef(Reg);
150eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  while (true) {
151eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilsonouter_continue:
152eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    if (DefMI->getParent() != MBB)
153eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      break;
154eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson
155eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    if (DefMI->isPHI()) {
156eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
157eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson        if (DefMI->getOperand(i + 1).getMBB() == MBB) {
158eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson          unsigned SrcReg = DefMI->getOperand(i).getReg();
159eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson          if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
160eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson            DefMI = MRI->getVRegDef(SrcReg);
161eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson            goto outer_continue;
162eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson          }
163eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson        }
164eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      }
165eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    } else if (DefMI->isCopyLike()) {
166eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      Reg = DefMI->getOperand(1).getReg();
167eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
168eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson        DefMI = MRI->getVRegDef(Reg);
169eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson        continue;
170eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      }
171eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    } else if (DefMI->isInsertSubreg()) {
172eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      Reg = DefMI->getOperand(2).getReg();
173eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
174eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson        DefMI = MRI->getVRegDef(Reg);
175eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson        continue;
176eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson      }
177eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    }
178eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson
179eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    break;
180eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  }
181eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson
182eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  return DefMI == MI;
183eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson}
184eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson
18548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengbool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
18648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // FIXME: Detect integer instructions properly.
187e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng  const MCInstrDesc &MCID = MI->getDesc();
188e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
1895a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng  if (MI->mayStore())
19048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    return false;
191e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng  unsigned Opcode = MCID.getOpcode();
1926557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
1936557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng    return false;
1946557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
1956557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng    return MI->readsRegister(Reg, TRI);
19648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return false;
19748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
19848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
199eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilsonstatic bool isFpMulInstruction(unsigned Opcode) {
200eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  switch (Opcode) {
201eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  case ARM::VMULS:
202eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  case ARM::VMULfd:
203eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  case ARM::VMULfq:
204eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  case ARM::VMULD:
205eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  case ARM::VMULslfd:
206eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  case ARM::VMULslfq:
207eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    return true;
208eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  default:
209eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    return false;
210eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  }
211eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson}
21248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
21384c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilsonbool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
21448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (NumExpand >= ExpandLimit)
21548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    return false;
21648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
21748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (ForceExapnd)
21848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    return true;
21948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
22048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MachineInstr *DefMI = getAccDefMI(MI);
22184c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson  if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
22248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // r0 = vmla
22348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // r3 = vmla r0, r1, r2
22448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // takes 16 - 17 cycles
22548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    //
22648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // r0 = vmla
22748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // r4 = vmul r1, r2
22848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // r3 = vadd r0, r4
22948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
23084c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    IgnoreStall.insert(DefMI);
23148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    return true;
23284c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson  }
23384c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson
234eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  // On Swift, we mostly care about hazards from multiplication instructions
235eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  // writing the accumulator and the pipelining of loop iterations by out-of-
236eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  // order execution.
237eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  if (isSwift)
238eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson    return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI);
239eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson
24084c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson  if (IgnoreStall.count(MI))
24184c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    return false;
24248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
24348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
24448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
24548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // preserves the in-order retirement of the instructions.
24648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // Look at the next few instructions, if *most* of them can cause hazards,
24748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  // then the scheduler can't *fix* this, we'd better break up the VMLA.
248616471d4bfe4717fa86259ff4534703357b3b723Silviu Baranga  unsigned Limit1 = isLikeA9 ? 1 : 4;
249616471d4bfe4717fa86259ff4534703357b3b723Silviu Baranga  unsigned Limit2 = isLikeA9 ? 1 : 4;
25048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  for (unsigned i = 1; i <= 4; ++i) {
25148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    int Idx = ((int)MIIdx - i + 4) % 4;
25248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MachineInstr *NextMI = LastMIs[Idx];
25348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    if (!NextMI)
25448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      continue;
25548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
25684c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
25784c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson      if (i <= Limit1)
25884c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson        return true;
25984c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    }
26048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
26148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    // Look for VMLx RAW hazard.
26284c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson    if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
26348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      return true;
26448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  }
26548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
26648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return false;
26748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
26848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
26948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
27048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng/// of MUL + ADD / SUB instructions.
27148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengvoid
27248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengMLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
27348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng                                     unsigned MulOpc, unsigned AddSubOpc,
27448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng                                     bool NegAcc, bool HasLane) {
27548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned DstReg = MI->getOperand(0).getReg();
27648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  bool DstDead = MI->getOperand(0).isDead();
27748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned AccReg = MI->getOperand(1).getReg();
27848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned Src1Reg = MI->getOperand(2).getReg();
27948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned Src2Reg = MI->getOperand(3).getReg();
28048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  bool Src1Kill = MI->getOperand(2).isKill();
28148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  bool Src2Kill = MI->getOperand(3).isKill();
28248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
28348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned NextOp = HasLane ? 5 : 4;
28448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
28548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned PredReg = MI->getOperand(++NextOp).getReg();
28648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
287e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng  const MCInstrDesc &MCID1 = TII->get(MulOpc);
288e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
289397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen  const MachineFunction &MF = *MI->getParent()->getParent();
290397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen  unsigned TmpReg = MRI->createVirtualRegister(
291397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen                      TII->getRegClass(MCID1, 0, TRI, MF));
29248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
293ddfd1377d2e4154d44dc3ad217735adc15af2e3fEvan Cheng  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
29448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    .addReg(Src1Reg, getKillRegState(Src1Kill))
29548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    .addReg(Src2Reg, getKillRegState(Src2Kill));
29648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (HasLane)
29748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MIB.addImm(LaneImm);
29848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MIB.addImm(Pred).addReg(PredReg);
29948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
300ddfd1377d2e4154d44dc3ad217735adc15af2e3fEvan Cheng  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
30148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
30248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
30348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  if (NegAcc) {
30448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
30548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MIB.addReg(TmpReg, getKillRegState(true))
30648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng       .addReg(AccReg, getKillRegState(AccKill));
30748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  } else {
30848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
30948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  }
31048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MIB.addImm(Pred).addReg(PredReg);
31148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
31248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  DEBUG({
31348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      dbgs() << "Expanding: " << *MI;
31448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      dbgs() << "  to:\n";
31548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      MachineBasicBlock::iterator MII = MI;
31636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      MII = std::prev(MII);
31748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      MachineInstr &MI2 = *MII;
31836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      MII = std::prev(MII);
31948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      MachineInstr &MI1 = *MII;
32048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      dbgs() << "    " << MI1;
32148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      dbgs() << "    " << MI2;
32248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng   });
32348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
32448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MI->eraseFromParent();
32548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  ++NumExpand;
32648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
32748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
32848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengbool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
32948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  bool Changed = false;
33048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
33148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  clearStack();
33284c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson  IgnoreStall.clear();
33348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
33448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  unsigned Skip = 0;
33548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
33648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  while (MII != E) {
33748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    MachineInstr *MI = &*MII;
33848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) {
34048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      ++MII;
34148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      continue;
34248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    }
34348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
344e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng    const MCInstrDesc &MCID = MI->getDesc();
3455a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng    if (MI->isBarrier()) {
34648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      clearStack();
34748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      Skip = 0;
34848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      ++MII;
34948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      continue;
35048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    }
35148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
352e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
35348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    if (Domain == ARMII::DomainGeneral) {
35448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      if (++Skip == 2)
35548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        // Assume dual issues of non-VFP / NEON instructions.
356dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        pushStack(nullptr);
35748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    } else {
35848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      Skip = 0;
35948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
36048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      unsigned MulOpc, AddSubOpc;
36148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      bool NegAcc, HasLane;
362e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
36348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
36448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng          !FindMLxHazard(MI))
36548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        pushStack(MI);
36648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      else {
36748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
36848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        E = MBB.rend(); // May have changed if MI was the 1st instruction.
36948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        Changed = true;
37048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng        continue;
37148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng      }
37248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    }
37348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
37448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    ++MII;
37548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  }
37648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
37748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return Changed;
37848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
37948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
38048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengbool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
38148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
38248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  TRI = Fn.getTarget().getRegisterInfo();
38348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  MRI = &Fn.getRegInfo();
38484c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson  const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
385eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  isLikeA9 = STI->isLikeA9() || STI->isSwift();
386eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson  isSwift = STI->isSwift();
38748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
38848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  bool Modified = false;
38936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (MachineBasicBlock &MBB : Fn)
39048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng    Modified |= ExpandFPMLxInstructions(MBB);
39148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
39248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return Modified;
39348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
39448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng
39548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengFunctionPass *llvm::createMLxExpansionPass() {
39648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng  return new MLxExpansion();
39748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng}
398