131d157ae1ac2cd9c787dc3c1d28e64c682803844Jia Liu//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===// 248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// 348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// The LLVM Compiler Infrastructure 448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// 548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// This file is distributed under the University of Illinois Open Source 648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// License. See LICENSE.TXT for details. 748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// 848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//===----------------------------------------------------------------------===// 948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// 1048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of 1148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// multiple and add / sub instructions) when special VMLx hazards are detected. 1248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng// 1348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng//===----------------------------------------------------------------------===// 1448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 1548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "ARM.h" 1648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "ARMBaseInstrInfo.h" 1784c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson#include "ARMSubtarget.h" 18d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/SmallPtrSet.h" 19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/ADT/Statistic.h" 20d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/CodeGen/MachineFunctionPass.h" 2148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/CodeGen/MachineInstr.h" 2248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/CodeGen/MachineInstrBuilder.h" 2348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/CodeGen/MachineRegisterInfo.h" 2448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/Support/CommandLine.h" 2548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/Support/Debug.h" 2648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng#include "llvm/Support/raw_ostream.h" 27d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Target/TargetRegisterInfo.h" 2848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengusing namespace llvm; 2948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "mlx-expansion" 31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 3248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengstatic cl::opt<bool> 3348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); 3448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengstatic cl::opt<unsigned> 3548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengExpandLimit("expand-limit", cl::init(~0U), cl::Hidden); 3648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 3748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengSTATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded"); 3848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 3948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengnamespace { 4048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng struct MLxExpansion : public MachineFunctionPass { 4148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng static char ID; 4248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MLxExpansion() : MachineFunctionPass(ID) {} 4348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 4436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnMachineFunction(MachineFunction &Fn) override; 4548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const char *getPassName() const override { 4748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return "ARM MLA / MLS expansion pass"; 4848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 4948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 5048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng private: 5148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng const ARMBaseInstrInfo *TII; 5248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng const TargetRegisterInfo *TRI; 5348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineRegisterInfo *MRI; 5448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 55616471d4bfe4717fa86259ff4534703357b3b723Silviu Baranga bool isLikeA9; 56eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson bool isSwift; 5748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned MIIdx; 5848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr* LastMIs[4]; 5984c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson SmallPtrSet<MachineInstr*, 4> IgnoreStall; 6048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 6148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng void clearStack(); 6248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng void pushStack(MachineInstr *MI); 6348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr *getAccDefMI(MachineInstr *MI) const; 6448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned getDefReg(MachineInstr *MI) const; 65eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson bool hasLoopHazard(MachineInstr *MI) const; 6648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; 6784c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson bool FindMLxHazard(MachineInstr *MI); 6848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, 6948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned MulOpc, unsigned AddSubOpc, 7048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool NegAcc, bool HasLane); 7148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool ExpandFPMLxInstructions(MachineBasicBlock &MBB); 7248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng }; 7348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng char MLxExpansion::ID = 0; 7448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 7548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 7648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengvoid MLxExpansion::clearStack() { 77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::fill(LastMIs, LastMIs + 4, nullptr); 7848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIIdx = 0; 7948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 8048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 8148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengvoid MLxExpansion::pushStack(MachineInstr *MI) { 8248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng LastMIs[MIIdx] = MI; 8348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (++MIIdx == 4) 8448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIIdx = 0; 8548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 8648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 8748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengMachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { 8848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // Look past COPY and INSERT_SUBREG instructions to find the 8948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // real definition MI. This is important for _sfp instructions. 9048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned Reg = MI->getOperand(1).getReg(); 9148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (TargetRegisterInfo::isPhysicalRegister(Reg)) 92dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 9348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 9448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineBasicBlock *MBB = MI->getParent(); 9548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr *DefMI = MRI->getVRegDef(Reg); 9648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng while (true) { 9748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (DefMI->getParent() != MBB) 9848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng break; 9948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (DefMI->isCopyLike()) { 10048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Reg = DefMI->getOperand(1).getReg(); 10148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (TargetRegisterInfo::isVirtualRegister(Reg)) { 10248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng DefMI = MRI->getVRegDef(Reg); 10348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng continue; 10448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 10548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } else if (DefMI->isInsertSubreg()) { 10648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Reg = DefMI->getOperand(2).getReg(); 10748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (TargetRegisterInfo::isVirtualRegister(Reg)) { 10848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng DefMI = MRI->getVRegDef(Reg); 10948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng continue; 11048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 11148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 11248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng break; 11348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 11448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return DefMI; 11548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 11648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 11748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengunsigned MLxExpansion::getDefReg(MachineInstr *MI) const { 11848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned Reg = MI->getOperand(0).getReg(); 11948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (TargetRegisterInfo::isPhysicalRegister(Reg) || 12048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng !MRI->hasOneNonDBGUse(Reg)) 12148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Reg; 12248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 12348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineBasicBlock *MBB = MI->getParent(); 12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *UseMI = &*MRI->use_instr_nodbg_begin(Reg); 12548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (UseMI->getParent() != MBB) 12648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Reg; 12748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 12848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng while (UseMI->isCopy() || UseMI->isInsertSubreg()) { 12948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Reg = UseMI->getOperand(0).getReg(); 13048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (TargetRegisterInfo::isPhysicalRegister(Reg) || 13148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng !MRI->hasOneNonDBGUse(Reg)) 13248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Reg; 13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UseMI = &*MRI->use_instr_nodbg_begin(Reg); 13448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (UseMI->getParent() != MBB) 13548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Reg; 13648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 13748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 13848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Reg; 13948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 14048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 141eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson/// hasLoopHazard - Check whether an MLx instruction is chained to itself across 142eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson/// a single-MBB loop. 143eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilsonbool MLxExpansion::hasLoopHazard(MachineInstr *MI) const { 144eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson unsigned Reg = MI->getOperand(1).getReg(); 145eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (TargetRegisterInfo::isPhysicalRegister(Reg)) 146eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson return false; 147eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson 148eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson MachineBasicBlock *MBB = MI->getParent(); 149eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson MachineInstr *DefMI = MRI->getVRegDef(Reg); 150eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson while (true) { 151eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilsonouter_continue: 152eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (DefMI->getParent() != MBB) 153eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson break; 154eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson 155eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (DefMI->isPHI()) { 156eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { 157eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (DefMI->getOperand(i + 1).getMBB() == MBB) { 158eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson unsigned SrcReg = DefMI->getOperand(i).getReg(); 159eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { 160eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson DefMI = MRI->getVRegDef(SrcReg); 161eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson goto outer_continue; 162eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 163eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 164eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 165eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } else if (DefMI->isCopyLike()) { 166eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson Reg = DefMI->getOperand(1).getReg(); 167eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (TargetRegisterInfo::isVirtualRegister(Reg)) { 168eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson DefMI = MRI->getVRegDef(Reg); 169eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson continue; 170eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 171eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } else if (DefMI->isInsertSubreg()) { 172eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson Reg = DefMI->getOperand(2).getReg(); 173eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (TargetRegisterInfo::isVirtualRegister(Reg)) { 174eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson DefMI = MRI->getVRegDef(Reg); 175eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson continue; 176eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 177eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 178eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson 179eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson break; 180eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 181eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson 182eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson return DefMI == MI; 183eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson} 184eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson 18548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengbool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { 18648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // FIXME: Detect integer instructions properly. 187e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng const MCInstrDesc &MCID = MI->getDesc(); 188e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng unsigned Domain = MCID.TSFlags & ARMII::DomainMask; 1895a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng if (MI->mayStore()) 19048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return false; 191e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng unsigned Opcode = MCID.getOpcode(); 1926557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 1936557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng return false; 1946557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) 1956557bce3ec8d5a82b2ea299a18cb51677b299633Evan Cheng return MI->readsRegister(Reg, TRI); 19648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return false; 19748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 19848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 199eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilsonstatic bool isFpMulInstruction(unsigned Opcode) { 200eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson switch (Opcode) { 201eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson case ARM::VMULS: 202eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson case ARM::VMULfd: 203eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson case ARM::VMULfq: 204eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson case ARM::VMULD: 205eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson case ARM::VMULslfd: 206eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson case ARM::VMULslfq: 207eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson return true; 208eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson default: 209eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson return false; 210eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson } 211eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson} 21248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 21384c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilsonbool MLxExpansion::FindMLxHazard(MachineInstr *MI) { 21448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (NumExpand >= ExpandLimit) 21548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return false; 21648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 21748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (ForceExapnd) 21848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return true; 21948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 22048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr *DefMI = getAccDefMI(MI); 22184c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson if (TII->isFpMLxInstruction(DefMI->getOpcode())) { 22248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // r0 = vmla 22348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // r3 = vmla r0, r1, r2 22448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // takes 16 - 17 cycles 22548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // 22648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // r0 = vmla 22748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // r4 = vmul r1, r2 22848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // r3 = vadd r0, r4 22948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. 23084c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson IgnoreStall.insert(DefMI); 23148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return true; 23284c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson } 23384c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson 234eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson // On Swift, we mostly care about hazards from multiplication instructions 235eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson // writing the accumulator and the pipelining of loop iterations by out-of- 236eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson // order execution. 237eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson if (isSwift) 238eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson return isFpMulInstruction(DefMI->getOpcode()) || hasLoopHazard(MI); 239eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson 24084c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson if (IgnoreStall.count(MI)) 24184c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson return false; 24248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 24348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the 24448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall 24548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // preserves the in-order retirement of the instructions. 24648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // Look at the next few instructions, if *most* of them can cause hazards, 24748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // then the scheduler can't *fix* this, we'd better break up the VMLA. 248616471d4bfe4717fa86259ff4534703357b3b723Silviu Baranga unsigned Limit1 = isLikeA9 ? 1 : 4; 249616471d4bfe4717fa86259ff4534703357b3b723Silviu Baranga unsigned Limit2 = isLikeA9 ? 1 : 4; 25048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng for (unsigned i = 1; i <= 4; ++i) { 25148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng int Idx = ((int)MIIdx - i + 4) % 4; 25248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr *NextMI = LastMIs[Idx]; 25348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (!NextMI) 25448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng continue; 25548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 25684c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson if (TII->canCauseFpMLxStall(NextMI->getOpcode())) { 25784c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson if (i <= Limit1) 25884c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson return true; 25984c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson } 26048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 26148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // Look for VMLx RAW hazard. 26284c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI)) 26348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return true; 26448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 26548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 26648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return false; 26748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 26848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 26948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair 27048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng/// of MUL + ADD / SUB instructions. 27148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengvoid 27248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengMLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, 27348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned MulOpc, unsigned AddSubOpc, 27448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool NegAcc, bool HasLane) { 27548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned DstReg = MI->getOperand(0).getReg(); 27648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool DstDead = MI->getOperand(0).isDead(); 27748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned AccReg = MI->getOperand(1).getReg(); 27848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned Src1Reg = MI->getOperand(2).getReg(); 27948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned Src2Reg = MI->getOperand(3).getReg(); 28048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool Src1Kill = MI->getOperand(2).isKill(); 28148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool Src2Kill = MI->getOperand(3).isKill(); 28248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0; 28348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned NextOp = HasLane ? 5 : 4; 28448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); 28548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned PredReg = MI->getOperand(++NextOp).getReg(); 28648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 287e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng const MCInstrDesc &MCID1 = TII->get(MulOpc); 288e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng const MCInstrDesc &MCID2 = TII->get(AddSubOpc); 289397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen const MachineFunction &MF = *MI->getParent()->getParent(); 290397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen unsigned TmpReg = MRI->createVirtualRegister( 291397fc4874efe9c17e737d4c5c50bd19dc3bf27f5Jakob Stoklund Olesen TII->getRegClass(MCID1, 0, TRI, MF)); 29248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 293ddfd1377d2e4154d44dc3ad217735adc15af2e3fEvan Cheng MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) 29448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng .addReg(Src1Reg, getKillRegState(Src1Kill)) 29548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng .addReg(Src2Reg, getKillRegState(Src2Kill)); 29648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (HasLane) 29748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIB.addImm(LaneImm); 29848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIB.addImm(Pred).addReg(PredReg); 29948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 300ddfd1377d2e4154d44dc3ad217735adc15af2e3fEvan Cheng MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) 30148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); 30248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 30348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (NegAcc) { 30448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool AccKill = MRI->hasOneNonDBGUse(AccReg); 30548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIB.addReg(TmpReg, getKillRegState(true)) 30648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng .addReg(AccReg, getKillRegState(AccKill)); 30748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } else { 30848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true)); 30948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 31048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MIB.addImm(Pred).addReg(PredReg); 31148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 31248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng DEBUG({ 31348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng dbgs() << "Expanding: " << *MI; 31448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng dbgs() << " to:\n"; 31548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineBasicBlock::iterator MII = MI; 31636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MII = std::prev(MII); 31748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr &MI2 = *MII; 31836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MII = std::prev(MII); 31948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr &MI1 = *MII; 32048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng dbgs() << " " << MI1; 32148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng dbgs() << " " << MI2; 32248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng }); 32348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 32448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MI->eraseFromParent(); 32548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng ++NumExpand; 32648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 32748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 32848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengbool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { 32948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool Changed = false; 33048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 33148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng clearStack(); 33284c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson IgnoreStall.clear(); 33348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 33448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned Skip = 0; 33548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); 33648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng while (MII != E) { 33748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MachineInstr *MI = &*MII; 33848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) { 34048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng ++MII; 34148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng continue; 34248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 34348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 344e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng const MCInstrDesc &MCID = MI->getDesc(); 3455a96b3dad2f634c9081c8b2b6c2575441dc5a2bdEvan Cheng if (MI->isBarrier()) { 34648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng clearStack(); 34748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Skip = 0; 34848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng ++MII; 34948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng continue; 35048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 35148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 352e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng unsigned Domain = MCID.TSFlags & ARMII::DomainMask; 35348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (Domain == ARMII::DomainGeneral) { 35448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng if (++Skip == 2) 35548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng // Assume dual issues of non-VFP / NEON instructions. 356dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines pushStack(nullptr); 35748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } else { 35848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Skip = 0; 35948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 36048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng unsigned MulOpc, AddSubOpc; 36148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool NegAcc, HasLane; 362e837dead3c8dc3445ef6a0e2322179c57e264a13Evan Cheng if (!TII->isFpMLxInstruction(MCID.getOpcode(), 36348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MulOpc, AddSubOpc, NegAcc, HasLane) || 36448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng !FindMLxHazard(MI)) 36548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng pushStack(MI); 36648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng else { 36748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane); 36848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng E = MBB.rend(); // May have changed if MI was the 1st instruction. 36948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Changed = true; 37048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng continue; 37148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 37248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 37348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 37448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng ++MII; 37548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng } 37648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 37748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Changed; 37848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 37948575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 38048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Chengbool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { 38148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); 38248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng TRI = Fn.getTarget().getRegisterInfo(); 38348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng MRI = &Fn.getRegInfo(); 38484c5eed15baa3710d7fb8522c7a28c8e0b732c2bBob Wilson const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 385eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson isLikeA9 = STI->isLikeA9() || STI->isSwift(); 386eb1641d54a7eda7717304bc4d55d059208d8ebedBob Wilson isSwift = STI->isSwift(); 38748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 38848575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng bool Modified = false; 38936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineBasicBlock &MBB : Fn) 39048575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng Modified |= ExpandFPMLxInstructions(MBB); 39148575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 39248575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return Modified; 39348575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 39448575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng 39548575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan ChengFunctionPass *llvm::createMLxExpansionPass() { 39648575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng return new MLxExpansion(); 39748575f6ea7d5cd21ab29ca370f58fcf9ca31400bEvan Cheng} 398