1//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass identifies floating point stores that should not be combined into 11// store pairs. Later we may do the same for floating point loads. 12// ===---------------------------------------------------------------------===// 13 14#include "AArch64InstrInfo.h" 15#include "llvm/CodeGen/MachineFunction.h" 16#include "llvm/CodeGen/MachineFunctionPass.h" 17#include "llvm/CodeGen/MachineInstr.h" 18#include "llvm/CodeGen/MachineTraceMetrics.h" 19#include "llvm/CodeGen/TargetSchedule.h" 20#include "llvm/Support/Debug.h" 21#include "llvm/Support/raw_ostream.h" 22#include "llvm/Target/TargetInstrInfo.h" 23 24using namespace llvm; 25 26#define DEBUG_TYPE "aarch64-stp-suppress" 27 28namespace { 29class AArch64StorePairSuppress : public MachineFunctionPass { 30 const AArch64InstrInfo *TII; 31 const TargetRegisterInfo *TRI; 32 const MachineRegisterInfo *MRI; 33 MachineFunction *MF; 34 TargetSchedModel SchedModel; 35 MachineTraceMetrics *Traces; 36 MachineTraceMetrics::Ensemble *MinInstr; 37 38public: 39 static char ID; 40 AArch64StorePairSuppress() : MachineFunctionPass(ID) {} 41 42 virtual const char *getPassName() const override { 43 return "AArch64 Store Pair Suppression"; 44 } 45 46 bool runOnMachineFunction(MachineFunction &F) override; 47 48private: 49 bool shouldAddSTPToBlock(const MachineBasicBlock *BB); 50 51 bool isNarrowFPStore(const MachineInstr &MI); 52 53 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 54 AU.setPreservesCFG(); 55 AU.addRequired<MachineTraceMetrics>(); 56 AU.addPreserved<MachineTraceMetrics>(); 57 MachineFunctionPass::getAnalysisUsage(AU); 58 } 59}; 60char AArch64StorePairSuppress::ID = 0; 61} // anonymous 62 63FunctionPass *llvm::createAArch64StorePairSuppressPass() { 64 return new AArch64StorePairSuppress(); 65} 66 67/// Return true if an STP can be added to this block without increasing the 68/// critical resource height. STP is good to form in Ld/St limited blocks and 69/// bad to form in float-point limited blocks. This is true independent of the 70/// critical path. If the critical path is longer than the resource height, the 71/// extra vector ops can limit physreg renaming. Otherwise, it could simply 72/// oversaturate the vector units. 73bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { 74 if (!MinInstr) 75 MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); 76 77 MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); 78 unsigned ResLength = BBTrace.getResourceLength(); 79 80 // Get the machine model's scheduling class for STPQi. 81 // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. 82 unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); 83 const MCSchedClassDesc *SCDesc = 84 SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); 85 86 // If a subtarget does not define resources for STPQi, bail here. 87 if (SCDesc->isValid() && !SCDesc->isVariant()) { 88 unsigned ResLenWithSTP = BBTrace.getResourceLength( 89 ArrayRef<const MachineBasicBlock *>(), SCDesc); 90 if (ResLenWithSTP > ResLength) { 91 DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() 92 << " resources " << ResLength << " -> " << ResLenWithSTP 93 << "\n"); 94 return false; 95 } 96 } 97 return true; 98} 99 100/// Return true if this is a floating-point store smaller than the V reg. On 101/// cyclone, these require a vector shuffle before storing a pair. 102/// Ideally we would call getMatchingPairOpcode() and have the machine model 103/// tell us if it's profitable with no cpu knowledge here. 104/// 105/// FIXME: We plan to develop a decent Target abstraction for simple loads and 106/// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. 107bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { 108 switch (MI.getOpcode()) { 109 default: 110 return false; 111 case AArch64::STRSui: 112 case AArch64::STRDui: 113 case AArch64::STURSi: 114 case AArch64::STURDi: 115 return true; 116 } 117} 118 119bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { 120 MF = &mf; 121 TII = static_cast<const AArch64InstrInfo *>(MF->getTarget().getInstrInfo()); 122 TRI = MF->getTarget().getRegisterInfo(); 123 MRI = &MF->getRegInfo(); 124 const TargetSubtargetInfo &ST = 125 MF->getTarget().getSubtarget<TargetSubtargetInfo>(); 126 SchedModel.init(*ST.getSchedModel(), &ST, TII); 127 128 Traces = &getAnalysis<MachineTraceMetrics>(); 129 MinInstr = nullptr; 130 131 DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); 132 133 if (!SchedModel.hasInstrSchedModel()) { 134 DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); 135 return false; 136 } 137 138 // Check for a sequence of stores to the same base address. We don't need to 139 // precisely determine whether a store pair can be formed. But we do want to 140 // filter out most situations where we can't form store pairs to avoid 141 // computing trace metrics in those cases. 142 for (auto &MBB : *MF) { 143 bool SuppressSTP = false; 144 unsigned PrevBaseReg = 0; 145 for (auto &MI : MBB) { 146 if (!isNarrowFPStore(MI)) 147 continue; 148 unsigned BaseReg; 149 unsigned Offset; 150 if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) { 151 if (PrevBaseReg == BaseReg) { 152 // If this block can take STPs, skip ahead to the next block. 153 if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) 154 break; 155 // Otherwise, continue unpairing the stores in this block. 156 DEBUG(dbgs() << "Unpairing store " << MI << "\n"); 157 SuppressSTP = true; 158 TII->suppressLdStPair(&MI); 159 } 160 PrevBaseReg = BaseReg; 161 } else 162 PrevBaseReg = 0; 163 } 164 } 165 // This pass just sets some internal MachineMemOperand flags. It can't really 166 // invalidate anything. 167 return false; 168} 169