1//===-------- PPCLoopDataPrefetch.cpp - Loop Data Prefetching Pass --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a Loop Data Prefetching Pass. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "ppc-loop-data-prefetch" 15#include "PPC.h" 16#include "llvm/Transforms/Scalar.h" 17#include "llvm/ADT/DepthFirstIterator.h" 18#include "llvm/ADT/Statistic.h" 19#include "llvm/Analysis/AssumptionCache.h" 20#include "llvm/Analysis/CodeMetrics.h" 21#include "llvm/Analysis/InstructionSimplify.h" 22#include "llvm/Analysis/LoopInfo.h" 23#include "llvm/Analysis/ScalarEvolution.h" 24#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25#include "llvm/Analysis/ScalarEvolutionExpander.h" 26#include "llvm/Analysis/ScalarEvolutionExpressions.h" 27#include "llvm/Analysis/TargetTransformInfo.h" 28#include "llvm/Analysis/ValueTracking.h" 29#include "llvm/IR/CFG.h" 30#include "llvm/IR/Dominators.h" 31#include "llvm/IR/Function.h" 32#include "llvm/IR/IntrinsicInst.h" 33#include "llvm/IR/Module.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Transforms/Utils/BasicBlockUtils.h" 37#include "llvm/Transforms/Utils/Local.h" 38#include "llvm/Transforms/Utils/ValueMapper.h" 39using namespace llvm; 40 41// By default, we limit this to creating 16 PHIs (which is a little over half 42// of the allocatable register set). 43static cl::opt<bool> 44PrefetchWrites("ppc-loop-prefetch-writes", cl::Hidden, cl::init(false), 45 cl::desc("Prefetch write addresses")); 46 47// This seems like a reasonable default for the BG/Q (this pass is enabled, by 48// default, only on the BG/Q). 49static cl::opt<unsigned> 50PrefDist("ppc-loop-prefetch-distance", cl::Hidden, cl::init(300), 51 cl::desc("The loop prefetch distance")); 52 53static cl::opt<unsigned> 54CacheLineSize("ppc-loop-prefetch-cache-line", cl::Hidden, cl::init(64), 55 cl::desc("The loop prefetch cache line size")); 56 57namespace llvm { 58 void initializePPCLoopDataPrefetchPass(PassRegistry&); 59} 60 61namespace { 62 63 class PPCLoopDataPrefetch : public FunctionPass { 64 public: 65 static char ID; // Pass ID, replacement for typeid 66 PPCLoopDataPrefetch() : FunctionPass(ID) { 67 initializePPCLoopDataPrefetchPass(*PassRegistry::getPassRegistry()); 68 } 69 70 void getAnalysisUsage(AnalysisUsage &AU) const override { 71 AU.addRequired<AssumptionCacheTracker>(); 72 AU.addPreserved<DominatorTreeWrapperPass>(); 73 AU.addRequired<LoopInfoWrapperPass>(); 74 AU.addPreserved<LoopInfoWrapperPass>(); 75 AU.addRequired<ScalarEvolutionWrapperPass>(); 76 // FIXME: For some reason, preserving SE here breaks LSR (even if 77 // this pass changes nothing). 78 // AU.addPreserved<ScalarEvolutionWrapperPass>(); 79 AU.addRequired<TargetTransformInfoWrapperPass>(); 80 } 81 82 bool runOnFunction(Function &F) override; 83 bool runOnLoop(Loop *L); 84 85 private: 86 AssumptionCache *AC; 87 LoopInfo *LI; 88 ScalarEvolution *SE; 89 const TargetTransformInfo *TTI; 90 const DataLayout *DL; 91 }; 92} 93 94char PPCLoopDataPrefetch::ID = 0; 95INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", 96 "PPC Loop Data Prefetch", false, false) 97INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 98INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 99INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 100INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 101INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch", 102 "PPC Loop Data Prefetch", false, false) 103 104FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPrefetch(); } 105 106bool PPCLoopDataPrefetch::runOnFunction(Function &F) { 107 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 108 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 109 DL = &F.getParent()->getDataLayout(); 110 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 111 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 112 113 bool MadeChange = false; 114 115 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 116 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 117 MadeChange |= runOnLoop(*L); 118 119 return MadeChange; 120} 121 122bool PPCLoopDataPrefetch::runOnLoop(Loop *L) { 123 bool MadeChange = false; 124 125 // Only prefetch in the inner-most loop 126 if (!L->empty()) 127 return MadeChange; 128 129 SmallPtrSet<const Value *, 32> EphValues; 130 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 131 132 // Calculate the number of iterations ahead to prefetch 133 CodeMetrics Metrics; 134 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 135 I != IE; ++I) { 136 137 // If the loop already has prefetches, then assume that the user knows 138 // what he or she is doing and don't add any more. 139 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 140 J != JE; ++J) 141 if (CallInst *CI = dyn_cast<CallInst>(J)) 142 if (Function *F = CI->getCalledFunction()) 143 if (F->getIntrinsicID() == Intrinsic::prefetch) 144 return MadeChange; 145 146 Metrics.analyzeBasicBlock(*I, *TTI, EphValues); 147 } 148 unsigned LoopSize = Metrics.NumInsts; 149 if (!LoopSize) 150 LoopSize = 1; 151 152 unsigned ItersAhead = PrefDist/LoopSize; 153 if (!ItersAhead) 154 ItersAhead = 1; 155 156 SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads; 157 for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); 158 I != IE; ++I) { 159 for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end(); 160 J != JE; ++J) { 161 Value *PtrValue; 162 Instruction *MemI; 163 164 if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) { 165 MemI = LMemI; 166 PtrValue = LMemI->getPointerOperand(); 167 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) { 168 if (!PrefetchWrites) continue; 169 MemI = SMemI; 170 PtrValue = SMemI->getPointerOperand(); 171 } else continue; 172 173 unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace(); 174 if (PtrAddrSpace) 175 continue; 176 177 if (L->isLoopInvariant(PtrValue)) 178 continue; 179 180 const SCEV *LSCEV = SE->getSCEV(PtrValue); 181 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); 182 if (!LSCEVAddRec) 183 continue; 184 185 // We don't want to double prefetch individual cache lines. If this load 186 // is known to be within one cache line of some other load that has 187 // already been prefetched, then don't prefetch this one as well. 188 bool DupPref = false; 189 for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 190 16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end(); 191 K != KE; ++K) { 192 const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second); 193 if (const SCEVConstant *ConstPtrDiff = 194 dyn_cast<SCEVConstant>(PtrDiff)) { 195 int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue()); 196 if (PD < (int64_t) CacheLineSize) { 197 DupPref = true; 198 break; 199 } 200 } 201 } 202 if (DupPref) 203 continue; 204 205 const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr( 206 SE->getConstant(LSCEVAddRec->getType(), ItersAhead), 207 LSCEVAddRec->getStepRecurrence(*SE))); 208 if (!isSafeToExpand(NextLSCEV, *SE)) 209 continue; 210 211 PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec)); 212 213 Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace); 214 SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr"); 215 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI); 216 217 IRBuilder<> Builder(MemI); 218 Module *M = (*I)->getParent()->getParent(); 219 Type *I32 = Type::getInt32Ty((*I)->getContext()); 220 Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch); 221 Builder.CreateCall( 222 PrefetchFunc, 223 {PrefPtrValue, 224 ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1), 225 ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)}); 226 227 MadeChange = true; 228 } 229 } 230 231 return MadeChange; 232} 233 234