LoopIdiomRecognize.cpp revision 95ae676bc89b4cb9166576b74f1220ab5b0ff0ad
1e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// 2e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 3e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// The LLVM Compiler Infrastructure 4e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 5e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This file is distributed under the University of Illinois Open Source 6e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// License. See LICENSE.TXT for details. 7e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 8e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===// 9e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 10e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This pass implements an idiom recognizer that transforms simple loops into a 11e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// non-loop form. In cases that this kicks in, it can be a significant 12e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// performance win. 13e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 14e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===// 15e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 16e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#define DEBUG_TYPE "loop-idiom" 17e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Transforms/Scalar.h" 182e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner#include "llvm/Analysis/AliasAnalysis.h" 19e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Analysis/LoopPass.h" 20e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Analysis/ScalarEvolutionExpressions.h" 21a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner#include "llvm/Analysis/ScalarEvolutionExpander.h" 2222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner#include "llvm/Analysis/ValueTracking.h" 2322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner#include "llvm/Target/TargetData.h" 249f39188def2b4102817945522fac1209083efa06Chris Lattner#include "llvm/Transforms/Utils/Local.h" 25e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Support/Debug.h" 26a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner#include "llvm/Support/IRBuilder.h" 27e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Support/raw_ostream.h" 28e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerusing namespace llvm; 29e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 30e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// TODO: Recognize "N" size array multiplies: replace with call to blas or 31e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// something. 32e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 33e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnernamespace { 34e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner class LoopIdiomRecognize : public LoopPass { 3522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner Loop *CurLoop; 3622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner const TargetData *TD; 3722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner ScalarEvolution *SE; 38e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner public: 39e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner static char ID; 40e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner explicit LoopIdiomRecognize() : LoopPass(ID) { 41e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 42e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner } 43e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 44e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner bool runOnLoop(Loop *L, LPPassManager &LPM); 45e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 4622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner bool processLoopStore(StoreInst *SI, const SCEV *BECount); 47e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 48a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner bool processLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize, 49a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *SplatValue, 50a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEVAddRecExpr *Ev, 51a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEV *BECount); 52a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 53e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// This transformation requires natural loop information & requires that 54e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// loop preheaders be inserted into the CFG. 55e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// 56e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner virtual void getAnalysisUsage(AnalysisUsage &AU) const { 57e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequired<LoopInfo>(); 58e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<LoopInfo>(); 59e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequiredID(LoopSimplifyID); 60e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreservedID(LoopSimplifyID); 61e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequiredID(LCSSAID); 62e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreservedID(LCSSAID); 632e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner AU.addRequired<AliasAnalysis>(); 642e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner AU.addPreserved<AliasAnalysis>(); 65e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequired<ScalarEvolution>(); 66e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<ScalarEvolution>(); 67e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<DominatorTree>(); 68e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner } 69e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner }; 70e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 71e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 72e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerchar LoopIdiomRecognize::ID = 0; 73e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 74e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner false, false) 75e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopInfo) 76e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 77e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LCSSA) 78e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 792e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris LattnerINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 80e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 81e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner false, false) 82e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 83e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerPass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } 84e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 859f39188def2b4102817945522fac1209083efa06Chris Lattner/// DeleteDeadInstruction - Delete this instruction. Before we do, go through 869f39188def2b4102817945522fac1209083efa06Chris Lattner/// and zero out all the operands of this instruction. If any of them become 879f39188def2b4102817945522fac1209083efa06Chris Lattner/// dead, delete them and the computation tree that feeds them. 889f39188def2b4102817945522fac1209083efa06Chris Lattner/// 899f39188def2b4102817945522fac1209083efa06Chris Lattnerstatic void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { 909f39188def2b4102817945522fac1209083efa06Chris Lattner SmallVector<Instruction*, 32> NowDeadInsts; 919f39188def2b4102817945522fac1209083efa06Chris Lattner 929f39188def2b4102817945522fac1209083efa06Chris Lattner NowDeadInsts.push_back(I); 939f39188def2b4102817945522fac1209083efa06Chris Lattner 949f39188def2b4102817945522fac1209083efa06Chris Lattner // Before we touch this instruction, remove it from SE! 959f39188def2b4102817945522fac1209083efa06Chris Lattner do { 969f39188def2b4102817945522fac1209083efa06Chris Lattner Instruction *DeadInst = NowDeadInsts.pop_back_val(); 979f39188def2b4102817945522fac1209083efa06Chris Lattner 989f39188def2b4102817945522fac1209083efa06Chris Lattner // This instruction is dead, zap it, in stages. Start by removing it from 999f39188def2b4102817945522fac1209083efa06Chris Lattner // SCEV. 1009f39188def2b4102817945522fac1209083efa06Chris Lattner SE.forgetValue(DeadInst); 1019f39188def2b4102817945522fac1209083efa06Chris Lattner 1029f39188def2b4102817945522fac1209083efa06Chris Lattner for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 1039f39188def2b4102817945522fac1209083efa06Chris Lattner Value *Op = DeadInst->getOperand(op); 1049f39188def2b4102817945522fac1209083efa06Chris Lattner DeadInst->setOperand(op, 0); 1059f39188def2b4102817945522fac1209083efa06Chris Lattner 1069f39188def2b4102817945522fac1209083efa06Chris Lattner // If this operand just became dead, add it to the NowDeadInsts list. 1079f39188def2b4102817945522fac1209083efa06Chris Lattner if (!Op->use_empty()) continue; 1089f39188def2b4102817945522fac1209083efa06Chris Lattner 1099f39188def2b4102817945522fac1209083efa06Chris Lattner if (Instruction *OpI = dyn_cast<Instruction>(Op)) 1109f39188def2b4102817945522fac1209083efa06Chris Lattner if (isInstructionTriviallyDead(OpI)) 1119f39188def2b4102817945522fac1209083efa06Chris Lattner NowDeadInsts.push_back(OpI); 1129f39188def2b4102817945522fac1209083efa06Chris Lattner } 1139f39188def2b4102817945522fac1209083efa06Chris Lattner 1149f39188def2b4102817945522fac1209083efa06Chris Lattner DeadInst->eraseFromParent(); 1159f39188def2b4102817945522fac1209083efa06Chris Lattner 1169f39188def2b4102817945522fac1209083efa06Chris Lattner } while (!NowDeadInsts.empty()); 1179f39188def2b4102817945522fac1209083efa06Chris Lattner} 1189f39188def2b4102817945522fac1209083efa06Chris Lattner 119e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerbool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 12022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner CurLoop = L; 12122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 122e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner // We only look at trivial single basic block loops. 123e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner // TODO: eventually support more complex loops, scanning the header. 124e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner if (L->getBlocks().size() != 1) 125e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner return false; 126e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 12722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // The trip count of the loop must be analyzable. 12822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner SE = &getAnalysis<ScalarEvolution>(); 12922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 13022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 13122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner const SCEV *BECount = SE->getBackedgeTakenCount(L); 13222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (isa<SCEVCouldNotCompute>(BECount)) return false; 13322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 13422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // We require target data for now. 13522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner TD = getAnalysisIfAvailable<TargetData>(); 13622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (TD == 0) return false; 13722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 138e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner BasicBlock *BB = L->getHeader(); 13922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner DEBUG(dbgs() << "loop-idiom Scanning: F[" << BB->getParent()->getName() 140e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner << "] Loop %" << BB->getName() << "\n"); 141e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 14222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner bool MadeChange = false; 14322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 14422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // Look for store instructions, which may be memsets. 145a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner StoreInst *SI = dyn_cast<StoreInst>(I++); 146a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner if (SI == 0 || SI->isVolatile()) continue; 147a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 1482e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner WeakVH InstPtr(SI); 1492e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner if (!processLoopStore(SI, BECount)) continue; 1502e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner 1512e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner MadeChange = true; 1522e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner 1532e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner // If processing the store invalidated our iterator, start over from the 1542e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner // head of the loop. 1552e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner if (InstPtr == 0) 1562e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner I = BB->begin(); 15722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner } 15822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 15922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return MadeChange; 160e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 161e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 162e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner/// scanBlock - Look over a block to see if we can promote anything out of it. 16322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattnerbool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { 16422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner Value *StoredVal = SI->getValueOperand(); 165a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *StorePtr = SI->getPointerOperand(); 166e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 16795ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner // Reject stores that are so large that they overflow an unsigned. 16822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); 16995ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 17022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 17122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 17222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // See if the pointer expression is an AddRec like {base,+,1} on the current 17322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // loop, which indicates a strided store. If we have something else, it's a 17422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // random store we can't handle. 175a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 17622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) 17722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 17822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 17922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // Check to see if the stride matches the size of the store. If so, then we 18022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // know that every byte is touched in the loop. 18122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner unsigned StoreSize = (unsigned)SizeInBits >> 3; 18222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); 18322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) 18422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 185e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 18622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // If the stored value is a byte-wise value (like i32 -1), then it may be 18722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // turned into a memset of i8 -1, assuming that all the consequtive bytes 18822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // are stored. A store of i32 0x01020304 can never be turned into a memset. 189a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner if (Value *SplatValue = isBytewiseValue(StoredVal)) 190a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner return processLoopStoreOfSplatValue(SI, StoreSize, SplatValue, Ev, BECount); 191a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 192a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // Handle the memcpy case here. 193a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner errs() << "Found strided store: " << *Ev << "\n"; 19422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 19522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 196e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner return false; 197e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 198e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 199a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner/// processLoopStoreOfSplatValue - We see a strided store of a memsetable value. 200a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner/// If we can transform this into a memset in the loop preheader, do so. 201a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattnerbool LoopIdiomRecognize:: 202a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris LattnerprocessLoopStoreOfSplatValue(StoreInst *SI, unsigned StoreSize, 203a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *SplatValue, 204a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEVAddRecExpr *Ev, const SCEV *BECount) { 205a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // Okay, we have a strided store "p[i]" of a splattable value. We can turn 206a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // this into a memset in the loop preheader now if we want. However, this 207a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // would be unsafe to do if there is anything else in the loop that may read 208a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // or write to the aliased location. Check for an alias. 209a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 2102e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner // FIXME: Need to get a base pointer that is valid. 2112e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner // if (LoopCanModRefLocation(SI->getPointerOperand()) 2122e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner 2132e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner 214a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // FIXME: TODO safety check. 215a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 216a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // Okay, everything looks good, insert the memset. 217a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner BasicBlock *Preheader = CurLoop->getLoopPreheader(); 218a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 219a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner IRBuilder<> Builder(Preheader->getTerminator()); 220a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 221a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // The trip count of the loop and the base pointer of the addrec SCEV is 222a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // guaranteed to be loop invariant, which means that it should dominate the 223a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // header. Just insert code for it in the preheader. 224a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner SCEVExpander Expander(*SE); 225a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 226a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner unsigned AddrSpace = SI->getPointerAddressSpace(); 227a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *BasePtr = 228a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), 229a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Preheader->getTerminator()); 230a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 231a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 232a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // pointer size if it isn't already. 233a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const Type *IntPtr = TD->getIntPtrType(SI->getContext()); 234a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner unsigned BESize = SE->getTypeSizeInBits(BECount->getType()); 235a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner if (BESize < TD->getPointerSizeInBits()) 236a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner BECount = SE->getZeroExtendExpr(BECount, IntPtr); 237a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner else if (BESize > TD->getPointerSizeInBits()) 238a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner BECount = SE->getTruncateExpr(BECount, IntPtr); 239a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 240a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 241a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner true, true /*nooverflow*/); 242a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner if (StoreSize != 1) 243a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 244a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner true, true /*nooverflow*/); 245a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 246a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *NumBytes = 247a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 248a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 249a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *NewCall = 250a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, SI->getAlignment()); 251a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 252a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" 253a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner << " from store to: " << *Ev << " at: " << *SI << "\n"); 2547922d34b6073712cb62f7d4d74810509c19a84dfDuncan Sands (void)NewCall; 255a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 2569f39188def2b4102817945522fac1209083efa06Chris Lattner // Okay, the memset has been formed. Zap the original store and anything that 2579f39188def2b4102817945522fac1209083efa06Chris Lattner // feeds into it. 2589f39188def2b4102817945522fac1209083efa06Chris Lattner DeleteDeadInstruction(SI, *SE); 259a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner return true; 260a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner} 261a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 262