LoopIdiomRecognize.cpp revision d1b8ef97c47d347f2a2261a0d6de4872f248321f
1e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// 2e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 3e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// The LLVM Compiler Infrastructure 4e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 5e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This file is distributed under the University of Illinois Open Source 6e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// License. See LICENSE.TXT for details. 7e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 8e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===// 9e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 10e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This pass implements an idiom recognizer that transforms simple loops into a 11e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// non-loop form. In cases that this kicks in, it can be a significant 12e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// performance win. 13e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 14e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===// 15bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 16bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// TODO List: 17bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 18bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future loop memory idioms to recognize: 19a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth// memcmp, memmove, strlen, etc. 20bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future floating point idioms to recognize in -ffast-math mode: 21bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// fpowi 22bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future integer operation idioms to recognize: 23bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// ctpop, ctlz, cttz 24bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 25bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Beware that isel's default lowering for ctpop is highly inefficient for 26bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// i64 and larger types when i64 is legal and the value has few bits set. It 27bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// would be good to enhance isel to emit a loop for ctpop in this case. 28bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 29bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// We should enhance the memset/memcpy recognition to handle multiple stores in 30bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// the loop. This would handle things like: 31bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// void foo(_Complex float *P) 32bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } 3391139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// 34408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// We should enhance this to handle negative strides through memory. 35408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// Alternatively (and perhaps better) we could rely on an earlier pass to force 36408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// forward iteration through memory, which is generally better for cache 37408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// behavior. Negative strides *do* happen for memset/memcpy loops. 38408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// 39d957c717918c412402157b85fc51b4c6d2631381Chris Lattner// This could recognize common matrix multiplies and dot product idioms and 4091139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// replace them with calls to BLAS (if linked in??). 4191139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// 42bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//===----------------------------------------------------------------------===// 43e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 44e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#define DEBUG_TYPE "loop-idiom" 45e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Transforms/Scalar.h" 4606cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/ADT/Statistic.h" 472e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner#include "llvm/Analysis/AliasAnalysis.h" 48e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Analysis/LoopPass.h" 49a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner#include "llvm/Analysis/ScalarEvolutionExpander.h" 5006cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Analysis/ScalarEvolutionExpressions.h" 51be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h" 5222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner#include "llvm/Analysis/ValueTracking.h" 530b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 540b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IRBuilder.h" 550b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h" 560b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Module.h" 5706cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/Debug.h" 5806cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/raw_ostream.h" 59c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner#include "llvm/Target/TargetLibraryInfo.h" 609f39188def2b4102817945522fac1209083efa06Chris Lattner#include "llvm/Transforms/Utils/Local.h" 61e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerusing namespace llvm; 62e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 63a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemSet, "Number of memset's formed from loop stores"); 64a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); 65e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 66e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnernamespace { 675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang class LoopIdiomRecognize; 695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// This class defines some utility functions for loop idiom recognization. 715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang class LIRUtil { 725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang public: 735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Return true iff the block contains nothing but an uncondition branch 745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// (aka goto instruction). 755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static bool isAlmostEmpty(BasicBlock *); 765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static BranchInst *getBranch(BasicBlock *BB) { 785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return dyn_cast<BranchInst>(BB->getTerminator()); 795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Return the condition of the branch terminating the given basic block. 825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static Value *getBrCondtion(BasicBlock *); 835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Derive the precondition block (i.e the block that guards the loop 855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// preheader) from the given preheader. 865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static BasicBlock *getPrecondBb(BasicBlock *PreHead); 875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang }; 885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// This class is to recoginize idioms of population-count conducted in 905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// a noncountable loop. Currently it only recognizes this pattern: 915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// \code 925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// while(x) {cnt++; ...; x &= x - 1; ...} 935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// \endcode 945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang class NclPopcountRecognize { 955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LoopIdiomRecognize &LIR; 965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Loop *CurLoop; 975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *PreCondBB; 985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang typedef IRBuilder<> IRBuilderTy; 1005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang public: 1025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR); 1035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool recognize(); 1045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang private: 1065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Take a glimpse of the loop to see if we need to go ahead recoginizing 1075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// the idiom. 1085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool preliminaryScreen(); 1095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Check if the given conditional branch is based on the comparison 1115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// beween a variable and zero, and if the variable is non-zero, the 1125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// control yeilds to the loop entry. If the branch matches the behavior, 1135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// the variable involved in the comparion is returned. This function will 1145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// be called to see if the precondition and postcondition of the loop 1155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// are in desirable form. 1165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; 1175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Return true iff the idiom is detected in the loop. and 1) \p CntInst 1195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// is set to the instruction counting the pupulation bit. 2) \p CntPhi 1205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// is set to the corresponding phi node. 3) \p Var is set to the value 1215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// whose population bits are being counted. 1225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool detectIdiom 1235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; 1245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Insert ctpop intrinsic function and some obviously dead instructions. 1265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var); 1275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Create llvm.ctpop.* intrinsic function. 1295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); 1305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang }; 1315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 132e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner class LoopIdiomRecognize : public LoopPass { 13322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner Loop *CurLoop; 1343574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow const DataLayout *TD; 13562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner DominatorTree *DT; 13622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner ScalarEvolution *SE; 137c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner TargetLibraryInfo *TLI; 1389980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth const TargetTransformInfo *TTI; 139e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner public: 140e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner static char ID; 141e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner explicit LoopIdiomRecognize() : LoopPass(ID) { 142e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 1439980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0; 144e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner } 145e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 146e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner bool runOnLoop(Loop *L, LPPassManager &LPM); 14762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 14862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner SmallVectorImpl<BasicBlock*> &ExitBlocks); 149e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 15022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner bool processLoopStore(StoreInst *SI, const SCEV *BECount); 151e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); 152d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1533a393728a62122d7009d8e2cbe52a221874e576aChris Lattner bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 1543a393728a62122d7009d8e2cbe52a221874e576aChris Lattner unsigned StoreAlignment, 1553a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Value *SplatValue, Instruction *TheStore, 1563a393728a62122d7009d8e2cbe52a221874e576aChris Lattner const SCEVAddRecExpr *Ev, 1573a393728a62122d7009d8e2cbe52a221874e576aChris Lattner const SCEV *BECount); 158e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 159e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *StoreEv, 160e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *LoadEv, 161e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEV *BECount); 162d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 163e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// This transformation requires natural loop information & requires that 164e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// loop preheaders be inserted into the CFG. 165e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// 166e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner virtual void getAnalysisUsage(AnalysisUsage &AU) const { 167e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequired<LoopInfo>(); 168e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<LoopInfo>(); 169e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequiredID(LoopSimplifyID); 170e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreservedID(LoopSimplifyID); 171e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequiredID(LCSSAID); 172e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreservedID(LCSSAID); 1732e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner AU.addRequired<AliasAnalysis>(); 1742e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner AU.addPreserved<AliasAnalysis>(); 175e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequired<ScalarEvolution>(); 176e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<ScalarEvolution>(); 177e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<DominatorTree>(); 17862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner AU.addRequired<DominatorTree>(); 179c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner AU.addRequired<TargetLibraryInfo>(); 180e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner } 1815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang const DataLayout *getDataLayout() { 1835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>(); 1845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang DominatorTree *getDominatorTree() { 1875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return DT ? DT : (DT=&getAnalysis<DominatorTree>()); 1885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ScalarEvolution *getScalarEvolution() { 1915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return SE ? SE : (SE = &getAnalysis<ScalarEvolution>()); 1925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TargetLibraryInfo *getTargetLibraryInfo() { 1955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>()); 1965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1989980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth const TargetTransformInfo *getTargetTransformInfo() { 1999980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth if (!TTI) 2009980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth TTI = getAnalysisIfAvailable<TargetTransformInfo>(); 2019980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth return TTI; 2025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 2035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Loop *getLoop() const { return CurLoop; } 2055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang private: 2075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool runOnNoncountableLoop(); 2085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool runOnCountableLoop(); 209e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner }; 210e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 211e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 212e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerchar LoopIdiomRecognize::ID = 0; 213e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 214e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner false, false) 215e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopInfo) 21662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris LattnerINITIALIZE_PASS_DEPENDENCY(DominatorTree) 217e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 218e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LCSSA) 219e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 220c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris LattnerINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 2212e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris LattnerINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 222e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 223e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner false, false) 224e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 225e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerPass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } 226e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 2274f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner/// deleteDeadInstruction - Delete this instruction. Before we do, go through 2289f39188def2b4102817945522fac1209083efa06Chris Lattner/// and zero out all the operands of this instruction. If any of them become 2299f39188def2b4102817945522fac1209083efa06Chris Lattner/// dead, delete them and the computation tree that feeds them. 2309f39188def2b4102817945522fac1209083efa06Chris Lattner/// 2318e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramerstatic void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, 2328e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer const TargetLibraryInfo *TLI) { 2339f39188def2b4102817945522fac1209083efa06Chris Lattner SmallVector<Instruction*, 32> NowDeadInsts; 234d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2359f39188def2b4102817945522fac1209083efa06Chris Lattner NowDeadInsts.push_back(I); 236d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2379f39188def2b4102817945522fac1209083efa06Chris Lattner // Before we touch this instruction, remove it from SE! 2389f39188def2b4102817945522fac1209083efa06Chris Lattner do { 2399f39188def2b4102817945522fac1209083efa06Chris Lattner Instruction *DeadInst = NowDeadInsts.pop_back_val(); 240d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2419f39188def2b4102817945522fac1209083efa06Chris Lattner // This instruction is dead, zap it, in stages. Start by removing it from 2429f39188def2b4102817945522fac1209083efa06Chris Lattner // SCEV. 2439f39188def2b4102817945522fac1209083efa06Chris Lattner SE.forgetValue(DeadInst); 244d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2459f39188def2b4102817945522fac1209083efa06Chris Lattner for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 2469f39188def2b4102817945522fac1209083efa06Chris Lattner Value *Op = DeadInst->getOperand(op); 2479f39188def2b4102817945522fac1209083efa06Chris Lattner DeadInst->setOperand(op, 0); 248d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2499f39188def2b4102817945522fac1209083efa06Chris Lattner // If this operand just became dead, add it to the NowDeadInsts list. 2509f39188def2b4102817945522fac1209083efa06Chris Lattner if (!Op->use_empty()) continue; 251d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2529f39188def2b4102817945522fac1209083efa06Chris Lattner if (Instruction *OpI = dyn_cast<Instruction>(Op)) 2538e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer if (isInstructionTriviallyDead(OpI, TLI)) 2549f39188def2b4102817945522fac1209083efa06Chris Lattner NowDeadInsts.push_back(OpI); 2559f39188def2b4102817945522fac1209083efa06Chris Lattner } 256d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2579f39188def2b4102817945522fac1209083efa06Chris Lattner DeadInst->eraseFromParent(); 258d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2599f39188def2b4102817945522fac1209083efa06Chris Lattner } while (!NowDeadInsts.empty()); 2609f39188def2b4102817945522fac1209083efa06Chris Lattner} 2619f39188def2b4102817945522fac1209083efa06Chris Lattner 262a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// deleteIfDeadInstruction - If the specified value is a dead instruction, 263a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// delete it and any recursively used instructions. 264a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, 265a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth const TargetLibraryInfo *TLI) { 266a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (Instruction *I = dyn_cast<Instruction>(V)) 267a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (isInstructionTriviallyDead(I, TLI)) 268a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteDeadInstruction(I, SE, TLI); 269a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth} 270a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 2715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 2725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 2735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// Implementation of LIRUtil 2745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 2755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 27684fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang 2775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// This fucntion will return true iff the given block contains nothing but goto. 2785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// A typical usage of this function is to check if the preheader fucntion is 2795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// "almost" empty such that generated intrinsic function can be moved across 2805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// preheader and to be placed at the end of the preconditiona block without 2815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// concerning of breaking data dependence. 2825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LIRUtil::isAlmostEmpty(BasicBlock *BB) { 2835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (BranchInst *Br = getBranch(BB)) { 2845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return Br->isUnconditional() && BB->size() == 1; 2855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 2865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 2875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 2885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangValue *LIRUtil::getBrCondtion(BasicBlock *BB) { 2905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *Br = getBranch(BB); 2915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return Br ? Br->getCondition() : 0; 2925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 2935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangBasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { 2955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (BasicBlock *BB = PreHead->getSinglePredecessor()) { 2965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *Br = getBranch(BB); 2975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return Br && Br->isConditional() ? BB : 0; 2985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 2995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return 0; 3005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 3035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 3045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// Implementation of NclPopcountRecognize 3055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 3065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 3075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangNclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): 3095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { 3105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::preliminaryScreen() { 3139980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth const TargetTransformInfo *TTI = LIR.getTargetTransformInfo(); 314d1b8ef97c47d347f2a2261a0d6de4872f248321fChandler Carruth if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware) 31584fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang return false; 31684fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang 3175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Counting population are usually conducted by few arithmetic instrutions. 3185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Such instructions can be easilly "absorbed" by vacant slots in a 3195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // non-compact loop. Therefore, recognizing popcount idiom only makes sense 3205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // in a compact loop. 3215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Give up if the loop has multiple blocks or multiple backedges. 3235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) 32422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 32584fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang 3265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *LoopBody = *(CurLoop->block_begin()); 3275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (LoopBody->size() >= 20) { 3285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // The loop is too big, bail out. 3295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 3305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 3315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // It should have a preheader containing nothing but a goto instruction. 3335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *PreHead = CurLoop->getLoopPreheader(); 3345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead)) 3355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 3365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // It should have a precondition block where the generated popcount instrinsic 3385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // function will be inserted. 3395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PreCondBB = LIRUtil::getPrecondBb(PreHead); 3405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!PreCondBB) 34184fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang return false; 3425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 3445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangValue *NclPopcountRecognize::matchCondition (BranchInst *Br, 3475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *LoopEntry) const { 3485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Br || !Br->isConditional()) 3495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return 0; 3505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition()); 3525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Cond) 3535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return 0; 3545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); 3565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!CmpZero || !CmpZero->isZero()) 3575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return 0; 3585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst::Predicate Pred = Cond->getPredicate(); 3605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || 3615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) 3625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return Cond->getOperand(0); 3635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return 0; 3655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, 3685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *&CntPhi, 3695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *&Var) const { 3705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Following code tries to detect this idiom: 3715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // if (x0 != 0) 3735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // goto loop-exit // the precondition of the loop 3745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // cnt0 = init-val; 3755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // do { 3765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // x1 = phi (x0, x2); 3775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // cnt1 = phi(cnt0, cnt2); 3785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // cnt2 = cnt1 + 1; 3805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // ... 3815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // x2 = x1 & (x1 - 1); 3825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // ... 3835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // } while(x != 0); 3845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop-exit: 3865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 1: Check to see if the look-back branch match this pattern: 3895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // "if (a!=0) goto loop-entry". 3905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *LoopEntry; 3915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *DefX2, *CountInst; 3925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *VarX1, *VarX0; 3935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *PhiX, *CountPhi; 3945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang DefX2 = CountInst = 0; 3965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang VarX1 = VarX0 = 0; 3975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PhiX = CountPhi = 0; 3985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LoopEntry = *(CurLoop->block_begin()); 3995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 1: Check if the loop-back branch is in desirable form. 4015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry)) 4035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang DefX2 = dyn_cast<Instruction>(T); 4045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang else 4055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" 4095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (DefX2->getOpcode() != Instruction::And) 4115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BinaryOperator *SubOneOp; 4145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0)))) 4165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang VarX1 = DefX2->getOperand(1); 4175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang else { 4185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang VarX1 = DefX2->getOperand(0); 4195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1)); 4205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!SubOneOp) 4225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *SubInst = cast<Instruction>(SubOneOp); 4255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1)); 4265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Dec || 4275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || 4285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) { 4295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 3: Check the recurrence of variable X 4345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PhiX = dyn_cast<PHINode>(VarX1); 4365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!PhiX || 4375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { 4385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 4435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CountInst = NULL; 4455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), 4465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang IterE = LoopEntry->end(); Iter != IterE; Iter++) { 4475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *Inst = Iter; 4485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Inst->getOpcode() != Instruction::Add) 4495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang continue; 4505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1)); 4525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Inc || !Inc->isOne()) 4535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang continue; 4545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0)); 4565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Phi || Phi->getParent() != LoopEntry) 4575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang continue; 4585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Check if the result of the instruction is live of the loop. 4605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool LiveOutLoop = false; 4615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); 4625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang I != E; I++) { 4635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if ((cast<Instruction>(*I))->getParent() != LoopEntry) { 4645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LiveOutLoop = true; break; 4655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (LiveOutLoop) { 4695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CountInst = Inst; 4705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CountPhi = Phi; 4715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang break; 4725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!CountInst) 4765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 5: check if the precondition is in this form: 4805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;" 4815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); 4835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader()); 4845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1)) 4855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CntInst = CountInst; 4885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CntPhi = CountPhi; 4895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Var = T; 4905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 4935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 4945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangvoid NclPopcountRecognize::transform(Instruction *CntInst, 4965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *CntPhi, Value *Var) { 4975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ScalarEvolution *SE = LIR.getScalarEvolution(); 4995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo(); 5005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *PreHead = CurLoop->getLoopPreheader(); 5015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); 5025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang const DebugLoc DL = CntInst->getDebugLoc(); 5035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Assuming before transformation, the loop is following: 5055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // if (x) // the precondition 5065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // do { cnt++; x &= x - 1; } while(x); 5075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 1: Insert the ctpop instruction at the end of the precondition block 5095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang IRBuilderTy Builder(PreCondBr); 5105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; 5115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PopCnt = createPopcntIntrinsic(Builder, Var, DL); 5135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang NewCount = PopCntZext = 5145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType())); 5155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (NewCount != PopCnt) 5175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (cast<Instruction>(NewCount))->setDebugLoc(DL); 5185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // TripCnt is exactly the number of iterations the loop has 5205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TripCnt = NewCount; 5215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // If the popoulation counter's initial value is not zero, insert Add Inst. 5235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); 5245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); 5255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!InitConst || !InitConst->isZero()) { 5265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang NewCount = Builder.CreateAdd(NewCount, CntInitVal); 5275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (cast<Instruction>(NewCount))->setDebugLoc(DL); 5285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to 5325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic 5335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // function would be partial dead code, and downstream passes will drag 5345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // it back from the precondition block to the preheader. 5355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition()); 5375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd0 = PopCntZext; 5395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0); 5405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (PreCond->getOperand(0) != Var) 5415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang std::swap(Opnd0, Opnd1); 5425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *NewPreCond = 5445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); 5455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PreCond->replaceAllUsesWith(NewPreCond); 5465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang deleteDeadInstruction(PreCond, *SE, TLI); 5485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 3: Note that the population count is exactly the trip count of the 5515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop in question, which enble us to to convert the loop from noncountable 5525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop into a countable one. The benefit is twofold: 5535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 5545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // - If the loop only counts population, the entire loop become dead after 5555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // the transformation. It is lots easier to prove a countable loop dead 5565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // than to prove a noncountable one. (In some C dialects, a infite loop 5575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // isn't dead even if it computes nothing useful. In general, DCE needs 5585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // to prove a noncountable loop finite before safely delete it.) 5595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 5605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // - If the loop also performs something else, it remains alive. 5615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Since it is transformed to countable form, it can be aggressively 5625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // optimized by some optimizations which are in general not applicable 5635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // to a noncountable loop. 5645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 5655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // After this step, this loop (conceptually) would look like following: 5665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // newcnt = __builtin_ctpop(x); 5675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // t = newcnt; 5685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // if (x) 5695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // do { cnt++; x &= x-1; t--) } while (t > 0); 5705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *Body = *(CurLoop->block_begin()); 5715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *LbBr = LIRUtil::getBranch(Body); 5735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); 5745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Type *Ty = TripCnt->getType(); 5755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin()); 5775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Builder.SetInsertPoint(LbCond); 5795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd1 = cast<Value>(TcPhi); 5805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1)); 5815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *TcDec = 5825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true)); 5835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TcPhi->addIncoming(TripCnt, PreHead); 5855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TcPhi->addIncoming(TcDec, Body); 5865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ? 5885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CmpInst::ICMP_UGT : CmpInst::ICMP_SLE; 5895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LbCond->setPredicate(Pred); 5905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LbCond->setOperand(0, TcDec); 5915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0))); 5925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 4: All the references to the original population counter outside 5955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // the loop are replaced with the NewCount -- the value returned from 5965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // __builtin_ctpop(). 5975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SmallVector<Value *, 4> CntUses; 5995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end(); 6005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang I != E; I++) { 6015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (cast<Instruction>(*I)->getParent() != Body) 6025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CntUses.push_back(*I); 6035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 6045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) { 6055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount); 6065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 6075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 6085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 5: Forget the "non-computable" trip-count SCEV associated with the 6105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop. The loop would otherwise not be deleted even if it becomes empty. 6115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SE->forgetLoop(CurLoop); 6125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 6135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangCallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, 6155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Val, DebugLoc DL) { 6165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Ops[] = { Val }; 6175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Type *Tys[] = { Val->getType() }; 6185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Module *M = (*(CurLoop->block_begin()))->getParent()->getParent(); 6205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); 6215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CallInst *CI = IRBuilder.CreateCall(Func, Ops); 6225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CI->setDebugLoc(DL); 6235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return CI; 6255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 6265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// recognize - detect population count idiom in a non-countable loop. If 6285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// detected, transform the relevant code to popcount intrinsic function 6295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// call, and return true; otherwise, return false. 6305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::recognize() { 6315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6329980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth if (!LIR.getTargetTransformInfo()) 6335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LIR.getScalarEvolution(); 6365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!preliminaryScreen()) 6385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *CntInst; 6415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *CntPhi; 6425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Val; 6435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!detectIdiom(CntInst, CntPhi, Val)) 6445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang transform(CntInst, CntPhi, Val); 6475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 6485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 6495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 6515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 6525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// Implementation of LoopIdiomRecognize 6535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 6545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 6555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnCountableLoop() { 6575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop); 65822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (isa<SCEVCouldNotCompute>(BECount)) return false; 659d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 6608e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner // If this loop executes exactly one time, then it should be peeled, not 6618e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner // optimized by this pass. 6628e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 6638e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner if (BECst->getValue()->getValue() == 0) 6648e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner return false; 665d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 66622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // We require target data for now. 6675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!getDataLayout()) 6685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 670cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang // set DT 671cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang (void)getDominatorTree(); 67262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner 67362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner LoopInfo &LI = getAnalysis<LoopInfo>(); 674c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner TLI = &getAnalysis<TargetLibraryInfo>(); 675d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 676cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang // set TLI 677cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang (void)getTargetLibraryInfo(); 6785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 67962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner SmallVector<BasicBlock*, 8> ExitBlocks; 68062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner CurLoop->getUniqueExitBlocks(ExitBlocks); 68162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner 68263f9c3c49ac7011e6366fbec42716f3f70f1beeeChris Lattner DEBUG(dbgs() << "loop-idiom Scanning: F[" 6835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang << CurLoop->getHeader()->getParent()->getName() 6845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); 685d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 68662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner bool MadeChange = false; 68762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // Scan all the blocks in the loop that are not in subloops. 6885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (Loop::block_iterator BI = CurLoop->block_begin(), 6895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang E = CurLoop->block_end(); BI != E; ++BI) { 69062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // Ignore blocks in subloops. 69162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner if (LI.getLoopFor(*BI) != CurLoop) 69262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner continue; 693d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 69462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); 69562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner } 69662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner return MadeChange; 69762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner} 698e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 6995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnNoncountableLoop() { 7005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang NclPopcountRecognize Popcount(*this); 7015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Popcount.recognize()) 7025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 7035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 7055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 7065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 7085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CurLoop = L; 7095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // If the loop could not be converted to canonical form, it must have an 7115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // indirectbr in it, just give up. 7125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!L->getLoopPreheader()) 7135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 7145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Disable loop idiom recognition if the function's name is a common idiom. 7165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang StringRef Name = L->getHeader()->getParent()->getName(); 7175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Name == "memset" || Name == "memcpy") 7185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 7195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SE = &getAnalysis<ScalarEvolution>(); 7215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (SE->hasLoopInvariantBackedgeTakenCount(L)) 7225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return runOnCountableLoop(); 7235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return runOnNoncountableLoop(); 7245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 7255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 72662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// runOnLoopBlock - Process the specified block, which lives in a counted loop 72762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// with the specified backedge count. This block is known to be in the current 72862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// loop and not in any subloops. 72962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattnerbool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 73062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner SmallVectorImpl<BasicBlock*> &ExitBlocks) { 73162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // We can only promote stores in this block if they are unconditionally 73262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // executed in the loop. For a block to be unconditionally executed, it has 73362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // to dominate all the exit blocks of the loop. Verify this now. 73462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 73562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner if (!DT->dominates(BB, ExitBlocks[i])) 73662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner return false; 737d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 73822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner bool MadeChange = false; 73922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 740b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner Instruction *Inst = I++; 741b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner // Look for store instructions, which may be optimized to memset/memcpy. 742b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 743b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner WeakVH InstPtr(I); 744b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner if (!processLoopStore(SI, BECount)) continue; 745b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner MadeChange = true; 746d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 747b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner // If processing the store invalidated our iterator, start over from the 748e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // top of the block. 749b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner if (InstPtr == 0) 750b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner I = BB->begin(); 751b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner continue; 752b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner } 753d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 754e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // Look for memset instructions, which may be optimized to a larger memset. 755e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { 756e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner WeakVH InstPtr(I); 757e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (!processLoopMemSet(MSI, BECount)) continue; 758e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner MadeChange = true; 759d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 760e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // If processing the memset invalidated our iterator, start over from the 761e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // top of the block. 762e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (InstPtr == 0) 763e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner I = BB->begin(); 764e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner continue; 765e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner } 76622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner } 767d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 76822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return MadeChange; 769e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 770e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 77162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner 772e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopStore - See if this store can be promoted to a memset or memcpy. 77322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattnerbool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { 7742bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman if (!SI->isSimple()) return false; 775e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 77622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner Value *StoredVal = SI->getValueOperand(); 777a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *StorePtr = SI->getPointerOperand(); 778d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 77995ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner // Reject stores that are so large that they overflow an unsigned. 78022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); 78195ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 78222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 783d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 78422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // See if the pointer expression is an AddRec like {base,+,1} on the current 78522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // loop, which indicates a strided store. If we have something else, it's a 78622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // random store we can't handle. 787e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *StoreEv = 788e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 789e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 79022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 79122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 79222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // Check to see if the stride matches the size of the store. If so, then we 79322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // know that every byte is touched in the loop. 794d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick unsigned StoreSize = (unsigned)SizeInBits >> 3; 795e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); 796d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 797408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { 798408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner // TODO: Could also handle negative stride here someday, that will require 799408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner // the validity check in mayLoopAccessLocation to be updated though. 800408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner // Enable this to print exact negative strides. 8010e68cee62f251c45df92c71ca536142bc7d82631Chris Lattner if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { 802408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; 803408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner dbgs() << "BB: " << *SI->getParent(); 804408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner } 805d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 80622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 807408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner } 8083a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 8093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // See if we can optimize just this store in isolation. 8103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), 8113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner StoredVal, SI, StoreEv, BECount)) 8123a393728a62122d7009d8e2cbe52a221874e576aChris Lattner return true; 813a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 814e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // If the stored value is a strided load in the same loop with the same stride 815e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // this this may be transformable into a memcpy. This kicks in for stuff like 816e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // for (i) A[i] = B[i]; 817e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 818e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *LoadEv = 819e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); 820e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && 8212bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) 822e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) 823e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner return true; 824e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner } 8254ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; 82622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 827e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner return false; 828e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 829e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 830e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopMemSet - See if this memset can be promoted to a large memset. 831e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattnerbool LoopIdiomRecognize:: 832e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris LattnerprocessLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { 833e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // We can only handle non-volatile memsets with a constant size. 834e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; 835e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 836c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner // If we're not allowed to hack on memset, we fail. 837c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner if (!TLI->has(LibFunc::memset)) 838c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner return false; 839d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 840e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner Value *Pointer = MSI->getDest(); 841d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 842e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // See if the pointer expression is an AddRec like {base,+,1} on the current 843e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // loop, which indicates a strided store. If we have something else, it's a 844e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // random store we can't handle. 845e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); 846e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) 847e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner return false; 848e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 849e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // Reject memsets that are so large that they overflow an unsigned. 850e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 851e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if ((SizeInBytes >> 32) != 0) 852e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner return false; 853d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 854e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // Check to see if the stride matches the size of the memset. If so, then we 855e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // know that every byte is touched in the loop. 856e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); 857d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 858e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // TODO: Could also handle negative stride here someday, that will require the 859e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // validity check in mayLoopAccessLocation to be updated though. 860e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (Stride == 0 || MSI->getLength() != Stride->getValue()) 861e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner return false; 862d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 8633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, 8643a393728a62122d7009d8e2cbe52a221874e576aChris Lattner MSI->getAlignment(), MSI->getValue(), 8653a393728a62122d7009d8e2cbe52a221874e576aChris Lattner MSI, Ev, BECount); 866e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner} 867e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 868a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 869a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// mayLoopAccessLocation - Return true if the specified loop might access the 870a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// specified pointer location, which is a loop-strided access. The 'Access' 871a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// argument specifies what the verboten forms of access are (read or write). 872a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, 873a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Loop *L, const SCEV *BECount, 874a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth unsigned StoreSize, AliasAnalysis &AA, 875a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Instruction *IgnoredStore) { 876a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // Get the location that may be stored across the loop. Since the access is 877a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // strided positively through memory, we say that the modified location starts 878a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // at the pointer and has infinite size. 879a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth uint64_t AccessSize = AliasAnalysis::UnknownSize; 880a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 881a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If the loop iterates a fixed number of times, we can refine the access size 882a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // to be exactly the size of the memset, which is (BECount+1)*StoreSize 883a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 884a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; 885a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 886a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // TODO: For this to be really effective, we have to dive into the pointer 887a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // operand in the store. Store to &A[i] of 100 will always return may alias 888a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 889a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // which will then no-alias a store to &A[100]. 890a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth AliasAnalysis::Location StoreLoc(Ptr, AccessSize); 891a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 892a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 893a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth ++BI) 894a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) 895a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (&*I != IgnoredStore && 896a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth (AA.getModRefInfo(I, StoreLoc) & Access)) 897a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return true; 898a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 899a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 900a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth} 901a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 9023a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// getMemSetPatternValue - If a strided store of the specified value is safe to 9033a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should 9043a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// be passed in. Otherwise, return null. 9053a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// 9063a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// Note that we don't ever attempt to use memset_pattern8 or 4, because these 9073a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// just replicate their input array and then pass on to memset_pattern16. 9083574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmowstatic Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) { 9093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // If the value isn't a constant, we can't promote it to being in a constant 9103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // array. We could theoretically do a store to an alloca or something, but 9113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // that doesn't seem worthwhile. 9123a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Constant *C = dyn_cast<Constant>(V); 9133a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (C == 0) return 0; 914d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 9153a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Only handle simple values that are a power of two bytes in size. 9163a393728a62122d7009d8e2cbe52a221874e576aChris Lattner uint64_t Size = TD.getTypeSizeInBits(V->getType()); 9173a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (Size == 0 || (Size & 7) || (Size & (Size-1))) 9183a393728a62122d7009d8e2cbe52a221874e576aChris Lattner return 0; 919d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 92080e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // Don't care enough about darwin/ppc to implement this. 92180e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner if (TD.isBigEndian()) 92280e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner return 0; 9233a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 9243a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Convert to size in bytes. 9253a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Size /= 8; 926c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner 9273a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see 92880e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // if the top and bottom are the same (e.g. for vectors and large integers). 9293a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (Size > 16) return 0; 930d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 93180e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // If the constant is exactly 16 bytes, just use it. 93280e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner if (Size == 16) return C; 93380e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner 93480e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // Otherwise, we'll use an array of the constants. 93580e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner unsigned ArraySize = 16/Size; 93680e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner ArrayType *AT = ArrayType::get(V->getType(), ArraySize); 93780e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); 9383a393728a62122d7009d8e2cbe52a221874e576aChris Lattner} 9393a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 9403a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 9413a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// processLoopStridedStore - We see a strided store of some value. If we can 9423a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// transform this into a memset or memset_pattern in the loop preheader, do so. 9433a393728a62122d7009d8e2cbe52a221874e576aChris Lattnerbool LoopIdiomRecognize:: 9443a393728a62122d7009d8e2cbe52a221874e576aChris LattnerprocessLoopStridedStore(Value *DestPtr, unsigned StoreSize, 9453a393728a62122d7009d8e2cbe52a221874e576aChris Lattner unsigned StoreAlignment, Value *StoredVal, 9463a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Instruction *TheStore, const SCEVAddRecExpr *Ev, 9473a393728a62122d7009d8e2cbe52a221874e576aChris Lattner const SCEV *BECount) { 948d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 9493a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // If the stored value is a byte-wise value (like i32 -1), then it may be 9503a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // turned into a memset of i8 -1, assuming that all the consecutive bytes 9513a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // are stored. A store of i32 0x01020304 can never be turned into a memset, 9523a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // but it can be turned into memset_pattern if the target supports it. 9533a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Value *SplatValue = isBytewiseValue(StoredVal); 9543a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Constant *PatternValue = 0; 955d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 9563a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // If we're allowed to form a memset, and the stored value would be acceptable 9573a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // for memset, use it. 9583a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (SplatValue && TLI->has(LibFunc::memset) && 9593a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Verify that the stored value is loop invariant. If not, we can't 9603a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // promote the memset. 9613a393728a62122d7009d8e2cbe52a221874e576aChris Lattner CurLoop->isLoopInvariant(SplatValue)) { 9623a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Keep and use SplatValue. 9633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner PatternValue = 0; 9643a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } else if (TLI->has(LibFunc::memset_pattern16) && 9653a393728a62122d7009d8e2cbe52a221874e576aChris Lattner (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { 9663a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // It looks like we can use PatternValue! 9673a393728a62122d7009d8e2cbe52a221874e576aChris Lattner SplatValue = 0; 9683a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } else { 9693a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Otherwise, this isn't an idiom we can transform. For example, we can't 9705ac7c7da3ebfd1aa95226aeefb8cfa3ba34fd72aEli Friedman // do anything with a 3-byte store. 971bafa117e8f2e3532f391227ebc9d4513b17edbadChris Lattner return false; 9723a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } 973d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 974a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // The trip count of the loop and the base pointer of the addrec SCEV is 975a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // guaranteed to be loop invariant, which means that it should dominate the 9764f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // header. This allows us to insert code for it in the preheader. 9774f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner BasicBlock *Preheader = CurLoop->getLoopPreheader(); 9784f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner IRBuilder<> Builder(Preheader->getTerminator()); 9795e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick SCEVExpander Expander(*SE, "loop-idiom"); 980a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 9814f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // Okay, we have a strided store "p[i]" of a splattable value. We can turn 9823740e798bc850cd5d40185959801606433b0221fBenjamin Kramer // this into a memset in the loop preheader now if we want. However, this 9833740e798bc850cd5d40185959801606433b0221fBenjamin Kramer // would be unsafe to do if there is anything else in the loop that may read 984ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth // or write to the aliased location. Check for any overlap by generating the 985ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth // base pointer and checking the region. 986ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); 987d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Value *BasePtr = 988a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), 989a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Preheader->getTerminator()); 990d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 9914f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner 992a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, 993a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth CurLoop, BECount, 994a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ 995a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Expander.clear(); 996a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If we generated new code for the base pointer, clean up. 997a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(BasePtr, *SE, TLI); 998a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 999a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth } 1000a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 10014f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // Okay, everything looks good, insert the memset. 10024f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner 1003a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 1004a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // pointer size if it isn't already. 1005ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); 10067c90b90f4e9b0c421f0e45d7de03f6edce113a90Chris Lattner BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 1007d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1008a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 10093228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1010a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner if (StoreSize != 1) 1011a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 10123228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1013d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1014d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Value *NumBytes = 1015a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 1016d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1017cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel CallInst *NewCall; 10183a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (SplatValue) 10193a393728a62122d7009d8e2cbe52a221874e576aChris Lattner NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); 10203a393728a62122d7009d8e2cbe52a221874e576aChris Lattner else { 10213a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Module *M = TheStore->getParent()->getParent()->getParent(); 10223a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Value *MSP = M->getOrInsertFunction("memset_pattern16", 10233a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Builder.getVoidTy(), 1024d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Builder.getInt8PtrTy(), 10253a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Builder.getInt8PtrTy(), IntPtr, 10263a393728a62122d7009d8e2cbe52a221874e576aChris Lattner (void*)0); 1027d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 10283a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Otherwise we should form a memset_pattern16. PatternValue is known to be 10293a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // an constant array of 16-bytes. Plop the value into a mergable global. 10303a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, 10313a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GlobalValue::InternalLinkage, 10323a393728a62122d7009d8e2cbe52a221874e576aChris Lattner PatternValue, ".memset_pattern"); 10333a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GV->setUnnamedAddr(true); // Ok to merge these. 10343a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GV->setAlignment(16); 103580e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); 10363a393728a62122d7009d8e2cbe52a221874e576aChris Lattner NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); 10373a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } 1038d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1039a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" 1040e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner << " from store to: " << *Ev << " at: " << *TheStore << "\n"); 1041cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel NewCall->setDebugLoc(TheStore->getDebugLoc()); 1042d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 10439f39188def2b4102817945522fac1209083efa06Chris Lattner // Okay, the memset has been formed. Zap the original store and anything that 10449f39188def2b4102817945522fac1209083efa06Chris Lattner // feeds into it. 10458e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer deleteDeadInstruction(TheStore, *SE, TLI); 10464ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner ++NumMemSet; 1047a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner return true; 1048a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner} 1049a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 1050e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// processLoopStoreOfLoopLoad - We see a strided store whose value is a 1051e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// same-strided load. 1052e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattnerbool LoopIdiomRecognize:: 1053e2c43920919c6fe376613d1d8331897dc1ba3d57Chris LattnerprocessLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 1054e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *StoreEv, 1055e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *LoadEv, 1056e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEV *BECount) { 1057c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner // If we're not allowed to form memcpy, we fail. 1058a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (!TLI->has(LibFunc::memcpy)) 1059c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner return false; 1060d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1061e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); 1062d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 10634f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // The trip count of the loop and the base pointer of the addrec SCEV is 10644f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // guaranteed to be loop invariant, which means that it should dominate the 10654f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // header. This allows us to insert code for it in the preheader. 10664f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner BasicBlock *Preheader = CurLoop->getLoopPreheader(); 10674f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner IRBuilder<> Builder(Preheader->getTerminator()); 10685e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick SCEVExpander Expander(*SE, "loop-idiom"); 1069a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 1070e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // Okay, we have a strided store "p[i]" of a loaded value. We can turn 1071a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // this into a memcpy in the loop preheader now if we want. However, this 1072a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // would be unsafe to do if there is anything else in the loop that may read 1073a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // or write the memory region we're storing to. This includes the load that 1074a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // feeds the stores. Check for an alias by generating the base address and 1075a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // checking everything. 10764f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Value *StoreBasePtr = 10774f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Expander.expandCodeFor(StoreEv->getStart(), 10784f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Builder.getInt8PtrTy(SI->getPointerAddressSpace()), 10794f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Preheader->getTerminator()); 1080a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 1081a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, 1082a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth CurLoop, BECount, StoreSize, 1083a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth getAnalysis<AliasAnalysis>(), SI)) { 1084a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Expander.clear(); 1085a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If we generated new code for the base pointer, clean up. 1086a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); 1087a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 1088a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth } 1089a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 1090a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // For a memcpy, we have to make sure that the input array is not being 1091a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // mutated by the loop. 1092d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Value *LoadBasePtr = 1093e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Expander.expandCodeFor(LoadEv->getStart(), 1094e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Builder.getInt8PtrTy(LI->getPointerAddressSpace()), 1095e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Preheader->getTerminator()); 10964f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner 1097a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, 1098a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth StoreSize, getAnalysis<AliasAnalysis>(), SI)) { 1099a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Expander.clear(); 1100a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If we generated new code for the base pointer, clean up. 1101a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); 1102a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); 1103a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 1104a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth } 1105a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 11064f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // Okay, everything is safe, we can transform this! 1107a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 1108d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1109e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 1110e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // pointer size if it isn't already. 1111ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth Type *IntPtr = TD->getIntPtrType(SI->getContext()); 11127c90b90f4e9b0c421f0e45d7de03f6edce113a90Chris Lattner BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 1113d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1114e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 11153228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1116e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (StoreSize != 1) 1117e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 11183228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1119d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1120e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Value *NumBytes = 1121e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 1122d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1123a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth CallInst *NewCall = 1124a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, 1125a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth std::min(SI->getAlignment(), LI->getAlignment())); 1126af35841f2e346f9259606886698c188280406cdbDevang Patel NewCall->setDebugLoc(SI->getDebugLoc()); 1127d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1128a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" 1129e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 1130e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); 1131a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 1132d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1133e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // Okay, the memset has been formed. Zap the original store and anything that 1134e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // feeds into it. 11358e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer deleteDeadInstruction(SI, *SE, TLI); 1136a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth ++NumMemCpy; 1137e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner return true; 1138e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner} 1139