1e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// 2e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 3e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// The LLVM Compiler Infrastructure 4e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 5e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This file is distributed under the University of Illinois Open Source 6e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// License. See LICENSE.TXT for details. 7e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 8e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===// 9e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 10e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This pass implements an idiom recognizer that transforms simple loops into a 11e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// non-loop form. In cases that this kicks in, it can be a significant 12e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// performance win. 13e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// 14e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===// 15bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 16bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// TODO List: 17bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 18bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future loop memory idioms to recognize: 19a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth// memcmp, memmove, strlen, etc. 20bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future floating point idioms to recognize in -ffast-math mode: 21bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// fpowi 22bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future integer operation idioms to recognize: 23bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// ctpop, ctlz, cttz 24bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 25bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Beware that isel's default lowering for ctpop is highly inefficient for 26bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// i64 and larger types when i64 is legal and the value has few bits set. It 27bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// would be good to enhance isel to emit a loop for ctpop in this case. 28bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// 29bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// We should enhance the memset/memcpy recognition to handle multiple stores in 30bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// the loop. This would handle things like: 31bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// void foo(_Complex float *P) 32bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } 3391139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// 34408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// We should enhance this to handle negative strides through memory. 35408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// Alternatively (and perhaps better) we could rely on an earlier pass to force 36408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// forward iteration through memory, which is generally better for cache 37408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// behavior. Negative strides *do* happen for memset/memcpy loops. 38408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// 39d957c717918c412402157b85fc51b4c6d2631381Chris Lattner// This could recognize common matrix multiplies and dot product idioms and 4091139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// replace them with calls to BLAS (if linked in??). 4191139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// 42bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//===----------------------------------------------------------------------===// 43e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 44e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Transforms/Scalar.h" 4506cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/ADT/Statistic.h" 462e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner#include "llvm/Analysis/AliasAnalysis.h" 47e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Analysis/LoopPass.h" 48a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner#include "llvm/Analysis/ScalarEvolutionExpander.h" 4906cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Analysis/ScalarEvolutionExpressions.h" 50be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h" 5122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner#include "llvm/Analysis/ValueTracking.h" 520b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h" 540b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IRBuilder.h" 550b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h" 560b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Module.h" 5706cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/Debug.h" 5806cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/raw_ostream.h" 59c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner#include "llvm/Target/TargetLibraryInfo.h" 609f39188def2b4102817945522fac1209083efa06Chris Lattner#include "llvm/Transforms/Utils/Local.h" 61e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerusing namespace llvm; 62e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 63dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "loop-idiom" 64dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 65a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemSet, "Number of memset's formed from loop stores"); 66a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); 67e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 68e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnernamespace { 695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang class LoopIdiomRecognize; 715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// This class defines some utility functions for loop idiom recognization. 735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang class LIRUtil { 745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang public: 755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Return true iff the block contains nothing but an uncondition branch 765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// (aka goto instruction). 775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static bool isAlmostEmpty(BasicBlock *); 785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static BranchInst *getBranch(BasicBlock *BB) { 805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return dyn_cast<BranchInst>(BB->getTerminator()); 815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 831f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault /// Derive the precondition block (i.e the block that guards the loop 845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// preheader) from the given preheader. 855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang static BasicBlock *getPrecondBb(BasicBlock *PreHead); 865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang }; 875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// This class is to recoginize idioms of population-count conducted in 895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// a noncountable loop. Currently it only recognizes this pattern: 905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// \code 915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// while(x) {cnt++; ...; x &= x - 1; ...} 925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// \endcode 935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang class NclPopcountRecognize { 945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LoopIdiomRecognize &LIR; 955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Loop *CurLoop; 965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *PreCondBB; 975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang typedef IRBuilder<> IRBuilderTy; 995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang public: 1015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR); 1025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool recognize(); 1035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang private: 1055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Take a glimpse of the loop to see if we need to go ahead recoginizing 1065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// the idiom. 1075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool preliminaryScreen(); 1085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Check if the given conditional branch is based on the comparison 11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// between a variable and zero, and if the variable is non-zero, the 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// control yields to the loop entry. If the branch matches the behavior, 1125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// the variable involved in the comparion is returned. This function will 1131f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault /// be called to see if the precondition and postcondition of the loop 1145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// are in desirable form. 115cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const; 1165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Return true iff the idiom is detected in the loop. and 1) \p CntInst 118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines /// is set to the instruction counting the population bit. 2) \p CntPhi 1195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// is set to the corresponding phi node. 3) \p Var is set to the value 1205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// whose population bits are being counted. 1215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool detectIdiom 1225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; 1235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Insert ctpop intrinsic function and some obviously dead instructions. 125cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var); 1265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang /// Create llvm.ctpop.* intrinsic function. 1285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); 1295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang }; 1305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 131e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner class LoopIdiomRecognize : public LoopPass { 13222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner Loop *CurLoop; 13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const DataLayout *DL; 13462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner DominatorTree *DT; 13522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner ScalarEvolution *SE; 136c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner TargetLibraryInfo *TLI; 1379980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth const TargetTransformInfo *TTI; 138e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner public: 139e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner static char ID; 140e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner explicit LoopIdiomRecognize() : LoopPass(ID) { 141e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; 143e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner } 144e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool runOnLoop(Loop *L, LPPassManager &LPM) override; 14662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 14762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner SmallVectorImpl<BasicBlock*> &ExitBlocks); 148e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 14922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner bool processLoopStore(StoreInst *SI, const SCEV *BECount); 150e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); 151d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1523a393728a62122d7009d8e2cbe52a221874e576aChris Lattner bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 1533a393728a62122d7009d8e2cbe52a221874e576aChris Lattner unsigned StoreAlignment, 1543a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Value *SplatValue, Instruction *TheStore, 1553a393728a62122d7009d8e2cbe52a221874e576aChris Lattner const SCEVAddRecExpr *Ev, 1563a393728a62122d7009d8e2cbe52a221874e576aChris Lattner const SCEV *BECount); 157e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 158e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *StoreEv, 159e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *LoadEv, 160e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEV *BECount); 161d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 162e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// This transformation requires natural loop information & requires that 163e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// loop preheaders be inserted into the CFG. 164e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner /// 16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 166e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequired<LoopInfo>(); 167e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<LoopInfo>(); 168e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequiredID(LoopSimplifyID); 169e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreservedID(LoopSimplifyID); 170e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequiredID(LCSSAID); 171e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreservedID(LCSSAID); 1722e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner AU.addRequired<AliasAnalysis>(); 1732e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner AU.addPreserved<AliasAnalysis>(); 174e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addRequired<ScalarEvolution>(); 175e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner AU.addPreserved<ScalarEvolution>(); 17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addPreserved<DominatorTreeWrapperPass>(); 17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AU.addRequired<DominatorTreeWrapperPass>(); 178c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner AU.addRequired<TargetLibraryInfo>(); 179d12aae6c0535bd2713207ef84b7098f905a89b78Chandler Carruth AU.addRequired<TargetTransformInfo>(); 180e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner } 1815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang const DataLayout *getDataLayout() { 18336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (DL) 18436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DL; 18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 186dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DL = DLP ? &DLP->getDataLayout() : nullptr; 18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DL; 1885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang DominatorTree *getDominatorTree() { 19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DT ? DT 19236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree()); 1935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ScalarEvolution *getScalarEvolution() { 1965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return SE ? SE : (SE = &getAnalysis<ScalarEvolution>()); 1975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 1985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 1995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TargetLibraryInfo *getTargetLibraryInfo() { 2005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>()); 2015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 2025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2039980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth const TargetTransformInfo *getTargetTransformInfo() { 204d12aae6c0535bd2713207ef84b7098f905a89b78Chandler Carruth return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>()); 2055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 2065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Loop *getLoop() const { return CurLoop; } 2085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang private: 2105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool runOnNoncountableLoop(); 2115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool runOnCountableLoop(); 212e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner }; 213e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 214e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 215e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerchar LoopIdiomRecognize::ID = 0; 216e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 217e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner false, false) 218e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopInfo) 21936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesINITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 220e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 221e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LCSSA) 222e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 223c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris LattnerINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 2242e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris LattnerINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 225d12aae6c0535bd2713207ef84b7098f905a89b78Chandler CarruthINITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 226e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 227e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner false, false) 228e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 229e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerPass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } 230e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 2314f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner/// deleteDeadInstruction - Delete this instruction. Before we do, go through 2329f39188def2b4102817945522fac1209083efa06Chris Lattner/// and zero out all the operands of this instruction. If any of them become 2339f39188def2b4102817945522fac1209083efa06Chris Lattner/// dead, delete them and the computation tree that feeds them. 2349f39188def2b4102817945522fac1209083efa06Chris Lattner/// 2358e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramerstatic void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, 2368e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer const TargetLibraryInfo *TLI) { 2379f39188def2b4102817945522fac1209083efa06Chris Lattner SmallVector<Instruction*, 32> NowDeadInsts; 238d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2399f39188def2b4102817945522fac1209083efa06Chris Lattner NowDeadInsts.push_back(I); 240d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2419f39188def2b4102817945522fac1209083efa06Chris Lattner // Before we touch this instruction, remove it from SE! 2429f39188def2b4102817945522fac1209083efa06Chris Lattner do { 2439f39188def2b4102817945522fac1209083efa06Chris Lattner Instruction *DeadInst = NowDeadInsts.pop_back_val(); 244d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2459f39188def2b4102817945522fac1209083efa06Chris Lattner // This instruction is dead, zap it, in stages. Start by removing it from 2469f39188def2b4102817945522fac1209083efa06Chris Lattner // SCEV. 2479f39188def2b4102817945522fac1209083efa06Chris Lattner SE.forgetValue(DeadInst); 248d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2499f39188def2b4102817945522fac1209083efa06Chris Lattner for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 2509f39188def2b4102817945522fac1209083efa06Chris Lattner Value *Op = DeadInst->getOperand(op); 251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DeadInst->setOperand(op, nullptr); 252d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2539f39188def2b4102817945522fac1209083efa06Chris Lattner // If this operand just became dead, add it to the NowDeadInsts list. 2549f39188def2b4102817945522fac1209083efa06Chris Lattner if (!Op->use_empty()) continue; 255d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2569f39188def2b4102817945522fac1209083efa06Chris Lattner if (Instruction *OpI = dyn_cast<Instruction>(Op)) 2578e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer if (isInstructionTriviallyDead(OpI, TLI)) 2589f39188def2b4102817945522fac1209083efa06Chris Lattner NowDeadInsts.push_back(OpI); 2599f39188def2b4102817945522fac1209083efa06Chris Lattner } 260d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2619f39188def2b4102817945522fac1209083efa06Chris Lattner DeadInst->eraseFromParent(); 262d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 2639f39188def2b4102817945522fac1209083efa06Chris Lattner } while (!NowDeadInsts.empty()); 2649f39188def2b4102817945522fac1209083efa06Chris Lattner} 2659f39188def2b4102817945522fac1209083efa06Chris Lattner 266a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// deleteIfDeadInstruction - If the specified value is a dead instruction, 267a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// delete it and any recursively used instructions. 268a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, 269a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth const TargetLibraryInfo *TLI) { 270a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (Instruction *I = dyn_cast<Instruction>(V)) 271a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (isInstructionTriviallyDead(I, TLI)) 272a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteDeadInstruction(I, SE, TLI); 273a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth} 274a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 2755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 2765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 2775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// Implementation of LIRUtil 2785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 2795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 28084fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang 2811f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// This function will return true iff the given block contains nothing but goto. 2821f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// A typical usage of this function is to check if the preheader function is 2831f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// "almost" empty such that generated intrinsic functions can be moved across 2841f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// the preheader and be placed at the end of the precondition block without 2851f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// the concern of breaking data dependence. 2865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LIRUtil::isAlmostEmpty(BasicBlock *BB) { 2875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (BranchInst *Br = getBranch(BB)) { 2885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return Br->isUnconditional() && BB->size() == 1; 2895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 2905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 2915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 2925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 2935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangBasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { 2945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (BasicBlock *BB = PreHead->getSinglePredecessor()) { 2955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *Br = getBranch(BB); 296dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return Br && Br->isConditional() ? BB : nullptr; 2975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 298dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 2995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 3025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 3035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// Implementation of NclPopcountRecognize 3045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 3055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 3065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangNclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): 308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) { 3095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::preliminaryScreen() { 3129980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth const TargetTransformInfo *TTI = LIR.getTargetTransformInfo(); 313d1b8ef97c47d347f2a2261a0d6de4872f248321fChandler Carruth if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware) 31484fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang return false; 31584fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang 3163f4f420ab7acb10221ba971543a7eed5489fb626Robert Wilhelm // Counting population are usually conducted by few arithmetic instructions. 3175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Such instructions can be easilly "absorbed" by vacant slots in a 3185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // non-compact loop. Therefore, recognizing popcount idiom only makes sense 3195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // in a compact loop. 3205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Give up if the loop has multiple blocks or multiple backedges. 3225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) 32322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 32484fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang 3255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *LoopBody = *(CurLoop->block_begin()); 3265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (LoopBody->size() >= 20) { 3275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // The loop is too big, bail out. 3285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 3295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 3305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // It should have a preheader containing nothing but a goto instruction. 3325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *PreHead = CurLoop->getLoopPreheader(); 3335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead)) 3345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 3355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // It should have a precondition block where the generated popcount instrinsic 3375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // function will be inserted. 3385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PreCondBB = LIRUtil::getPrecondBb(PreHead); 3395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!PreCondBB) 34084fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang return false; 3411f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault 3425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 3435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 345dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesValue *NclPopcountRecognize::matchCondition(BranchInst *Br, 346dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BasicBlock *LoopEntry) const { 3475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Br || !Br->isConditional()) 348dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 3495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition()); 3515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Cond) 352dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 3535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); 3555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!CmpZero || !CmpZero->isZero()) 356dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 3575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst::Predicate Pred = Cond->getPredicate(); 3595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || 3605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) 3615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return Cond->getOperand(0); 3625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 363dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 3645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 3655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, 3675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *&CntPhi, 3685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *&Var) const { 3695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Following code tries to detect this idiom: 3705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // if (x0 != 0) 3725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // goto loop-exit // the precondition of the loop 3735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // cnt0 = init-val; 3745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // do { 3755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // x1 = phi (x0, x2); 3765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // cnt1 = phi(cnt0, cnt2); 3775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // cnt2 = cnt1 + 1; 3795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // ... 3805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // x2 = x1 & (x1 - 1); 3815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // ... 3825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // } while(x != 0); 3835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop-exit: 3855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 3865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 1: Check to see if the look-back branch match this pattern: 3885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // "if (a!=0) goto loop-entry". 3895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *LoopEntry; 3905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *DefX2, *CountInst; 3915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *VarX1, *VarX0; 3925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *PhiX, *CountPhi; 3935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 394dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DefX2 = CountInst = nullptr; 395dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines VarX1 = VarX0 = nullptr; 396dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines PhiX = CountPhi = nullptr; 3975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LoopEntry = *(CurLoop->block_begin()); 3985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 3995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 1: Check if the loop-back branch is in desirable form. 4005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry)) 4025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang DefX2 = dyn_cast<Instruction>(T); 4035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang else 4045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" 4085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 409253449db20f86b655852a397245ba16ff262452fShuxin Yang if (!DefX2 || DefX2->getOpcode() != Instruction::And) 4105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BinaryOperator *SubOneOp; 4135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0)))) 4155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang VarX1 = DefX2->getOperand(1); 4165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang else { 4175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang VarX1 = DefX2->getOperand(0); 4185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1)); 4195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!SubOneOp) 4215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *SubInst = cast<Instruction>(SubOneOp); 4245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1)); 4255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Dec || 4265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || 4275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) { 4285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 3: Check the recurrence of variable X 4335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PhiX = dyn_cast<PHINode>(VarX1); 4355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!PhiX || 4365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { 4375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 4425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 443dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CountInst = nullptr; 4445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), 4455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang IterE = LoopEntry->end(); Iter != IterE; Iter++) { 4465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *Inst = Iter; 4475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Inst->getOpcode() != Instruction::Add) 4485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang continue; 4495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1)); 4515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Inc || !Inc->isOne()) 4525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang continue; 4535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0)); 4555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!Phi || Phi->getParent() != LoopEntry) 4565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang continue; 4575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Check if the result of the instruction is live of the loop. 4595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang bool LiveOutLoop = false; 46036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *U : Inst->users()) { 46136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if ((cast<Instruction>(U))->getParent() != LoopEntry) { 4625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LiveOutLoop = true; break; 4635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (LiveOutLoop) { 4675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CountInst = Inst; 4685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CountPhi = Phi; 4695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang break; 4705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!CountInst) 4745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 5: check if the precondition is in this form: 4785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;" 4795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 4805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); 4815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader()); 4825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1)) 4835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 4845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CntInst = CountInst; 4865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CntPhi = CountPhi; 4875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Var = T; 4885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 4895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 4915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 4925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangvoid NclPopcountRecognize::transform(Instruction *CntInst, 4945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *CntPhi, Value *Var) { 4955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 4965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ScalarEvolution *SE = LIR.getScalarEvolution(); 4975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo(); 4985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *PreHead = CurLoop->getLoopPreheader(); 4995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB); 5005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang const DebugLoc DL = CntInst->getDebugLoc(); 5015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Assuming before transformation, the loop is following: 5035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // if (x) // the precondition 5045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // do { cnt++; x &= x - 1; } while(x); 5051f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault 5065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 1: Insert the ctpop instruction at the end of the precondition block 5075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang IRBuilderTy Builder(PreCondBr); 5085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; 5095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PopCnt = createPopcntIntrinsic(Builder, Var, DL); 5115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang NewCount = PopCntZext = 5125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType())); 5135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (NewCount != PopCnt) 5155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (cast<Instruction>(NewCount))->setDebugLoc(DL); 5165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // TripCnt is exactly the number of iterations the loop has 5185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TripCnt = NewCount; 5195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the population counter's initial value is not zero, insert Add Inst. 5215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); 5225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal); 5235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!InitConst || !InitConst->isZero()) { 5245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang NewCount = Builder.CreateAdd(NewCount, CntInitVal); 5255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (cast<Instruction>(NewCount))->setDebugLoc(DL); 5265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to 5305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic 5315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // function would be partial dead code, and downstream passes will drag 5325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // it back from the precondition block to the preheader. 5335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition()); 5355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd0 = PopCntZext; 5375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0); 5385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (PreCond->getOperand(0) != Var) 5395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang std::swap(Opnd0, Opnd1); 5405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *NewPreCond = 5425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1)); 5435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PreCond->replaceAllUsesWith(NewPreCond); 5445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang deleteDeadInstruction(PreCond, *SE, TLI); 5465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 3: Note that the population count is exactly the trip count of the 5495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop in question, which enble us to to convert the loop from noncountable 5505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop into a countable one. The benefit is twofold: 5515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 5525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // - If the loop only counts population, the entire loop become dead after 5535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // the transformation. It is lots easier to prove a countable loop dead 5545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // than to prove a noncountable one. (In some C dialects, a infite loop 5555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // isn't dead even if it computes nothing useful. In general, DCE needs 5565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // to prove a noncountable loop finite before safely delete it.) 5575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 5585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // - If the loop also performs something else, it remains alive. 5595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Since it is transformed to countable form, it can be aggressively 5605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // optimized by some optimizations which are in general not applicable 5615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // to a noncountable loop. 5625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // 5635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // After this step, this loop (conceptually) would look like following: 5645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // newcnt = __builtin_ctpop(x); 5655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // t = newcnt; 5665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // if (x) 5675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // do { cnt++; x &= x-1; t--) } while (t > 0); 5685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BasicBlock *Body = *(CurLoop->block_begin()); 5695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang BranchInst *LbBr = LIRUtil::getBranch(Body); 5715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition()); 5725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Type *Ty = TripCnt->getType(); 5735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin()); 5755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Builder.SetInsertPoint(LbCond); 5775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd1 = cast<Value>(TcPhi); 5785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1)); 5795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *TcDec = 5805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true)); 5815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TcPhi->addIncoming(TripCnt, PreHead); 5835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang TcPhi->addIncoming(TcDec, Body); 5845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ? 5865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CmpInst::ICMP_UGT : CmpInst::ICMP_SLE; 5875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LbCond->setPredicate(Pred); 5885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LbCond->setOperand(0, TcDec); 5895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0))); 5905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 5915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 5925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Step 4: All the references to the original population counter outside 5935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // the loop are replaced with the NewCount -- the value returned from 5945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // __builtin_ctpop(). 5955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang { 5965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SmallVector<Value *, 4> CntUses; 59736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (User *U : CntInst->users()) 59836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (cast<Instruction>(U)->getParent() != Body) 59936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines CntUses.push_back(U); 6005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) { 6015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount); 6025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 6035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang } 6045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // step 5: Forget the "non-computable" trip-count SCEV associated with the 6065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // loop. The loop would otherwise not be deleted even if it becomes empty. 6075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SE->forgetLoop(CurLoop); 6085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 6095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6101f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt ArsenaultCallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, 6115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Val, DebugLoc DL) { 6125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Ops[] = { Val }; 6135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Type *Tys[] = { Val->getType() }; 6145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Module *M = (*(CurLoop->block_begin()))->getParent()->getParent(); 6165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys); 6175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CallInst *CI = IRBuilder.CreateCall(Func, Ops); 6185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CI->setDebugLoc(DL); 6195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return CI; 6215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 6225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// recognize - detect population count idiom in a non-countable loop. If 6245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// detected, transform the relevant code to popcount intrinsic function 6255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// call, and return true; otherwise, return false. 6265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::recognize() { 6275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6289980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth if (!LIR.getTargetTransformInfo()) 6295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang LIR.getScalarEvolution(); 6325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!preliminaryScreen()) 6345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Instruction *CntInst; 6375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang PHINode *CntPhi; 6385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang Value *Val; 6395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!detectIdiom(CntInst, CntPhi, Val)) 6405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang transform(CntInst, CntPhi, Val); 6435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 6445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 6455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 6475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 6485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// Implementation of LoopIdiomRecognize 6495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// 6505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===// 6515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnCountableLoop() { 6535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop); 65422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner if (isa<SCEVCouldNotCompute>(BECount)) return false; 655d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 6568e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner // If this loop executes exactly one time, then it should be peeled, not 6578e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner // optimized by this pass. 6588e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 6598e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner if (BECst->getValue()->getValue() == 0) 6608e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner return false; 661d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 66222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // We require target data for now. 6635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!getDataLayout()) 6645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 6655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 6661f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault // set DT 667cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang (void)getDominatorTree(); 66862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner 66962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner LoopInfo &LI = getAnalysis<LoopInfo>(); 670c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner TLI = &getAnalysis<TargetLibraryInfo>(); 671d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 6721f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault // set TLI 673cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang (void)getTargetLibraryInfo(); 6745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 67562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner SmallVector<BasicBlock*, 8> ExitBlocks; 67662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner CurLoop->getUniqueExitBlocks(ExitBlocks); 67762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner 67863f9c3c49ac7011e6366fbec42716f3f70f1beeeChris Lattner DEBUG(dbgs() << "loop-idiom Scanning: F[" 6795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang << CurLoop->getHeader()->getParent()->getName() 6805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); 681d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 68262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner bool MadeChange = false; 68362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // Scan all the blocks in the loop that are not in subloops. 6845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang for (Loop::block_iterator BI = CurLoop->block_begin(), 6855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang E = CurLoop->block_end(); BI != E; ++BI) { 68662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // Ignore blocks in subloops. 68762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner if (LI.getLoopFor(*BI) != CurLoop) 68862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner continue; 689d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 69062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); 69162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner } 69262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner return MadeChange; 69362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner} 694e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 6955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnNoncountableLoop() { 6965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang NclPopcountRecognize Popcount(*this); 6975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Popcount.recognize()) 6985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return true; 6995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 7015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 7025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 70436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (skipOptnoneFunction(L)) 70536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 70636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 7075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang CurLoop = L; 7085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // If the loop could not be converted to canonical form, it must have an 7105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // indirectbr in it, just give up. 7115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (!L->getLoopPreheader()) 7125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 7135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang // Disable loop idiom recognition if the function's name is a common idiom. 7155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang StringRef Name = L->getHeader()->getParent()->getName(); 7165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (Name == "memset" || Name == "memcpy") 7175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return false; 7185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 7195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang SE = &getAnalysis<ScalarEvolution>(); 7205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang if (SE->hasLoopInvariantBackedgeTakenCount(L)) 7215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return runOnCountableLoop(); 7225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang return runOnNoncountableLoop(); 7235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang} 7245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang 72562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// runOnLoopBlock - Process the specified block, which lives in a counted loop 72662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// with the specified backedge count. This block is known to be in the current 72762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// loop and not in any subloops. 72862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattnerbool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 72962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner SmallVectorImpl<BasicBlock*> &ExitBlocks) { 73062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // We can only promote stores in this block if they are unconditionally 73162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // executed in the loop. For a block to be unconditionally executed, it has 73262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner // to dominate all the exit blocks of the loop. Verify this now. 73362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 73462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner if (!DT->dominates(BB, ExitBlocks[i])) 73562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner return false; 736d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 73722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner bool MadeChange = false; 73822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 739b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner Instruction *Inst = I++; 740b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner // Look for store instructions, which may be optimized to memset/memcpy. 741b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 742b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner WeakVH InstPtr(I); 743b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner if (!processLoopStore(SI, BECount)) continue; 744b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner MadeChange = true; 745d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 746b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner // If processing the store invalidated our iterator, start over from the 747e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // top of the block. 748dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!InstPtr) 749b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner I = BB->begin(); 750b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner continue; 751b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner } 752d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 753e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // Look for memset instructions, which may be optimized to a larger memset. 754e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { 755e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner WeakVH InstPtr(I); 756e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (!processLoopMemSet(MSI, BECount)) continue; 757e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner MadeChange = true; 758d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 759e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // If processing the memset invalidated our iterator, start over from the 760e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // top of the block. 761dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!InstPtr) 762e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner I = BB->begin(); 763e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner continue; 764e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner } 76522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner } 766d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 76722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return MadeChange; 768e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 769e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 77062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner 771e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopStore - See if this store can be promoted to a memset or memcpy. 77222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattnerbool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { 7732bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman if (!SI->isSimple()) return false; 774e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 77522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner Value *StoredVal = SI->getValueOperand(); 776a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Value *StorePtr = SI->getPointerOperand(); 777d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 77895ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner // Reject stores that are so large that they overflow an unsigned. 77936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); 78095ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 78122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 782d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 78322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // See if the pointer expression is an AddRec like {base,+,1} on the current 78422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // loop, which indicates a strided store. If we have something else, it's a 78522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // random store we can't handle. 786e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *StoreEv = 787e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 788dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 78922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 79022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 79122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // Check to see if the stride matches the size of the store. If so, then we 79222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner // know that every byte is touched in the loop. 793d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick unsigned StoreSize = (unsigned)SizeInBits >> 3; 794e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); 795d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 796dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!Stride || StoreSize != Stride->getValue()->getValue()) { 797408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner // TODO: Could also handle negative stride here someday, that will require 798408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner // the validity check in mayLoopAccessLocation to be updated though. 799408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner // Enable this to print exact negative strides. 8000e68cee62f251c45df92c71ca536142bc7d82631Chris Lattner if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { 801408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; 802408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner dbgs() << "BB: " << *SI->getParent(); 803408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner } 804d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 80522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner return false; 806408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner } 8073a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 8083a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // See if we can optimize just this store in isolation. 8093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), 8103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner StoredVal, SI, StoreEv, BECount)) 8113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner return true; 812a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 813e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // If the stored value is a strided load in the same loop with the same stride 814e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // this this may be transformable into a memcpy. This kicks in for stuff like 815e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // for (i) A[i] = B[i]; 816e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 817e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *LoadEv = 818e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); 819e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && 8202bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) 821e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) 822e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner return true; 823e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner } 8244ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; 82522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner 826e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner return false; 827e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner} 828e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner 829e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopMemSet - See if this memset can be promoted to a large memset. 830e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattnerbool LoopIdiomRecognize:: 831e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris LattnerprocessLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { 832e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // We can only handle non-volatile memsets with a constant size. 833e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; 834e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 835c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner // If we're not allowed to hack on memset, we fail. 836c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner if (!TLI->has(LibFunc::memset)) 837c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner return false; 838d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 839e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner Value *Pointer = MSI->getDest(); 840d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 841e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // See if the pointer expression is an AddRec like {base,+,1} on the current 842e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // loop, which indicates a strided store. If we have something else, it's a 843e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // random store we can't handle. 844e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); 845dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) 846e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner return false; 847e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 848e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // Reject memsets that are so large that they overflow an unsigned. 849e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 850e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner if ((SizeInBytes >> 32) != 0) 851e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner return false; 852d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 853e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // Check to see if the stride matches the size of the memset. If so, then we 854e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // know that every byte is touched in the loop. 855e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); 856d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 857e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // TODO: Could also handle negative stride here someday, that will require the 858e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner // validity check in mayLoopAccessLocation to be updated though. 859dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!Stride || MSI->getLength() != Stride->getValue()) 860e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner return false; 861d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 8623a393728a62122d7009d8e2cbe52a221874e576aChris Lattner return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, 8633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner MSI->getAlignment(), MSI->getValue(), 8643a393728a62122d7009d8e2cbe52a221874e576aChris Lattner MSI, Ev, BECount); 865e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner} 866e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner 867a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 868a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// mayLoopAccessLocation - Return true if the specified loop might access the 869a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// specified pointer location, which is a loop-strided access. The 'Access' 870a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// argument specifies what the verboten forms of access are (read or write). 871a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, 872a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Loop *L, const SCEV *BECount, 873a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth unsigned StoreSize, AliasAnalysis &AA, 874a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Instruction *IgnoredStore) { 875a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // Get the location that may be stored across the loop. Since the access is 876a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // strided positively through memory, we say that the modified location starts 877a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // at the pointer and has infinite size. 878a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth uint64_t AccessSize = AliasAnalysis::UnknownSize; 879a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 880a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If the loop iterates a fixed number of times, we can refine the access size 881a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // to be exactly the size of the memset, which is (BECount+1)*StoreSize 882a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 883a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; 884a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 885a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // TODO: For this to be really effective, we have to dive into the pointer 886a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // operand in the store. Store to &A[i] of 100 will always return may alias 887a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 888a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // which will then no-alias a store to &A[100]. 889a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth AliasAnalysis::Location StoreLoc(Ptr, AccessSize); 890a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 891a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 892a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth ++BI) 893a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) 894a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (&*I != IgnoredStore && 895a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth (AA.getModRefInfo(I, StoreLoc) & Access)) 896a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return true; 897a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 898a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 899a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth} 900a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 9013a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// getMemSetPatternValue - If a strided store of the specified value is safe to 9023a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should 9033a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// be passed in. Otherwise, return null. 9043a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// 9053a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// Note that we don't ever attempt to use memset_pattern8 or 4, because these 9063a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// just replicate their input array and then pass on to memset_pattern16. 90736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) { 9083a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // If the value isn't a constant, we can't promote it to being in a constant 9093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // array. We could theoretically do a store to an alloca or something, but 9103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // that doesn't seem worthwhile. 9113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Constant *C = dyn_cast<Constant>(V); 912dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!C) return nullptr; 913d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 9143a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Only handle simple values that are a power of two bytes in size. 91536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines uint64_t Size = DL.getTypeSizeInBits(V->getType()); 9163a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (Size == 0 || (Size & 7) || (Size & (Size-1))) 917dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 918d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 91980e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // Don't care enough about darwin/ppc to implement this. 92036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (DL.isBigEndian()) 921dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 9223a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 9233a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Convert to size in bytes. 9243a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Size /= 8; 925c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner 9263a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see 92780e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // if the top and bottom are the same (e.g. for vectors and large integers). 928dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Size > 16) return nullptr; 929d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 93080e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // If the constant is exactly 16 bytes, just use it. 93180e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner if (Size == 16) return C; 93280e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner 93380e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner // Otherwise, we'll use an array of the constants. 93480e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner unsigned ArraySize = 16/Size; 93580e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner ArrayType *AT = ArrayType::get(V->getType(), ArraySize); 93680e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); 9373a393728a62122d7009d8e2cbe52a221874e576aChris Lattner} 9383a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 9393a393728a62122d7009d8e2cbe52a221874e576aChris Lattner 9403a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// processLoopStridedStore - We see a strided store of some value. If we can 9413a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// transform this into a memset or memset_pattern in the loop preheader, do so. 9423a393728a62122d7009d8e2cbe52a221874e576aChris Lattnerbool LoopIdiomRecognize:: 9433a393728a62122d7009d8e2cbe52a221874e576aChris LattnerprocessLoopStridedStore(Value *DestPtr, unsigned StoreSize, 9443a393728a62122d7009d8e2cbe52a221874e576aChris Lattner unsigned StoreAlignment, Value *StoredVal, 9453a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Instruction *TheStore, const SCEVAddRecExpr *Ev, 9463a393728a62122d7009d8e2cbe52a221874e576aChris Lattner const SCEV *BECount) { 947d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 9483a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // If the stored value is a byte-wise value (like i32 -1), then it may be 9493a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // turned into a memset of i8 -1, assuming that all the consecutive bytes 9503a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // are stored. A store of i32 0x01020304 can never be turned into a memset, 9513a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // but it can be turned into memset_pattern if the target supports it. 9523a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Value *SplatValue = isBytewiseValue(StoredVal); 953dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Constant *PatternValue = nullptr; 954d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 95511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); 95611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault 9573a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // If we're allowed to form a memset, and the stored value would be acceptable 9583a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // for memset, use it. 9593a393728a62122d7009d8e2cbe52a221874e576aChris Lattner if (SplatValue && TLI->has(LibFunc::memset) && 9603a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Verify that the stored value is loop invariant. If not, we can't 9613a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // promote the memset. 9623a393728a62122d7009d8e2cbe52a221874e576aChris Lattner CurLoop->isLoopInvariant(SplatValue)) { 9633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Keep and use SplatValue. 964dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines PatternValue = nullptr; 96511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault } else if (DestAS == 0 && 96611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault TLI->has(LibFunc::memset_pattern16) && 96736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { 96811250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault // Don't create memset_pattern16s with address spaces. 9693a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // It looks like we can use PatternValue! 970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SplatValue = nullptr; 9713a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } else { 9723a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Otherwise, this isn't an idiom we can transform. For example, we can't 9735ac7c7da3ebfd1aa95226aeefb8cfa3ba34fd72aEli Friedman // do anything with a 3-byte store. 974bafa117e8f2e3532f391227ebc9d4513b17edbadChris Lattner return false; 9753a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } 976d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 977a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // The trip count of the loop and the base pointer of the addrec SCEV is 978a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // guaranteed to be loop invariant, which means that it should dominate the 9794f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // header. This allows us to insert code for it in the preheader. 9804f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner BasicBlock *Preheader = CurLoop->getLoopPreheader(); 9814f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner IRBuilder<> Builder(Preheader->getTerminator()); 9825e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick SCEVExpander Expander(*SE, "loop-idiom"); 983a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 98411250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); 98511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault 9864f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // Okay, we have a strided store "p[i]" of a splattable value. We can turn 9873740e798bc850cd5d40185959801606433b0221fBenjamin Kramer // this into a memset in the loop preheader now if we want. However, this 9883740e798bc850cd5d40185959801606433b0221fBenjamin Kramer // would be unsafe to do if there is anything else in the loop that may read 989ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth // or write to the aliased location. Check for any overlap by generating the 990ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth // base pointer and checking the region. 991d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Value *BasePtr = 99211250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy, 993a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Preheader->getTerminator()); 994d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 995a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, 996a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth CurLoop, BECount, 997f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) { 998a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Expander.clear(); 999a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If we generated new code for the base pointer, clean up. 1000a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(BasePtr, *SE, TLI); 1001a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 1002a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth } 1003a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 10044f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // Okay, everything looks good, insert the memset. 10054f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner 1006a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 1007a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner // pointer size if it isn't already. 100836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Type *IntPtr = Builder.getIntPtrTy(DL, DestAS); 10097c90b90f4e9b0c421f0e45d7de03f6edce113a90Chris Lattner BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 1010d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1011a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 10123228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1013f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault if (StoreSize != 1) { 1014a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 10153228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1016f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault } 1017d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1018d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Value *NumBytes = 1019a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 1020d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1021cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel CallInst *NewCall; 1022f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault if (SplatValue) { 1023f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault NewCall = Builder.CreateMemSet(BasePtr, 1024f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault SplatValue, 1025f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault NumBytes, 1026f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault StoreAlignment); 1027f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault } else { 102811250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault // Everything is emitted in default address space 102911250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Type *Int8PtrTy = DestInt8PtrTy; 103011250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault 10313a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Module *M = TheStore->getParent()->getParent()->getParent(); 10323a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Value *MSP = M->getOrInsertFunction("memset_pattern16", 10333a393728a62122d7009d8e2cbe52a221874e576aChris Lattner Builder.getVoidTy(), 103411250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Int8PtrTy, 103511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Int8PtrTy, 103611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault IntPtr, 1037dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines (void*)nullptr); 1038d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 10393a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // Otherwise we should form a memset_pattern16. PatternValue is known to be 10403a393728a62122d7009d8e2cbe52a221874e576aChris Lattner // an constant array of 16-bytes. Plop the value into a mergable global. 10413a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, 10423a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GlobalValue::InternalLinkage, 10433a393728a62122d7009d8e2cbe52a221874e576aChris Lattner PatternValue, ".memset_pattern"); 10443a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GV->setUnnamedAddr(true); // Ok to merge these. 10453a393728a62122d7009d8e2cbe52a221874e576aChris Lattner GV->setAlignment(16); 104611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); 10473a393728a62122d7009d8e2cbe52a221874e576aChris Lattner NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); 10483a393728a62122d7009d8e2cbe52a221874e576aChris Lattner } 1049d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1050a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" 1051e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner << " from store to: " << *Ev << " at: " << *TheStore << "\n"); 1052cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel NewCall->setDebugLoc(TheStore->getDebugLoc()); 1053d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 10549f39188def2b4102817945522fac1209083efa06Chris Lattner // Okay, the memset has been formed. Zap the original store and anything that 10559f39188def2b4102817945522fac1209083efa06Chris Lattner // feeds into it. 10568e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer deleteDeadInstruction(TheStore, *SE, TLI); 10574ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner ++NumMemSet; 1058a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner return true; 1059a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner} 1060a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner 1061e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// processLoopStoreOfLoopLoad - We see a strided store whose value is a 1062e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// same-strided load. 1063e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattnerbool LoopIdiomRecognize:: 1064e2c43920919c6fe376613d1d8331897dc1ba3d57Chris LattnerprocessLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 1065e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *StoreEv, 1066e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEVAddRecExpr *LoadEv, 1067e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner const SCEV *BECount) { 1068c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner // If we're not allowed to form memcpy, we fail. 1069a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (!TLI->has(LibFunc::memcpy)) 1070c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner return false; 1071d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1072e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); 1073d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 10744f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // The trip count of the loop and the base pointer of the addrec SCEV is 10754f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // guaranteed to be loop invariant, which means that it should dominate the 10764f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // header. This allows us to insert code for it in the preheader. 10774f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner BasicBlock *Preheader = CurLoop->getLoopPreheader(); 10784f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner IRBuilder<> Builder(Preheader->getTerminator()); 10795e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick SCEVExpander Expander(*SE, "loop-idiom"); 1080a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 1081e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // Okay, we have a strided store "p[i]" of a loaded value. We can turn 1082a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // this into a memcpy in the loop preheader now if we want. However, this 1083a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // would be unsafe to do if there is anything else in the loop that may read 1084a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // or write the memory region we're storing to. This includes the load that 1085a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // feeds the stores. Check for an alias by generating the base address and 1086a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // checking everything. 10874f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Value *StoreBasePtr = 10884f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Expander.expandCodeFor(StoreEv->getStart(), 10894f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Builder.getInt8PtrTy(SI->getPointerAddressSpace()), 10904f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner Preheader->getTerminator()); 1091a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 1092a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, 1093a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth CurLoop, BECount, StoreSize, 1094a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth getAnalysis<AliasAnalysis>(), SI)) { 1095a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Expander.clear(); 1096a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If we generated new code for the base pointer, clean up. 1097a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); 1098a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 1099a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth } 1100a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 1101a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // For a memcpy, we have to make sure that the input array is not being 1102a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // mutated by the loop. 1103d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick Value *LoadBasePtr = 1104e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Expander.expandCodeFor(LoadEv->getStart(), 1105e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Builder.getInt8PtrTy(LI->getPointerAddressSpace()), 1106e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Preheader->getTerminator()); 11074f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner 1108a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, 1109a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth StoreSize, getAnalysis<AliasAnalysis>(), SI)) { 1110a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Expander.clear(); 1111a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth // If we generated new code for the base pointer, clean up. 1112a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); 1113a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); 1114a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth return false; 1115a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth } 1116a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth 11174f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner // Okay, everything is safe, we can transform this! 1118a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 1119d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1120e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 1121e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // pointer size if it isn't already. 112236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace()); 112311250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); 1124d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 112511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1), 11263228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1127e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner if (StoreSize != 1) 112811250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), 11293228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick SCEV::FlagNUW); 1130d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1131e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner Value *NumBytes = 113211250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); 1133d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1134a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth CallInst *NewCall = 1135a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, 1136a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth std::min(SI->getAlignment(), LI->getAlignment())); 1137af35841f2e346f9259606886698c188280406cdbDevang Patel NewCall->setDebugLoc(SI->getDebugLoc()); 1138d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1139a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" 1140e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 1141e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); 1142a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick 1143d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick 1144e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // Okay, the memset has been formed. Zap the original store and anything that 1145e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner // feeds into it. 11468e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer deleteDeadInstruction(SI, *SE, TLI); 1147a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth ++NumMemCpy; 1148e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner return true; 1149e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner} 1150