LoopIdiomRecognize.cpp revision d1b8ef97c47d347f2a2261a0d6de4872f248321f
1e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
2e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
3e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//                     The LLVM Compiler Infrastructure
4e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
5e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This file is distributed under the University of Illinois Open Source
6e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// License. See LICENSE.TXT for details.
7e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
8e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===//
9e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
10e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This pass implements an idiom recognizer that transforms simple loops into a
11e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// non-loop form.  In cases that this kicks in, it can be a significant
12e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// performance win.
13e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
14e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===//
15bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
16bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// TODO List:
17bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
18bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future loop memory idioms to recognize:
19a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth//   memcmp, memmove, strlen, etc.
20bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future floating point idioms to recognize in -ffast-math mode:
21bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//   fpowi
22bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future integer operation idioms to recognize:
23bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//   ctpop, ctlz, cttz
24bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
25bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Beware that isel's default lowering for ctpop is highly inefficient for
26bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// i64 and larger types when i64 is legal and the value has few bits set.  It
27bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// would be good to enhance isel to emit a loop for ctpop in this case.
28bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
29bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// We should enhance the memset/memcpy recognition to handle multiple stores in
30bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// the loop.  This would handle things like:
31bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//   void foo(_Complex float *P)
32bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
3391139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner//
34408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// We should enhance this to handle negative strides through memory.
35408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// Alternatively (and perhaps better) we could rely on an earlier pass to force
36408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// forward iteration through memory, which is generally better for cache
37408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// behavior.  Negative strides *do* happen for memset/memcpy loops.
38408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner//
39d957c717918c412402157b85fc51b4c6d2631381Chris Lattner// This could recognize common matrix multiplies and dot product idioms and
4091139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// replace them with calls to BLAS (if linked in??).
4191139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner//
42bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//===----------------------------------------------------------------------===//
43e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
44e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#define DEBUG_TYPE "loop-idiom"
45e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Transforms/Scalar.h"
4606cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/ADT/Statistic.h"
472e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner#include "llvm/Analysis/AliasAnalysis.h"
48e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Analysis/LoopPass.h"
49a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner#include "llvm/Analysis/ScalarEvolutionExpander.h"
5006cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Analysis/ScalarEvolutionExpressions.h"
51be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h"
5222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner#include "llvm/Analysis/ValueTracking.h"
530b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h"
540b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IRBuilder.h"
550b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h"
560b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Module.h"
5706cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/Debug.h"
5806cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/raw_ostream.h"
59c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner#include "llvm/Target/TargetLibraryInfo.h"
609f39188def2b4102817945522fac1209083efa06Chris Lattner#include "llvm/Transforms/Utils/Local.h"
61e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerusing namespace llvm;
62e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
63a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemSet, "Number of memset's formed from loop stores");
64a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
65e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
66e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnernamespace {
675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  class LoopIdiomRecognize;
695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// This class defines some utility functions for loop idiom recognization.
715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  class LIRUtil {
725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  public:
735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Return true iff the block contains nothing but an uncondition branch
745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// (aka goto instruction).
755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static bool isAlmostEmpty(BasicBlock *);
765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static BranchInst *getBranch(BasicBlock *BB) {
785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return dyn_cast<BranchInst>(BB->getTerminator());
795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Return the condition of the branch terminating the given basic block.
825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static Value *getBrCondtion(BasicBlock *);
835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Derive the precondition block (i.e the block that guards the loop
855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// preheader) from the given preheader.
865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static BasicBlock *getPrecondBb(BasicBlock *PreHead);
875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  };
885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// This class is to recoginize idioms of population-count conducted in
905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// a noncountable loop. Currently it only recognizes this pattern:
915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// \code
925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ///   while(x) {cnt++; ...; x &= x - 1; ...}
935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// \endcode
945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  class NclPopcountRecognize {
955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LoopIdiomRecognize &LIR;
965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Loop *CurLoop;
975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BasicBlock *PreCondBB;
985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    typedef IRBuilder<> IRBuilderTy;
1005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  public:
1025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
1035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool recognize();
1045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  private:
1065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Take a glimpse of the loop to see if we need to go ahead recoginizing
1075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// the idiom.
1085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool preliminaryScreen();
1095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Check if the given conditional branch is based on the comparison
1115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// beween a variable and zero, and if the variable is non-zero, the
1125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// control yeilds to the loop entry. If the branch matches the behavior,
1135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// the variable involved in the comparion is returned. This function will
1145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// be called to see if the precondition and postcondition of the loop
1155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// are in desirable form.
1165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
1175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
1195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
1205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// is set to the corresponding phi node. 3) \p Var is set to the value
1215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// whose population bits are being counted.
1225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool detectIdiom
1235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
1245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Insert ctpop intrinsic function and some obviously dead instructions.
1265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
1275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Create llvm.ctpop.* intrinsic function.
1295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
1305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  };
1315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
132e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  class LoopIdiomRecognize : public LoopPass {
13322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    Loop *CurLoop;
1343574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmow    const DataLayout *TD;
13562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    DominatorTree *DT;
13622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    ScalarEvolution *SE;
137c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner    TargetLibraryInfo *TLI;
1389980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth    const TargetTransformInfo *TTI;
139e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  public:
140e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    static char ID;
141e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    explicit LoopIdiomRecognize() : LoopPass(ID) {
142e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
1439980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth      TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
144e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    }
145e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
146e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    bool runOnLoop(Loop *L, LPPassManager &LPM);
14762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
14862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner                        SmallVectorImpl<BasicBlock*> &ExitBlocks);
149e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
15022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    bool processLoopStore(StoreInst *SI, const SCEV *BECount);
151e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
152d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1533a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
1543a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 unsigned StoreAlignment,
1553a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 Value *SplatValue, Instruction *TheStore,
1563a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 const SCEVAddRecExpr *Ev,
1573a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 const SCEV *BECount);
158e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
159e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                                    const SCEVAddRecExpr *StoreEv,
160e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                                    const SCEVAddRecExpr *LoadEv,
161e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                                    const SCEV *BECount);
162d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
163e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    /// This transformation requires natural loop information & requires that
164e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    /// loop preheaders be inserted into the CFG.
165e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    ///
166e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
167e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequired<LoopInfo>();
168e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreserved<LoopInfo>();
169e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequiredID(LoopSimplifyID);
170e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreservedID(LoopSimplifyID);
171e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequiredID(LCSSAID);
172e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreservedID(LCSSAID);
1732e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner      AU.addRequired<AliasAnalysis>();
1742e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner      AU.addPreserved<AliasAnalysis>();
175e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequired<ScalarEvolution>();
176e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreserved<ScalarEvolution>();
177e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreserved<DominatorTree>();
17862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner      AU.addRequired<DominatorTree>();
179c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner      AU.addRequired<TargetLibraryInfo>();
180e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    }
1815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    const DataLayout *getDataLayout() {
1835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>();
1845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    DominatorTree *getDominatorTree() {
1875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return DT ? DT : (DT=&getAnalysis<DominatorTree>());
1885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ScalarEvolution *getScalarEvolution() {
1915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
1925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TargetLibraryInfo *getTargetLibraryInfo() {
1955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
1965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1989980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth    const TargetTransformInfo *getTargetTransformInfo() {
1999980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth      if (!TTI)
2009980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth        TTI = getAnalysisIfAvailable<TargetTransformInfo>();
2019980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth      return TTI;
2025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
2035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Loop *getLoop() const { return CurLoop; }
2055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  private:
2075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool runOnNoncountableLoop();
2085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool runOnCountableLoop();
209e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  };
210e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner}
211e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
212e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerchar LoopIdiomRecognize::ID = 0;
213e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
214e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner                      false, false)
215e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopInfo)
21662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris LattnerINITIALIZE_PASS_DEPENDENCY(DominatorTree)
217e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
218e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LCSSA)
219e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
220c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris LattnerINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
2212e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris LattnerINITIALIZE_AG_DEPENDENCY(AliasAnalysis)
222e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
223e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner                    false, false)
224e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
225e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerPass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
226e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
2274f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner/// deleteDeadInstruction - Delete this instruction.  Before we do, go through
2289f39188def2b4102817945522fac1209083efa06Chris Lattner/// and zero out all the operands of this instruction.  If any of them become
2299f39188def2b4102817945522fac1209083efa06Chris Lattner/// dead, delete them and the computation tree that feeds them.
2309f39188def2b4102817945522fac1209083efa06Chris Lattner///
2318e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramerstatic void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
2328e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer                                  const TargetLibraryInfo *TLI) {
2339f39188def2b4102817945522fac1209083efa06Chris Lattner  SmallVector<Instruction*, 32> NowDeadInsts;
234d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2359f39188def2b4102817945522fac1209083efa06Chris Lattner  NowDeadInsts.push_back(I);
236d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2379f39188def2b4102817945522fac1209083efa06Chris Lattner  // Before we touch this instruction, remove it from SE!
2389f39188def2b4102817945522fac1209083efa06Chris Lattner  do {
2399f39188def2b4102817945522fac1209083efa06Chris Lattner    Instruction *DeadInst = NowDeadInsts.pop_back_val();
240d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2419f39188def2b4102817945522fac1209083efa06Chris Lattner    // This instruction is dead, zap it, in stages.  Start by removing it from
2429f39188def2b4102817945522fac1209083efa06Chris Lattner    // SCEV.
2439f39188def2b4102817945522fac1209083efa06Chris Lattner    SE.forgetValue(DeadInst);
244d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2459f39188def2b4102817945522fac1209083efa06Chris Lattner    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
2469f39188def2b4102817945522fac1209083efa06Chris Lattner      Value *Op = DeadInst->getOperand(op);
2479f39188def2b4102817945522fac1209083efa06Chris Lattner      DeadInst->setOperand(op, 0);
248d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2499f39188def2b4102817945522fac1209083efa06Chris Lattner      // If this operand just became dead, add it to the NowDeadInsts list.
2509f39188def2b4102817945522fac1209083efa06Chris Lattner      if (!Op->use_empty()) continue;
251d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2529f39188def2b4102817945522fac1209083efa06Chris Lattner      if (Instruction *OpI = dyn_cast<Instruction>(Op))
2538e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer        if (isInstructionTriviallyDead(OpI, TLI))
2549f39188def2b4102817945522fac1209083efa06Chris Lattner          NowDeadInsts.push_back(OpI);
2559f39188def2b4102817945522fac1209083efa06Chris Lattner    }
256d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2579f39188def2b4102817945522fac1209083efa06Chris Lattner    DeadInst->eraseFromParent();
258d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2599f39188def2b4102817945522fac1209083efa06Chris Lattner  } while (!NowDeadInsts.empty());
2609f39188def2b4102817945522fac1209083efa06Chris Lattner}
2619f39188def2b4102817945522fac1209083efa06Chris Lattner
262a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// deleteIfDeadInstruction - If the specified value is a dead instruction,
263a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// delete it and any recursively used instructions.
264a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
265a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                    const TargetLibraryInfo *TLI) {
266a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (Instruction *I = dyn_cast<Instruction>(V))
267a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    if (isInstructionTriviallyDead(I, TLI))
268a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth      deleteDeadInstruction(I, SE, TLI);
269a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth}
270a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
2715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
2725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
2735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//          Implementation of LIRUtil
2745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
2755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
27684fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang
2775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// This fucntion will return true iff the given block contains nothing but goto.
2785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// A typical usage of this function is to check if the preheader fucntion is
2795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// "almost" empty such that generated intrinsic function can be moved across
2805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// preheader and to be placed at the end of the preconditiona block without
2815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang// concerning of breaking data dependence.
2825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
2835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (BranchInst *Br = getBranch(BB)) {
2845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return Br->isUnconditional() && BB->size() == 1;
2855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
2865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return false;
2875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
2885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangValue *LIRUtil::getBrCondtion(BasicBlock *BB) {
2905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BranchInst *Br = getBranch(BB);
2915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return Br ? Br->getCondition() : 0;
2925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
2935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangBasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
2955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
2965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BranchInst *Br = getBranch(BB);
2975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return Br && Br->isConditional() ? BB : 0;
2985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
2995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return 0;
3005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
3035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
3045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//          Implementation of NclPopcountRecognize
3055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
3065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
3075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangNclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
3095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
3105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::preliminaryScreen() {
3139980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth  const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
314d1b8ef97c47d347f2a2261a0d6de4872f248321fChandler Carruth  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
31584fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang    return false;
31684fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang
3175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Counting population are usually conducted by few arithmetic instrutions.
3185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Such instructions can be easilly "absorbed" by vacant slots in a
3195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
3205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // in a compact loop.
3215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Give up if the loop has multiple blocks or multiple backedges.
3235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
32422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
32584fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang
3265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *LoopBody = *(CurLoop->block_begin());
3275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (LoopBody->size() >= 20) {
3285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    // The loop is too big, bail out.
3295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
3305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
3315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // It should have a preheader containing nothing but a goto instruction.
3335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *PreHead = CurLoop->getLoopPreheader();
3345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
3355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
3365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // It should have a precondition block where the generated popcount instrinsic
3385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // function will be inserted.
3395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PreCondBB = LIRUtil::getPrecondBb(PreHead);
3405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!PreCondBB)
34184fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang    return false;
3425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return true;
3445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangValue *NclPopcountRecognize::matchCondition (BranchInst *Br,
3475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                             BasicBlock *LoopEntry) const {
3485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!Br || !Br->isConditional())
3495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return 0;
3505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
3525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!Cond)
3535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return 0;
3545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
3565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!CmpZero || !CmpZero->isZero())
3575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return 0;
3585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ICmpInst::Predicate Pred = Cond->getPredicate();
3605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
3615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
3625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return Cond->getOperand(0);
3635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return 0;
3655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
3685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                       PHINode *&CntPhi,
3695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                       Value *&Var) const {
3705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Following code tries to detect this idiom:
3715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    if (x0 != 0)
3735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //      goto loop-exit // the precondition of the loop
3745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    cnt0 = init-val;
3755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    do {
3765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       x1 = phi (x0, x2);
3775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       cnt1 = phi(cnt0, cnt2);
3785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       cnt2 = cnt1 + 1;
3805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //        ...
3815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       x2 = x1 & (x1 - 1);
3825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //        ...
3835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    } while(x != 0);
3845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // loop-exit:
3865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 1: Check to see if the look-back branch match this pattern:
3895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    "if (a!=0) goto loop-entry".
3905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *LoopEntry;
3915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Instruction *DefX2, *CountInst;
3925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *VarX1, *VarX0;
3935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PHINode *PhiX, *CountPhi;
3945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  DefX2 = CountInst = 0;
3965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  VarX1 = VarX0 = 0;
3975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PhiX = CountPhi = 0;
3985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  LoopEntry = *(CurLoop->block_begin());
3995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 1: Check if the loop-back branch is in desirable form.
4015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
4035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      DefX2 = dyn_cast<Instruction>(T);
4045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    else
4055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
4095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (DefX2->getOpcode() != Instruction::And)
4115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BinaryOperator *SubOneOp;
4145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
4165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      VarX1 = DefX2->getOperand(1);
4175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    else {
4185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      VarX1 = DefX2->getOperand(0);
4195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
4205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!SubOneOp)
4225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Instruction *SubInst = cast<Instruction>(SubOneOp);
4255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
4265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!Dec ||
4275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
4285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang          (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
4295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 3: Check the recurrence of variable X
4345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PhiX = dyn_cast<PHINode>(VarX1);
4365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!PhiX ||
4375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
4385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
4435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CountInst = NULL;
4455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
4465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang           IterE = LoopEntry->end(); Iter != IterE; Iter++) {
4475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      Instruction *Inst = Iter;
4485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (Inst->getOpcode() != Instruction::Add)
4495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        continue;
4505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
4525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (!Inc || !Inc->isOne())
4535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        continue;
4545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
4565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (!Phi || Phi->getParent() != LoopEntry)
4575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        continue;
4585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      // Check if the result of the instruction is live of the loop.
4605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      bool LiveOutLoop = false;
4615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
4625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang             I != E;  I++) {
4635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        if ((cast<Instruction>(*I))->getParent() != LoopEntry) {
4645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang          LiveOutLoop = true; break;
4655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        }
4665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      }
4675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (LiveOutLoop) {
4695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        CountInst = Inst;
4705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        CountPhi = Phi;
4715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        break;
4725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      }
4735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!CountInst)
4765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 5: check if the precondition is in this form:
4805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
4815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
4835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
4845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
4855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CntInst = CountInst;
4885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CntPhi = CountPhi;
4895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Var = T;
4905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return true;
4935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
4945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangvoid NclPopcountRecognize::transform(Instruction *CntInst,
4965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                     PHINode *CntPhi, Value *Var) {
4975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ScalarEvolution *SE = LIR.getScalarEvolution();
4995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
5005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *PreHead = CurLoop->getLoopPreheader();
5015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
5025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  const DebugLoc DL = CntInst->getDebugLoc();
5035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Assuming before transformation, the loop is following:
5055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  if (x) // the precondition
5065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //     do { cnt++; x &= x - 1; } while(x);
5075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 1: Insert the ctpop instruction at the end of the precondition block
5095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  IRBuilderTy Builder(PreCondBr);
5105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
5115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PopCnt = createPopcntIntrinsic(Builder, Var, DL);
5135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    NewCount = PopCntZext =
5145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
5155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (NewCount != PopCnt)
5175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (cast<Instruction>(NewCount))->setDebugLoc(DL);
5185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    // TripCnt is exactly the number of iterations the loop has
5205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TripCnt = NewCount;
5215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    // If the popoulation counter's initial value is not zero, insert Add Inst.
5235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
5245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
5255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!InitConst || !InitConst->isZero()) {
5265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
5275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (cast<Instruction>(NewCount))->setDebugLoc(DL);
5285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
5295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
5305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
5325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
5335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   function would be partial dead code, and downstream passes will drag
5345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   it back from the precondition block to the preheader.
5355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
5375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd0 = PopCntZext;
5395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
5405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (PreCond->getOperand(0) != Var)
5415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      std::swap(Opnd0, Opnd1);
5425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ICmpInst *NewPreCond =
5445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
5455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PreCond->replaceAllUsesWith(NewPreCond);
5465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    deleteDeadInstruction(PreCond, *SE, TLI);
5485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
5495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 3: Note that the population count is exactly the trip count of the
5515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // loop in question, which enble us to to convert the loop from noncountable
5525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // loop into a countable one. The benefit is twofold:
5535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
5545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  - If the loop only counts population, the entire loop become dead after
5555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    the transformation. It is lots easier to prove a countable loop dead
5565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    than to prove a noncountable one. (In some C dialects, a infite loop
5575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    isn't dead even if it computes nothing useful. In general, DCE needs
5585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    to prove a noncountable loop finite before safely delete it.)
5595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
5605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  - If the loop also performs something else, it remains alive.
5615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    Since it is transformed to countable form, it can be aggressively
5625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    optimized by some optimizations which are in general not applicable
5635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    to a noncountable loop.
5645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
5655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // After this step, this loop (conceptually) would look like following:
5665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   newcnt = __builtin_ctpop(x);
5675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   t = newcnt;
5685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   if (x)
5695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //     do { cnt++; x &= x-1; t--) } while (t > 0);
5705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *Body = *(CurLoop->block_begin());
5715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BranchInst *LbBr = LIRUtil::getBranch(Body);
5735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
5745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Type *Ty = TripCnt->getType();
5755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
5775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Builder.SetInsertPoint(LbCond);
5795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd1 = cast<Value>(TcPhi);
5805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
5815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Instruction *TcDec =
5825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
5835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TcPhi->addIncoming(TripCnt, PreHead);
5855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TcPhi->addIncoming(TcDec, Body);
5865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
5885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
5895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LbCond->setPredicate(Pred);
5905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LbCond->setOperand(0, TcDec);
5915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
5925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
5935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 4: All the references to the original population counter outside
5955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  the loop are replaced with the NewCount -- the value returned from
5965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  __builtin_ctpop().
5975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    SmallVector<Value *, 4> CntUses;
5995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
6005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang         I != E; I++) {
6015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (cast<Instruction>(*I)->getParent() != Body)
6025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        CntUses.push_back(*I);
6035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
6045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
6055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
6065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
6075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
6085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 5: Forget the "non-computable" trip-count SCEV associated with the
6105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   loop. The loop would otherwise not be deleted even if it becomes empty.
6115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  SE->forgetLoop(CurLoop);
6125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
6135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangCallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
6155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                                      Value *Val, DebugLoc DL) {
6165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *Ops[] = { Val };
6175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Type *Tys[] = { Val->getType() };
6185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
6205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
6215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
6225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  CI->setDebugLoc(DL);
6235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return CI;
6255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
6265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// recognize - detect population count idiom in a non-countable loop. If
6285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang///   detected, transform the relevant code to popcount intrinsic function
6295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang///   call, and return true; otherwise, return false.
6305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::recognize() {
6315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6329980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth  if (!LIR.getTargetTransformInfo())
6335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  LIR.getScalarEvolution();
6365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!preliminaryScreen())
6385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Instruction *CntInst;
6415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PHINode *CntPhi;
6425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *Val;
6435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!detectIdiom(CntInst, CntPhi, Val))
6445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  transform(CntInst, CntPhi, Val);
6475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return true;
6485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
6495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
6515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
6525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//          Implementation of LoopIdiomRecognize
6535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
6545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
6555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnCountableLoop() {
6575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
65822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  if (isa<SCEVCouldNotCompute>(BECount)) return false;
659d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
6608e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner  // If this loop executes exactly one time, then it should be peeled, not
6618e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner  // optimized by this pass.
6628e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
6638e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner    if (BECst->getValue()->getValue() == 0)
6648e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner      return false;
665d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
66622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // We require target data for now.
6675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!getDataLayout())
6685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
670cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang  // set DT
671cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang  (void)getDominatorTree();
67262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner
67362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  LoopInfo &LI = getAnalysis<LoopInfo>();
674c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  TLI = &getAnalysis<TargetLibraryInfo>();
675d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
676cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang  // set TLI
677cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang  (void)getTargetLibraryInfo();
6785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
67962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  SmallVector<BasicBlock*, 8> ExitBlocks;
68062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  CurLoop->getUniqueExitBlocks(ExitBlocks);
68162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner
68263f9c3c49ac7011e6366fbec42716f3f70f1beeeChris Lattner  DEBUG(dbgs() << "loop-idiom Scanning: F["
6835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang               << CurLoop->getHeader()->getParent()->getName()
6845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang               << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
685d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
68662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  bool MadeChange = false;
68762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // Scan all the blocks in the loop that are not in subloops.
6885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  for (Loop::block_iterator BI = CurLoop->block_begin(),
6895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang         E = CurLoop->block_end(); BI != E; ++BI) {
69062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    // Ignore blocks in subloops.
69162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    if (LI.getLoopFor(*BI) != CurLoop)
69262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner      continue;
693d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
69462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
69562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  }
69662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  return MadeChange;
69762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner}
698e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
6995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnNoncountableLoop() {
7005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  NclPopcountRecognize Popcount(*this);
7015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (Popcount.recognize())
7025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return true;
7035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return false;
7055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
7065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
7085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  CurLoop = L;
7095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // If the loop could not be converted to canonical form, it must have an
7115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // indirectbr in it, just give up.
7125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!L->getLoopPreheader())
7135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
7145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Disable loop idiom recognition if the function's name is a common idiom.
7165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  StringRef Name = L->getHeader()->getParent()->getName();
7175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (Name == "memset" || Name == "memcpy")
7185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
7195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  SE = &getAnalysis<ScalarEvolution>();
7215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (SE->hasLoopInvariantBackedgeTakenCount(L))
7225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return runOnCountableLoop();
7235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return runOnNoncountableLoop();
7245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
7255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
72662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// runOnLoopBlock - Process the specified block, which lives in a counted loop
72762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// with the specified backedge count.  This block is known to be in the current
72862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// loop and not in any subloops.
72962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattnerbool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
73062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner                                     SmallVectorImpl<BasicBlock*> &ExitBlocks) {
73162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // We can only promote stores in this block if they are unconditionally
73262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // executed in the loop.  For a block to be unconditionally executed, it has
73362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // to dominate all the exit blocks of the loop.  Verify this now.
73462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
73562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    if (!DT->dominates(BB, ExitBlocks[i]))
73662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner      return false;
737d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
73822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  bool MadeChange = false;
73922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
740b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    Instruction *Inst = I++;
741b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    // Look for store instructions, which may be optimized to memset/memcpy.
742b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
743b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      WeakVH InstPtr(I);
744b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      if (!processLoopStore(SI, BECount)) continue;
745b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      MadeChange = true;
746d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
747b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      // If processing the store invalidated our iterator, start over from the
748e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      // top of the block.
749b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      if (InstPtr == 0)
750b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner        I = BB->begin();
751b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      continue;
752b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    }
753d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
754e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    // Look for memset instructions, which may be optimized to a larger memset.
755e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
756e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      WeakVH InstPtr(I);
757e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      if (!processLoopMemSet(MSI, BECount)) continue;
758e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      MadeChange = true;
759d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
760e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      // If processing the memset invalidated our iterator, start over from the
761e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      // top of the block.
762e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      if (InstPtr == 0)
763e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner        I = BB->begin();
764e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      continue;
765e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    }
76622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  }
767d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
76822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  return MadeChange;
769e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner}
770e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
77162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner
772e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopStore - See if this store can be promoted to a memset or memcpy.
77322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattnerbool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
7742bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman  if (!SI->isSimple()) return false;
775e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
77622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  Value *StoredVal = SI->getValueOperand();
777a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  Value *StorePtr = SI->getPointerOperand();
778d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
77995ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner  // Reject stores that are so large that they overflow an unsigned.
78022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
78195ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
78222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
783d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
78422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // See if the pointer expression is an AddRec like {base,+,1} on the current
78522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // loop, which indicates a strided store.  If we have something else, it's a
78622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // random store we can't handle.
787e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  const SCEVAddRecExpr *StoreEv =
788e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
789e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
79022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
79122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner
79222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // Check to see if the stride matches the size of the store.  If so, then we
79322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // know that every byte is touched in the loop.
794d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  unsigned StoreSize = (unsigned)SizeInBits >> 3;
795e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
796d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
797408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner  if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
798408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    // TODO: Could also handle negative stride here someday, that will require
799408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    // the validity check in mayLoopAccessLocation to be updated though.
800408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    // Enable this to print exact negative strides.
8010e68cee62f251c45df92c71ca536142bc7d82631Chris Lattner    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
802408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
803408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner      dbgs() << "BB: " << *SI->getParent();
804408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    }
805d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
80622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
807408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner  }
8083a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
8093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // See if we can optimize just this store in isolation.
8103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
8113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                              StoredVal, SI, StoreEv, BECount))
8123a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    return true;
813a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner
814e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // If the stored value is a strided load in the same loop with the same stride
815e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // this this may be transformable into a memcpy.  This kicks in for stuff like
816e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  //   for (i) A[i] = B[i];
817e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
818e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    const SCEVAddRecExpr *LoadEv =
819e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
820e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
8212bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman        StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
822e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
823e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner        return true;
824e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  }
8254ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner  //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
82622920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner
827e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  return false;
828e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner}
829e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
830e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopMemSet - See if this memset can be promoted to a large memset.
831e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattnerbool LoopIdiomRecognize::
832e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris LattnerprocessLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
833e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // We can only handle non-volatile memsets with a constant size.
834e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
835e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
836c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  // If we're not allowed to hack on memset, we fail.
837c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  if (!TLI->has(LibFunc::memset))
838c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner    return false;
839d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
840e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  Value *Pointer = MSI->getDest();
841d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
842e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // See if the pointer expression is an AddRec like {base,+,1} on the current
843e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // loop, which indicates a strided store.  If we have something else, it's a
844e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // random store we can't handle.
845e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
846e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
847e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    return false;
848e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
849e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // Reject memsets that are so large that they overflow an unsigned.
850e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
851e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  if ((SizeInBytes >> 32) != 0)
852e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    return false;
853d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
854e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // Check to see if the stride matches the size of the memset.  If so, then we
855e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // know that every byte is touched in the loop.
856e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
857d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
858e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // TODO: Could also handle negative stride here someday, that will require the
859e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // validity check in mayLoopAccessLocation to be updated though.
860e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  if (Stride == 0 || MSI->getLength() != Stride->getValue())
861e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    return false;
862d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
8633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
8643a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 MSI->getAlignment(), MSI->getValue(),
8653a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 MSI, Ev, BECount);
866e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner}
867e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
868a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
869a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// mayLoopAccessLocation - Return true if the specified loop might access the
870a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// specified pointer location, which is a loop-strided access.  The 'Access'
871a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// argument specifies what the verboten forms of access are (read or write).
872a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
873a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                  Loop *L, const SCEV *BECount,
874a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                  unsigned StoreSize, AliasAnalysis &AA,
875a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                  Instruction *IgnoredStore) {
876a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // Get the location that may be stored across the loop.  Since the access is
877a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // strided positively through memory, we say that the modified location starts
878a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // at the pointer and has infinite size.
879a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  uint64_t AccessSize = AliasAnalysis::UnknownSize;
880a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
881a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // If the loop iterates a fixed number of times, we can refine the access size
882a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
883a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
884a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
885a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
886a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // TODO: For this to be really effective, we have to dive into the pointer
887a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // operand in the store.  Store to &A[i] of 100 will always return may alias
888a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
889a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // which will then no-alias a store to &A[100].
890a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
891a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
892a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
893a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth       ++BI)
894a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
895a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth      if (&*I != IgnoredStore &&
896a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth          (AA.getModRefInfo(I, StoreLoc) & Access))
897a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth        return true;
898a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
899a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  return false;
900a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth}
901a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
9023a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// getMemSetPatternValue - If a strided store of the specified value is safe to
9033a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
9043a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// be passed in.  Otherwise, return null.
9053a393728a62122d7009d8e2cbe52a221874e576aChris Lattner///
9063a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
9073a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// just replicate their input array and then pass on to memset_pattern16.
9083574eca1b02600bac4e625297f4ecf745f4c4f32Micah Villmowstatic Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) {
9093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // If the value isn't a constant, we can't promote it to being in a constant
9103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // array.  We could theoretically do a store to an alloca or something, but
9113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // that doesn't seem worthwhile.
9123a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Constant *C = dyn_cast<Constant>(V);
9133a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (C == 0) return 0;
914d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
9153a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // Only handle simple values that are a power of two bytes in size.
9163a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  uint64_t Size = TD.getTypeSizeInBits(V->getType());
9173a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (Size == 0 || (Size & 7) || (Size & (Size-1)))
9183a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    return 0;
919d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
92080e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // Don't care enough about darwin/ppc to implement this.
92180e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  if (TD.isBigEndian())
92280e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner    return 0;
9233a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
9243a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // Convert to size in bytes.
9253a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Size /= 8;
926c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner
9273a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
92880e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // if the top and bottom are the same (e.g. for vectors and large integers).
9293a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (Size > 16) return 0;
930d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
93180e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // If the constant is exactly 16 bytes, just use it.
93280e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  if (Size == 16) return C;
93380e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner
93480e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // Otherwise, we'll use an array of the constants.
93580e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  unsigned ArraySize = 16/Size;
93680e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
93780e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
9383a393728a62122d7009d8e2cbe52a221874e576aChris Lattner}
9393a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
9403a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
9413a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// processLoopStridedStore - We see a strided store of some value.  If we can
9423a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// transform this into a memset or memset_pattern in the loop preheader, do so.
9433a393728a62122d7009d8e2cbe52a221874e576aChris Lattnerbool LoopIdiomRecognize::
9443a393728a62122d7009d8e2cbe52a221874e576aChris LattnerprocessLoopStridedStore(Value *DestPtr, unsigned StoreSize,
9453a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                        unsigned StoreAlignment, Value *StoredVal,
9463a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                        Instruction *TheStore, const SCEVAddRecExpr *Ev,
9473a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                        const SCEV *BECount) {
948d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
9493a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // If the stored value is a byte-wise value (like i32 -1), then it may be
9503a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // turned into a memset of i8 -1, assuming that all the consecutive bytes
9513a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // are stored.  A store of i32 0x01020304 can never be turned into a memset,
9523a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // but it can be turned into memset_pattern if the target supports it.
9533a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Value *SplatValue = isBytewiseValue(StoredVal);
9543a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Constant *PatternValue = 0;
955d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
9563a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // If we're allowed to form a memset, and the stored value would be acceptable
9573a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // for memset, use it.
9583a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (SplatValue && TLI->has(LibFunc::memset) &&
9593a393728a62122d7009d8e2cbe52a221874e576aChris Lattner      // Verify that the stored value is loop invariant.  If not, we can't
9603a393728a62122d7009d8e2cbe52a221874e576aChris Lattner      // promote the memset.
9613a393728a62122d7009d8e2cbe52a221874e576aChris Lattner      CurLoop->isLoopInvariant(SplatValue)) {
9623a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // Keep and use SplatValue.
9633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    PatternValue = 0;
9643a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  } else if (TLI->has(LibFunc::memset_pattern16) &&
9653a393728a62122d7009d8e2cbe52a221874e576aChris Lattner             (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
9663a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // It looks like we can use PatternValue!
9673a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    SplatValue = 0;
9683a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  } else {
9693a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // Otherwise, this isn't an idiom we can transform.  For example, we can't
9705ac7c7da3ebfd1aa95226aeefb8cfa3ba34fd72aEli Friedman    // do anything with a 3-byte store.
971bafa117e8f2e3532f391227ebc9d4513b17edbadChris Lattner    return false;
9723a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  }
973d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
974a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // The trip count of the loop and the base pointer of the addrec SCEV is
975a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // guaranteed to be loop invariant, which means that it should dominate the
9764f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // header.  This allows us to insert code for it in the preheader.
9774f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  BasicBlock *Preheader = CurLoop->getLoopPreheader();
9784f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  IRBuilder<> Builder(Preheader->getTerminator());
9795e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick  SCEVExpander Expander(*SE, "loop-idiom");
980a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
9814f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
9823740e798bc850cd5d40185959801606433b0221fBenjamin Kramer  // this into a memset in the loop preheader now if we want.  However, this
9833740e798bc850cd5d40185959801606433b0221fBenjamin Kramer  // would be unsafe to do if there is anything else in the loop that may read
984ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  // or write to the aliased location.  Check for any overlap by generating the
985ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  // base pointer and checking the region.
986ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
987d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  Value *BasePtr =
988a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner    Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
989a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner                           Preheader->getTerminator());
990d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
9914f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner
992a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
993a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            CurLoop, BECount,
994a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
995a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Expander.clear();
996a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    // If we generated new code for the base pointer, clean up.
997a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(BasePtr, *SE, TLI);
998a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    return false;
999a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  }
1000a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
10014f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // Okay, everything looks good, insert the memset.
10024f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner
1003a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
1004a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // pointer size if it isn't already.
1005ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
10067c90b90f4e9b0c421f0e45d7de03f6edce113a90Chris Lattner  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
1007d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1008a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
10093228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                                         SCEV::FlagNUW);
1010a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  if (StoreSize != 1)
1011a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
10123228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                               SCEV::FlagNUW);
1013d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1014d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  Value *NumBytes =
1015a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
1016d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1017cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel  CallInst *NewCall;
10183a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (SplatValue)
10193a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
10203a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  else {
10213a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    Module *M = TheStore->getParent()->getParent()->getParent();
10223a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    Value *MSP = M->getOrInsertFunction("memset_pattern16",
10233a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                        Builder.getVoidTy(),
1024d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick                                        Builder.getInt8PtrTy(),
10253a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                        Builder.getInt8PtrTy(), IntPtr,
10263a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                        (void*)0);
1027d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
10283a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // Otherwise we should form a memset_pattern16.  PatternValue is known to be
10293a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // an constant array of 16-bytes.  Plop the value into a mergable global.
10303a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
10313a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                            GlobalValue::InternalLinkage,
10323a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                            PatternValue, ".memset_pattern");
10333a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    GV->setUnnamedAddr(true); // Ok to merge these.
10343a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    GV->setAlignment(16);
103580e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner    Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
10363a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
10373a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  }
1038d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1039a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
1040e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner               << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
1041cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel  NewCall->setDebugLoc(TheStore->getDebugLoc());
1042d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
10439f39188def2b4102817945522fac1209083efa06Chris Lattner  // Okay, the memset has been formed.  Zap the original store and anything that
10449f39188def2b4102817945522fac1209083efa06Chris Lattner  // feeds into it.
10458e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer  deleteDeadInstruction(TheStore, *SE, TLI);
10464ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner  ++NumMemSet;
1047a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  return true;
1048a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner}
1049a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner
1050e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
1051e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// same-strided load.
1052e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattnerbool LoopIdiomRecognize::
1053e2c43920919c6fe376613d1d8331897dc1ba3d57Chris LattnerprocessLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
1054e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           const SCEVAddRecExpr *StoreEv,
1055e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           const SCEVAddRecExpr *LoadEv,
1056e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           const SCEV *BECount) {
1057c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  // If we're not allowed to form memcpy, we fail.
1058a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (!TLI->has(LibFunc::memcpy))
1059c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner    return false;
1060d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1061e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1062d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
10634f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // The trip count of the loop and the base pointer of the addrec SCEV is
10644f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // guaranteed to be loop invariant, which means that it should dominate the
10654f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // header.  This allows us to insert code for it in the preheader.
10664f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  BasicBlock *Preheader = CurLoop->getLoopPreheader();
10674f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  IRBuilder<> Builder(Preheader->getTerminator());
10685e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick  SCEVExpander Expander(*SE, "loop-idiom");
1069a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
1070e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
1071a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // this into a memcpy in the loop preheader now if we want.  However, this
1072a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // would be unsafe to do if there is anything else in the loop that may read
1073a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // or write the memory region we're storing to.  This includes the load that
1074a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // feeds the stores.  Check for an alias by generating the base address and
1075a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // checking everything.
10764f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  Value *StoreBasePtr =
10774f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner    Expander.expandCodeFor(StoreEv->getStart(),
10784f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
10794f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner                           Preheader->getTerminator());
1080a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
1081a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
1082a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            CurLoop, BECount, StoreSize,
1083a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            getAnalysis<AliasAnalysis>(), SI)) {
1084a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Expander.clear();
1085a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    // If we generated new code for the base pointer, clean up.
1086a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
1087a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    return false;
1088a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  }
1089a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
1090a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // For a memcpy, we have to make sure that the input array is not being
1091a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // mutated by the loop.
1092d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  Value *LoadBasePtr =
1093e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    Expander.expandCodeFor(LoadEv->getStart(),
1094e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
1095e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           Preheader->getTerminator());
10964f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner
1097a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
1098a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
1099a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Expander.clear();
1100a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    // If we generated new code for the base pointer, clean up.
1101a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
1102a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
1103a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    return false;
1104a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  }
1105a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
11064f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // Okay, everything is safe, we can transform this!
1107a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
1108d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1109e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
1110e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // pointer size if it isn't already.
1111ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  Type *IntPtr = TD->getIntPtrType(SI->getContext());
11127c90b90f4e9b0c421f0e45d7de03f6edce113a90Chris Lattner  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
1113d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1114e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
11153228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                                         SCEV::FlagNUW);
1116e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  if (StoreSize != 1)
1117e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
11183228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                               SCEV::FlagNUW);
1119d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1120e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  Value *NumBytes =
1121e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
1122d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1123a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  CallInst *NewCall =
1124a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
1125a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                         std::min(SI->getAlignment(), LI->getAlignment()));
1126af35841f2e346f9259606886698c188280406cdbDevang Patel  NewCall->setDebugLoc(SI->getDebugLoc());
1127d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1128a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
1129e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner               << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1130e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner               << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
1131a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
1132d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1133e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // Okay, the memset has been formed.  Zap the original store and anything that
1134e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // feeds into it.
11358e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer  deleteDeadInstruction(SI, *SE, TLI);
1136a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  ++NumMemCpy;
1137e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  return true;
1138e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner}
1139