1e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
2e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
3e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//                     The LLVM Compiler Infrastructure
4e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
5e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This file is distributed under the University of Illinois Open Source
6e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// License. See LICENSE.TXT for details.
7e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
8e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===//
9e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
10e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// This pass implements an idiom recognizer that transforms simple loops into a
11e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// non-loop form.  In cases that this kicks in, it can be a significant
12e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner// performance win.
13e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//
14e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner//===----------------------------------------------------------------------===//
15bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
16bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// TODO List:
17bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
18bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future loop memory idioms to recognize:
19a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth//   memcmp, memmove, strlen, etc.
20bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future floating point idioms to recognize in -ffast-math mode:
21bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//   fpowi
22bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Future integer operation idioms to recognize:
23bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//   ctpop, ctlz, cttz
24bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
25bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// Beware that isel's default lowering for ctpop is highly inefficient for
26bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// i64 and larger types when i64 is legal and the value has few bits set.  It
27bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// would be good to enhance isel to emit a loop for ctpop in this case.
28bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//
29bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// We should enhance the memset/memcpy recognition to handle multiple stores in
30bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner// the loop.  This would handle things like:
31bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//   void foo(_Complex float *P)
32bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
3391139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner//
34408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// We should enhance this to handle negative strides through memory.
35408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// Alternatively (and perhaps better) we could rely on an earlier pass to force
36408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// forward iteration through memory, which is generally better for cache
37408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner// behavior.  Negative strides *do* happen for memset/memcpy loops.
38408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner//
39d957c717918c412402157b85fc51b4c6d2631381Chris Lattner// This could recognize common matrix multiplies and dot product idioms and
4091139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner// replace them with calls to BLAS (if linked in??).
4191139ccd995149dd0d5e4ab3604d9239e1f90a54Chris Lattner//
42bdce5720ad878259544e6a4e3621dac32cf2d134Chris Lattner//===----------------------------------------------------------------------===//
43e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
44e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Transforms/Scalar.h"
4506cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/ADT/Statistic.h"
462e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner#include "llvm/Analysis/AliasAnalysis.h"
47e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner#include "llvm/Analysis/LoopPass.h"
48a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner#include "llvm/Analysis/ScalarEvolutionExpander.h"
4906cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Analysis/ScalarEvolutionExpressions.h"
50be04929f7fd76a921540e9901f24563e51dc1219Chandler Carruth#include "llvm/Analysis/TargetTransformInfo.h"
5122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner#include "llvm/Analysis/ValueTracking.h"
520b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h"
5336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Dominators.h"
540b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IRBuilder.h"
550b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/IntrinsicInst.h"
560b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/Module.h"
5706cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/Debug.h"
5806cb8ed00696eb14d1b831921452e50ec0568ea2Chandler Carruth#include "llvm/Support/raw_ostream.h"
59c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner#include "llvm/Target/TargetLibraryInfo.h"
609f39188def2b4102817945522fac1209083efa06Chris Lattner#include "llvm/Transforms/Utils/Local.h"
61e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerusing namespace llvm;
62e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
63dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "loop-idiom"
64dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
65a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemSet, "Number of memset's formed from loop stores");
66a864748284f4bab974ffb13b840a611c1f9bc7acChandler CarruthSTATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
67e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
68e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnernamespace {
695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  class LoopIdiomRecognize;
715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// This class defines some utility functions for loop idiom recognization.
735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  class LIRUtil {
745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  public:
755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Return true iff the block contains nothing but an uncondition branch
765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// (aka goto instruction).
775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static bool isAlmostEmpty(BasicBlock *);
785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static BranchInst *getBranch(BasicBlock *BB) {
805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return dyn_cast<BranchInst>(BB->getTerminator());
815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
831f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault    /// Derive the precondition block (i.e the block that guards the loop
845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// preheader) from the given preheader.
855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    static BasicBlock *getPrecondBb(BasicBlock *PreHead);
865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  };
875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// This class is to recoginize idioms of population-count conducted in
895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// a noncountable loop. Currently it only recognizes this pattern:
905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// \code
915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ///   while(x) {cnt++; ...; x &= x - 1; ...}
925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  /// \endcode
935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  class NclPopcountRecognize {
945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LoopIdiomRecognize &LIR;
955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Loop *CurLoop;
965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BasicBlock *PreCondBB;
975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    typedef IRBuilder<> IRBuilderTy;
995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  public:
1015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
1025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool recognize();
1035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  private:
1055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Take a glimpse of the loop to see if we need to go ahead recoginizing
1065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// the idiom.
1075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool preliminaryScreen();
1085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Check if the given conditional branch is based on the comparison
11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    /// between a variable and zero, and if the variable is non-zero, the
11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    /// control yields to the loop entry. If the branch matches the behavior,
1125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// the variable involved in the comparion is returned. This function will
1131f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault    /// be called to see if the precondition and postcondition of the loop
1145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// are in desirable form.
115cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
1165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    /// is set to the instruction counting the population bit. 2) \p CntPhi
1195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// is set to the corresponding phi node. 3) \p Var is set to the value
1205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// whose population bits are being counted.
1215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool detectIdiom
1225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
1235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Insert ctpop intrinsic function and some obviously dead instructions.
125cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
1265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    /// Create llvm.ctpop.* intrinsic function.
1285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
1295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  };
1305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
131e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  class LoopIdiomRecognize : public LoopPass {
13222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    Loop *CurLoop;
13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    const DataLayout *DL;
13462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    DominatorTree *DT;
13522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    ScalarEvolution *SE;
136c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner    TargetLibraryInfo *TLI;
1379980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth    const TargetTransformInfo *TTI;
138e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  public:
139e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    static char ID;
140e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    explicit LoopIdiomRecognize() : LoopPass(ID) {
141e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
143e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    }
144e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
14662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
14762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner                        SmallVectorImpl<BasicBlock*> &ExitBlocks);
148e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
14922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    bool processLoopStore(StoreInst *SI, const SCEV *BECount);
150e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
151d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1523a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
1533a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 unsigned StoreAlignment,
1543a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 Value *SplatValue, Instruction *TheStore,
1553a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 const SCEVAddRecExpr *Ev,
1563a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 const SCEV *BECount);
157e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
158e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                                    const SCEVAddRecExpr *StoreEv,
159e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                                    const SCEVAddRecExpr *LoadEv,
160e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                                    const SCEV *BECount);
161d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
162e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    /// This transformation requires natural loop information & requires that
163e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    /// loop preheaders be inserted into the CFG.
164e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    ///
16536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    void getAnalysisUsage(AnalysisUsage &AU) const override {
166e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequired<LoopInfo>();
167e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreserved<LoopInfo>();
168e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequiredID(LoopSimplifyID);
169e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreservedID(LoopSimplifyID);
170e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequiredID(LCSSAID);
171e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreservedID(LCSSAID);
1722e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner      AU.addRequired<AliasAnalysis>();
1732e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris Lattner      AU.addPreserved<AliasAnalysis>();
174e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addRequired<ScalarEvolution>();
175e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner      AU.addPreserved<ScalarEvolution>();
17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      AU.addPreserved<DominatorTreeWrapperPass>();
17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      AU.addRequired<DominatorTreeWrapperPass>();
178c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner      AU.addRequired<TargetLibraryInfo>();
179d12aae6c0535bd2713207ef84b7098f905a89b78Chandler Carruth      AU.addRequired<TargetTransformInfo>();
180e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner    }
1815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    const DataLayout *getDataLayout() {
18336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (DL)
18436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        return DL;
18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
186dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      DL = DLP ? &DLP->getDataLayout() : nullptr;
18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return DL;
1885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    DominatorTree *getDominatorTree() {
19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return DT ? DT
19236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
1935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ScalarEvolution *getScalarEvolution() {
1965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
1975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
1985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
1995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TargetLibraryInfo *getTargetLibraryInfo() {
2005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
2015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
2025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2039980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth    const TargetTransformInfo *getTargetTransformInfo() {
204d12aae6c0535bd2713207ef84b7098f905a89b78Chandler Carruth      return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
2055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
2065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Loop *getLoop() const { return CurLoop; }
2085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  private:
2105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool runOnNoncountableLoop();
2115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    bool runOnCountableLoop();
212e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  };
213e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner}
214e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
215e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattnerchar LoopIdiomRecognize::ID = 0;
216e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
217e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner                      false, false)
218e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopInfo)
21936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesINITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
220e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
221e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(LCSSA)
222e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
223c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris LattnerINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
2242e12f1ac6ebd49e5c41b7e9dcd802f3df40a1bc7Chris LattnerINITIALIZE_AG_DEPENDENCY(AliasAnalysis)
225d12aae6c0535bd2713207ef84b7098f905a89b78Chandler CarruthINITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
226e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerINITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
227e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner                    false, false)
228e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
229e6bb649ec63059d795a31198c3c569c137e7ad59Chris LattnerPass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
230e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
2314f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner/// deleteDeadInstruction - Delete this instruction.  Before we do, go through
2329f39188def2b4102817945522fac1209083efa06Chris Lattner/// and zero out all the operands of this instruction.  If any of them become
2339f39188def2b4102817945522fac1209083efa06Chris Lattner/// dead, delete them and the computation tree that feeds them.
2349f39188def2b4102817945522fac1209083efa06Chris Lattner///
2358e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramerstatic void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
2368e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer                                  const TargetLibraryInfo *TLI) {
2379f39188def2b4102817945522fac1209083efa06Chris Lattner  SmallVector<Instruction*, 32> NowDeadInsts;
238d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2399f39188def2b4102817945522fac1209083efa06Chris Lattner  NowDeadInsts.push_back(I);
240d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2419f39188def2b4102817945522fac1209083efa06Chris Lattner  // Before we touch this instruction, remove it from SE!
2429f39188def2b4102817945522fac1209083efa06Chris Lattner  do {
2439f39188def2b4102817945522fac1209083efa06Chris Lattner    Instruction *DeadInst = NowDeadInsts.pop_back_val();
244d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2459f39188def2b4102817945522fac1209083efa06Chris Lattner    // This instruction is dead, zap it, in stages.  Start by removing it from
2469f39188def2b4102817945522fac1209083efa06Chris Lattner    // SCEV.
2479f39188def2b4102817945522fac1209083efa06Chris Lattner    SE.forgetValue(DeadInst);
248d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2499f39188def2b4102817945522fac1209083efa06Chris Lattner    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
2509f39188def2b4102817945522fac1209083efa06Chris Lattner      Value *Op = DeadInst->getOperand(op);
251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      DeadInst->setOperand(op, nullptr);
252d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2539f39188def2b4102817945522fac1209083efa06Chris Lattner      // If this operand just became dead, add it to the NowDeadInsts list.
2549f39188def2b4102817945522fac1209083efa06Chris Lattner      if (!Op->use_empty()) continue;
255d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2569f39188def2b4102817945522fac1209083efa06Chris Lattner      if (Instruction *OpI = dyn_cast<Instruction>(Op))
2578e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer        if (isInstructionTriviallyDead(OpI, TLI))
2589f39188def2b4102817945522fac1209083efa06Chris Lattner          NowDeadInsts.push_back(OpI);
2599f39188def2b4102817945522fac1209083efa06Chris Lattner    }
260d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2619f39188def2b4102817945522fac1209083efa06Chris Lattner    DeadInst->eraseFromParent();
262d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
2639f39188def2b4102817945522fac1209083efa06Chris Lattner  } while (!NowDeadInsts.empty());
2649f39188def2b4102817945522fac1209083efa06Chris Lattner}
2659f39188def2b4102817945522fac1209083efa06Chris Lattner
266a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// deleteIfDeadInstruction - If the specified value is a dead instruction,
267a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// delete it and any recursively used instructions.
268a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
269a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                    const TargetLibraryInfo *TLI) {
270a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (Instruction *I = dyn_cast<Instruction>(V))
271a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    if (isInstructionTriviallyDead(I, TLI))
272a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth      deleteDeadInstruction(I, SE, TLI);
273a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth}
274a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
2755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
2765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
2775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//          Implementation of LIRUtil
2785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
2795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
28084fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang
2811f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// This function will return true iff the given block contains nothing but goto.
2821f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// A typical usage of this function is to check if the preheader function is
2831f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// "almost" empty such that generated intrinsic functions can be moved across
2841f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// the preheader and be placed at the end of the precondition block without
2851f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault// the concern of breaking data dependence.
2865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
2875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (BranchInst *Br = getBranch(BB)) {
2885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return Br->isUnconditional() && BB->size() == 1;
2895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
2905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return false;
2915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
2925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
2935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangBasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
2945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
2955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BranchInst *Br = getBranch(BB);
296dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return Br && Br->isConditional() ? BB : nullptr;
2975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
298dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
2995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
3025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
3035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//          Implementation of NclPopcountRecognize
3045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
3055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
3065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin YangNclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
3095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::preliminaryScreen() {
3129980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth  const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
313d1b8ef97c47d347f2a2261a0d6de4872f248321fChandler Carruth  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
31484fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang    return false;
31584fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang
3163f4f420ab7acb10221ba971543a7eed5489fb626Robert Wilhelm  // Counting population are usually conducted by few arithmetic instructions.
3175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Such instructions can be easilly "absorbed" by vacant slots in a
3185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
3195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // in a compact loop.
3205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Give up if the loop has multiple blocks or multiple backedges.
3225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
32322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
32484fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang
3255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *LoopBody = *(CurLoop->block_begin());
3265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (LoopBody->size() >= 20) {
3275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    // The loop is too big, bail out.
3285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
3295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
3305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // It should have a preheader containing nothing but a goto instruction.
3325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *PreHead = CurLoop->getLoopPreheader();
3335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
3345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
3355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // It should have a precondition block where the generated popcount instrinsic
3375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // function will be inserted.
3385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PreCondBB = LIRUtil::getPrecondBb(PreHead);
3395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!PreCondBB)
34084fca61ca5fba5c33a799d9133750b6832ddef7eShuxin Yang    return false;
3411f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault
3425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return true;
3435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
345dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesValue *NclPopcountRecognize::matchCondition(BranchInst *Br,
346dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                            BasicBlock *LoopEntry) const {
3475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!Br || !Br->isConditional())
348dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
3495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
3515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!Cond)
352dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
3535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
3555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!CmpZero || !CmpZero->isZero())
356dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
3575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ICmpInst::Predicate Pred = Cond->getPredicate();
3595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
3605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
3615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return Cond->getOperand(0);
3625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
363dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
3645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
3655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
3675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                       PHINode *&CntPhi,
3685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                       Value *&Var) const {
3695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Following code tries to detect this idiom:
3705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    if (x0 != 0)
3725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //      goto loop-exit // the precondition of the loop
3735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    cnt0 = init-val;
3745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    do {
3755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       x1 = phi (x0, x2);
3765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       cnt1 = phi(cnt0, cnt2);
3775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       cnt2 = cnt1 + 1;
3795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //        ...
3805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //       x2 = x1 & (x1 - 1);
3815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //        ...
3825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    } while(x != 0);
3835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // loop-exit:
3855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
3865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 1: Check to see if the look-back branch match this pattern:
3885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    "if (a!=0) goto loop-entry".
3895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *LoopEntry;
3905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Instruction *DefX2, *CountInst;
3915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *VarX1, *VarX0;
3925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PHINode *PhiX, *CountPhi;
3935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
394dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  DefX2 = CountInst = nullptr;
395dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  VarX1 = VarX0 = nullptr;
396dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  PhiX = CountPhi = nullptr;
3975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  LoopEntry = *(CurLoop->block_begin());
3985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
3995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 1: Check if the loop-back branch is in desirable form.
4005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
4025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      DefX2 = dyn_cast<Instruction>(T);
4035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    else
4045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
4085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
409253449db20f86b655852a397245ba16ff262452fShuxin Yang    if (!DefX2 || DefX2->getOpcode() != Instruction::And)
4105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BinaryOperator *SubOneOp;
4135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
4155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      VarX1 = DefX2->getOperand(1);
4165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    else {
4175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      VarX1 = DefX2->getOperand(0);
4185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
4195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!SubOneOp)
4215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Instruction *SubInst = cast<Instruction>(SubOneOp);
4245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
4255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!Dec ||
4265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
4275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang          (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
4285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 3: Check the recurrence of variable X
4335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PhiX = dyn_cast<PHINode>(VarX1);
4355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!PhiX ||
4365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
4375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
4425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
443dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    CountInst = nullptr;
4445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
4455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang           IterE = LoopEntry->end(); Iter != IterE; Iter++) {
4465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      Instruction *Inst = Iter;
4475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (Inst->getOpcode() != Instruction::Add)
4485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        continue;
4495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
4515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (!Inc || !Inc->isOne())
4525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        continue;
4535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
4555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (!Phi || Phi->getParent() != LoopEntry)
4565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        continue;
4575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      // Check if the result of the instruction is live of the loop.
4595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      bool LiveOutLoop = false;
46036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (User *U : Inst->users()) {
46136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if ((cast<Instruction>(U))->getParent() != LoopEntry) {
4625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang          LiveOutLoop = true; break;
4635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        }
4645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      }
4655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      if (LiveOutLoop) {
4675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        CountInst = Inst;
4685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        CountPhi = Phi;
4695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang        break;
4705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      }
4715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
4725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!CountInst)
4745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 5: check if the precondition is in this form:
4785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
4795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
4805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
4815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
4825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
4835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      return false;
4845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CntInst = CountInst;
4865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CntPhi = CountPhi;
4875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Var = T;
4885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
4895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return true;
4915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
4925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangvoid NclPopcountRecognize::transform(Instruction *CntInst,
4945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                     PHINode *CntPhi, Value *Var) {
4955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
4965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  ScalarEvolution *SE = LIR.getScalarEvolution();
4975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
4985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *PreHead = CurLoop->getLoopPreheader();
4995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
5005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  const DebugLoc DL = CntInst->getDebugLoc();
5015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Assuming before transformation, the loop is following:
5035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  if (x) // the precondition
5045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //     do { cnt++; x &= x - 1; } while(x);
5051f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault
5065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 1: Insert the ctpop instruction at the end of the precondition block
5075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  IRBuilderTy Builder(PreCondBr);
5085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
5095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PopCnt = createPopcntIntrinsic(Builder, Var, DL);
5115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    NewCount = PopCntZext =
5125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
5135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (NewCount != PopCnt)
5155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (cast<Instruction>(NewCount))->setDebugLoc(DL);
5165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    // TripCnt is exactly the number of iterations the loop has
5185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TripCnt = NewCount;
5195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // If the population counter's initial value is not zero, insert Add Inst.
5215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
5225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
5235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (!InitConst || !InitConst->isZero()) {
5245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
5255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (cast<Instruction>(NewCount))->setDebugLoc(DL);
5265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
5275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
5285518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
5305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
5315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   function would be partial dead code, and downstream passes will drag
5325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   it back from the precondition block to the preheader.
5335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
5355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd0 = PopCntZext;
5375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
5385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    if (PreCond->getOperand(0) != Var)
5395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      std::swap(Opnd0, Opnd1);
5405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ICmpInst *NewPreCond =
5425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
5435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PreCond->replaceAllUsesWith(NewPreCond);
5445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    deleteDeadInstruction(PreCond, *SE, TLI);
5465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
5475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 3: Note that the population count is exactly the trip count of the
5495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // loop in question, which enble us to to convert the loop from noncountable
5505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // loop into a countable one. The benefit is twofold:
5515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
5525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  - If the loop only counts population, the entire loop become dead after
5535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    the transformation. It is lots easier to prove a countable loop dead
5545518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    than to prove a noncountable one. (In some C dialects, a infite loop
5555518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    isn't dead even if it computes nothing useful. In general, DCE needs
5565518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    to prove a noncountable loop finite before safely delete it.)
5575518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
5585518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  - If the loop also performs something else, it remains alive.
5595518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    Since it is transformed to countable form, it can be aggressively
5605518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    optimized by some optimizations which are in general not applicable
5615518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //    to a noncountable loop.
5625518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //
5635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // After this step, this loop (conceptually) would look like following:
5645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   newcnt = __builtin_ctpop(x);
5655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   t = newcnt;
5665518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   if (x)
5675518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //     do { cnt++; x &= x-1; t--) } while (t > 0);
5685518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  BasicBlock *Body = *(CurLoop->block_begin());
5695518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5705518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    BranchInst *LbBr = LIRUtil::getBranch(Body);
5715518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
5725518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Type *Ty = TripCnt->getType();
5735518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
5755518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5765518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Builder.SetInsertPoint(LbCond);
5775518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd1 = cast<Value>(TcPhi);
5785518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
5795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    Instruction *TcDec =
5805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
5815518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5825518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TcPhi->addIncoming(TripCnt, PreHead);
5835518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    TcPhi->addIncoming(TcDec, Body);
5845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
5865518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
5875518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LbCond->setPredicate(Pred);
5885518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LbCond->setOperand(0, TcDec);
5895518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
5905518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
5915518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
5925518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Step 4: All the references to the original population counter outside
5935518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  the loop are replaced with the NewCount -- the value returned from
5945518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //  __builtin_ctpop().
5955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  {
5965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    SmallVector<Value *, 4> CntUses;
59736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    for (User *U : CntInst->users())
59836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (cast<Instruction>(U)->getParent() != Body)
59936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        CntUses.push_back(U);
6005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
6015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang      (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
6025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    }
6035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  }
6045518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6055518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // step 5: Forget the "non-computable" trip-count SCEV associated with the
6065518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  //   loop. The loop would otherwise not be deleted even if it becomes empty.
6075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  SE->forgetLoop(CurLoop);
6085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
6095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6101f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt ArsenaultCallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
6115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang                                                      Value *Val, DebugLoc DL) {
6125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *Ops[] = { Val };
6135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Type *Tys[] = { Val->getType() };
6145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
6165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
6175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
6185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  CI->setDebugLoc(DL);
6195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return CI;
6215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
6225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang/// recognize - detect population count idiom in a non-countable loop. If
6245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang///   detected, transform the relevant code to popcount intrinsic function
6255518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang///   call, and return true; otherwise, return false.
6265518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool NclPopcountRecognize::recognize() {
6275518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6289980b8a0bd4c5dfb67909a2de7720076881e6525Chandler Carruth  if (!LIR.getTargetTransformInfo())
6295518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6305518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6315518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  LIR.getScalarEvolution();
6325518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6335518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!preliminaryScreen())
6345518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6355518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6365518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Instruction *CntInst;
6375518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  PHINode *CntPhi;
6385518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  Value *Val;
6395518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!detectIdiom(CntInst, CntPhi, Val))
6405518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6415518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6425518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  transform(CntInst, CntPhi, Val);
6435518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return true;
6445518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
6455518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6465518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
6475518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
6485518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//          Implementation of LoopIdiomRecognize
6495518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//
6505518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang//===----------------------------------------------------------------------===//
6515518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6525518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnCountableLoop() {
6535518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
65422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  if (isa<SCEVCouldNotCompute>(BECount)) return false;
655d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
6568e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner  // If this loop executes exactly one time, then it should be peeled, not
6578e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner  // optimized by this pass.
6588e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
6598e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner    if (BECst->getValue()->getValue() == 0)
6608e08e73f0e12c9e669f2d7d0a5c9213df3046c01Chris Lattner      return false;
661d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
66222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // We require target data for now.
6635518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!getDataLayout())
6645518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
6655518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
6661f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault  // set DT
667cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang  (void)getDominatorTree();
66862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner
66962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  LoopInfo &LI = getAnalysis<LoopInfo>();
670c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  TLI = &getAnalysis<TargetLibraryInfo>();
671d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
6721f4492e0b0d3b2d58a0243f7b3d1a45ba0261075Matt Arsenault  // set TLI
673cbf5373603991b7196f096ddf2b5962ed7708b7eShuxin Yang  (void)getTargetLibraryInfo();
6745518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
67562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  SmallVector<BasicBlock*, 8> ExitBlocks;
67662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  CurLoop->getUniqueExitBlocks(ExitBlocks);
67762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner
67863f9c3c49ac7011e6366fbec42716f3f70f1beeeChris Lattner  DEBUG(dbgs() << "loop-idiom Scanning: F["
6795518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang               << CurLoop->getHeader()->getParent()->getName()
6805518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang               << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
681d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
68262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  bool MadeChange = false;
68362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // Scan all the blocks in the loop that are not in subloops.
6845518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  for (Loop::block_iterator BI = CurLoop->block_begin(),
6855518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang         E = CurLoop->block_end(); BI != E; ++BI) {
68662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    // Ignore blocks in subloops.
68762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    if (LI.getLoopFor(*BI) != CurLoop)
68862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner      continue;
689d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
69062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
69162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  }
69262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  return MadeChange;
69362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner}
694e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
6955518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnNoncountableLoop() {
6965518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  NclPopcountRecognize Popcount(*this);
6975518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (Popcount.recognize())
6985518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return true;
6995518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7005518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return false;
7015518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
7025518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7035518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yangbool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
70436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (skipOptnoneFunction(L))
70536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
70636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
7075518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  CurLoop = L;
7085518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7095518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // If the loop could not be converted to canonical form, it must have an
7105518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // indirectbr in it, just give up.
7115518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (!L->getLoopPreheader())
7125518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
7135518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7145518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  // Disable loop idiom recognition if the function's name is a common idiom.
7155518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  StringRef Name = L->getHeader()->getParent()->getName();
7165518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (Name == "memset" || Name == "memcpy")
7175518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return false;
7185518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
7195518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  SE = &getAnalysis<ScalarEvolution>();
7205518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  if (SE->hasLoopInvariantBackedgeTakenCount(L))
7215518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang    return runOnCountableLoop();
7225518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang  return runOnNoncountableLoop();
7235518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang}
7245518a1355b8b09bf92419b65ea4e4854734b0ebcShuxin Yang
72562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// runOnLoopBlock - Process the specified block, which lives in a counted loop
72662c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// with the specified backedge count.  This block is known to be in the current
72762c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner/// loop and not in any subloops.
72862c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattnerbool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
72962c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner                                     SmallVectorImpl<BasicBlock*> &ExitBlocks) {
73062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // We can only promote stores in this block if they are unconditionally
73162c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // executed in the loop.  For a block to be unconditionally executed, it has
73262c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  // to dominate all the exit blocks of the loop.  Verify this now.
73362c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
73462c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner    if (!DT->dominates(BB, ExitBlocks[i]))
73562c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner      return false;
736d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
73722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  bool MadeChange = false;
73822920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
739b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    Instruction *Inst = I++;
740b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    // Look for store instructions, which may be optimized to memset/memcpy.
741b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
742b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      WeakVH InstPtr(I);
743b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      if (!processLoopStore(SI, BECount)) continue;
744b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      MadeChange = true;
745d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
746b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      // If processing the store invalidated our iterator, start over from the
747e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      // top of the block.
748dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!InstPtr)
749b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner        I = BB->begin();
750b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner      continue;
751b7e9ef0ed1e246bd64d97a768555c8334c0d86e9Chris Lattner    }
752d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
753e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    // Look for memset instructions, which may be optimized to a larger memset.
754e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
755e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      WeakVH InstPtr(I);
756e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      if (!processLoopMemSet(MSI, BECount)) continue;
757e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      MadeChange = true;
758d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
759e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      // If processing the memset invalidated our iterator, start over from the
760e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      // top of the block.
761dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!InstPtr)
762e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner        I = BB->begin();
763e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner      continue;
764e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    }
76522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  }
766d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
76722920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  return MadeChange;
768e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner}
769e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
77062c50fdf69064c0d086aedfadb6ceb7c303bbcb9Chris Lattner
771e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopStore - See if this store can be promoted to a memset or memcpy.
77222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattnerbool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
7732bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman  if (!SI->isSimple()) return false;
774e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
77522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  Value *StoredVal = SI->getValueOperand();
776a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  Value *StorePtr = SI->getPointerOperand();
777d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
77895ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner  // Reject stores that are so large that they overflow an unsigned.
77936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
78095ae676bc89b4cb9166576b74f1220ab5b0ff0adChris Lattner  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
78122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
782d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
78322920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // See if the pointer expression is an AddRec like {base,+,1} on the current
78422920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // loop, which indicates a strided store.  If we have something else, it's a
78522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // random store we can't handle.
786e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  const SCEVAddRecExpr *StoreEv =
787e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
788dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
78922920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
79022920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner
79122920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // Check to see if the stride matches the size of the store.  If so, then we
79222920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner  // know that every byte is touched in the loop.
793d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  unsigned StoreSize = (unsigned)SizeInBits >> 3;
794e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
795d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
796dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!Stride || StoreSize != Stride->getValue()->getValue()) {
797408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    // TODO: Could also handle negative stride here someday, that will require
798408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    // the validity check in mayLoopAccessLocation to be updated though.
799408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    // Enable this to print exact negative strides.
8000e68cee62f251c45df92c71ca536142bc7d82631Chris Lattner    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
801408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
802408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner      dbgs() << "BB: " << *SI->getParent();
803408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner    }
804d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
80522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner    return false;
806408b534e431cc317f1116cd78df96da2af7cafd9Chris Lattner  }
8073a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
8083a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // See if we can optimize just this store in isolation.
8093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
8103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                              StoredVal, SI, StoreEv, BECount))
8113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    return true;
812a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner
813e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // If the stored value is a strided load in the same loop with the same stride
814e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // this this may be transformable into a memcpy.  This kicks in for stuff like
815e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  //   for (i) A[i] = B[i];
816e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
817e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    const SCEVAddRecExpr *LoadEv =
818e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
819e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
8202bc3d52b9ab422ee9f7e42a1a4e3b818e623a5f7Eli Friedman        StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
821e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
822e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner        return true;
823e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  }
8244ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner  //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
82522920b59864cac3b62d158fd4111cfda3c09c67aChris Lattner
826e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner  return false;
827e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner}
828e6bb649ec63059d795a31198c3c569c137e7ad59Chris Lattner
829e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner/// processLoopMemSet - See if this memset can be promoted to a large memset.
830e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattnerbool LoopIdiomRecognize::
831e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris LattnerprocessLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
832e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // We can only handle non-volatile memsets with a constant size.
833e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
834e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
835c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  // If we're not allowed to hack on memset, we fail.
836c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  if (!TLI->has(LibFunc::memset))
837c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner    return false;
838d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
839e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  Value *Pointer = MSI->getDest();
840d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
841e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // See if the pointer expression is an AddRec like {base,+,1} on the current
842e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // loop, which indicates a strided store.  If we have something else, it's a
843e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // random store we can't handle.
844e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
845dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
846e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    return false;
847e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
848e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // Reject memsets that are so large that they overflow an unsigned.
849e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
850e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  if ((SizeInBytes >> 32) != 0)
851e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    return false;
852d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
853e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // Check to see if the stride matches the size of the memset.  If so, then we
854e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // know that every byte is touched in the loop.
855e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
856d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
857e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // TODO: Could also handle negative stride here someday, that will require the
858e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner  // validity check in mayLoopAccessLocation to be updated though.
859dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!Stride || MSI->getLength() != Stride->getValue())
860e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner    return false;
861d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
8623a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
8633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 MSI->getAlignment(), MSI->getValue(),
8643a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                 MSI, Ev, BECount);
865e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner}
866e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner
867a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
868a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// mayLoopAccessLocation - Return true if the specified loop might access the
869a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// specified pointer location, which is a loop-strided access.  The 'Access'
870a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth/// argument specifies what the verboten forms of access are (read or write).
871a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruthstatic bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
872a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                  Loop *L, const SCEV *BECount,
873a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                  unsigned StoreSize, AliasAnalysis &AA,
874a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                                  Instruction *IgnoredStore) {
875a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // Get the location that may be stored across the loop.  Since the access is
876a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // strided positively through memory, we say that the modified location starts
877a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // at the pointer and has infinite size.
878a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  uint64_t AccessSize = AliasAnalysis::UnknownSize;
879a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
880a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // If the loop iterates a fixed number of times, we can refine the access size
881a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
882a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
883a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
884a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
885a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // TODO: For this to be really effective, we have to dive into the pointer
886a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // operand in the store.  Store to &A[i] of 100 will always return may alias
887a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
888a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // which will then no-alias a store to &A[100].
889a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
890a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
891a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
892a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth       ++BI)
893a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
894a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth      if (&*I != IgnoredStore &&
895a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth          (AA.getModRefInfo(I, StoreLoc) & Access))
896a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth        return true;
897a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
898a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  return false;
899a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth}
900a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
9013a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// getMemSetPatternValue - If a strided store of the specified value is safe to
9023a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
9033a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// be passed in.  Otherwise, return null.
9043a393728a62122d7009d8e2cbe52a221874e576aChris Lattner///
9053a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
9063a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// just replicate their input array and then pass on to memset_pattern16.
90736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
9083a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // If the value isn't a constant, we can't promote it to being in a constant
9093a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // array.  We could theoretically do a store to an alloca or something, but
9103a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // that doesn't seem worthwhile.
9113a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Constant *C = dyn_cast<Constant>(V);
912dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!C) return nullptr;
913d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
9143a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // Only handle simple values that are a power of two bytes in size.
91536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  uint64_t Size = DL.getTypeSizeInBits(V->getType());
9163a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (Size == 0 || (Size & 7) || (Size & (Size-1)))
917dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
918d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
91980e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // Don't care enough about darwin/ppc to implement this.
92036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (DL.isBigEndian())
921dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return nullptr;
9223a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
9233a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // Convert to size in bytes.
9243a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Size /= 8;
925c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner
9263a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
92780e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // if the top and bottom are the same (e.g. for vectors and large integers).
928dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (Size > 16) return nullptr;
929d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
93080e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // If the constant is exactly 16 bytes, just use it.
93180e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  if (Size == 16) return C;
93280e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner
93380e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  // Otherwise, we'll use an array of the constants.
93480e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  unsigned ArraySize = 16/Size;
93580e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
93680e8b506b8134d63dc3cb6211cccc34be4b19d40Chris Lattner  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
9373a393728a62122d7009d8e2cbe52a221874e576aChris Lattner}
9383a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
9393a393728a62122d7009d8e2cbe52a221874e576aChris Lattner
9403a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// processLoopStridedStore - We see a strided store of some value.  If we can
9413a393728a62122d7009d8e2cbe52a221874e576aChris Lattner/// transform this into a memset or memset_pattern in the loop preheader, do so.
9423a393728a62122d7009d8e2cbe52a221874e576aChris Lattnerbool LoopIdiomRecognize::
9433a393728a62122d7009d8e2cbe52a221874e576aChris LattnerprocessLoopStridedStore(Value *DestPtr, unsigned StoreSize,
9443a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                        unsigned StoreAlignment, Value *StoredVal,
9453a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                        Instruction *TheStore, const SCEVAddRecExpr *Ev,
9463a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                        const SCEV *BECount) {
947d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
9483a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // If the stored value is a byte-wise value (like i32 -1), then it may be
9493a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // turned into a memset of i8 -1, assuming that all the consecutive bytes
9503a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // are stored.  A store of i32 0x01020304 can never be turned into a memset,
9513a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // but it can be turned into memset_pattern if the target supports it.
9523a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  Value *SplatValue = isBytewiseValue(StoredVal);
953dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  Constant *PatternValue = nullptr;
954d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
95511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault  unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
95611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault
9573a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // If we're allowed to form a memset, and the stored value would be acceptable
9583a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  // for memset, use it.
9593a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  if (SplatValue && TLI->has(LibFunc::memset) &&
9603a393728a62122d7009d8e2cbe52a221874e576aChris Lattner      // Verify that the stored value is loop invariant.  If not, we can't
9613a393728a62122d7009d8e2cbe52a221874e576aChris Lattner      // promote the memset.
9623a393728a62122d7009d8e2cbe52a221874e576aChris Lattner      CurLoop->isLoopInvariant(SplatValue)) {
9633a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // Keep and use SplatValue.
964dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    PatternValue = nullptr;
96511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault  } else if (DestAS == 0 &&
96611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault             TLI->has(LibFunc::memset_pattern16) &&
96736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines             (PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
96811250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    // Don't create memset_pattern16s with address spaces.
9693a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // It looks like we can use PatternValue!
970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    SplatValue = nullptr;
9713a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  } else {
9723a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // Otherwise, this isn't an idiom we can transform.  For example, we can't
9735ac7c7da3ebfd1aa95226aeefb8cfa3ba34fd72aEli Friedman    // do anything with a 3-byte store.
974bafa117e8f2e3532f391227ebc9d4513b17edbadChris Lattner    return false;
9753a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  }
976d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
977a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // The trip count of the loop and the base pointer of the addrec SCEV is
978a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // guaranteed to be loop invariant, which means that it should dominate the
9794f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // header.  This allows us to insert code for it in the preheader.
9804f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  BasicBlock *Preheader = CurLoop->getLoopPreheader();
9814f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  IRBuilder<> Builder(Preheader->getTerminator());
9825e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick  SCEVExpander Expander(*SE, "loop-idiom");
983a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
98411250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault  Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
98511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault
9864f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
9873740e798bc850cd5d40185959801606433b0221fBenjamin Kramer  // this into a memset in the loop preheader now if we want.  However, this
9883740e798bc850cd5d40185959801606433b0221fBenjamin Kramer  // would be unsafe to do if there is anything else in the loop that may read
989ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  // or write to the aliased location.  Check for any overlap by generating the
990ece6c6bb6329748b92403c06ac87f45c43485911Chandler Carruth  // base pointer and checking the region.
991d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  Value *BasePtr =
99211250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
993a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner                           Preheader->getTerminator());
994d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
995a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
996a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            CurLoop, BECount,
997f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
998a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Expander.clear();
999a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    // If we generated new code for the base pointer, clean up.
1000a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(BasePtr, *SE, TLI);
1001a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    return false;
1002a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  }
1003a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
10044f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // Okay, everything looks good, insert the memset.
10054f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner
1006a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
1007a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  // pointer size if it isn't already.
100836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
10097c90b90f4e9b0c421f0e45d7de03f6edce113a90Chris Lattner  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
1010d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1011a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
10123228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                                         SCEV::FlagNUW);
1013f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault  if (StoreSize != 1) {
1014a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
10153228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                               SCEV::FlagNUW);
1016f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault  }
1017d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1018d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  Value *NumBytes =
1019a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
1020d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1021cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel  CallInst *NewCall;
1022f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault  if (SplatValue) {
1023f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault    NewCall = Builder.CreateMemSet(BasePtr,
1024f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault                                   SplatValue,
1025f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault                                   NumBytes,
1026f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault                                   StoreAlignment);
1027f834dce7c7d13af85be5bc8b789c1d7793db8a58Matt Arsenault  } else {
102811250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    // Everything is emitted in default address space
102911250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    Type *Int8PtrTy = DestInt8PtrTy;
103011250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault
10313a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    Module *M = TheStore->getParent()->getParent()->getParent();
10323a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    Value *MSP = M->getOrInsertFunction("memset_pattern16",
10333a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                        Builder.getVoidTy(),
103411250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault                                        Int8PtrTy,
103511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault                                        Int8PtrTy,
103611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault                                        IntPtr,
1037dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                        (void*)nullptr);
1038d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
10393a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // Otherwise we should form a memset_pattern16.  PatternValue is known to be
10403a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    // an constant array of 16-bytes.  Plop the value into a mergable global.
10413a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
10423a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                            GlobalValue::InternalLinkage,
10433a393728a62122d7009d8e2cbe52a221874e576aChris Lattner                                            PatternValue, ".memset_pattern");
10443a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    GV->setUnnamedAddr(true); // Ok to merge these.
10453a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    GV->setAlignment(16);
104611250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
10473a393728a62122d7009d8e2cbe52a221874e576aChris Lattner    NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
10483a393728a62122d7009d8e2cbe52a221874e576aChris Lattner  }
1049d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1050a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
1051e41d3c015ce5cafde305d9a6d9baaae1c41bf46aChris Lattner               << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
1052cd77a50e638be5e7153ebe2a8ba875de7df48beaDevang Patel  NewCall->setDebugLoc(TheStore->getDebugLoc());
1053d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
10549f39188def2b4102817945522fac1209083efa06Chris Lattner  // Okay, the memset has been formed.  Zap the original store and anything that
10559f39188def2b4102817945522fac1209083efa06Chris Lattner  // feeds into it.
10568e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer  deleteDeadInstruction(TheStore, *SE, TLI);
10574ce31fb57412b16cd61337ba41c60112dfbecfb6Chris Lattner  ++NumMemSet;
1058a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner  return true;
1059a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner}
1060a92ff91a967c63a2395a34c9e8331a7d50d6ab10Chris Lattner
1061e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
1062e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner/// same-strided load.
1063e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattnerbool LoopIdiomRecognize::
1064e2c43920919c6fe376613d1d8331897dc1ba3d57Chris LattnerprocessLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
1065e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           const SCEVAddRecExpr *StoreEv,
1066e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           const SCEVAddRecExpr *LoadEv,
1067e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           const SCEV *BECount) {
1068c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner  // If we're not allowed to form memcpy, we fail.
1069a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (!TLI->has(LibFunc::memcpy))
1070c19175c9d8d3c2663b682a7fe34bb22da370d5c1Chris Lattner    return false;
1071d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1072e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
1073d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
10744f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // The trip count of the loop and the base pointer of the addrec SCEV is
10754f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // guaranteed to be loop invariant, which means that it should dominate the
10764f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // header.  This allows us to insert code for it in the preheader.
10774f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  BasicBlock *Preheader = CurLoop->getLoopPreheader();
10784f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  IRBuilder<> Builder(Preheader->getTerminator());
10795e7645be4c9dd2193add44d30b5fef8036d7a3ceAndrew Trick  SCEVExpander Expander(*SE, "loop-idiom");
1080a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
1081e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
1082a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // this into a memcpy in the loop preheader now if we want.  However, this
1083a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // would be unsafe to do if there is anything else in the loop that may read
1084a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // or write the memory region we're storing to.  This includes the load that
1085a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // feeds the stores.  Check for an alias by generating the base address and
1086a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // checking everything.
10874f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  Value *StoreBasePtr =
10884f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner    Expander.expandCodeFor(StoreEv->getStart(),
10894f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
10904f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner                           Preheader->getTerminator());
1091a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
1092a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
1093a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            CurLoop, BECount, StoreSize,
1094a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            getAnalysis<AliasAnalysis>(), SI)) {
1095a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Expander.clear();
1096a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    // If we generated new code for the base pointer, clean up.
1097a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
1098a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    return false;
1099a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  }
1100a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
1101a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // For a memcpy, we have to make sure that the input array is not being
1102a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  // mutated by the loop.
1103d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick  Value *LoadBasePtr =
1104e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner    Expander.expandCodeFor(LoadEv->getStart(),
1105e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
1106e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner                           Preheader->getTerminator());
11074f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner
1108a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
1109a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
1110a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Expander.clear();
1111a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    // If we generated new code for the base pointer, clean up.
1112a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
1113a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
1114a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    return false;
1115a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  }
1116a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth
11174f81b5419295cfc26a1349d6c23a55c6d2a683e1Chris Lattner  // Okay, everything is safe, we can transform this!
1118a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
1119d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1120e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
1121e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // pointer size if it isn't already.
112236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
112311250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
1124d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
112511250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
11263228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                                         SCEV::FlagNUW);
1127e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  if (StoreSize != 1)
112811250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
11293228cc259b5ca00e46af36da369a451f5736cbf4Andrew Trick                               SCEV::FlagNUW);
1130d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1131e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  Value *NumBytes =
113211250c1194830aa4cec72788dcd04f06cfe33f50Matt Arsenault    Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
1133d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1134a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  CallInst *NewCall =
1135a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
1136a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth                         std::min(SI->getAlignment(), LI->getAlignment()));
1137af35841f2e346f9259606886698c188280406cdbDevang Patel  NewCall->setDebugLoc(SI->getDebugLoc());
1138d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1139a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
1140e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner               << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
1141e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner               << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
1142a5d950f673c29710d0e9e2afefe74b7003362a06Andrew Trick
1143d99b39e43b6e8263c80425c99d4e924f08e86e43Andrew Trick
1144e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // Okay, the memset has been formed.  Zap the original store and anything that
1145e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  // feeds into it.
11468e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6Benjamin Kramer  deleteDeadInstruction(SI, *SE, TLI);
1147a864748284f4bab974ffb13b840a611c1f9bc7acChandler Carruth  ++NumMemCpy;
1148e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner  return true;
1149e2c43920919c6fe376613d1d8331897dc1ba3d57Chris Lattner}
1150