CodeGenPrepare.cpp revision 6ccb5ef1b504e71b63219437f5bcf4856207949b
1b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//
3b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//                     The LLVM Compiler Infrastructure
4b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//
5b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman// This file is distributed under the University of Illinois Open Source
6b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman// License. See LICENSE.TXT for details.
7b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//
8b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//===----------------------------------------------------------------------===//
9b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//
10b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman// This pass munges the code in the input function to better prepare it for
11b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman// SelectionDAG-based code generation. This works around limitations in it's
12b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman// basic-block-at-a-time approach. It should eventually be removed.
13b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//
14b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman//===----------------------------------------------------------------------===//
15b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman
16b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#define DEBUG_TYPE "codegenprepare"
17b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Transforms/Scalar.h"
18b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Constants.h"
19b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/DerivedTypes.h"
20b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Function.h"
21b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/InlineAsm.h"
22b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Instructions.h"
2310df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman#include "llvm/IntrinsicInst.h"
24b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Pass.h"
2595267a1e671efc3c14e916b6978bbb15973b4cdcOwen Anderson#include "llvm/Analysis/Dominators.h"
26b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Analysis/InstructionSimplify.h"
27bb466331e7e50d03497ce40ee344870236fd9c32Dan Gohman#include "llvm/Analysis/ProfileInfo.h"
2883785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng#include "llvm/Target/TargetData.h"
29b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Target/TargetLowering.h"
3083785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng#include "llvm/Transforms/Utils/AddrModeMatcher.h"
3122bb31103de3337f0bb74c7bee16d1817d4dca14Dan Gohman#include "llvm/Transforms/Utils/BasicBlockUtils.h"
32b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Transforms/Utils/Local.h"
33b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Transforms/Utils/BuildLibCalls.h"
3440610241d00e219341ff4b7106c5baff08ad407bDan Gohman#include "llvm/ADT/DenseMap.h"
3540610241d00e219341ff4b7106c5baff08ad407bDan Gohman#include "llvm/ADT/SmallSet.h"
3640610241d00e219341ff4b7106c5baff08ad407bDan Gohman#include "llvm/ADT/Statistic.h"
37b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Assembly/Writer.h"
3822bb31103de3337f0bb74c7bee16d1817d4dca14Dan Gohman#include "llvm/Support/CallSite.h"
39b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman#include "llvm/Support/CommandLine.h"
40104e4ce1629ea84736691bd1ee7867bdf90e8a2eDan Gohman#include "llvm/Support/Debug.h"
413df24e667f04a7003342b534310919abc9c87418Dan Gohman#include "llvm/Support/GetElementPtrTypeIterator.h"
423df24e667f04a7003342b534310919abc9c87418Dan Gohman#include "llvm/Support/PatternMatch.h"
43bb466331e7e50d03497ce40ee344870236fd9c32Dan Gohman#include "llvm/Support/raw_ostream.h"
44bb466331e7e50d03497ce40ee344870236fd9c32Dan Gohman#include "llvm/Support/IRBuilder.h"
4522bb31103de3337f0bb74c7bee16d1817d4dca14Dan Gohman#include "llvm/Support/ValueHandle.h"
4683785c80968165b30fcdd111ceb2c28d38bcff86Evan Chengusing namespace llvm;
47bb466331e7e50d03497ce40ee344870236fd9c32Dan Gohmanusing namespace llvm::PatternMatch;
4822bb31103de3337f0bb74c7bee16d1817d4dca14Dan Gohman
49b0cf29c5cfff797284b3660dc233e135feb65d9aDan GohmanSTATISTIC(NumBlocksElim, "Number of blocks eliminated");
50b0cf29c5cfff797284b3660dc233e135feb65d9aDan GohmanSTATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
513df24e667f04a7003342b534310919abc9c87418Dan GohmanSTATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
523df24e667f04a7003342b534310919abc9c87418Dan GohmanSTATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
533df24e667f04a7003342b534310919abc9c87418Dan Gohman                      "sunken Cmps");
543df24e667f04a7003342b534310919abc9c87418Dan GohmanSTATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
553df24e667f04a7003342b534310919abc9c87418Dan Gohman                       "of sunken Casts");
563df24e667f04a7003342b534310919abc9c87418Dan GohmanSTATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
573df24e667f04a7003342b534310919abc9c87418Dan Gohman                          "computations were sunk");
583df24e667f04a7003342b534310919abc9c87418Dan GohmanSTATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
593df24e667f04a7003342b534310919abc9c87418Dan GohmanSTATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
603df24e667f04a7003342b534310919abc9c87418Dan Gohman
613df24e667f04a7003342b534310919abc9c87418Dan Gohmanstatic cl::opt<bool>
623df24e667f04a7003342b534310919abc9c87418Dan GohmanCriticalEdgeSplit("cgp-critical-edge-splitting",
63b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman                  cl::desc("Split critical edges during codegen prepare"),
6440b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman                  cl::init(false), cl::Hidden);
6540b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman
6640b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohmannamespace {
6740b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman  class CodeGenPrepare : public FunctionPass {
6840b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman    /// TLI - Keep a pointer of a TargetLowering to consult for determining
6940b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman    /// transformation profitability.
7040b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman    const TargetLowering *TLI;
7199b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman    DominatorTree *DT;
7299b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman    ProfileInfo *PFI;
7399b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman
7499b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman    /// CurInstIterator - As we scan instructions optimizing them, this is the
7599b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman    /// next instruction to optimize.  Xforms that can invalidate this should
7699b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman    /// update it.
773df24e667f04a7003342b534310919abc9c87418Dan Gohman    BasicBlock::iterator CurInstIterator;
783df24e667f04a7003342b534310919abc9c87418Dan Gohman
793df24e667f04a7003342b534310919abc9c87418Dan Gohman    /// BackEdges - Keep a set of all the loop back edges.
803df24e667f04a7003342b534310919abc9c87418Dan Gohman    ///
813df24e667f04a7003342b534310919abc9c87418Dan Gohman    SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
8299b218218c0ca3ebfdd568ddfeafa07842e9d69dDan Gohman
8359fbc80f6b3b5c71dfb84149f589625f7ed510e3Evan Cheng    // Keeps track of non-local addresses that have been sunk into a block. This
8459fbc80f6b3b5c71dfb84149f589625f7ed510e3Evan Cheng    // allows us to avoid inserting duplicate code for blocks with multiple
8559fbc80f6b3b5c71dfb84149f589625f7ed510e3Evan Cheng    // load/stores of the same address.
8659fbc80f6b3b5c71dfb84149f589625f7ed510e3Evan Cheng    DenseMap<Value*, Value*> SunkAddrs;
8759fbc80f6b3b5c71dfb84149f589625f7ed510e3Evan Cheng
88cc8430f742b0f1e567292c8a776e94fc1c930b2aDan Gohman  public:
89cc8430f742b0f1e567292c8a776e94fc1c930b2aDan Gohman    static char ID; // Pass identification, replacement for typeid
90b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    explicit CodeGenPrepare(const TargetLowering *tli = 0)
913df24e667f04a7003342b534310919abc9c87418Dan Gohman      : FunctionPass(ID), TLI(tli) {
923df24e667f04a7003342b534310919abc9c87418Dan Gohman        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
933df24e667f04a7003342b534310919abc9c87418Dan Gohman      }
94e285a74f7cf9dd3ccf4fe758576cf83301f8a43eDan Gohman    bool runOnFunction(Function &F);
95bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman
96bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
97bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman      AU.addPreserved<DominatorTree>();
98b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman      AU.addPreserved<ProfileInfo>();
990f84e4e31009eecf2dfcbe6113b65d0919f30254Owen Anderson    }
100b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman
101bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    virtual void releaseMemory() {
102bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman      BackEdges.clear();
103bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    }
104bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman
105bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman  private:
106b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    bool EliminateMostlyEmptyBlocks(Function &F);
1070f84e4e31009eecf2dfcbe6113b65d0919f30254Owen Anderson    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
108b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    void EliminateMostlyEmptyBlock(BasicBlock *BB);
109bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    bool OptimizeBlock(BasicBlock &BB);
110bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    bool OptimizeInst(Instruction *I);
111bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
112bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    bool OptimizeInlineAsmInst(CallInst *CS);
113bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    bool OptimizeCallInst(CallInst *CI);
114b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    bool MoveExtToFormExtLoad(Instruction *I);
1150f84e4e31009eecf2dfcbe6113b65d0919f30254Owen Anderson    bool OptimizeExtUses(Instruction *I);
116b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    void findLoopBackEdges(const Function &F);
117b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman  };
118b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman}
11983785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng
12083785c80968165b30fcdd111ceb2c28d38bcff86Evan Chengchar CodeGenPrepare::ID = 0;
12183785c80968165b30fcdd111ceb2c28d38bcff86Evan ChengINITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
12283785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng                "Optimize for code generation", false, false)
12383785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng
1240f84e4e31009eecf2dfcbe6113b65d0919f30254Owen AndersonFunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
12583785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  return new CodeGenPrepare(TLI);
126d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman}
127d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman
12810df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman/// findLoopBackEdges - Do a DFS walk to find loop back edges.
12910df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman///
13010df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohmanvoid CodeGenPrepare::findLoopBackEdges(const Function &F) {
13110df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
13210df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  FindFunctionBackedges(F, Edges);
13310df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman
13410df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  BackEdges.insert(Edges.begin(), Edges.end());
13510df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman}
13610df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman
137d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman
138d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohmanbool CodeGenPrepare::runOnFunction(Function &F) {
139d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  bool EverMadeChange = false;
140d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman
141d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  DT = getAnalysisIfAvailable<DominatorTree>();
1420f84e4e31009eecf2dfcbe6113b65d0919f30254Owen Anderson  PFI = getAnalysisIfAvailable<ProfileInfo>();
143d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  // First pass, eliminate blocks that contain only PHI nodes and an
144d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  // unconditional branch.
14583785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
14683785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng
14783785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  // Now find loop back edges, but only if they are being used to decide which
14883785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  // critical edges to split.
14983785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  if (CriticalEdgeSplit)
15083785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng    findLoopBackEdges(F);
15183785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng
15283785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  bool MadeChange = true;
15383785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  while (MadeChange) {
1546d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson    MadeChange = false;
15510df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
15610df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman      MadeChange |= OptimizeBlock(*BB);
15710df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman    EverMadeChange |= MadeChange;
15810df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  }
15910df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman
16010df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  SunkAddrs.clear();
16110df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman
16210df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  return EverMadeChange;
16310df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman}
1646d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson
1656d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
1666d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson/// debug info directives, and an unconditional branch.  Passes before isel
1676d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
1680f84e4e31009eecf2dfcbe6113b65d0919f30254Owen Anderson/// isel.  Start by eliminating these blocks so we can split them the way we
1696d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson/// want them.
1706d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Andersonbool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
17183785c80968165b30fcdd111ceb2c28d38bcff86Evan Cheng  bool MadeChange = false;
17210df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  // Note that this intentionally skips the entry block.
17310df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
17410df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman    BasicBlock *BB = I++;
17510df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman
17610df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman    // If this block doesn't end with an uncond branch, ignore it.
17710df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
17810df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman    if (!BI || !BI->isUnconditional())
17910df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman      continue;
180bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman
181bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    // If the instruction before the branch (skipping debug info) isn't a phi
182bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    // node, then other stuff is happening here.
183b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    BasicBlock::iterator BBI = BI;
184b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    if (BBI != BB->begin()) {
185bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman      --BBI;
186d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman      while (isa<DbgInfoIntrinsic>(BBI)) {
187bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman        if (BBI == BB->begin())
188bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman          break;
189b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman        --BBI;
190b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman      }
191b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
192bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman        continue;
193d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman    }
194bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman
195bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman    // Do not break infinite loops.
196b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    BasicBlock *DestBB = BI->getSuccessor(0);
197b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    if (DestBB == BB)
198b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman      continue;
199bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman
200d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman    if (!CanMergeBlocks(BB, DestBB))
201d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman      continue;
202d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman
203d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman    EliminateMostlyEmptyBlock(BB);
204d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman    MadeChange = true;
205d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  }
206d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  return MadeChange;
20710df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman}
20810df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman
20910df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
21010df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman/// single uncond branch between them, and BB contains no other non-phi
21110df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman/// instructions.
21210df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohmanbool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
21310df0fa73e396bbc93a8940e8b53827390c54d10Dan Gohman                                    const BasicBlock *DestBB) const {
214d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
215d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  // the successor.  If there are more complex condition (e.g. preheaders),
216d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  // don't mess around with them.
217d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  BasicBlock::const_iterator BBI = BB->begin();
218d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
219d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman    for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
2206d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson         UI != E; ++UI) {
2216d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson      const Instruction *User = cast<Instruction>(*UI);
2226d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson      if (User->getParent() != DestBB || !isa<PHINode>(User))
2236d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson        return false;
2246d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson      // If User is inside DestBB block and it is a PHINode then check
2256d0c25ec3a7ca822e68f73a4481eee43eb5c9485Owen Anderson      // incoming value. If incoming value is not from BB then this is
226d5fe57d2f980c6bd1a61450f99c254a76d0f1683Dan Gohman      // a complex condition (e.g. preheaders) we want to avoid here.
2278970f00deff00ffce1f35cf00883357e1582daa1Owen Anderson      if (User->getParent() == DestBB) {
2288970f00deff00ffce1f35cf00883357e1582daa1Owen Anderson        if (const PHINode *UPN = dyn_cast<PHINode>(User))
22940a468f24909792f000e3ccc1dda7a27b9c34b69Owen Anderson          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
2308970f00deff00ffce1f35cf00883357e1582daa1Owen Anderson            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
23195267a1e671efc3c14e916b6978bbb15973b4cdcOwen Anderson            if (Insn && Insn->getParent() == BB &&
232ea09f4f4691a0db65772b54fe8163a48c9dce01dEvan Cheng                Insn->getParent() != UPN->getIncomingBlock(I))
233c7f72de3b4ef21828ea4780f0693bf0acd04e1c5Dan Gohman              return false;
23495267a1e671efc3c14e916b6978bbb15973b4cdcOwen Anderson          }
2359c7216f984111eb8f1716741bc9039ed86ec4a9bOwen Anderson      }
2369c7216f984111eb8f1716741bc9039ed86ec4a9bOwen Anderson    }
2379c7216f984111eb8f1716741bc9039ed86ec4a9bOwen Anderson  }
2389c7216f984111eb8f1716741bc9039ed86ec4a9bOwen Anderson
23995267a1e671efc3c14e916b6978bbb15973b4cdcOwen Anderson  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
24095267a1e671efc3c14e916b6978bbb15973b4cdcOwen Anderson  // and DestBB may have conflicting incoming values for the block.  If so, we
241c7f72de3b4ef21828ea4780f0693bf0acd04e1c5Dan Gohman  // can't merge the block.
242ea09f4f4691a0db65772b54fe8163a48c9dce01dEvan Cheng  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
24340b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman  if (!DestBBPN) return true;  // no conflict.
244bdedd4477331b3b0d28d74658baf05f675f2d195Dan Gohman
24540b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman  // Collect the preds of BB.
246763d89343be210eb62a13318ca0cc9321ce46bfbDan Gohman  SmallPtrSet<const BasicBlock*, 16> BBPreds;
24740b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
248d0533c9998d3baf41848ba559a9b2f2c65296d14Owen Anderson    // It is faster to get preds from a PHI than with pred_iterator.
24940b189e4e257924d90aaf63bf2e12bc7bbca961aDan Gohman    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
250b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman      BBPreds.insert(BBPN->getIncomingBlock(i));
251b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman  } else {
252b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman    BBPreds.insert(pred_begin(BB), pred_end(BB));
253b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman  }
254b0cf29c5cfff797284b3660dc233e135feb65d9aDan Gohman
255  // Walk the preds of DestBB.
256  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
257    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
258    if (BBPreds.count(Pred)) {   // Common predecessor?
259      BBI = DestBB->begin();
260      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
261        const Value *V1 = PN->getIncomingValueForBlock(Pred);
262        const Value *V2 = PN->getIncomingValueForBlock(BB);
263
264        // If V2 is a phi node in BB, look up what the mapped value will be.
265        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
266          if (V2PN->getParent() == BB)
267            V2 = V2PN->getIncomingValueForBlock(Pred);
268
269        // If there is a conflict, bail out.
270        if (V1 != V2) return false;
271      }
272    }
273  }
274
275  return true;
276}
277
278
279/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
280/// an unconditional branch in it.
281void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
282  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
283  BasicBlock *DestBB = BI->getSuccessor(0);
284
285  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
286
287  // If the destination block has a single pred, then this is a trivial edge,
288  // just collapse it.
289  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
290    if (SinglePred != DestBB) {
291      // Remember if SinglePred was the entry block of the function.  If so, we
292      // will need to move BB back to the entry position.
293      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
294      MergeBasicBlockIntoOnlyPred(DestBB, this);
295
296      if (isEntry && BB != &BB->getParent()->getEntryBlock())
297        BB->moveBefore(&BB->getParent()->getEntryBlock());
298
299      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
300      return;
301    }
302  }
303
304  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
305  // to handle the new incoming edges it is about to have.
306  PHINode *PN;
307  for (BasicBlock::iterator BBI = DestBB->begin();
308       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
309    // Remove the incoming value for BB, and remember it.
310    Value *InVal = PN->removeIncomingValue(BB, false);
311
312    // Two options: either the InVal is a phi node defined in BB or it is some
313    // value that dominates BB.
314    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
315    if (InValPhi && InValPhi->getParent() == BB) {
316      // Add all of the input values of the input PHI as inputs of this phi.
317      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
318        PN->addIncoming(InValPhi->getIncomingValue(i),
319                        InValPhi->getIncomingBlock(i));
320    } else {
321      // Otherwise, add one instance of the dominating value for each edge that
322      // we will be adding.
323      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
324        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
325          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
326      } else {
327        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
328          PN->addIncoming(InVal, *PI);
329      }
330    }
331  }
332
333  // The PHIs are now updated, change everything that refers to BB to use
334  // DestBB and remove BB.
335  BB->replaceAllUsesWith(DestBB);
336  if (DT) {
337    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
338    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
339    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
340    DT->changeImmediateDominator(DestBB, NewIDom);
341    DT->eraseNode(BB);
342  }
343  if (PFI) {
344    PFI->replaceAllUses(BB, DestBB);
345    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
346  }
347  BB->eraseFromParent();
348  ++NumBlocksElim;
349
350  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
351}
352
353/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
354/// lives in to see if there is a block that we can reuse as a critical edge
355/// from TIBB.
356static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
357  BasicBlock *Dest = DestPHI->getParent();
358
359  /// TIPHIValues - This array is lazily computed to determine the values of
360  /// PHIs in Dest that TI would provide.
361  SmallVector<Value*, 32> TIPHIValues;
362
363  /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
364  unsigned TIBBEntryNo = 0;
365
366  // Check to see if Dest has any blocks that can be used as a split edge for
367  // this terminator.
368  for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
369    BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
370    // To be usable, the pred has to end with an uncond branch to the dest.
371    BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
372    if (!PredBr || !PredBr->isUnconditional())
373      continue;
374    // Must be empty other than the branch and debug info.
375    BasicBlock::iterator I = Pred->begin();
376    while (isa<DbgInfoIntrinsic>(I))
377      I++;
378    if (&*I != PredBr)
379      continue;
380    // Cannot be the entry block; its label does not get emitted.
381    if (Pred == &Dest->getParent()->getEntryBlock())
382      continue;
383
384    // Finally, since we know that Dest has phi nodes in it, we have to make
385    // sure that jumping to Pred will have the same effect as going to Dest in
386    // terms of PHI values.
387    PHINode *PN;
388    unsigned PHINo = 0;
389    unsigned PredEntryNo = pi;
390
391    bool FoundMatch = true;
392    for (BasicBlock::iterator I = Dest->begin();
393         (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
394      if (PHINo == TIPHIValues.size()) {
395        if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
396          TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
397        TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
398      }
399
400      // If the PHI entry doesn't work, we can't use this pred.
401      if (PN->getIncomingBlock(PredEntryNo) != Pred)
402        PredEntryNo = PN->getBasicBlockIndex(Pred);
403
404      if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
405        FoundMatch = false;
406        break;
407      }
408    }
409
410    // If we found a workable predecessor, change TI to branch to Succ.
411    if (FoundMatch)
412      return Pred;
413  }
414  return 0;
415}
416
417
418/// SplitEdgeNicely - Split the critical edge from TI to its specified
419/// successor if it will improve codegen.  We only do this if the successor has
420/// phi nodes (otherwise critical edges are ok).  If there is already another
421/// predecessor of the succ that is empty (and thus has no phi nodes), use it
422/// instead of introducing a new block.
423static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
424                     SmallSet<std::pair<const BasicBlock*,
425                                        const BasicBlock*>, 8> &BackEdges,
426                             Pass *P) {
427  BasicBlock *TIBB = TI->getParent();
428  BasicBlock *Dest = TI->getSuccessor(SuccNum);
429  assert(isa<PHINode>(Dest->begin()) &&
430         "This should only be called if Dest has a PHI!");
431  PHINode *DestPHI = cast<PHINode>(Dest->begin());
432
433  // Do not split edges to EH landing pads.
434  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
435    if (Invoke->getSuccessor(1) == Dest)
436      return;
437
438  // As a hack, never split backedges of loops.  Even though the copy for any
439  // PHIs inserted on the backedge would be dead for exits from the loop, we
440  // assume that the cost of *splitting* the backedge would be too high.
441  if (BackEdges.count(std::make_pair(TIBB, Dest)))
442    return;
443
444  if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
445    ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
446    if (PFI)
447      PFI->splitEdge(TIBB, Dest, ReuseBB);
448    Dest->removePredecessor(TIBB);
449    TI->setSuccessor(SuccNum, ReuseBB);
450    return;
451  }
452
453  SplitCriticalEdge(TI, SuccNum, P, true);
454}
455
456
457/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
458/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
459/// sink it into user blocks to reduce the number of virtual
460/// registers that must be created and coalesced.
461///
462/// Return true if any changes are made.
463///
464static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
465  // If this is a noop copy,
466  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
467  EVT DstVT = TLI.getValueType(CI->getType());
468
469  // This is an fp<->int conversion?
470  if (SrcVT.isInteger() != DstVT.isInteger())
471    return false;
472
473  // If this is an extension, it will be a zero or sign extension, which
474  // isn't a noop.
475  if (SrcVT.bitsLT(DstVT)) return false;
476
477  // If these values will be promoted, find out what they will be promoted
478  // to.  This helps us consider truncates on PPC as noop copies when they
479  // are.
480  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
481    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
482  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
483    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
484
485  // If, after promotion, these are the same types, this is a noop copy.
486  if (SrcVT != DstVT)
487    return false;
488
489  BasicBlock *DefBB = CI->getParent();
490
491  /// InsertedCasts - Only insert a cast in each block once.
492  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
493
494  bool MadeChange = false;
495  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
496       UI != E; ) {
497    Use &TheUse = UI.getUse();
498    Instruction *User = cast<Instruction>(*UI);
499
500    // Figure out which BB this cast is used in.  For PHI's this is the
501    // appropriate predecessor block.
502    BasicBlock *UserBB = User->getParent();
503    if (PHINode *PN = dyn_cast<PHINode>(User)) {
504      UserBB = PN->getIncomingBlock(UI);
505    }
506
507    // Preincrement use iterator so we don't invalidate it.
508    ++UI;
509
510    // If this user is in the same block as the cast, don't change the cast.
511    if (UserBB == DefBB) continue;
512
513    // If we have already inserted a cast into this block, use it.
514    CastInst *&InsertedCast = InsertedCasts[UserBB];
515
516    if (!InsertedCast) {
517      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
518
519      InsertedCast =
520        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
521                         InsertPt);
522      MadeChange = true;
523    }
524
525    // Replace a use of the cast with a use of the new cast.
526    TheUse = InsertedCast;
527    ++NumCastUses;
528  }
529
530  // If we removed all uses, nuke the cast.
531  if (CI->use_empty()) {
532    CI->eraseFromParent();
533    MadeChange = true;
534  }
535
536  return MadeChange;
537}
538
539/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
540/// the number of virtual registers that must be created and coalesced.  This is
541/// a clear win except on targets with multiple condition code registers
542///  (PowerPC), where it might lose; some adjustment may be wanted there.
543///
544/// Return true if any changes are made.
545static bool OptimizeCmpExpression(CmpInst *CI) {
546  BasicBlock *DefBB = CI->getParent();
547
548  /// InsertedCmp - Only insert a cmp in each block once.
549  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
550
551  bool MadeChange = false;
552  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
553       UI != E; ) {
554    Use &TheUse = UI.getUse();
555    Instruction *User = cast<Instruction>(*UI);
556
557    // Preincrement use iterator so we don't invalidate it.
558    ++UI;
559
560    // Don't bother for PHI nodes.
561    if (isa<PHINode>(User))
562      continue;
563
564    // Figure out which BB this cmp is used in.
565    BasicBlock *UserBB = User->getParent();
566
567    // If this user is in the same block as the cmp, don't change the cmp.
568    if (UserBB == DefBB) continue;
569
570    // If we have already inserted a cmp into this block, use it.
571    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
572
573    if (!InsertedCmp) {
574      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
575
576      InsertedCmp =
577        CmpInst::Create(CI->getOpcode(),
578                        CI->getPredicate(),  CI->getOperand(0),
579                        CI->getOperand(1), "", InsertPt);
580      MadeChange = true;
581    }
582
583    // Replace a use of the cmp with a use of the new cmp.
584    TheUse = InsertedCmp;
585    ++NumCmpUses;
586  }
587
588  // If we removed all uses, nuke the cmp.
589  if (CI->use_empty())
590    CI->eraseFromParent();
591
592  return MadeChange;
593}
594
595namespace {
596class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
597protected:
598  void replaceCall(Value *With) {
599    CI->replaceAllUsesWith(With);
600    CI->eraseFromParent();
601  }
602  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
603      if (ConstantInt *SizeCI =
604                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
605        return SizeCI->isAllOnesValue();
606    return false;
607  }
608};
609} // end anonymous namespace
610
611bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
612  BasicBlock *BB = CI->getParent();
613
614  // Lower inline assembly if we can.
615  // If we found an inline asm expession, and if the target knows how to
616  // lower it to normal LLVM code, do so now.
617  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
618    if (TLI->ExpandInlineAsm(CI)) {
619      // Avoid invalidating the iterator.
620      CurInstIterator = BB->begin();
621      // Avoid processing instructions out of order, which could cause
622      // reuse before a value is defined.
623      SunkAddrs.clear();
624      return true;
625    }
626    // Sink address computing for memory operands into the block.
627    if (OptimizeInlineAsmInst(CI))
628      return true;
629  }
630
631  // Lower all uses of llvm.objectsize.*
632  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
633  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
634    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
635    const Type *ReturnTy = CI->getType();
636    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
637
638    // Substituting this can cause recursive simplifications, which can
639    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
640    // happens.
641    WeakVH IterHandle(CurInstIterator);
642
643    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
644
645    // If the iterator instruction was recursively deleted, start over at the
646    // start of the block.
647    if (IterHandle != CurInstIterator)
648      CurInstIterator = BB->begin();
649    return true;
650  }
651
652  // From here on out we're working with named functions.
653  if (CI->getCalledFunction() == 0) return false;
654
655  // We'll need TargetData from here on out.
656  const TargetData *TD = TLI ? TLI->getTargetData() : 0;
657  if (!TD) return false;
658
659  // Lower all default uses of _chk calls.  This is very similar
660  // to what InstCombineCalls does, but here we are only lowering calls
661  // that have the default "don't know" as the objectsize.  Anything else
662  // should be left alone.
663  CodeGenPrepareFortifiedLibCalls Simplifier;
664  return Simplifier.fold(CI, TD);
665}
666
667//===----------------------------------------------------------------------===//
668// Memory Optimization
669//===----------------------------------------------------------------------===//
670
671/// IsNonLocalValue - Return true if the specified values are defined in a
672/// different basic block than BB.
673static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
674  if (Instruction *I = dyn_cast<Instruction>(V))
675    return I->getParent() != BB;
676  return false;
677}
678
679/// OptimizeMemoryInst - Load and Store Instructions often have
680/// addressing modes that can do significant amounts of computation.  As such,
681/// instruction selection will try to get the load or store to do as much
682/// computation as possible for the program.  The problem is that isel can only
683/// see within a single block.  As such, we sink as much legal addressing mode
684/// stuff into the block as possible.
685///
686/// This method is used to optimize both load/store and inline asms with memory
687/// operands.
688bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
689                                        const Type *AccessTy) {
690  Value *Repl = Addr;
691
692  // Try to collapse single-value PHI nodes.  This is necessary to undo
693  // unprofitable PRE transformations.
694  SmallVector<Value*, 8> worklist;
695  SmallPtrSet<Value*, 16> Visited;
696  worklist.push_back(Addr);
697
698  // Use a worklist to iteratively look through PHI nodes, and ensure that
699  // the addressing mode obtained from the non-PHI roots of the graph
700  // are equivalent.
701  Value *Consensus = 0;
702  unsigned NumUses = 0;
703  SmallVector<Instruction*, 16> AddrModeInsts;
704  ExtAddrMode AddrMode;
705  while (!worklist.empty()) {
706    Value *V = worklist.back();
707    worklist.pop_back();
708
709    // Break use-def graph loops.
710    if (Visited.count(V)) {
711      Consensus = 0;
712      break;
713    }
714
715    Visited.insert(V);
716
717    // For a PHI node, push all of its incoming values.
718    if (PHINode *P = dyn_cast<PHINode>(V)) {
719      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
720        worklist.push_back(P->getIncomingValue(i));
721      continue;
722    }
723
724    // For non-PHIs, determine the addressing mode being computed.
725    SmallVector<Instruction*, 16> NewAddrModeInsts;
726    ExtAddrMode NewAddrMode =
727      AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
728                                   NewAddrModeInsts, *TLI);
729
730    // Ensure that the obtained addressing mode is equivalent to that obtained
731    // for all other roots of the PHI traversal.  Also, when choosing one
732    // such root as representative, select the one with the most uses in order
733    // to keep the cost modeling heuristics in AddressingModeMatcher applicable.
734    if (!Consensus || NewAddrMode == AddrMode) {
735      if (V->getNumUses() > NumUses) {
736        Consensus = V;
737        NumUses = V->getNumUses();
738        AddrMode = NewAddrMode;
739        AddrModeInsts = NewAddrModeInsts;
740      }
741      continue;
742    }
743
744    Consensus = 0;
745    break;
746  }
747
748  // If the addressing mode couldn't be determined, or if multiple different
749  // ones were determined, bail out now.
750  if (!Consensus) return false;
751
752  // Check to see if any of the instructions supersumed by this addr mode are
753  // non-local to I's BB.
754  bool AnyNonLocal = false;
755  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
756    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
757      AnyNonLocal = true;
758      break;
759    }
760  }
761
762  // If all the instructions matched are already in this BB, don't do anything.
763  if (!AnyNonLocal) {
764    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
765    return false;
766  }
767
768  // Insert this computation right after this user.  Since our caller is
769  // scanning from the top of the BB to the bottom, reuse of the expr are
770  // guaranteed to happen later.
771  BasicBlock::iterator InsertPt = MemoryInst;
772
773  // Now that we determined the addressing expression we want to use and know
774  // that we have to sink it into this block.  Check to see if we have already
775  // done this for some other load/store instr in this block.  If so, reuse the
776  // computation.
777  Value *&SunkAddr = SunkAddrs[Addr];
778  if (SunkAddr) {
779    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
780                 << *MemoryInst);
781    if (SunkAddr->getType() != Addr->getType())
782      SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
783  } else {
784    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
785                 << *MemoryInst);
786    const Type *IntPtrTy =
787          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
788
789    Value *Result = 0;
790
791    // Start with the base register. Do this first so that subsequent address
792    // matching finds it last, which will prevent it from trying to match it
793    // as the scaled value in case it happens to be a mul. That would be
794    // problematic if we've sunk a different mul for the scale, because then
795    // we'd end up sinking both muls.
796    if (AddrMode.BaseReg) {
797      Value *V = AddrMode.BaseReg;
798      if (V->getType()->isPointerTy())
799        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
800      if (V->getType() != IntPtrTy)
801        V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
802                                        "sunkaddr", InsertPt);
803      Result = V;
804    }
805
806    // Add the scale value.
807    if (AddrMode.Scale) {
808      Value *V = AddrMode.ScaledReg;
809      if (V->getType() == IntPtrTy) {
810        // done.
811      } else if (V->getType()->isPointerTy()) {
812        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
813      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
814                 cast<IntegerType>(V->getType())->getBitWidth()) {
815        V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
816      } else {
817        V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
818      }
819      if (AddrMode.Scale != 1)
820        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
821                                                                AddrMode.Scale),
822                                      "sunkaddr", InsertPt);
823      if (Result)
824        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
825      else
826        Result = V;
827    }
828
829    // Add in the BaseGV if present.
830    if (AddrMode.BaseGV) {
831      Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
832                                  InsertPt);
833      if (Result)
834        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
835      else
836        Result = V;
837    }
838
839    // Add in the Base Offset if present.
840    if (AddrMode.BaseOffs) {
841      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
842      if (Result)
843        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
844      else
845        Result = V;
846    }
847
848    if (Result == 0)
849      SunkAddr = Constant::getNullValue(Addr->getType());
850    else
851      SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
852  }
853
854  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
855
856  if (Repl->use_empty()) {
857    RecursivelyDeleteTriviallyDeadInstructions(Repl);
858    // This address is now available for reassignment, so erase the table entry;
859    // we don't want to match some completely different instruction.
860    SunkAddrs[Addr] = 0;
861  }
862  ++NumMemoryInsts;
863  return true;
864}
865
866/// OptimizeInlineAsmInst - If there are any memory operands, use
867/// OptimizeMemoryInst to sink their address computing into the block when
868/// possible / profitable.
869bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
870  bool MadeChange = false;
871
872  TargetLowering::AsmOperandInfoVector
873    TargetConstraints = TLI->ParseConstraints(CS);
874  unsigned ArgNo = 0;
875  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
876    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
877
878    // Compute the constraint code and ConstraintType to use.
879    TLI->ComputeConstraintToUse(OpInfo, SDValue());
880
881    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
882        OpInfo.isIndirect) {
883      Value *OpVal = CS->getArgOperand(ArgNo++);
884      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
885    } else if (OpInfo.Type == InlineAsm::isInput)
886      ArgNo++;
887  }
888
889  return MadeChange;
890}
891
892/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
893/// basic block as the load, unless conditions are unfavorable. This allows
894/// SelectionDAG to fold the extend into the load.
895///
896bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
897  // Look for a load being extended.
898  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
899  if (!LI) return false;
900
901  // If they're already in the same block, there's nothing to do.
902  if (LI->getParent() == I->getParent())
903    return false;
904
905  // If the load has other users and the truncate is not free, this probably
906  // isn't worthwhile.
907  if (!LI->hasOneUse() &&
908      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
909              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
910      !TLI->isTruncateFree(I->getType(), LI->getType()))
911    return false;
912
913  // Check whether the target supports casts folded into loads.
914  unsigned LType;
915  if (isa<ZExtInst>(I))
916    LType = ISD::ZEXTLOAD;
917  else {
918    assert(isa<SExtInst>(I) && "Unexpected ext type!");
919    LType = ISD::SEXTLOAD;
920  }
921  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
922    return false;
923
924  // Move the extend into the same block as the load, so that SelectionDAG
925  // can fold it.
926  I->removeFromParent();
927  I->insertAfter(LI);
928  ++NumExtsMoved;
929  return true;
930}
931
932bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
933  BasicBlock *DefBB = I->getParent();
934
935  // If the result of a {s|z}ext and its source are both live out, rewrite all
936  // other uses of the source with result of extension.
937  Value *Src = I->getOperand(0);
938  if (Src->hasOneUse())
939    return false;
940
941  // Only do this xform if truncating is free.
942  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
943    return false;
944
945  // Only safe to perform the optimization if the source is also defined in
946  // this block.
947  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
948    return false;
949
950  bool DefIsLiveOut = false;
951  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
952       UI != E; ++UI) {
953    Instruction *User = cast<Instruction>(*UI);
954
955    // Figure out which BB this ext is used in.
956    BasicBlock *UserBB = User->getParent();
957    if (UserBB == DefBB) continue;
958    DefIsLiveOut = true;
959    break;
960  }
961  if (!DefIsLiveOut)
962    return false;
963
964  // Make sure non of the uses are PHI nodes.
965  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
966       UI != E; ++UI) {
967    Instruction *User = cast<Instruction>(*UI);
968    BasicBlock *UserBB = User->getParent();
969    if (UserBB == DefBB) continue;
970    // Be conservative. We don't want this xform to end up introducing
971    // reloads just before load / store instructions.
972    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
973      return false;
974  }
975
976  // InsertedTruncs - Only insert one trunc in each block once.
977  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
978
979  bool MadeChange = false;
980  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
981       UI != E; ++UI) {
982    Use &TheUse = UI.getUse();
983    Instruction *User = cast<Instruction>(*UI);
984
985    // Figure out which BB this ext is used in.
986    BasicBlock *UserBB = User->getParent();
987    if (UserBB == DefBB) continue;
988
989    // Both src and def are live in this block. Rewrite the use.
990    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
991
992    if (!InsertedTrunc) {
993      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
994
995      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
996    }
997
998    // Replace a use of the {s|z}ext source with a use of the result.
999    TheUse = InsertedTrunc;
1000    ++NumExtUses;
1001    MadeChange = true;
1002  }
1003
1004  return MadeChange;
1005}
1006
1007bool CodeGenPrepare::OptimizeInst(Instruction *I) {
1008  if (PHINode *P = dyn_cast<PHINode>(I)) {
1009    // It is possible for very late stage optimizations (such as SimplifyCFG)
1010    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
1011    // trivial PHI, go ahead and zap it here.
1012    if (Value *V = SimplifyInstruction(P)) {
1013      P->replaceAllUsesWith(V);
1014      P->eraseFromParent();
1015      ++NumPHIsElim;
1016      return true;
1017    }
1018    return false;
1019  }
1020
1021  if (CastInst *CI = dyn_cast<CastInst>(I)) {
1022    // If the source of the cast is a constant, then this should have
1023    // already been constant folded.  The only reason NOT to constant fold
1024    // it is if something (e.g. LSR) was careful to place the constant
1025    // evaluation in a block other than then one that uses it (e.g. to hoist
1026    // the address of globals out of a loop).  If this is the case, we don't
1027    // want to forward-subst the cast.
1028    if (isa<Constant>(CI->getOperand(0)))
1029      return false;
1030
1031    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
1032      return true;
1033
1034    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
1035      bool MadeChange = MoveExtToFormExtLoad(I);
1036      return MadeChange | OptimizeExtUses(I);
1037    }
1038    return false;
1039  }
1040
1041  if (CmpInst *CI = dyn_cast<CmpInst>(I))
1042    return OptimizeCmpExpression(CI);
1043
1044  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1045    if (TLI)
1046      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
1047    return false;
1048  }
1049
1050  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1051    if (TLI)
1052      return OptimizeMemoryInst(I, SI->getOperand(1),
1053                                SI->getOperand(0)->getType());
1054    return false;
1055  }
1056
1057  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
1058    if (GEPI->hasAllZeroIndices()) {
1059      /// The GEP operand must be a pointer, so must its result -> BitCast
1060      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
1061                                        GEPI->getName(), GEPI);
1062      GEPI->replaceAllUsesWith(NC);
1063      GEPI->eraseFromParent();
1064      ++NumGEPsElim;
1065      OptimizeInst(NC);
1066      return true;
1067    }
1068    return false;
1069  }
1070
1071  if (CallInst *CI = dyn_cast<CallInst>(I))
1072    return OptimizeCallInst(CI);
1073
1074  return false;
1075}
1076
1077// In this pass we look for GEP and cast instructions that are used
1078// across basic blocks and rewrite them to improve basic-block-at-a-time
1079// selection.
1080bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
1081  bool MadeChange = false;
1082
1083  // Split all critical edges where the dest block has a PHI.
1084  if (CriticalEdgeSplit) {
1085    TerminatorInst *BBTI = BB.getTerminator();
1086    if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
1087      for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
1088        BasicBlock *SuccBB = BBTI->getSuccessor(i);
1089        if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
1090          SplitEdgeNicely(BBTI, i, BackEdges, this);
1091      }
1092    }
1093  }
1094
1095  SunkAddrs.clear();
1096
1097  CurInstIterator = BB.begin();
1098  for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
1099    MadeChange |= OptimizeInst(CurInstIterator++);
1100
1101  return MadeChange;
1102}
1103