1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach. It should eventually be removed.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "codegenprepare"
17#include "llvm/Transforms/Scalar.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/IRBuilder.h"
22#include "llvm/InlineAsm.h"
23#include "llvm/Instructions.h"
24#include "llvm/IntrinsicInst.h"
25#include "llvm/Pass.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/SmallSet.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/Analysis/Dominators.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/ProfileInfo.h"
32#include "llvm/Assembly/Writer.h"
33#include "llvm/Support/CallSite.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/GetElementPtrTypeIterator.h"
37#include "llvm/Support/PatternMatch.h"
38#include "llvm/Support/ValueHandle.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Target/TargetData.h"
41#include "llvm/Target/TargetLibraryInfo.h"
42#include "llvm/Target/TargetLowering.h"
43#include "llvm/Transforms/Utils/AddrModeMatcher.h"
44#include "llvm/Transforms/Utils/BasicBlockUtils.h"
45#include "llvm/Transforms/Utils/BuildLibCalls.h"
46#include "llvm/Transforms/Utils/BypassSlowDivision.h"
47#include "llvm/Transforms/Utils/Local.h"
48using namespace llvm;
49using namespace llvm::PatternMatch;
50
51STATISTIC(NumBlocksElim, "Number of blocks eliminated");
52STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
53STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
54STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
55                      "sunken Cmps");
56STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
57                       "of sunken Casts");
58STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
59                          "computations were sunk");
60STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
61STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
62STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
63STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
64STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
65
66static cl::opt<bool> DisableBranchOpts(
67  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
68  cl::desc("Disable branch optimizations in CodeGenPrepare"));
69
70static cl::opt<bool> DisableSelectToBranch(
71  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
72  cl::desc("Disable select to branch conversion."));
73
74namespace {
75  class CodeGenPrepare : public FunctionPass {
76    /// TLI - Keep a pointer of a TargetLowering to consult for determining
77    /// transformation profitability.
78    const TargetLowering *TLI;
79    const TargetLibraryInfo *TLInfo;
80    DominatorTree *DT;
81    ProfileInfo *PFI;
82
83    /// CurInstIterator - As we scan instructions optimizing them, this is the
84    /// next instruction to optimize.  Xforms that can invalidate this should
85    /// update it.
86    BasicBlock::iterator CurInstIterator;
87
88    /// Keeps track of non-local addresses that have been sunk into a block.
89    /// This allows us to avoid inserting duplicate code for blocks with
90    /// multiple load/stores of the same address.
91    DenseMap<Value*, Value*> SunkAddrs;
92
93    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
94    /// be updated.
95    bool ModifiedDT;
96
97    /// OptSize - True if optimizing for size.
98    bool OptSize;
99
100  public:
101    static char ID; // Pass identification, replacement for typeid
102    explicit CodeGenPrepare(const TargetLowering *tli = 0)
103      : FunctionPass(ID), TLI(tli) {
104        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
105      }
106    bool runOnFunction(Function &F);
107
108    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
109      AU.addPreserved<DominatorTree>();
110      AU.addPreserved<ProfileInfo>();
111      AU.addRequired<TargetLibraryInfo>();
112    }
113
114  private:
115    bool EliminateFallThrough(Function &F);
116    bool EliminateMostlyEmptyBlocks(Function &F);
117    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
118    void EliminateMostlyEmptyBlock(BasicBlock *BB);
119    bool OptimizeBlock(BasicBlock &BB);
120    bool OptimizeInst(Instruction *I);
121    bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
122    bool OptimizeInlineAsmInst(CallInst *CS);
123    bool OptimizeCallInst(CallInst *CI);
124    bool MoveExtToFormExtLoad(Instruction *I);
125    bool OptimizeExtUses(Instruction *I);
126    bool OptimizeSelectInst(SelectInst *SI);
127    bool DupRetToEnableTailCallOpts(ReturnInst *RI);
128    bool PlaceDbgValues(Function &F);
129  };
130}
131
132char CodeGenPrepare::ID = 0;
133INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
134                "Optimize for code generation", false, false)
135INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
136INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
137                "Optimize for code generation", false, false)
138
139FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
140  return new CodeGenPrepare(TLI);
141}
142
143bool CodeGenPrepare::runOnFunction(Function &F) {
144  bool EverMadeChange = false;
145
146  ModifiedDT = false;
147  TLInfo = &getAnalysis<TargetLibraryInfo>();
148  DT = getAnalysisIfAvailable<DominatorTree>();
149  PFI = getAnalysisIfAvailable<ProfileInfo>();
150  OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
151
152  /// This optimization identifies DIV instructions that can be
153  /// profitably bypassed and carried out with a shorter, faster divide.
154  if (TLI && TLI->isSlowDivBypassed()) {
155    const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes();
156
157    for (Function::iterator I = F.begin(); I != F.end(); I++) {
158      EverMadeChange |= bypassSlowDivision(F,
159                                           I,
160                                           BypassTypeMap);
161    }
162  }
163
164  // Eliminate blocks that contain only PHI nodes and an
165  // unconditional branch.
166  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
167
168  // llvm.dbg.value is far away from the value then iSel may not be able
169  // handle it properly. iSel will drop llvm.dbg.value if it can not
170  // find a node corresponding to the value.
171  EverMadeChange |= PlaceDbgValues(F);
172
173  bool MadeChange = true;
174  while (MadeChange) {
175    MadeChange = false;
176    for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
177      BasicBlock *BB = I++;
178      MadeChange |= OptimizeBlock(*BB);
179    }
180    EverMadeChange |= MadeChange;
181  }
182
183  SunkAddrs.clear();
184
185  if (!DisableBranchOpts) {
186    MadeChange = false;
187    SmallPtrSet<BasicBlock*, 8> WorkList;
188    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
189      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
190      MadeChange |= ConstantFoldTerminator(BB, true);
191      if (!MadeChange) continue;
192
193      for (SmallVectorImpl<BasicBlock*>::iterator
194             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
195        if (pred_begin(*II) == pred_end(*II))
196          WorkList.insert(*II);
197    }
198
199    for (SmallPtrSet<BasicBlock*, 8>::iterator
200           I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
201      DeleteDeadBlock(*I);
202
203    // Merge pairs of basic blocks with unconditional branches, connected by
204    // a single edge.
205    if (EverMadeChange || MadeChange)
206      MadeChange |= EliminateFallThrough(F);
207
208    if (MadeChange)
209      ModifiedDT = true;
210    EverMadeChange |= MadeChange;
211  }
212
213  if (ModifiedDT && DT)
214    DT->DT->recalculate(F);
215
216  return EverMadeChange;
217}
218
219/// EliminateFallThrough - Merge basic blocks which are connected
220/// by a single edge, where one of the basic blocks has a single successor
221/// pointing to the other basic block, which has a single predecessor.
222bool CodeGenPrepare::EliminateFallThrough(Function &F) {
223  bool Changed = false;
224  // Scan all of the blocks in the function, except for the entry block.
225  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
226    BasicBlock *BB = I++;
227    // If the destination block has a single pred, then this is a trivial
228    // edge, just collapse it.
229    BasicBlock *SinglePred = BB->getSinglePredecessor();
230
231    if (!SinglePred || SinglePred == BB) continue;
232
233    BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
234    if (Term && !Term->isConditional()) {
235      Changed = true;
236      DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
237      // Remember if SinglePred was the entry block of the function.
238      // If so, we will need to move BB back to the entry position.
239      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
240      MergeBasicBlockIntoOnlyPred(BB, this);
241
242      if (isEntry && BB != &BB->getParent()->getEntryBlock())
243        BB->moveBefore(&BB->getParent()->getEntryBlock());
244
245      // We have erased a block. Update the iterator.
246      I = BB;
247    }
248  }
249  return Changed;
250}
251
252/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
253/// debug info directives, and an unconditional branch.  Passes before isel
254/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
255/// isel.  Start by eliminating these blocks so we can split them the way we
256/// want them.
257bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
258  bool MadeChange = false;
259  // Note that this intentionally skips the entry block.
260  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
261    BasicBlock *BB = I++;
262
263    // If this block doesn't end with an uncond branch, ignore it.
264    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
265    if (!BI || !BI->isUnconditional())
266      continue;
267
268    // If the instruction before the branch (skipping debug info) isn't a phi
269    // node, then other stuff is happening here.
270    BasicBlock::iterator BBI = BI;
271    if (BBI != BB->begin()) {
272      --BBI;
273      while (isa<DbgInfoIntrinsic>(BBI)) {
274        if (BBI == BB->begin())
275          break;
276        --BBI;
277      }
278      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
279        continue;
280    }
281
282    // Do not break infinite loops.
283    BasicBlock *DestBB = BI->getSuccessor(0);
284    if (DestBB == BB)
285      continue;
286
287    if (!CanMergeBlocks(BB, DestBB))
288      continue;
289
290    EliminateMostlyEmptyBlock(BB);
291    MadeChange = true;
292  }
293  return MadeChange;
294}
295
296/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
297/// single uncond branch between them, and BB contains no other non-phi
298/// instructions.
299bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
300                                    const BasicBlock *DestBB) const {
301  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
302  // the successor.  If there are more complex condition (e.g. preheaders),
303  // don't mess around with them.
304  BasicBlock::const_iterator BBI = BB->begin();
305  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
306    for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
307         UI != E; ++UI) {
308      const Instruction *User = cast<Instruction>(*UI);
309      if (User->getParent() != DestBB || !isa<PHINode>(User))
310        return false;
311      // If User is inside DestBB block and it is a PHINode then check
312      // incoming value. If incoming value is not from BB then this is
313      // a complex condition (e.g. preheaders) we want to avoid here.
314      if (User->getParent() == DestBB) {
315        if (const PHINode *UPN = dyn_cast<PHINode>(User))
316          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
317            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
318            if (Insn && Insn->getParent() == BB &&
319                Insn->getParent() != UPN->getIncomingBlock(I))
320              return false;
321          }
322      }
323    }
324  }
325
326  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
327  // and DestBB may have conflicting incoming values for the block.  If so, we
328  // can't merge the block.
329  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
330  if (!DestBBPN) return true;  // no conflict.
331
332  // Collect the preds of BB.
333  SmallPtrSet<const BasicBlock*, 16> BBPreds;
334  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
335    // It is faster to get preds from a PHI than with pred_iterator.
336    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
337      BBPreds.insert(BBPN->getIncomingBlock(i));
338  } else {
339    BBPreds.insert(pred_begin(BB), pred_end(BB));
340  }
341
342  // Walk the preds of DestBB.
343  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
344    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
345    if (BBPreds.count(Pred)) {   // Common predecessor?
346      BBI = DestBB->begin();
347      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
348        const Value *V1 = PN->getIncomingValueForBlock(Pred);
349        const Value *V2 = PN->getIncomingValueForBlock(BB);
350
351        // If V2 is a phi node in BB, look up what the mapped value will be.
352        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
353          if (V2PN->getParent() == BB)
354            V2 = V2PN->getIncomingValueForBlock(Pred);
355
356        // If there is a conflict, bail out.
357        if (V1 != V2) return false;
358      }
359    }
360  }
361
362  return true;
363}
364
365
366/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
367/// an unconditional branch in it.
368void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
369  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
370  BasicBlock *DestBB = BI->getSuccessor(0);
371
372  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
373
374  // If the destination block has a single pred, then this is a trivial edge,
375  // just collapse it.
376  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
377    if (SinglePred != DestBB) {
378      // Remember if SinglePred was the entry block of the function.  If so, we
379      // will need to move BB back to the entry position.
380      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
381      MergeBasicBlockIntoOnlyPred(DestBB, this);
382
383      if (isEntry && BB != &BB->getParent()->getEntryBlock())
384        BB->moveBefore(&BB->getParent()->getEntryBlock());
385
386      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
387      return;
388    }
389  }
390
391  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
392  // to handle the new incoming edges it is about to have.
393  PHINode *PN;
394  for (BasicBlock::iterator BBI = DestBB->begin();
395       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
396    // Remove the incoming value for BB, and remember it.
397    Value *InVal = PN->removeIncomingValue(BB, false);
398
399    // Two options: either the InVal is a phi node defined in BB or it is some
400    // value that dominates BB.
401    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
402    if (InValPhi && InValPhi->getParent() == BB) {
403      // Add all of the input values of the input PHI as inputs of this phi.
404      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
405        PN->addIncoming(InValPhi->getIncomingValue(i),
406                        InValPhi->getIncomingBlock(i));
407    } else {
408      // Otherwise, add one instance of the dominating value for each edge that
409      // we will be adding.
410      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
411        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
412          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
413      } else {
414        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
415          PN->addIncoming(InVal, *PI);
416      }
417    }
418  }
419
420  // The PHIs are now updated, change everything that refers to BB to use
421  // DestBB and remove BB.
422  BB->replaceAllUsesWith(DestBB);
423  if (DT && !ModifiedDT) {
424    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
425    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
426    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
427    DT->changeImmediateDominator(DestBB, NewIDom);
428    DT->eraseNode(BB);
429  }
430  if (PFI) {
431    PFI->replaceAllUses(BB, DestBB);
432    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
433  }
434  BB->eraseFromParent();
435  ++NumBlocksElim;
436
437  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
438}
439
440/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
441/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
442/// sink it into user blocks to reduce the number of virtual
443/// registers that must be created and coalesced.
444///
445/// Return true if any changes are made.
446///
447static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
448  // If this is a noop copy,
449  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
450  EVT DstVT = TLI.getValueType(CI->getType());
451
452  // This is an fp<->int conversion?
453  if (SrcVT.isInteger() != DstVT.isInteger())
454    return false;
455
456  // If this is an extension, it will be a zero or sign extension, which
457  // isn't a noop.
458  if (SrcVT.bitsLT(DstVT)) return false;
459
460  // If these values will be promoted, find out what they will be promoted
461  // to.  This helps us consider truncates on PPC as noop copies when they
462  // are.
463  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
464      TargetLowering::TypePromoteInteger)
465    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
466  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
467      TargetLowering::TypePromoteInteger)
468    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
469
470  // If, after promotion, these are the same types, this is a noop copy.
471  if (SrcVT != DstVT)
472    return false;
473
474  BasicBlock *DefBB = CI->getParent();
475
476  /// InsertedCasts - Only insert a cast in each block once.
477  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
478
479  bool MadeChange = false;
480  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
481       UI != E; ) {
482    Use &TheUse = UI.getUse();
483    Instruction *User = cast<Instruction>(*UI);
484
485    // Figure out which BB this cast is used in.  For PHI's this is the
486    // appropriate predecessor block.
487    BasicBlock *UserBB = User->getParent();
488    if (PHINode *PN = dyn_cast<PHINode>(User)) {
489      UserBB = PN->getIncomingBlock(UI);
490    }
491
492    // Preincrement use iterator so we don't invalidate it.
493    ++UI;
494
495    // If this user is in the same block as the cast, don't change the cast.
496    if (UserBB == DefBB) continue;
497
498    // If we have already inserted a cast into this block, use it.
499    CastInst *&InsertedCast = InsertedCasts[UserBB];
500
501    if (!InsertedCast) {
502      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
503      InsertedCast =
504        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
505                         InsertPt);
506      MadeChange = true;
507    }
508
509    // Replace a use of the cast with a use of the new cast.
510    TheUse = InsertedCast;
511    ++NumCastUses;
512  }
513
514  // If we removed all uses, nuke the cast.
515  if (CI->use_empty()) {
516    CI->eraseFromParent();
517    MadeChange = true;
518  }
519
520  return MadeChange;
521}
522
523/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
524/// the number of virtual registers that must be created and coalesced.  This is
525/// a clear win except on targets with multiple condition code registers
526///  (PowerPC), where it might lose; some adjustment may be wanted there.
527///
528/// Return true if any changes are made.
529static bool OptimizeCmpExpression(CmpInst *CI) {
530  BasicBlock *DefBB = CI->getParent();
531
532  /// InsertedCmp - Only insert a cmp in each block once.
533  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
534
535  bool MadeChange = false;
536  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
537       UI != E; ) {
538    Use &TheUse = UI.getUse();
539    Instruction *User = cast<Instruction>(*UI);
540
541    // Preincrement use iterator so we don't invalidate it.
542    ++UI;
543
544    // Don't bother for PHI nodes.
545    if (isa<PHINode>(User))
546      continue;
547
548    // Figure out which BB this cmp is used in.
549    BasicBlock *UserBB = User->getParent();
550
551    // If this user is in the same block as the cmp, don't change the cmp.
552    if (UserBB == DefBB) continue;
553
554    // If we have already inserted a cmp into this block, use it.
555    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
556
557    if (!InsertedCmp) {
558      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
559      InsertedCmp =
560        CmpInst::Create(CI->getOpcode(),
561                        CI->getPredicate(),  CI->getOperand(0),
562                        CI->getOperand(1), "", InsertPt);
563      MadeChange = true;
564    }
565
566    // Replace a use of the cmp with a use of the new cmp.
567    TheUse = InsertedCmp;
568    ++NumCmpUses;
569  }
570
571  // If we removed all uses, nuke the cmp.
572  if (CI->use_empty())
573    CI->eraseFromParent();
574
575  return MadeChange;
576}
577
578namespace {
579class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
580protected:
581  void replaceCall(Value *With) {
582    CI->replaceAllUsesWith(With);
583    CI->eraseFromParent();
584  }
585  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
586      if (ConstantInt *SizeCI =
587                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
588        return SizeCI->isAllOnesValue();
589    return false;
590  }
591};
592} // end anonymous namespace
593
594bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
595  BasicBlock *BB = CI->getParent();
596
597  // Lower inline assembly if we can.
598  // If we found an inline asm expession, and if the target knows how to
599  // lower it to normal LLVM code, do so now.
600  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
601    if (TLI->ExpandInlineAsm(CI)) {
602      // Avoid invalidating the iterator.
603      CurInstIterator = BB->begin();
604      // Avoid processing instructions out of order, which could cause
605      // reuse before a value is defined.
606      SunkAddrs.clear();
607      return true;
608    }
609    // Sink address computing for memory operands into the block.
610    if (OptimizeInlineAsmInst(CI))
611      return true;
612  }
613
614  // Lower all uses of llvm.objectsize.*
615  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
616  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
617    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
618    Type *ReturnTy = CI->getType();
619    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
620
621    // Substituting this can cause recursive simplifications, which can
622    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
623    // happens.
624    WeakVH IterHandle(CurInstIterator);
625
626    replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
627                                  TLInfo, ModifiedDT ? 0 : DT);
628
629    // If the iterator instruction was recursively deleted, start over at the
630    // start of the block.
631    if (IterHandle != CurInstIterator) {
632      CurInstIterator = BB->begin();
633      SunkAddrs.clear();
634    }
635    return true;
636  }
637
638  if (II && TLI) {
639    SmallVector<Value*, 2> PtrOps;
640    Type *AccessTy;
641    if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
642      while (!PtrOps.empty())
643        if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
644          return true;
645  }
646
647  // From here on out we're working with named functions.
648  if (CI->getCalledFunction() == 0) return false;
649
650  // We'll need TargetData from here on out.
651  const TargetData *TD = TLI ? TLI->getTargetData() : 0;
652  if (!TD) return false;
653
654  // Lower all default uses of _chk calls.  This is very similar
655  // to what InstCombineCalls does, but here we are only lowering calls
656  // that have the default "don't know" as the objectsize.  Anything else
657  // should be left alone.
658  CodeGenPrepareFortifiedLibCalls Simplifier;
659  return Simplifier.fold(CI, TD, TLInfo);
660}
661
662/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
663/// instructions to the predecessor to enable tail call optimizations. The
664/// case it is currently looking for is:
665/// bb0:
666///   %tmp0 = tail call i32 @f0()
667///   br label %return
668/// bb1:
669///   %tmp1 = tail call i32 @f1()
670///   br label %return
671/// bb2:
672///   %tmp2 = tail call i32 @f2()
673///   br label %return
674/// return:
675///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
676///   ret i32 %retval
677///
678/// =>
679///
680/// bb0:
681///   %tmp0 = tail call i32 @f0()
682///   ret i32 %tmp0
683/// bb1:
684///   %tmp1 = tail call i32 @f1()
685///   ret i32 %tmp1
686/// bb2:
687///   %tmp2 = tail call i32 @f2()
688///   ret i32 %tmp2
689///
690bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
691  if (!TLI)
692    return false;
693
694  PHINode *PN = 0;
695  BitCastInst *BCI = 0;
696  Value *V = RI->getReturnValue();
697  if (V) {
698    BCI = dyn_cast<BitCastInst>(V);
699    if (BCI)
700      V = BCI->getOperand(0);
701
702    PN = dyn_cast<PHINode>(V);
703    if (!PN)
704      return false;
705  }
706
707  BasicBlock *BB = RI->getParent();
708  if (PN && PN->getParent() != BB)
709    return false;
710
711  // It's not safe to eliminate the sign / zero extension of the return value.
712  // See llvm::isInTailCallPosition().
713  const Function *F = BB->getParent();
714  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
715  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
716    return false;
717
718  // Make sure there are no instructions between the PHI and return, or that the
719  // return is the first instruction in the block.
720  if (PN) {
721    BasicBlock::iterator BI = BB->begin();
722    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
723    if (&*BI == BCI)
724      // Also skip over the bitcast.
725      ++BI;
726    if (&*BI != RI)
727      return false;
728  } else {
729    BasicBlock::iterator BI = BB->begin();
730    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
731    if (&*BI != RI)
732      return false;
733  }
734
735  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
736  /// call.
737  SmallVector<CallInst*, 4> TailCalls;
738  if (PN) {
739    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
740      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
741      // Make sure the phi value is indeed produced by the tail call.
742      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
743          TLI->mayBeEmittedAsTailCall(CI))
744        TailCalls.push_back(CI);
745    }
746  } else {
747    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
748    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
749      if (!VisitedBBs.insert(*PI))
750        continue;
751
752      BasicBlock::InstListType &InstList = (*PI)->getInstList();
753      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
754      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
755      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
756      if (RI == RE)
757        continue;
758
759      CallInst *CI = dyn_cast<CallInst>(&*RI);
760      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
761        TailCalls.push_back(CI);
762    }
763  }
764
765  bool Changed = false;
766  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
767    CallInst *CI = TailCalls[i];
768    CallSite CS(CI);
769
770    // Conservatively require the attributes of the call to match those of the
771    // return. Ignore noalias because it doesn't affect the call sequence.
772    Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
773    if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
774      continue;
775
776    // Make sure the call instruction is followed by an unconditional branch to
777    // the return block.
778    BasicBlock *CallBB = CI->getParent();
779    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
780    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
781      continue;
782
783    // Duplicate the return into CallBB.
784    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
785    ModifiedDT = Changed = true;
786    ++NumRetsDup;
787  }
788
789  // If we eliminated all predecessors of the block, delete the block now.
790  if (Changed && pred_begin(BB) == pred_end(BB))
791    BB->eraseFromParent();
792
793  return Changed;
794}
795
796//===----------------------------------------------------------------------===//
797// Memory Optimization
798//===----------------------------------------------------------------------===//
799
800/// IsNonLocalValue - Return true if the specified values are defined in a
801/// different basic block than BB.
802static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
803  if (Instruction *I = dyn_cast<Instruction>(V))
804    return I->getParent() != BB;
805  return false;
806}
807
808/// OptimizeMemoryInst - Load and Store Instructions often have
809/// addressing modes that can do significant amounts of computation.  As such,
810/// instruction selection will try to get the load or store to do as much
811/// computation as possible for the program.  The problem is that isel can only
812/// see within a single block.  As such, we sink as much legal addressing mode
813/// stuff into the block as possible.
814///
815/// This method is used to optimize both load/store and inline asms with memory
816/// operands.
817bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
818                                        Type *AccessTy) {
819  Value *Repl = Addr;
820
821  // Try to collapse single-value PHI nodes.  This is necessary to undo
822  // unprofitable PRE transformations.
823  SmallVector<Value*, 8> worklist;
824  SmallPtrSet<Value*, 16> Visited;
825  worklist.push_back(Addr);
826
827  // Use a worklist to iteratively look through PHI nodes, and ensure that
828  // the addressing mode obtained from the non-PHI roots of the graph
829  // are equivalent.
830  Value *Consensus = 0;
831  unsigned NumUsesConsensus = 0;
832  bool IsNumUsesConsensusValid = false;
833  SmallVector<Instruction*, 16> AddrModeInsts;
834  ExtAddrMode AddrMode;
835  while (!worklist.empty()) {
836    Value *V = worklist.back();
837    worklist.pop_back();
838
839    // Break use-def graph loops.
840    if (!Visited.insert(V)) {
841      Consensus = 0;
842      break;
843    }
844
845    // For a PHI node, push all of its incoming values.
846    if (PHINode *P = dyn_cast<PHINode>(V)) {
847      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
848        worklist.push_back(P->getIncomingValue(i));
849      continue;
850    }
851
852    // For non-PHIs, determine the addressing mode being computed.
853    SmallVector<Instruction*, 16> NewAddrModeInsts;
854    ExtAddrMode NewAddrMode =
855      AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
856                                   NewAddrModeInsts, *TLI);
857
858    // This check is broken into two cases with very similar code to avoid using
859    // getNumUses() as much as possible. Some values have a lot of uses, so
860    // calling getNumUses() unconditionally caused a significant compile-time
861    // regression.
862    if (!Consensus) {
863      Consensus = V;
864      AddrMode = NewAddrMode;
865      AddrModeInsts = NewAddrModeInsts;
866      continue;
867    } else if (NewAddrMode == AddrMode) {
868      if (!IsNumUsesConsensusValid) {
869        NumUsesConsensus = Consensus->getNumUses();
870        IsNumUsesConsensusValid = true;
871      }
872
873      // Ensure that the obtained addressing mode is equivalent to that obtained
874      // for all other roots of the PHI traversal.  Also, when choosing one
875      // such root as representative, select the one with the most uses in order
876      // to keep the cost modeling heuristics in AddressingModeMatcher
877      // applicable.
878      unsigned NumUses = V->getNumUses();
879      if (NumUses > NumUsesConsensus) {
880        Consensus = V;
881        NumUsesConsensus = NumUses;
882        AddrModeInsts = NewAddrModeInsts;
883      }
884      continue;
885    }
886
887    Consensus = 0;
888    break;
889  }
890
891  // If the addressing mode couldn't be determined, or if multiple different
892  // ones were determined, bail out now.
893  if (!Consensus) return false;
894
895  // Check to see if any of the instructions supersumed by this addr mode are
896  // non-local to I's BB.
897  bool AnyNonLocal = false;
898  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
899    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
900      AnyNonLocal = true;
901      break;
902    }
903  }
904
905  // If all the instructions matched are already in this BB, don't do anything.
906  if (!AnyNonLocal) {
907    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
908    return false;
909  }
910
911  // Insert this computation right after this user.  Since our caller is
912  // scanning from the top of the BB to the bottom, reuse of the expr are
913  // guaranteed to happen later.
914  IRBuilder<> Builder(MemoryInst);
915
916  // Now that we determined the addressing expression we want to use and know
917  // that we have to sink it into this block.  Check to see if we have already
918  // done this for some other load/store instr in this block.  If so, reuse the
919  // computation.
920  Value *&SunkAddr = SunkAddrs[Addr];
921  if (SunkAddr) {
922    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
923                 << *MemoryInst);
924    if (SunkAddr->getType() != Addr->getType())
925      SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
926  } else {
927    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
928                 << *MemoryInst);
929    Type *IntPtrTy =
930          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
931
932    Value *Result = 0;
933
934    // Start with the base register. Do this first so that subsequent address
935    // matching finds it last, which will prevent it from trying to match it
936    // as the scaled value in case it happens to be a mul. That would be
937    // problematic if we've sunk a different mul for the scale, because then
938    // we'd end up sinking both muls.
939    if (AddrMode.BaseReg) {
940      Value *V = AddrMode.BaseReg;
941      if (V->getType()->isPointerTy())
942        V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
943      if (V->getType() != IntPtrTy)
944        V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
945      Result = V;
946    }
947
948    // Add the scale value.
949    if (AddrMode.Scale) {
950      Value *V = AddrMode.ScaledReg;
951      if (V->getType() == IntPtrTy) {
952        // done.
953      } else if (V->getType()->isPointerTy()) {
954        V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
955      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
956                 cast<IntegerType>(V->getType())->getBitWidth()) {
957        V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
958      } else {
959        V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
960      }
961      if (AddrMode.Scale != 1)
962        V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
963                              "sunkaddr");
964      if (Result)
965        Result = Builder.CreateAdd(Result, V, "sunkaddr");
966      else
967        Result = V;
968    }
969
970    // Add in the BaseGV if present.
971    if (AddrMode.BaseGV) {
972      Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
973      if (Result)
974        Result = Builder.CreateAdd(Result, V, "sunkaddr");
975      else
976        Result = V;
977    }
978
979    // Add in the Base Offset if present.
980    if (AddrMode.BaseOffs) {
981      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
982      if (Result)
983        Result = Builder.CreateAdd(Result, V, "sunkaddr");
984      else
985        Result = V;
986    }
987
988    if (Result == 0)
989      SunkAddr = Constant::getNullValue(Addr->getType());
990    else
991      SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
992  }
993
994  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
995
996  // If we have no uses, recursively delete the value and all dead instructions
997  // using it.
998  if (Repl->use_empty()) {
999    // This can cause recursive deletion, which can invalidate our iterator.
1000    // Use a WeakVH to hold onto it in case this happens.
1001    WeakVH IterHandle(CurInstIterator);
1002    BasicBlock *BB = CurInstIterator->getParent();
1003
1004    RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
1005
1006    if (IterHandle != CurInstIterator) {
1007      // If the iterator instruction was recursively deleted, start over at the
1008      // start of the block.
1009      CurInstIterator = BB->begin();
1010      SunkAddrs.clear();
1011    } else {
1012      // This address is now available for reassignment, so erase the table
1013      // entry; we don't want to match some completely different instruction.
1014      SunkAddrs[Addr] = 0;
1015    }
1016  }
1017  ++NumMemoryInsts;
1018  return true;
1019}
1020
1021/// OptimizeInlineAsmInst - If there are any memory operands, use
1022/// OptimizeMemoryInst to sink their address computing into the block when
1023/// possible / profitable.
1024bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
1025  bool MadeChange = false;
1026
1027  TargetLowering::AsmOperandInfoVector
1028    TargetConstraints = TLI->ParseConstraints(CS);
1029  unsigned ArgNo = 0;
1030  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
1031    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
1032
1033    // Compute the constraint code and ConstraintType to use.
1034    TLI->ComputeConstraintToUse(OpInfo, SDValue());
1035
1036    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
1037        OpInfo.isIndirect) {
1038      Value *OpVal = CS->getArgOperand(ArgNo++);
1039      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
1040    } else if (OpInfo.Type == InlineAsm::isInput)
1041      ArgNo++;
1042  }
1043
1044  return MadeChange;
1045}
1046
1047/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
1048/// basic block as the load, unless conditions are unfavorable. This allows
1049/// SelectionDAG to fold the extend into the load.
1050///
1051bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
1052  // Look for a load being extended.
1053  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
1054  if (!LI) return false;
1055
1056  // If they're already in the same block, there's nothing to do.
1057  if (LI->getParent() == I->getParent())
1058    return false;
1059
1060  // If the load has other users and the truncate is not free, this probably
1061  // isn't worthwhile.
1062  if (!LI->hasOneUse() &&
1063      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
1064              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
1065      !TLI->isTruncateFree(I->getType(), LI->getType()))
1066    return false;
1067
1068  // Check whether the target supports casts folded into loads.
1069  unsigned LType;
1070  if (isa<ZExtInst>(I))
1071    LType = ISD::ZEXTLOAD;
1072  else {
1073    assert(isa<SExtInst>(I) && "Unexpected ext type!");
1074    LType = ISD::SEXTLOAD;
1075  }
1076  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
1077    return false;
1078
1079  // Move the extend into the same block as the load, so that SelectionDAG
1080  // can fold it.
1081  I->removeFromParent();
1082  I->insertAfter(LI);
1083  ++NumExtsMoved;
1084  return true;
1085}
1086
1087bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
1088  BasicBlock *DefBB = I->getParent();
1089
1090  // If the result of a {s|z}ext and its source are both live out, rewrite all
1091  // other uses of the source with result of extension.
1092  Value *Src = I->getOperand(0);
1093  if (Src->hasOneUse())
1094    return false;
1095
1096  // Only do this xform if truncating is free.
1097  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
1098    return false;
1099
1100  // Only safe to perform the optimization if the source is also defined in
1101  // this block.
1102  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
1103    return false;
1104
1105  bool DefIsLiveOut = false;
1106  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1107       UI != E; ++UI) {
1108    Instruction *User = cast<Instruction>(*UI);
1109
1110    // Figure out which BB this ext is used in.
1111    BasicBlock *UserBB = User->getParent();
1112    if (UserBB == DefBB) continue;
1113    DefIsLiveOut = true;
1114    break;
1115  }
1116  if (!DefIsLiveOut)
1117    return false;
1118
1119  // Make sure non of the uses are PHI nodes.
1120  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1121       UI != E; ++UI) {
1122    Instruction *User = cast<Instruction>(*UI);
1123    BasicBlock *UserBB = User->getParent();
1124    if (UserBB == DefBB) continue;
1125    // Be conservative. We don't want this xform to end up introducing
1126    // reloads just before load / store instructions.
1127    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
1128      return false;
1129  }
1130
1131  // InsertedTruncs - Only insert one trunc in each block once.
1132  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
1133
1134  bool MadeChange = false;
1135  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1136       UI != E; ++UI) {
1137    Use &TheUse = UI.getUse();
1138    Instruction *User = cast<Instruction>(*UI);
1139
1140    // Figure out which BB this ext is used in.
1141    BasicBlock *UserBB = User->getParent();
1142    if (UserBB == DefBB) continue;
1143
1144    // Both src and def are live in this block. Rewrite the use.
1145    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
1146
1147    if (!InsertedTrunc) {
1148      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1149      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
1150    }
1151
1152    // Replace a use of the {s|z}ext source with a use of the result.
1153    TheUse = InsertedTrunc;
1154    ++NumExtUses;
1155    MadeChange = true;
1156  }
1157
1158  return MadeChange;
1159}
1160
1161/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
1162/// turned into an explicit branch.
1163static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
1164  // FIXME: This should use the same heuristics as IfConversion to determine
1165  // whether a select is better represented as a branch.  This requires that
1166  // branch probability metadata is preserved for the select, which is not the
1167  // case currently.
1168
1169  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
1170
1171  // If the branch is predicted right, an out of order CPU can avoid blocking on
1172  // the compare.  Emit cmovs on compares with a memory operand as branches to
1173  // avoid stalls on the load from memory.  If the compare has more than one use
1174  // there's probably another cmov or setcc around so it's not worth emitting a
1175  // branch.
1176  if (!Cmp)
1177    return false;
1178
1179  Value *CmpOp0 = Cmp->getOperand(0);
1180  Value *CmpOp1 = Cmp->getOperand(1);
1181
1182  // We check that the memory operand has one use to avoid uses of the loaded
1183  // value directly after the compare, making branches unprofitable.
1184  return Cmp->hasOneUse() &&
1185         ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
1186          (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
1187}
1188
1189
1190/// If we have a SelectInst that will likely profit from branch prediction,
1191/// turn it into a branch.
1192bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
1193  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1194
1195  // Can we convert the 'select' to CF ?
1196  if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
1197    return false;
1198
1199  TargetLowering::SelectSupportKind SelectKind;
1200  if (VectorCond)
1201    SelectKind = TargetLowering::VectorMaskSelect;
1202  else if (SI->getType()->isVectorTy())
1203    SelectKind = TargetLowering::ScalarCondVectorVal;
1204  else
1205    SelectKind = TargetLowering::ScalarValSelect;
1206
1207  // Do we have efficient codegen support for this kind of 'selects' ?
1208  if (TLI->isSelectSupported(SelectKind)) {
1209    // We have efficient codegen support for the select instruction.
1210    // Check if it is profitable to keep this 'select'.
1211    if (!TLI->isPredictableSelectExpensive() ||
1212        !isFormingBranchFromSelectProfitable(SI))
1213      return false;
1214  }
1215
1216  ModifiedDT = true;
1217
1218  // First, we split the block containing the select into 2 blocks.
1219  BasicBlock *StartBlock = SI->getParent();
1220  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
1221  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
1222
1223  // Create a new block serving as the landing pad for the branch.
1224  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
1225                                             NextBlock->getParent(), NextBlock);
1226
1227  // Move the unconditional branch from the block with the select in it into our
1228  // landing pad block.
1229  StartBlock->getTerminator()->eraseFromParent();
1230  BranchInst::Create(NextBlock, SmallBlock);
1231
1232  // Insert the real conditional branch based on the original condition.
1233  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
1234
1235  // The select itself is replaced with a PHI Node.
1236  PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
1237  PN->takeName(SI);
1238  PN->addIncoming(SI->getTrueValue(), StartBlock);
1239  PN->addIncoming(SI->getFalseValue(), SmallBlock);
1240  SI->replaceAllUsesWith(PN);
1241  SI->eraseFromParent();
1242
1243  // Instruct OptimizeBlock to skip to the next block.
1244  CurInstIterator = StartBlock->end();
1245  ++NumSelectsExpanded;
1246  return true;
1247}
1248
1249bool CodeGenPrepare::OptimizeInst(Instruction *I) {
1250  if (PHINode *P = dyn_cast<PHINode>(I)) {
1251    // It is possible for very late stage optimizations (such as SimplifyCFG)
1252    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
1253    // trivial PHI, go ahead and zap it here.
1254    if (Value *V = SimplifyInstruction(P)) {
1255      P->replaceAllUsesWith(V);
1256      P->eraseFromParent();
1257      ++NumPHIsElim;
1258      return true;
1259    }
1260    return false;
1261  }
1262
1263  if (CastInst *CI = dyn_cast<CastInst>(I)) {
1264    // If the source of the cast is a constant, then this should have
1265    // already been constant folded.  The only reason NOT to constant fold
1266    // it is if something (e.g. LSR) was careful to place the constant
1267    // evaluation in a block other than then one that uses it (e.g. to hoist
1268    // the address of globals out of a loop).  If this is the case, we don't
1269    // want to forward-subst the cast.
1270    if (isa<Constant>(CI->getOperand(0)))
1271      return false;
1272
1273    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
1274      return true;
1275
1276    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
1277      bool MadeChange = MoveExtToFormExtLoad(I);
1278      return MadeChange | OptimizeExtUses(I);
1279    }
1280    return false;
1281  }
1282
1283  if (CmpInst *CI = dyn_cast<CmpInst>(I))
1284    return OptimizeCmpExpression(CI);
1285
1286  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1287    if (TLI)
1288      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
1289    return false;
1290  }
1291
1292  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1293    if (TLI)
1294      return OptimizeMemoryInst(I, SI->getOperand(1),
1295                                SI->getOperand(0)->getType());
1296    return false;
1297  }
1298
1299  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
1300    if (GEPI->hasAllZeroIndices()) {
1301      /// The GEP operand must be a pointer, so must its result -> BitCast
1302      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
1303                                        GEPI->getName(), GEPI);
1304      GEPI->replaceAllUsesWith(NC);
1305      GEPI->eraseFromParent();
1306      ++NumGEPsElim;
1307      OptimizeInst(NC);
1308      return true;
1309    }
1310    return false;
1311  }
1312
1313  if (CallInst *CI = dyn_cast<CallInst>(I))
1314    return OptimizeCallInst(CI);
1315
1316  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
1317    return DupRetToEnableTailCallOpts(RI);
1318
1319  if (SelectInst *SI = dyn_cast<SelectInst>(I))
1320    return OptimizeSelectInst(SI);
1321
1322  return false;
1323}
1324
1325// In this pass we look for GEP and cast instructions that are used
1326// across basic blocks and rewrite them to improve basic-block-at-a-time
1327// selection.
1328bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
1329  SunkAddrs.clear();
1330  bool MadeChange = false;
1331
1332  CurInstIterator = BB.begin();
1333  for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
1334    MadeChange |= OptimizeInst(CurInstIterator++);
1335
1336  return MadeChange;
1337}
1338
1339// llvm.dbg.value is far away from the value then iSel may not be able
1340// handle it properly. iSel will drop llvm.dbg.value if it can not
1341// find a node corresponding to the value.
1342bool CodeGenPrepare::PlaceDbgValues(Function &F) {
1343  bool MadeChange = false;
1344  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
1345    Instruction *PrevNonDbgInst = NULL;
1346    for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
1347      Instruction *Insn = BI; ++BI;
1348      DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
1349      if (!DVI) {
1350        PrevNonDbgInst = Insn;
1351        continue;
1352      }
1353
1354      Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
1355      if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
1356        DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
1357        DVI->removeFromParent();
1358        if (isa<PHINode>(VI))
1359          DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
1360        else
1361          DVI->insertAfter(VI);
1362        MadeChange = true;
1363        ++NumDbgValueMoved;
1364      }
1365    }
1366  }
1367  return MadeChange;
1368}
1369