CodeGenPrepare.cpp revision 040056fd11693ffc41ce9b777281c71705d0dc1f
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach. It should eventually be removed.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "codegenprepare"
17#include "llvm/Transforms/Scalar.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/InlineAsm.h"
22#include "llvm/Instructions.h"
23#include "llvm/IntrinsicInst.h"
24#include "llvm/Pass.h"
25#include "llvm/Analysis/ProfileInfo.h"
26#include "llvm/Target/TargetData.h"
27#include "llvm/Target/TargetLowering.h"
28#include "llvm/Transforms/Utils/AddrModeMatcher.h"
29#include "llvm/Transforms/Utils/BasicBlockUtils.h"
30#include "llvm/Transforms/Utils/Local.h"
31#include "llvm/Transforms/Utils/BuildLibCalls.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/SmallSet.h"
34#include "llvm/Assembly/Writer.h"
35#include "llvm/Support/CallSite.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/GetElementPtrTypeIterator.h"
38#include "llvm/Support/PatternMatch.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Support/IRBuilder.h"
41using namespace llvm;
42using namespace llvm::PatternMatch;
43
44namespace {
45  class CodeGenPrepare : public FunctionPass {
46    /// TLI - Keep a pointer of a TargetLowering to consult for determining
47    /// transformation profitability.
48    const TargetLowering *TLI;
49    ProfileInfo *PFI;
50
51    /// BackEdges - Keep a set of all the loop back edges.
52    ///
53    SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges;
54  public:
55    static char ID; // Pass identification, replacement for typeid
56    explicit CodeGenPrepare(const TargetLowering *tli = 0)
57      : FunctionPass(&ID), TLI(tli) {}
58    bool runOnFunction(Function &F);
59
60    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
61      AU.addPreserved<ProfileInfo>();
62    }
63
64    virtual void releaseMemory() {
65      BackEdges.clear();
66    }
67
68  private:
69    bool EliminateMostlyEmptyBlocks(Function &F);
70    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
71    void EliminateMostlyEmptyBlock(BasicBlock *BB);
72    bool OptimizeBlock(BasicBlock &BB);
73    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
74                            DenseMap<Value*,Value*> &SunkAddrs);
75    bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
76                               DenseMap<Value*,Value*> &SunkAddrs);
77    bool OptimizeCallInst(CallInst *CI);
78    bool MoveExtToFormExtLoad(Instruction *I);
79    bool OptimizeExtUses(Instruction *I);
80    void findLoopBackEdges(const Function &F);
81  };
82}
83
84char CodeGenPrepare::ID = 0;
85static RegisterPass<CodeGenPrepare> X("codegenprepare",
86                                      "Optimize for code generation");
87
88FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
89  return new CodeGenPrepare(TLI);
90}
91
92/// findLoopBackEdges - Do a DFS walk to find loop back edges.
93///
94void CodeGenPrepare::findLoopBackEdges(const Function &F) {
95  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
96  FindFunctionBackedges(F, Edges);
97
98  BackEdges.insert(Edges.begin(), Edges.end());
99}
100
101
102bool CodeGenPrepare::runOnFunction(Function &F) {
103  bool EverMadeChange = false;
104
105  PFI = getAnalysisIfAvailable<ProfileInfo>();
106  // First pass, eliminate blocks that contain only PHI nodes and an
107  // unconditional branch.
108  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
109
110  // Now find loop back edges.
111  findLoopBackEdges(F);
112
113  bool MadeChange = true;
114  while (MadeChange) {
115    MadeChange = false;
116    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
117      MadeChange |= OptimizeBlock(*BB);
118    EverMadeChange |= MadeChange;
119  }
120  return EverMadeChange;
121}
122
123/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
124/// debug info directives, and an unconditional branch.  Passes before isel
125/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
126/// isel.  Start by eliminating these blocks so we can split them the way we
127/// want them.
128bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
129  bool MadeChange = false;
130  // Note that this intentionally skips the entry block.
131  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
132    BasicBlock *BB = I++;
133
134    // If this block doesn't end with an uncond branch, ignore it.
135    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
136    if (!BI || !BI->isUnconditional())
137      continue;
138
139    // If the instruction before the branch (skipping debug info) isn't a phi
140    // node, then other stuff is happening here.
141    BasicBlock::iterator BBI = BI;
142    if (BBI != BB->begin()) {
143      --BBI;
144      while (isa<DbgInfoIntrinsic>(BBI)) {
145        if (BBI == BB->begin())
146          break;
147        --BBI;
148      }
149      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
150        continue;
151    }
152
153    // Do not break infinite loops.
154    BasicBlock *DestBB = BI->getSuccessor(0);
155    if (DestBB == BB)
156      continue;
157
158    if (!CanMergeBlocks(BB, DestBB))
159      continue;
160
161    EliminateMostlyEmptyBlock(BB);
162    MadeChange = true;
163  }
164  return MadeChange;
165}
166
167/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
168/// single uncond branch between them, and BB contains no other non-phi
169/// instructions.
170bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
171                                    const BasicBlock *DestBB) const {
172  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
173  // the successor.  If there are more complex condition (e.g. preheaders),
174  // don't mess around with them.
175  BasicBlock::const_iterator BBI = BB->begin();
176  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
177    for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end();
178         UI != E; ++UI) {
179      const Instruction *User = cast<Instruction>(*UI);
180      if (User->getParent() != DestBB || !isa<PHINode>(User))
181        return false;
182      // If User is inside DestBB block and it is a PHINode then check
183      // incoming value. If incoming value is not from BB then this is
184      // a complex condition (e.g. preheaders) we want to avoid here.
185      if (User->getParent() == DestBB) {
186        if (const PHINode *UPN = dyn_cast<PHINode>(User))
187          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
188            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
189            if (Insn && Insn->getParent() == BB &&
190                Insn->getParent() != UPN->getIncomingBlock(I))
191              return false;
192          }
193      }
194    }
195  }
196
197  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
198  // and DestBB may have conflicting incoming values for the block.  If so, we
199  // can't merge the block.
200  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
201  if (!DestBBPN) return true;  // no conflict.
202
203  // Collect the preds of BB.
204  SmallPtrSet<const BasicBlock*, 16> BBPreds;
205  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
206    // It is faster to get preds from a PHI than with pred_iterator.
207    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
208      BBPreds.insert(BBPN->getIncomingBlock(i));
209  } else {
210    BBPreds.insert(pred_begin(BB), pred_end(BB));
211  }
212
213  // Walk the preds of DestBB.
214  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
215    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
216    if (BBPreds.count(Pred)) {   // Common predecessor?
217      BBI = DestBB->begin();
218      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
219        const Value *V1 = PN->getIncomingValueForBlock(Pred);
220        const Value *V2 = PN->getIncomingValueForBlock(BB);
221
222        // If V2 is a phi node in BB, look up what the mapped value will be.
223        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
224          if (V2PN->getParent() == BB)
225            V2 = V2PN->getIncomingValueForBlock(Pred);
226
227        // If there is a conflict, bail out.
228        if (V1 != V2) return false;
229      }
230    }
231  }
232
233  return true;
234}
235
236
237/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
238/// an unconditional branch in it.
239void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
240  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
241  BasicBlock *DestBB = BI->getSuccessor(0);
242
243  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
244
245  // If the destination block has a single pred, then this is a trivial edge,
246  // just collapse it.
247  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
248    if (SinglePred != DestBB) {
249      // Remember if SinglePred was the entry block of the function.  If so, we
250      // will need to move BB back to the entry position.
251      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
252      MergeBasicBlockIntoOnlyPred(DestBB, this);
253
254      if (isEntry && BB != &BB->getParent()->getEntryBlock())
255        BB->moveBefore(&BB->getParent()->getEntryBlock());
256
257      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
258      return;
259    }
260  }
261
262  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
263  // to handle the new incoming edges it is about to have.
264  PHINode *PN;
265  for (BasicBlock::iterator BBI = DestBB->begin();
266       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
267    // Remove the incoming value for BB, and remember it.
268    Value *InVal = PN->removeIncomingValue(BB, false);
269
270    // Two options: either the InVal is a phi node defined in BB or it is some
271    // value that dominates BB.
272    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
273    if (InValPhi && InValPhi->getParent() == BB) {
274      // Add all of the input values of the input PHI as inputs of this phi.
275      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
276        PN->addIncoming(InValPhi->getIncomingValue(i),
277                        InValPhi->getIncomingBlock(i));
278    } else {
279      // Otherwise, add one instance of the dominating value for each edge that
280      // we will be adding.
281      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
282        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
283          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
284      } else {
285        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
286          PN->addIncoming(InVal, *PI);
287      }
288    }
289  }
290
291  // The PHIs are now updated, change everything that refers to BB to use
292  // DestBB and remove BB.
293  BB->replaceAllUsesWith(DestBB);
294  if (PFI) {
295    PFI->replaceAllUses(BB, DestBB);
296    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
297  }
298  BB->eraseFromParent();
299
300  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
301}
302
303/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
304/// lives in to see if there is a block that we can reuse as a critical edge
305/// from TIBB.
306static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
307  BasicBlock *Dest = DestPHI->getParent();
308
309  /// TIPHIValues - This array is lazily computed to determine the values of
310  /// PHIs in Dest that TI would provide.
311  SmallVector<Value*, 32> TIPHIValues;
312
313  /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
314  unsigned TIBBEntryNo = 0;
315
316  // Check to see if Dest has any blocks that can be used as a split edge for
317  // this terminator.
318  for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
319    BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
320    // To be usable, the pred has to end with an uncond branch to the dest.
321    BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
322    if (!PredBr || !PredBr->isUnconditional())
323      continue;
324    // Must be empty other than the branch and debug info.
325    BasicBlock::iterator I = Pred->begin();
326    while (isa<DbgInfoIntrinsic>(I))
327      I++;
328    if (&*I != PredBr)
329      continue;
330    // Cannot be the entry block; its label does not get emitted.
331    if (Pred == &Dest->getParent()->getEntryBlock())
332      continue;
333
334    // Finally, since we know that Dest has phi nodes in it, we have to make
335    // sure that jumping to Pred will have the same effect as going to Dest in
336    // terms of PHI values.
337    PHINode *PN;
338    unsigned PHINo = 0;
339    unsigned PredEntryNo = pi;
340
341    bool FoundMatch = true;
342    for (BasicBlock::iterator I = Dest->begin();
343         (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
344      if (PHINo == TIPHIValues.size()) {
345        if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
346          TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
347        TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
348      }
349
350      // If the PHI entry doesn't work, we can't use this pred.
351      if (PN->getIncomingBlock(PredEntryNo) != Pred)
352        PredEntryNo = PN->getBasicBlockIndex(Pred);
353
354      if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
355        FoundMatch = false;
356        break;
357      }
358    }
359
360    // If we found a workable predecessor, change TI to branch to Succ.
361    if (FoundMatch)
362      return Pred;
363  }
364  return 0;
365}
366
367
368/// SplitEdgeNicely - Split the critical edge from TI to its specified
369/// successor if it will improve codegen.  We only do this if the successor has
370/// phi nodes (otherwise critical edges are ok).  If there is already another
371/// predecessor of the succ that is empty (and thus has no phi nodes), use it
372/// instead of introducing a new block.
373static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
374                     SmallSet<std::pair<const BasicBlock*,
375                                        const BasicBlock*>, 8> &BackEdges,
376                             Pass *P) {
377  BasicBlock *TIBB = TI->getParent();
378  BasicBlock *Dest = TI->getSuccessor(SuccNum);
379  assert(isa<PHINode>(Dest->begin()) &&
380         "This should only be called if Dest has a PHI!");
381  PHINode *DestPHI = cast<PHINode>(Dest->begin());
382
383  // Do not split edges to EH landing pads.
384  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
385    if (Invoke->getSuccessor(1) == Dest)
386      return;
387
388  // As a hack, never split backedges of loops.  Even though the copy for any
389  // PHIs inserted on the backedge would be dead for exits from the loop, we
390  // assume that the cost of *splitting* the backedge would be too high.
391  if (BackEdges.count(std::make_pair(TIBB, Dest)))
392    return;
393
394  if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
395    ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
396    if (PFI)
397      PFI->splitEdge(TIBB, Dest, ReuseBB);
398    Dest->removePredecessor(TIBB);
399    TI->setSuccessor(SuccNum, ReuseBB);
400    return;
401  }
402
403  SplitCriticalEdge(TI, SuccNum, P, true);
404}
405
406
407/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
408/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
409/// sink it into user blocks to reduce the number of virtual
410/// registers that must be created and coalesced.
411///
412/// Return true if any changes are made.
413///
414static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
415  // If this is a noop copy,
416  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
417  EVT DstVT = TLI.getValueType(CI->getType());
418
419  // This is an fp<->int conversion?
420  if (SrcVT.isInteger() != DstVT.isInteger())
421    return false;
422
423  // If this is an extension, it will be a zero or sign extension, which
424  // isn't a noop.
425  if (SrcVT.bitsLT(DstVT)) return false;
426
427  // If these values will be promoted, find out what they will be promoted
428  // to.  This helps us consider truncates on PPC as noop copies when they
429  // are.
430  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
431    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
432  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
433    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
434
435  // If, after promotion, these are the same types, this is a noop copy.
436  if (SrcVT != DstVT)
437    return false;
438
439  BasicBlock *DefBB = CI->getParent();
440
441  /// InsertedCasts - Only insert a cast in each block once.
442  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
443
444  bool MadeChange = false;
445  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
446       UI != E; ) {
447    Use &TheUse = UI.getUse();
448    Instruction *User = cast<Instruction>(*UI);
449
450    // Figure out which BB this cast is used in.  For PHI's this is the
451    // appropriate predecessor block.
452    BasicBlock *UserBB = User->getParent();
453    if (PHINode *PN = dyn_cast<PHINode>(User)) {
454      UserBB = PN->getIncomingBlock(UI);
455    }
456
457    // Preincrement use iterator so we don't invalidate it.
458    ++UI;
459
460    // If this user is in the same block as the cast, don't change the cast.
461    if (UserBB == DefBB) continue;
462
463    // If we have already inserted a cast into this block, use it.
464    CastInst *&InsertedCast = InsertedCasts[UserBB];
465
466    if (!InsertedCast) {
467      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
468
469      InsertedCast =
470        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
471                         InsertPt);
472      MadeChange = true;
473    }
474
475    // Replace a use of the cast with a use of the new cast.
476    TheUse = InsertedCast;
477  }
478
479  // If we removed all uses, nuke the cast.
480  if (CI->use_empty()) {
481    CI->eraseFromParent();
482    MadeChange = true;
483  }
484
485  return MadeChange;
486}
487
488/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
489/// the number of virtual registers that must be created and coalesced.  This is
490/// a clear win except on targets with multiple condition code registers
491///  (PowerPC), where it might lose; some adjustment may be wanted there.
492///
493/// Return true if any changes are made.
494static bool OptimizeCmpExpression(CmpInst *CI) {
495  BasicBlock *DefBB = CI->getParent();
496
497  /// InsertedCmp - Only insert a cmp in each block once.
498  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
499
500  bool MadeChange = false;
501  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
502       UI != E; ) {
503    Use &TheUse = UI.getUse();
504    Instruction *User = cast<Instruction>(*UI);
505
506    // Preincrement use iterator so we don't invalidate it.
507    ++UI;
508
509    // Don't bother for PHI nodes.
510    if (isa<PHINode>(User))
511      continue;
512
513    // Figure out which BB this cmp is used in.
514    BasicBlock *UserBB = User->getParent();
515
516    // If this user is in the same block as the cmp, don't change the cmp.
517    if (UserBB == DefBB) continue;
518
519    // If we have already inserted a cmp into this block, use it.
520    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
521
522    if (!InsertedCmp) {
523      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
524
525      InsertedCmp =
526        CmpInst::Create(CI->getOpcode(),
527                        CI->getPredicate(),  CI->getOperand(0),
528                        CI->getOperand(1), "", InsertPt);
529      MadeChange = true;
530    }
531
532    // Replace a use of the cmp with a use of the new cmp.
533    TheUse = InsertedCmp;
534  }
535
536  // If we removed all uses, nuke the cmp.
537  if (CI->use_empty())
538    CI->eraseFromParent();
539
540  return MadeChange;
541}
542
543bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
544  bool MadeChange = false;
545
546  // Lower all uses of llvm.objectsize.*
547  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
548  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
549    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
550    const Type *ReturnTy = CI->getType();
551    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
552    CI->replaceAllUsesWith(RetVal);
553    CI->eraseFromParent();
554    return true;
555  }
556
557  // From here on out we're working with named functions.
558  if (CI->getCalledFunction() == 0) return false;
559
560  // We'll need TargetData from here on out.
561  const TargetData *TD = TLI ? TLI->getTargetData() : 0;
562  if (!TD) return false;
563
564  // Lower all default uses of _chk calls.  This is code very similar
565  // to the code in InstCombineCalls, but here we are only lowering calls
566  // that have the default "don't know" as the objectsize.  Anything else
567  // should be left alone.
568  StringRef Name = CI->getCalledFunction()->getName();
569  BasicBlock *BB = CI->getParent();
570  IRBuilder<> B(CI->getParent()->getContext());
571
572  // Set the builder to the instruction after the call.
573  B.SetInsertPoint(BB, CI);
574
575  if (Name == "__memcpy_chk") {
576    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
577    if (!SizeCI)
578      return 0;
579    if (SizeCI->isAllOnesValue()) {
580      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
581                 1, B, TD);
582      CI->replaceAllUsesWith(CI->getOperand(1));
583      CI->eraseFromParent();
584      return true;
585    }
586    return 0;
587  }
588
589  // Should be similar to memcpy.
590  if (Name == "__mempcpy_chk") {
591    return 0;
592  }
593
594  if (Name == "__memmove_chk") {
595    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
596    if (!SizeCI)
597      return 0;
598    if (SizeCI->isAllOnesValue()) {
599      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
600                  1, B, TD);
601      CI->replaceAllUsesWith(CI->getOperand(1));
602      CI->eraseFromParent();
603      return true;
604    }
605    return 0;
606  }
607
608  if (Name == "__memset_chk") {
609    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
610    if (!SizeCI)
611      return 0;
612    if (SizeCI->isAllOnesValue()) {
613      Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
614                                   false);
615      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
616      CI->replaceAllUsesWith(CI->getOperand(1));
617      CI->eraseFromParent();
618      return true;
619    }
620    return 0;
621  }
622
623  if (Name == "__strcpy_chk") {
624    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
625    if (!SizeCI)
626      return 0;
627    // If a) we don't have any length information, or b) we know this will
628    // fit then just lower to a plain strcpy. Otherwise we'll keep our
629    // strcpy_chk call which may fail at runtime if the size is too long.
630    // TODO: It might be nice to get a maximum length out of the possible
631    // string lengths for varying.
632    if (SizeCI->isAllOnesValue()) {
633      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
634      CI->replaceAllUsesWith(Ret);
635      CI->eraseFromParent();
636      return true;
637    }
638    return 0;
639  }
640
641  // Should be similar to strcpy.
642  if (Name == "__stpcpy_chk") {
643    return 0;
644  }
645
646  if (Name == "__strncpy_chk") {
647    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
648    if (!SizeCI)
649      return 0;
650    if (SizeCI->isAllOnesValue()) {
651      Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
652                               CI->getOperand(3), B, TD);
653      CI->replaceAllUsesWith(Ret);
654      CI->eraseFromParent();
655      return true;
656    }
657    return 0;
658  }
659
660  if (Name == "__strcat_chk") {
661    return 0;
662  }
663
664  if (Name == "__strncat_chk") {
665    return 0;
666  }
667
668  return MadeChange;
669}
670//===----------------------------------------------------------------------===//
671// Memory Optimization
672//===----------------------------------------------------------------------===//
673
674/// IsNonLocalValue - Return true if the specified values are defined in a
675/// different basic block than BB.
676static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
677  if (Instruction *I = dyn_cast<Instruction>(V))
678    return I->getParent() != BB;
679  return false;
680}
681
682/// OptimizeMemoryInst - Load and Store Instructions often have
683/// addressing modes that can do significant amounts of computation.  As such,
684/// instruction selection will try to get the load or store to do as much
685/// computation as possible for the program.  The problem is that isel can only
686/// see within a single block.  As such, we sink as much legal addressing mode
687/// stuff into the block as possible.
688///
689/// This method is used to optimize both load/store and inline asms with memory
690/// operands.
691bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
692                                        const Type *AccessTy,
693                                        DenseMap<Value*,Value*> &SunkAddrs) {
694  // Figure out what addressing mode will be built up for this operation.
695  SmallVector<Instruction*, 16> AddrModeInsts;
696  ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
697                                                      AddrModeInsts, *TLI);
698
699  // Check to see if any of the instructions supersumed by this addr mode are
700  // non-local to I's BB.
701  bool AnyNonLocal = false;
702  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
703    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
704      AnyNonLocal = true;
705      break;
706    }
707  }
708
709  // If all the instructions matched are already in this BB, don't do anything.
710  if (!AnyNonLocal) {
711    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
712    return false;
713  }
714
715  // Insert this computation right after this user.  Since our caller is
716  // scanning from the top of the BB to the bottom, reuse of the expr are
717  // guaranteed to happen later.
718  BasicBlock::iterator InsertPt = MemoryInst;
719
720  // Now that we determined the addressing expression we want to use and know
721  // that we have to sink it into this block.  Check to see if we have already
722  // done this for some other load/store instr in this block.  If so, reuse the
723  // computation.
724  Value *&SunkAddr = SunkAddrs[Addr];
725  if (SunkAddr) {
726    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
727                 << *MemoryInst);
728    if (SunkAddr->getType() != Addr->getType())
729      SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
730  } else {
731    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
732                 << *MemoryInst);
733    const Type *IntPtrTy =
734          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
735
736    Value *Result = 0;
737
738    // Start with the base register. Do this first so that subsequent address
739    // matching finds it last, which will prevent it from trying to match it
740    // as the scaled value in case it happens to be a mul. That would be
741    // problematic if we've sunk a different mul for the scale, because then
742    // we'd end up sinking both muls.
743    if (AddrMode.BaseReg) {
744      Value *V = AddrMode.BaseReg;
745      if (V->getType()->isPointerTy())
746        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
747      if (V->getType() != IntPtrTy)
748        V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
749                                        "sunkaddr", InsertPt);
750      Result = V;
751    }
752
753    // Add the scale value.
754    if (AddrMode.Scale) {
755      Value *V = AddrMode.ScaledReg;
756      if (V->getType() == IntPtrTy) {
757        // done.
758      } else if (V->getType()->isPointerTy()) {
759        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
760      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
761                 cast<IntegerType>(V->getType())->getBitWidth()) {
762        V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
763      } else {
764        V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
765      }
766      if (AddrMode.Scale != 1)
767        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
768                                                                AddrMode.Scale),
769                                      "sunkaddr", InsertPt);
770      if (Result)
771        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
772      else
773        Result = V;
774    }
775
776    // Add in the BaseGV if present.
777    if (AddrMode.BaseGV) {
778      Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
779                                  InsertPt);
780      if (Result)
781        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
782      else
783        Result = V;
784    }
785
786    // Add in the Base Offset if present.
787    if (AddrMode.BaseOffs) {
788      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
789      if (Result)
790        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
791      else
792        Result = V;
793    }
794
795    if (Result == 0)
796      SunkAddr = Constant::getNullValue(Addr->getType());
797    else
798      SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
799  }
800
801  MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
802
803  if (Addr->use_empty())
804    RecursivelyDeleteTriviallyDeadInstructions(Addr);
805  return true;
806}
807
808/// OptimizeInlineAsmInst - If there are any memory operands, use
809/// OptimizeMemoryInst to sink their address computing into the block when
810/// possible / profitable.
811bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
812                                           DenseMap<Value*,Value*> &SunkAddrs) {
813  bool MadeChange = false;
814  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
815
816  // Do a prepass over the constraints, canonicalizing them, and building up the
817  // ConstraintOperands list.
818  std::vector<InlineAsm::ConstraintInfo>
819    ConstraintInfos = IA->ParseConstraints();
820
821  /// ConstraintOperands - Information about all of the constraints.
822  std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
823  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
824  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
825    ConstraintOperands.
826      push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
827    TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
828
829    // Compute the value type for each operand.
830    switch (OpInfo.Type) {
831    case InlineAsm::isOutput:
832      if (OpInfo.isIndirect)
833        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
834      break;
835    case InlineAsm::isInput:
836      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
837      break;
838    case InlineAsm::isClobber:
839      // Nothing to do.
840      break;
841    }
842
843    // Compute the constraint code and ConstraintType to use.
844    TLI->ComputeConstraintToUse(OpInfo, SDValue(),
845                             OpInfo.ConstraintType == TargetLowering::C_Memory);
846
847    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
848        OpInfo.isIndirect) {
849      Value *OpVal = OpInfo.CallOperandVal;
850      MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
851    }
852  }
853
854  return MadeChange;
855}
856
857/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
858/// basic block as the load, unless conditions are unfavorable. This allows
859/// SelectionDAG to fold the extend into the load.
860///
861bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
862  // Look for a load being extended.
863  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
864  if (!LI) return false;
865
866  // If they're already in the same block, there's nothing to do.
867  if (LI->getParent() == I->getParent())
868    return false;
869
870  // If the load has other users and the truncate is not free, this probably
871  // isn't worthwhile.
872  if (!LI->hasOneUse() &&
873      TLI && !TLI->isTruncateFree(I->getType(), LI->getType()))
874    return false;
875
876  // Check whether the target supports casts folded into loads.
877  unsigned LType;
878  if (isa<ZExtInst>(I))
879    LType = ISD::ZEXTLOAD;
880  else {
881    assert(isa<SExtInst>(I) && "Unexpected ext type!");
882    LType = ISD::SEXTLOAD;
883  }
884  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
885    return false;
886
887  // Move the extend into the same block as the load, so that SelectionDAG
888  // can fold it.
889  I->removeFromParent();
890  I->insertAfter(LI);
891  return true;
892}
893
894bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
895  BasicBlock *DefBB = I->getParent();
896
897  // If both result of the {s|z}xt and its source are live out, rewrite all
898  // other uses of the source with result of extension.
899  Value *Src = I->getOperand(0);
900  if (Src->hasOneUse())
901    return false;
902
903  // Only do this xform if truncating is free.
904  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
905    return false;
906
907  // Only safe to perform the optimization if the source is also defined in
908  // this block.
909  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
910    return false;
911
912  bool DefIsLiveOut = false;
913  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
914       UI != E; ++UI) {
915    Instruction *User = cast<Instruction>(*UI);
916
917    // Figure out which BB this ext is used in.
918    BasicBlock *UserBB = User->getParent();
919    if (UserBB == DefBB) continue;
920    DefIsLiveOut = true;
921    break;
922  }
923  if (!DefIsLiveOut)
924    return false;
925
926  // Make sure non of the uses are PHI nodes.
927  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
928       UI != E; ++UI) {
929    Instruction *User = cast<Instruction>(*UI);
930    BasicBlock *UserBB = User->getParent();
931    if (UserBB == DefBB) continue;
932    // Be conservative. We don't want this xform to end up introducing
933    // reloads just before load / store instructions.
934    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
935      return false;
936  }
937
938  // InsertedTruncs - Only insert one trunc in each block once.
939  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
940
941  bool MadeChange = false;
942  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
943       UI != E; ++UI) {
944    Use &TheUse = UI.getUse();
945    Instruction *User = cast<Instruction>(*UI);
946
947    // Figure out which BB this ext is used in.
948    BasicBlock *UserBB = User->getParent();
949    if (UserBB == DefBB) continue;
950
951    // Both src and def are live in this block. Rewrite the use.
952    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
953
954    if (!InsertedTrunc) {
955      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
956
957      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
958    }
959
960    // Replace a use of the {s|z}ext source with a use of the result.
961    TheUse = InsertedTrunc;
962
963    MadeChange = true;
964  }
965
966  return MadeChange;
967}
968
969// In this pass we look for GEP and cast instructions that are used
970// across basic blocks and rewrite them to improve basic-block-at-a-time
971// selection.
972bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
973  bool MadeChange = false;
974
975  // Split all critical edges where the dest block has a PHI.
976  TerminatorInst *BBTI = BB.getTerminator();
977  if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
978    for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
979      BasicBlock *SuccBB = BBTI->getSuccessor(i);
980      if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
981        SplitEdgeNicely(BBTI, i, BackEdges, this);
982    }
983  }
984
985  // Keep track of non-local addresses that have been sunk into this block.
986  // This allows us to avoid inserting duplicate code for blocks with multiple
987  // load/stores of the same address.
988  DenseMap<Value*, Value*> SunkAddrs;
989
990  for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
991    Instruction *I = BBI++;
992
993    if (CastInst *CI = dyn_cast<CastInst>(I)) {
994      // If the source of the cast is a constant, then this should have
995      // already been constant folded.  The only reason NOT to constant fold
996      // it is if something (e.g. LSR) was careful to place the constant
997      // evaluation in a block other than then one that uses it (e.g. to hoist
998      // the address of globals out of a loop).  If this is the case, we don't
999      // want to forward-subst the cast.
1000      if (isa<Constant>(CI->getOperand(0)))
1001        continue;
1002
1003      bool Change = false;
1004      if (TLI) {
1005        Change = OptimizeNoopCopyExpression(CI, *TLI);
1006        MadeChange |= Change;
1007      }
1008
1009      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I))) {
1010        MadeChange |= MoveExtToFormExtLoad(I);
1011        MadeChange |= OptimizeExtUses(I);
1012      }
1013    } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
1014      MadeChange |= OptimizeCmpExpression(CI);
1015    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1016      if (TLI)
1017        MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
1018                                         SunkAddrs);
1019    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1020      if (TLI)
1021        MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
1022                                         SI->getOperand(0)->getType(),
1023                                         SunkAddrs);
1024    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
1025      if (GEPI->hasAllZeroIndices()) {
1026        /// The GEP operand must be a pointer, so must its result -> BitCast
1027        Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
1028                                          GEPI->getName(), GEPI);
1029        GEPI->replaceAllUsesWith(NC);
1030        GEPI->eraseFromParent();
1031        MadeChange = true;
1032        BBI = NC;
1033      }
1034    } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
1035      // If we found an inline asm expession, and if the target knows how to
1036      // lower it to normal LLVM code, do so now.
1037      if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1038        if (TLI->ExpandInlineAsm(CI)) {
1039          BBI = BB.begin();
1040          // Avoid processing instructions out of order, which could cause
1041          // reuse before a value is defined.
1042          SunkAddrs.clear();
1043        } else
1044          // Sink address computing for memory operands into the block.
1045          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
1046      } else {
1047        // Other CallInst optimizations that don't need to muck with the
1048        // enclosing iterator here.
1049        MadeChange |= OptimizeCallInst(CI);
1050      }
1051    }
1052  }
1053
1054  return MadeChange;
1055}
1056