CodeGenPrepare.cpp revision ab63152871f4144050d0a58d592a95e089fe40d4
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach. It should eventually be removed.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "codegenprepare"
17#include "llvm/Transforms/Scalar.h"
18#include "llvm/Constants.h"
19#include "llvm/DerivedTypes.h"
20#include "llvm/Function.h"
21#include "llvm/InlineAsm.h"
22#include "llvm/Instructions.h"
23#include "llvm/Pass.h"
24#include "llvm/Target/TargetAsmInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetLowering.h"
27#include "llvm/Target/TargetMachine.h"
28#include "llvm/Transforms/Utils/BasicBlockUtils.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/Support/CallSite.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Compiler.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/GetElementPtrTypeIterator.h"
37#include "llvm/Support/PatternMatch.h"
38using namespace llvm;
39using namespace llvm::PatternMatch;
40
41static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
42                                       cl::init(false), cl::Hidden);
43
44namespace {
45  class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
46    /// TLI - Keep a pointer of a TargetLowering to consult for determining
47    /// transformation profitability.
48    const TargetLowering *TLI;
49
50    /// BackEdges - Keep a set of all the loop back edges.
51    ///
52    SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> BackEdges;
53  public:
54    static char ID; // Pass identification, replacement for typeid
55    explicit CodeGenPrepare(const TargetLowering *tli = 0)
56      : FunctionPass(&ID), TLI(tli) {}
57    bool runOnFunction(Function &F);
58
59  private:
60    bool EliminateMostlyEmptyBlocks(Function &F);
61    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
62    void EliminateMostlyEmptyBlock(BasicBlock *BB);
63    bool OptimizeBlock(BasicBlock &BB);
64    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy,
65                            DenseMap<Value*,Value*> &SunkAddrs);
66    bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
67                               DenseMap<Value*,Value*> &SunkAddrs);
68    bool OptimizeExtUses(Instruction *I);
69    void findLoopBackEdges(Function &F);
70  };
71}
72
73char CodeGenPrepare::ID = 0;
74static RegisterPass<CodeGenPrepare> X("codegenprepare",
75                                      "Optimize for code generation");
76
77FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
78  return new CodeGenPrepare(TLI);
79}
80
81/// findLoopBackEdges - Do a DFS walk to find loop back edges.
82///
83void CodeGenPrepare::findLoopBackEdges(Function &F) {
84  SmallPtrSet<BasicBlock*, 8> Visited;
85  SmallVector<std::pair<BasicBlock*, succ_iterator>, 8> VisitStack;
86  SmallPtrSet<BasicBlock*, 8> InStack;
87
88  BasicBlock *BB = &F.getEntryBlock();
89  if (succ_begin(BB) == succ_end(BB))
90    return;
91  Visited.insert(BB);
92  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
93  InStack.insert(BB);
94  do {
95    std::pair<BasicBlock*, succ_iterator> &Top = VisitStack.back();
96    BasicBlock *ParentBB = Top.first;
97    succ_iterator &I = Top.second;
98
99    bool FoundNew = false;
100    while (I != succ_end(ParentBB)) {
101      BB = *I++;
102      if (Visited.insert(BB)) {
103        FoundNew = true;
104        break;
105      }
106      // Successor is in VisitStack, it's a back edge.
107      if (InStack.count(BB))
108        BackEdges.insert(std::make_pair(ParentBB, BB));
109    }
110
111    if (FoundNew) {
112      // Go down one level if there is a unvisited successor.
113      InStack.insert(BB);
114      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
115    } else {
116      // Go up one level.
117      std::pair<BasicBlock*, succ_iterator> &Pop = VisitStack.back();
118      InStack.erase(Pop.first);
119      VisitStack.pop_back();
120    }
121  } while (!VisitStack.empty());
122}
123
124
125bool CodeGenPrepare::runOnFunction(Function &F) {
126  bool EverMadeChange = false;
127
128  findLoopBackEdges(F);
129
130  // First pass, eliminate blocks that contain only PHI nodes and an
131  // unconditional branch.
132  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
133
134  bool MadeChange = true;
135  while (MadeChange) {
136    MadeChange = false;
137    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
138      MadeChange |= OptimizeBlock(*BB);
139    EverMadeChange |= MadeChange;
140  }
141  return EverMadeChange;
142}
143
144/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes
145/// and an unconditional branch.  Passes before isel (e.g. LSR/loopsimplify)
146/// often split edges in ways that are non-optimal for isel.  Start by
147/// eliminating these blocks so we can split them the way we want them.
148bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
149  bool MadeChange = false;
150  // Note that this intentionally skips the entry block.
151  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
152    BasicBlock *BB = I++;
153
154    // If this block doesn't end with an uncond branch, ignore it.
155    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
156    if (!BI || !BI->isUnconditional())
157      continue;
158
159    // If the instruction before the branch isn't a phi node, then other stuff
160    // is happening here.
161    BasicBlock::iterator BBI = BI;
162    if (BBI != BB->begin()) {
163      --BBI;
164      if (!isa<PHINode>(BBI)) continue;
165    }
166
167    // Do not break infinite loops.
168    BasicBlock *DestBB = BI->getSuccessor(0);
169    if (DestBB == BB)
170      continue;
171
172    if (!CanMergeBlocks(BB, DestBB))
173      continue;
174
175    EliminateMostlyEmptyBlock(BB);
176    MadeChange = true;
177  }
178  return MadeChange;
179}
180
181/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
182/// single uncond branch between them, and BB contains no other non-phi
183/// instructions.
184bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
185                                    const BasicBlock *DestBB) const {
186  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
187  // the successor.  If there are more complex condition (e.g. preheaders),
188  // don't mess around with them.
189  BasicBlock::const_iterator BBI = BB->begin();
190  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
191    for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end();
192         UI != E; ++UI) {
193      const Instruction *User = cast<Instruction>(*UI);
194      if (User->getParent() != DestBB || !isa<PHINode>(User))
195        return false;
196      // If User is inside DestBB block and it is a PHINode then check
197      // incoming value. If incoming value is not from BB then this is
198      // a complex condition (e.g. preheaders) we want to avoid here.
199      if (User->getParent() == DestBB) {
200        if (const PHINode *UPN = dyn_cast<PHINode>(User))
201          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
202            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
203            if (Insn && Insn->getParent() == BB &&
204                Insn->getParent() != UPN->getIncomingBlock(I))
205              return false;
206          }
207      }
208    }
209  }
210
211  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
212  // and DestBB may have conflicting incoming values for the block.  If so, we
213  // can't merge the block.
214  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
215  if (!DestBBPN) return true;  // no conflict.
216
217  // Collect the preds of BB.
218  SmallPtrSet<const BasicBlock*, 16> BBPreds;
219  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
220    // It is faster to get preds from a PHI than with pred_iterator.
221    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
222      BBPreds.insert(BBPN->getIncomingBlock(i));
223  } else {
224    BBPreds.insert(pred_begin(BB), pred_end(BB));
225  }
226
227  // Walk the preds of DestBB.
228  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
229    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
230    if (BBPreds.count(Pred)) {   // Common predecessor?
231      BBI = DestBB->begin();
232      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
233        const Value *V1 = PN->getIncomingValueForBlock(Pred);
234        const Value *V2 = PN->getIncomingValueForBlock(BB);
235
236        // If V2 is a phi node in BB, look up what the mapped value will be.
237        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
238          if (V2PN->getParent() == BB)
239            V2 = V2PN->getIncomingValueForBlock(Pred);
240
241        // If there is a conflict, bail out.
242        if (V1 != V2) return false;
243      }
244    }
245  }
246
247  return true;
248}
249
250
251/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
252/// an unconditional branch in it.
253void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
254  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
255  BasicBlock *DestBB = BI->getSuccessor(0);
256
257  DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB;
258
259  // If the destination block has a single pred, then this is a trivial edge,
260  // just collapse it.
261  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
262    if (SinglePred != DestBB) {
263      // Remember if SinglePred was the entry block of the function.  If so, we
264      // will need to move BB back to the entry position.
265      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
266      MergeBasicBlockIntoOnlyPred(DestBB);
267
268      if (isEntry && BB != &BB->getParent()->getEntryBlock())
269        BB->moveBefore(&BB->getParent()->getEntryBlock());
270
271      DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
272      return;
273    }
274  }
275
276  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
277  // to handle the new incoming edges it is about to have.
278  PHINode *PN;
279  for (BasicBlock::iterator BBI = DestBB->begin();
280       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
281    // Remove the incoming value for BB, and remember it.
282    Value *InVal = PN->removeIncomingValue(BB, false);
283
284    // Two options: either the InVal is a phi node defined in BB or it is some
285    // value that dominates BB.
286    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
287    if (InValPhi && InValPhi->getParent() == BB) {
288      // Add all of the input values of the input PHI as inputs of this phi.
289      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
290        PN->addIncoming(InValPhi->getIncomingValue(i),
291                        InValPhi->getIncomingBlock(i));
292    } else {
293      // Otherwise, add one instance of the dominating value for each edge that
294      // we will be adding.
295      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
296        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
297          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
298      } else {
299        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
300          PN->addIncoming(InVal, *PI);
301      }
302    }
303  }
304
305  // The PHIs are now updated, change everything that refers to BB to use
306  // DestBB and remove BB.
307  BB->replaceAllUsesWith(DestBB);
308  BB->eraseFromParent();
309
310  DOUT << "AFTER:\n" << *DestBB << "\n\n\n";
311}
312
313
314/// SplitEdgeNicely - Split the critical edge from TI to its specified
315/// successor if it will improve codegen.  We only do this if the successor has
316/// phi nodes (otherwise critical edges are ok).  If there is already another
317/// predecessor of the succ that is empty (and thus has no phi nodes), use it
318/// instead of introducing a new block.
319static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
320                     SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> &BackEdges,
321                             Pass *P) {
322  BasicBlock *TIBB = TI->getParent();
323  BasicBlock *Dest = TI->getSuccessor(SuccNum);
324  assert(isa<PHINode>(Dest->begin()) &&
325         "This should only be called if Dest has a PHI!");
326
327  // As a hack, never split backedges of loops.  Even though the copy for any
328  // PHIs inserted on the backedge would be dead for exits from the loop, we
329  // assume that the cost of *splitting* the backedge would be too high.
330  if (BackEdges.count(std::make_pair(TIBB, Dest)))
331    return;
332
333  if (!FactorCommonPreds) {
334    /// TIPHIValues - This array is lazily computed to determine the values of
335    /// PHIs in Dest that TI would provide.
336    SmallVector<Value*, 32> TIPHIValues;
337
338    // Check to see if Dest has any blocks that can be used as a split edge for
339    // this terminator.
340    for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
341      BasicBlock *Pred = *PI;
342      // To be usable, the pred has to end with an uncond branch to the dest.
343      BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
344      if (!PredBr || !PredBr->isUnconditional() ||
345          // Must be empty other than the branch.
346          &Pred->front() != PredBr ||
347          // Cannot be the entry block; its label does not get emitted.
348          Pred == &(Dest->getParent()->getEntryBlock()))
349        continue;
350
351      // Finally, since we know that Dest has phi nodes in it, we have to make
352      // sure that jumping to Pred will have the same affect as going to Dest in
353      // terms of PHI values.
354      PHINode *PN;
355      unsigned PHINo = 0;
356      bool FoundMatch = true;
357      for (BasicBlock::iterator I = Dest->begin();
358           (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
359        if (PHINo == TIPHIValues.size())
360          TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
361
362        // If the PHI entry doesn't work, we can't use this pred.
363        if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
364          FoundMatch = false;
365          break;
366        }
367      }
368
369      // If we found a workable predecessor, change TI to branch to Succ.
370      if (FoundMatch) {
371        Dest->removePredecessor(TIBB);
372        TI->setSuccessor(SuccNum, Pred);
373        return;
374      }
375    }
376
377    SplitCriticalEdge(TI, SuccNum, P, true);
378    return;
379  }
380
381  PHINode *PN;
382  SmallVector<Value*, 8> TIPHIValues;
383  for (BasicBlock::iterator I = Dest->begin();
384       (PN = dyn_cast<PHINode>(I)); ++I)
385    TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
386
387  SmallVector<BasicBlock*, 8> IdenticalPreds;
388  for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
389    BasicBlock *Pred = *PI;
390    if (BackEdges.count(std::make_pair(Pred, Dest)))
391      continue;
392    if (PI == TIBB)
393      IdenticalPreds.push_back(Pred);
394    else {
395      bool Identical = true;
396      unsigned PHINo = 0;
397      for (BasicBlock::iterator I = Dest->begin();
398           (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
399        if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
400          Identical = false;
401          break;
402        }
403      if (Identical)
404        IdenticalPreds.push_back(Pred);
405    }
406  }
407
408  assert(!IdenticalPreds.empty());
409  SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
410                         ".critedge", P);
411}
412
413
414/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
415/// copy (e.g. it's casting from one pointer type to another, int->uint, or
416/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
417/// registers that must be created and coalesced.
418///
419/// Return true if any changes are made.
420///
421static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
422  // If this is a noop copy,
423  MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
424  MVT DstVT = TLI.getValueType(CI->getType());
425
426  // This is an fp<->int conversion?
427  if (SrcVT.isInteger() != DstVT.isInteger())
428    return false;
429
430  // If this is an extension, it will be a zero or sign extension, which
431  // isn't a noop.
432  if (SrcVT.bitsLT(DstVT)) return false;
433
434  // If these values will be promoted, find out what they will be promoted
435  // to.  This helps us consider truncates on PPC as noop copies when they
436  // are.
437  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
438    SrcVT = TLI.getTypeToTransformTo(SrcVT);
439  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
440    DstVT = TLI.getTypeToTransformTo(DstVT);
441
442  // If, after promotion, these are the same types, this is a noop copy.
443  if (SrcVT != DstVT)
444    return false;
445
446  BasicBlock *DefBB = CI->getParent();
447
448  /// InsertedCasts - Only insert a cast in each block once.
449  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
450
451  bool MadeChange = false;
452  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
453       UI != E; ) {
454    Use &TheUse = UI.getUse();
455    Instruction *User = cast<Instruction>(*UI);
456
457    // Figure out which BB this cast is used in.  For PHI's this is the
458    // appropriate predecessor block.
459    BasicBlock *UserBB = User->getParent();
460    if (PHINode *PN = dyn_cast<PHINode>(User)) {
461      unsigned OpVal = UI.getOperandNo()/2;
462      UserBB = PN->getIncomingBlock(OpVal);
463    }
464
465    // Preincrement use iterator so we don't invalidate it.
466    ++UI;
467
468    // If this user is in the same block as the cast, don't change the cast.
469    if (UserBB == DefBB) continue;
470
471    // If we have already inserted a cast into this block, use it.
472    CastInst *&InsertedCast = InsertedCasts[UserBB];
473
474    if (!InsertedCast) {
475      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
476
477      InsertedCast =
478        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
479                         InsertPt);
480      MadeChange = true;
481    }
482
483    // Replace a use of the cast with a use of the new cast.
484    TheUse = InsertedCast;
485  }
486
487  // If we removed all uses, nuke the cast.
488  if (CI->use_empty()) {
489    CI->eraseFromParent();
490    MadeChange = true;
491  }
492
493  return MadeChange;
494}
495
496/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
497/// the number of virtual registers that must be created and coalesced.  This is
498/// a clear win except on targets with multiple condition code registers
499///  (PowerPC), where it might lose; some adjustment may be wanted there.
500///
501/// Return true if any changes are made.
502static bool OptimizeCmpExpression(CmpInst *CI) {
503  BasicBlock *DefBB = CI->getParent();
504
505  /// InsertedCmp - Only insert a cmp in each block once.
506  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
507
508  bool MadeChange = false;
509  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
510       UI != E; ) {
511    Use &TheUse = UI.getUse();
512    Instruction *User = cast<Instruction>(*UI);
513
514    // Preincrement use iterator so we don't invalidate it.
515    ++UI;
516
517    // Don't bother for PHI nodes.
518    if (isa<PHINode>(User))
519      continue;
520
521    // Figure out which BB this cmp is used in.
522    BasicBlock *UserBB = User->getParent();
523
524    // If this user is in the same block as the cmp, don't change the cmp.
525    if (UserBB == DefBB) continue;
526
527    // If we have already inserted a cmp into this block, use it.
528    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
529
530    if (!InsertedCmp) {
531      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
532
533      InsertedCmp =
534        CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0),
535                        CI->getOperand(1), "", InsertPt);
536      MadeChange = true;
537    }
538
539    // Replace a use of the cmp with a use of the new cmp.
540    TheUse = InsertedCmp;
541  }
542
543  // If we removed all uses, nuke the cmp.
544  if (CI->use_empty())
545    CI->eraseFromParent();
546
547  return MadeChange;
548}
549
550//===----------------------------------------------------------------------===//
551// Addressing Mode Analysis and Optimization
552//===----------------------------------------------------------------------===//
553
554namespace {
555  /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
556  /// which holds actual Value*'s for register values.
557  struct ExtAddrMode : public TargetLowering::AddrMode {
558    Value *BaseReg;
559    Value *ScaledReg;
560    ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
561    void print(OStream &OS) const;
562    void dump() const {
563      print(cerr);
564      cerr << '\n';
565    }
566  };
567} // end anonymous namespace
568
569static inline OStream &operator<<(OStream &OS, const ExtAddrMode &AM) {
570  AM.print(OS);
571  return OS;
572}
573
574void ExtAddrMode::print(OStream &OS) const {
575  bool NeedPlus = false;
576  OS << "[";
577  if (BaseGV)
578    OS << (NeedPlus ? " + " : "")
579       << "GV:%" << BaseGV->getName(), NeedPlus = true;
580
581  if (BaseOffs)
582    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
583
584  if (BaseReg)
585    OS << (NeedPlus ? " + " : "")
586       << "Base:%" << BaseReg->getName(), NeedPlus = true;
587  if (Scale)
588    OS << (NeedPlus ? " + " : "")
589       << Scale << "*%" << ScaledReg->getName(), NeedPlus = true;
590
591  OS << ']';
592}
593
594namespace {
595/// AddressingModeMatcher - This class exposes a single public method, which is
596/// used to construct a "maximal munch" of the addressing mode for the target
597/// specified by TLI for an access to "V" with an access type of AccessTy.  This
598/// returns the addressing mode that is actually matched by value, but also
599/// returns the list of instructions involved in that addressing computation in
600/// AddrModeInsts.
601class AddressingModeMatcher {
602  SmallVectorImpl<Instruction*> &AddrModeInsts;
603  const TargetLowering &TLI;
604
605  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
606  /// the memory instruction that we're computing this address for.
607  const Type *AccessTy;
608  Instruction *MemoryInst;
609
610  /// AddrMode - This is the addressing mode that we're building up.  This is
611  /// part of the return value of this addressing mode matching stuff.
612  ExtAddrMode &AddrMode;
613
614  /// IgnoreProfitability - This is set to true when we should not do
615  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
616  /// always returns true.
617  bool IgnoreProfitability;
618
619  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
620                        const TargetLowering &T, const Type *AT,
621                        Instruction *MI, ExtAddrMode &AM)
622    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
623    IgnoreProfitability = false;
624  }
625public:
626
627  /// Match - Find the maximal addressing mode that a load/store of V can fold,
628  /// give an access type of AccessTy.  This returns a list of involved
629  /// instructions in AddrModeInsts.
630  static ExtAddrMode Match(Value *V, const Type *AccessTy,
631                           Instruction *MemoryInst,
632                           SmallVectorImpl<Instruction*> &AddrModeInsts,
633                           const TargetLowering &TLI) {
634    ExtAddrMode Result;
635
636    bool Success =
637      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
638                            MemoryInst, Result).MatchAddr(V, 0);
639    Success = Success; assert(Success && "Couldn't select *anything*?");
640    return Result;
641  }
642private:
643  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
644  bool MatchAddr(Value *V, unsigned Depth);
645  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
646  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
647                                            ExtAddrMode &AMBefore,
648                                            ExtAddrMode &AMAfter);
649  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
650};
651} // end anonymous namespace
652
653/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
654/// Return true and update AddrMode if this addr mode is legal for the target,
655/// false if not.
656bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
657                                             unsigned Depth) {
658  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
659  // mode.  Just process that directly.
660  if (Scale == 1)
661    return MatchAddr(ScaleReg, Depth);
662
663  // If the scale is 0, it takes nothing to add this.
664  if (Scale == 0)
665    return true;
666
667  // If we already have a scale of this value, we can add to it, otherwise, we
668  // need an available scale field.
669  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
670    return false;
671
672  ExtAddrMode TestAddrMode = AddrMode;
673
674  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
675  // [A+B + A*7] -> [B+A*8].
676  TestAddrMode.Scale += Scale;
677  TestAddrMode.ScaledReg = ScaleReg;
678
679  // If the new address isn't legal, bail out.
680  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
681    return false;
682
683  // It was legal, so commit it.
684  AddrMode = TestAddrMode;
685
686  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
687  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
688  // X*Scale + C*Scale to addr mode.
689  ConstantInt *CI; Value *AddLHS;
690  if (match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
691    TestAddrMode.ScaledReg = AddLHS;
692    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
693
694    // If this addressing mode is legal, commit it and remember that we folded
695    // this instruction.
696    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
697      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
698      AddrMode = TestAddrMode;
699      return true;
700    }
701  }
702
703  // Otherwise, not (x+c)*scale, just return what we have.
704  return true;
705}
706
707/// MightBeFoldableInst - This is a little filter, which returns true if an
708/// addressing computation involving I might be folded into a load/store
709/// accessing it.  This doesn't need to be perfect, but needs to accept at least
710/// the set of instructions that MatchOperationAddr can.
711static bool MightBeFoldableInst(Instruction *I) {
712  switch (I->getOpcode()) {
713  case Instruction::BitCast:
714    // Don't touch identity bitcasts.
715    if (I->getType() == I->getOperand(0)->getType())
716      return false;
717    return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType());
718  case Instruction::PtrToInt:
719    // PtrToInt is always a noop, as we know that the int type is pointer sized.
720    return true;
721  case Instruction::IntToPtr:
722    // We know the input is intptr_t, so this is foldable.
723    return true;
724  case Instruction::Add:
725    return true;
726  case Instruction::Mul:
727  case Instruction::Shl:
728    // Can only handle X*C and X << C.
729    return isa<ConstantInt>(I->getOperand(1));
730  case Instruction::GetElementPtr:
731    return true;
732  default:
733    return false;
734  }
735}
736
737
738/// MatchOperationAddr - Given an instruction or constant expr, see if we can
739/// fold the operation into the addressing mode.  If so, update the addressing
740/// mode and return true, otherwise return false without modifying AddrMode.
741bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
742                                               unsigned Depth) {
743  // Avoid exponential behavior on extremely deep expression trees.
744  if (Depth >= 5) return false;
745
746  switch (Opcode) {
747  case Instruction::PtrToInt:
748    // PtrToInt is always a noop, as we know that the int type is pointer sized.
749    return MatchAddr(AddrInst->getOperand(0), Depth);
750  case Instruction::IntToPtr:
751    // This inttoptr is a no-op if the integer type is pointer sized.
752    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
753        TLI.getPointerTy())
754      return MatchAddr(AddrInst->getOperand(0), Depth);
755    return false;
756  case Instruction::BitCast:
757    // BitCast is always a noop, and we can handle it as long as it is
758    // int->int or pointer->pointer (we don't want int<->fp or something).
759    if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) ||
760         isa<IntegerType>(AddrInst->getOperand(0)->getType())) &&
761        // Don't touch identity bitcasts.  These were probably put here by LSR,
762        // and we don't want to mess around with them.  Assume it knows what it
763        // is doing.
764        AddrInst->getOperand(0)->getType() != AddrInst->getType())
765      return MatchAddr(AddrInst->getOperand(0), Depth);
766    return false;
767  case Instruction::Add: {
768    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
769    ExtAddrMode BackupAddrMode = AddrMode;
770    unsigned OldSize = AddrModeInsts.size();
771    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
772        MatchAddr(AddrInst->getOperand(0), Depth+1))
773      return true;
774
775    // Restore the old addr mode info.
776    AddrMode = BackupAddrMode;
777    AddrModeInsts.resize(OldSize);
778
779    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
780    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
781        MatchAddr(AddrInst->getOperand(1), Depth+1))
782      return true;
783
784    // Otherwise we definitely can't merge the ADD in.
785    AddrMode = BackupAddrMode;
786    AddrModeInsts.resize(OldSize);
787    break;
788  }
789  //case Instruction::Or:
790  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
791  //break;
792  case Instruction::Mul:
793  case Instruction::Shl: {
794    // Can only handle X*C and X << C.
795    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
796    if (!RHS) return false;
797    int64_t Scale = RHS->getSExtValue();
798    if (Opcode == Instruction::Shl)
799      Scale = 1 << Scale;
800
801    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
802  }
803  case Instruction::GetElementPtr: {
804    // Scan the GEP.  We check it if it contains constant offsets and at most
805    // one variable offset.
806    int VariableOperand = -1;
807    unsigned VariableScale = 0;
808
809    int64_t ConstantOffset = 0;
810    const TargetData *TD = TLI.getTargetData();
811    gep_type_iterator GTI = gep_type_begin(AddrInst);
812    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
813      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
814        const StructLayout *SL = TD->getStructLayout(STy);
815        unsigned Idx =
816          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
817        ConstantOffset += SL->getElementOffset(Idx);
818      } else {
819        uint64_t TypeSize = TD->getABITypeSize(GTI.getIndexedType());
820        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
821          ConstantOffset += CI->getSExtValue()*TypeSize;
822        } else if (TypeSize) {  // Scales of zero don't do anything.
823          // We only allow one variable index at the moment.
824          if (VariableOperand != -1)
825            return false;
826
827          // Remember the variable index.
828          VariableOperand = i;
829          VariableScale = TypeSize;
830        }
831      }
832    }
833
834    // A common case is for the GEP to only do a constant offset.  In this case,
835    // just add it to the disp field and check validity.
836    if (VariableOperand == -1) {
837      AddrMode.BaseOffs += ConstantOffset;
838      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
839        // Check to see if we can fold the base pointer in too.
840        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
841          return true;
842      }
843      AddrMode.BaseOffs -= ConstantOffset;
844      return false;
845    }
846
847    // Save the valid addressing mode in case we can't match.
848    ExtAddrMode BackupAddrMode = AddrMode;
849
850    // Check that this has no base reg yet.  If so, we won't have a place to
851    // put the base of the GEP (assuming it is not a null ptr).
852    bool SetBaseReg = true;
853    if (isa<ConstantPointerNull>(AddrInst->getOperand(0)))
854      SetBaseReg = false;   // null pointer base doesn't need representation.
855    else if (AddrMode.HasBaseReg)
856      return false;  // Base register already specified, can't match GEP.
857    else {
858      // Otherwise, we'll use the GEP base as the BaseReg.
859      AddrMode.HasBaseReg = true;
860      AddrMode.BaseReg = AddrInst->getOperand(0);
861    }
862
863    // See if the scale and offset amount is valid for this target.
864    AddrMode.BaseOffs += ConstantOffset;
865
866    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
867                          Depth)) {
868      AddrMode = BackupAddrMode;
869      return false;
870    }
871
872    // If we have a null as the base of the GEP, folding in the constant offset
873    // plus variable scale is all we can do.
874    if (!SetBaseReg) return true;
875
876    // If this match succeeded, we know that we can form an address with the
877    // GepBase as the basereg.  Match the base pointer of the GEP more
878    // aggressively by zeroing out BaseReg and rematching.  If the base is
879    // (for example) another GEP, this allows merging in that other GEP into
880    // the addressing mode we're forming.
881    AddrMode.HasBaseReg = false;
882    AddrMode.BaseReg = 0;
883    bool Success = MatchAddr(AddrInst->getOperand(0), Depth+1);
884    assert(Success && "MatchAddr should be able to fill in BaseReg!");
885    Success=Success;
886    return true;
887  }
888  }
889  return false;
890}
891
892/// MatchAddr - If we can, try to add the value of 'Addr' into the current
893/// addressing mode.  If Addr can't be added to AddrMode this returns false and
894/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
895/// or intptr_t for the target.
896///
897bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
898  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
899    // Fold in immediates if legal for the target.
900    AddrMode.BaseOffs += CI->getSExtValue();
901    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
902      return true;
903    AddrMode.BaseOffs -= CI->getSExtValue();
904  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
905    // If this is a global variable, try to fold it into the addressing mode.
906    if (AddrMode.BaseGV == 0) {
907      AddrMode.BaseGV = GV;
908      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
909        return true;
910      AddrMode.BaseGV = 0;
911    }
912  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
913    ExtAddrMode BackupAddrMode = AddrMode;
914    unsigned OldSize = AddrModeInsts.size();
915
916    // Check to see if it is possible to fold this operation.
917    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
918      // Okay, it's possible to fold this.  Check to see if it is actually
919      // *profitable* to do so.  We use a simple cost model to avoid increasing
920      // register pressure too much.
921      if (I->hasOneUse() ||
922          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
923        AddrModeInsts.push_back(I);
924        return true;
925      }
926
927      // It isn't profitable to do this, roll back.
928      //cerr << "NOT FOLDING: " << *I;
929      AddrMode = BackupAddrMode;
930      AddrModeInsts.resize(OldSize);
931    }
932  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
933    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
934      return true;
935  } else if (isa<ConstantPointerNull>(Addr)) {
936    // Null pointer gets folded without affecting the addressing mode.
937    return true;
938  }
939
940  // Worse case, the target should support [reg] addressing modes. :)
941  if (!AddrMode.HasBaseReg) {
942    AddrMode.HasBaseReg = true;
943    AddrMode.BaseReg = Addr;
944    // Still check for legality in case the target supports [imm] but not [i+r].
945    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
946      return true;
947    AddrMode.HasBaseReg = false;
948    AddrMode.BaseReg = 0;
949  }
950
951  // If the base register is already taken, see if we can do [r+r].
952  if (AddrMode.Scale == 0) {
953    AddrMode.Scale = 1;
954    AddrMode.ScaledReg = Addr;
955    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
956      return true;
957    AddrMode.Scale = 0;
958    AddrMode.ScaledReg = 0;
959  }
960  // Couldn't match.
961  return false;
962}
963
964
965/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
966/// inline asm call are due to memory operands.  If so, return true, otherwise
967/// return false.
968static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
969                                    const TargetLowering &TLI) {
970  std::vector<InlineAsm::ConstraintInfo>
971  Constraints = IA->ParseConstraints();
972
973  unsigned ArgNo = 1;   // ArgNo - The operand of the CallInst.
974  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
975    TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
976
977    // Compute the value type for each operand.
978    switch (OpInfo.Type) {
979      case InlineAsm::isOutput:
980        if (OpInfo.isIndirect)
981          OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
982        break;
983      case InlineAsm::isInput:
984        OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
985        break;
986      case InlineAsm::isClobber:
987        // Nothing to do.
988        break;
989    }
990
991    // Compute the constraint code and ConstraintType to use.
992    TLI.ComputeConstraintToUse(OpInfo, SDValue(),
993                             OpInfo.ConstraintType == TargetLowering::C_Memory);
994
995    // If this asm operand is our Value*, and if it isn't an indirect memory
996    // operand, we can't fold it!
997    if (OpInfo.CallOperandVal == OpVal &&
998        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
999         !OpInfo.isIndirect))
1000      return false;
1001  }
1002
1003  return true;
1004}
1005
1006
1007/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
1008/// memory use.  If we find an obviously non-foldable instruction, return true.
1009/// Add the ultimately found memory instructions to MemoryUses.
1010static bool FindAllMemoryUses(Instruction *I,
1011                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
1012                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
1013                              const TargetLowering &TLI) {
1014  // If we already considered this instruction, we're done.
1015  if (!ConsideredInsts.insert(I))
1016    return false;
1017
1018  // If this is an obviously unfoldable instruction, bail out.
1019  if (!MightBeFoldableInst(I))
1020    return true;
1021
1022  // Loop over all the uses, recursively processing them.
1023  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1024       UI != E; ++UI) {
1025    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
1026      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
1027      continue;
1028    }
1029
1030    if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
1031      if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr.
1032      MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo()));
1033      continue;
1034    }
1035
1036    if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
1037      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
1038      if (IA == 0) return true;
1039
1040      // If this is a memory operand, we're cool, otherwise bail out.
1041      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
1042        return true;
1043      continue;
1044    }
1045
1046    if (FindAllMemoryUses(cast<Instruction>(*UI), MemoryUses, ConsideredInsts,
1047                          TLI))
1048      return true;
1049  }
1050
1051  return false;
1052}
1053
1054
1055/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
1056/// the use site that we're folding it into.  If so, there is no cost to
1057/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
1058/// that we know are live at the instruction already.
1059bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
1060                                                   Value *KnownLive2) {
1061  // If Val is either of the known-live values, we know it is live!
1062  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
1063    return true;
1064
1065  // All values other than instructions and arguments (e.g. constants) are live.
1066  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
1067
1068  // If Val is a constant sized alloca in the entry block, it is live, this is
1069  // true because it is just a reference to the stack/frame pointer, which is
1070  // live for the whole function.
1071  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
1072    if (AI->isStaticAlloca())
1073      return true;
1074
1075  // Check to see if this value is already used in the memory instruction's
1076  // block.  If so, it's already live into the block at the very least, so we
1077  // can reasonably fold it.
1078  BasicBlock *MemBB = MemoryInst->getParent();
1079  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
1080       UI != E; ++UI)
1081    // We know that uses of arguments and instructions have to be instructions.
1082    if (cast<Instruction>(*UI)->getParent() == MemBB)
1083      return true;
1084
1085  return false;
1086}
1087
1088
1089
1090/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
1091/// mode of the machine to fold the specified instruction into a load or store
1092/// that ultimately uses it.  However, the specified instruction has multiple
1093/// uses.  Given this, it may actually increase register pressure to fold it
1094/// into the load.  For example, consider this code:
1095///
1096///     X = ...
1097///     Y = X+1
1098///     use(Y)   -> nonload/store
1099///     Z = Y+1
1100///     load Z
1101///
1102/// In this case, Y has multiple uses, and can be folded into the load of Z
1103/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
1104/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
1105/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
1106/// number of computations either.
1107///
1108/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
1109/// X was live across 'load Z' for other reasons, we actually *would* want to
1110/// fold the addressing mode in the Z case.  This would make Y die earlier.
1111bool AddressingModeMatcher::
1112IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
1113                                     ExtAddrMode &AMAfter) {
1114  if (IgnoreProfitability) return true;
1115
1116  // AMBefore is the addressing mode before this instruction was folded into it,
1117  // and AMAfter is the addressing mode after the instruction was folded.  Get
1118  // the set of registers referenced by AMAfter and subtract out those
1119  // referenced by AMBefore: this is the set of values which folding in this
1120  // address extends the lifetime of.
1121  //
1122  // Note that there are only two potential values being referenced here,
1123  // BaseReg and ScaleReg (global addresses are always available, as are any
1124  // folded immediates).
1125  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
1126
1127  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
1128  // lifetime wasn't extended by adding this instruction.
1129  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
1130    BaseReg = 0;
1131  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
1132    ScaledReg = 0;
1133
1134  // If folding this instruction (and it's subexprs) didn't extend any live
1135  // ranges, we're ok with it.
1136  if (BaseReg == 0 && ScaledReg == 0)
1137    return true;
1138
1139  // If all uses of this instruction are ultimately load/store/inlineasm's,
1140  // check to see if their addressing modes will include this instruction.  If
1141  // so, we can fold it into all uses, so it doesn't matter if it has multiple
1142  // uses.
1143  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
1144  SmallPtrSet<Instruction*, 16> ConsideredInsts;
1145  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
1146    return false;  // Has a non-memory, non-foldable use!
1147
1148  // Now that we know that all uses of this instruction are part of a chain of
1149  // computation involving only operations that could theoretically be folded
1150  // into a memory use, loop over each of these uses and see if they could
1151  // *actually* fold the instruction.
1152  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
1153  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
1154    Instruction *User = MemoryUses[i].first;
1155    unsigned OpNo = MemoryUses[i].second;
1156
1157    // Get the access type of this use.  If the use isn't a pointer, we don't
1158    // know what it accesses.
1159    Value *Address = User->getOperand(OpNo);
1160    if (!isa<PointerType>(Address->getType()))
1161      return false;
1162    const Type *AddressAccessTy =
1163      cast<PointerType>(Address->getType())->getElementType();
1164
1165    // Do a match against the root of this address, ignoring profitability. This
1166    // will tell us if the addressing mode for the memory operation will
1167    // *actually* cover the shared instruction.
1168    ExtAddrMode Result;
1169    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
1170                                  MemoryInst, Result);
1171    Matcher.IgnoreProfitability = true;
1172    bool Success = Matcher.MatchAddr(Address, 0);
1173    Success = Success; assert(Success && "Couldn't select *anything*?");
1174
1175    // If the match didn't cover I, then it won't be shared by it.
1176    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
1177                  I) == MatchedAddrModeInsts.end())
1178      return false;
1179
1180    MatchedAddrModeInsts.clear();
1181  }
1182
1183  return true;
1184}
1185
1186
1187//===----------------------------------------------------------------------===//
1188// Memory Optimization
1189//===----------------------------------------------------------------------===//
1190
1191/// IsNonLocalValue - Return true if the specified values are defined in a
1192/// different basic block than BB.
1193static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
1194  if (Instruction *I = dyn_cast<Instruction>(V))
1195    return I->getParent() != BB;
1196  return false;
1197}
1198
1199/// OptimizeMemoryInst - Load and Store Instructions have often have
1200/// addressing modes that can do significant amounts of computation.  As such,
1201/// instruction selection will try to get the load or store to do as much
1202/// computation as possible for the program.  The problem is that isel can only
1203/// see within a single block.  As such, we sink as much legal addressing mode
1204/// stuff into the block as possible.
1205///
1206/// This method is used to optimize both load/store and inline asms with memory
1207/// operands.
1208bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
1209                                        const Type *AccessTy,
1210                                        DenseMap<Value*,Value*> &SunkAddrs) {
1211  // Figure out what addressing mode will be built up for this operation.
1212  SmallVector<Instruction*, 16> AddrModeInsts;
1213  ExtAddrMode AddrMode = AddressingModeMatcher::Match(Addr, AccessTy,MemoryInst,
1214                                                      AddrModeInsts, *TLI);
1215
1216  // Check to see if any of the instructions supersumed by this addr mode are
1217  // non-local to I's BB.
1218  bool AnyNonLocal = false;
1219  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
1220    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
1221      AnyNonLocal = true;
1222      break;
1223    }
1224  }
1225
1226  // If all the instructions matched are already in this BB, don't do anything.
1227  if (!AnyNonLocal) {
1228    DEBUG(cerr << "CGP: Found      local addrmode: " << AddrMode << "\n");
1229    return false;
1230  }
1231
1232  // Insert this computation right after this user.  Since our caller is
1233  // scanning from the top of the BB to the bottom, reuse of the expr are
1234  // guaranteed to happen later.
1235  BasicBlock::iterator InsertPt = MemoryInst;
1236
1237  // Now that we determined the addressing expression we want to use and know
1238  // that we have to sink it into this block.  Check to see if we have already
1239  // done this for some other load/store instr in this block.  If so, reuse the
1240  // computation.
1241  Value *&SunkAddr = SunkAddrs[Addr];
1242  if (SunkAddr) {
1243    DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << "\n");
1244    if (SunkAddr->getType() != Addr->getType())
1245      SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
1246  } else {
1247    DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << "\n");
1248    const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType();
1249
1250    Value *Result = 0;
1251    // Start with the scale value.
1252    if (AddrMode.Scale) {
1253      Value *V = AddrMode.ScaledReg;
1254      if (V->getType() == IntPtrTy) {
1255        // done.
1256      } else if (isa<PointerType>(V->getType())) {
1257        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
1258      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
1259                 cast<IntegerType>(V->getType())->getBitWidth()) {
1260        V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
1261      } else {
1262        V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
1263      }
1264      if (AddrMode.Scale != 1)
1265        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
1266                                                          AddrMode.Scale),
1267                                      "sunkaddr", InsertPt);
1268      Result = V;
1269    }
1270
1271    // Add in the base register.
1272    if (AddrMode.BaseReg) {
1273      Value *V = AddrMode.BaseReg;
1274      if (V->getType() != IntPtrTy)
1275        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
1276      if (Result)
1277        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
1278      else
1279        Result = V;
1280    }
1281
1282    // Add in the BaseGV if present.
1283    if (AddrMode.BaseGV) {
1284      Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
1285                                  InsertPt);
1286      if (Result)
1287        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
1288      else
1289        Result = V;
1290    }
1291
1292    // Add in the Base Offset if present.
1293    if (AddrMode.BaseOffs) {
1294      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
1295      if (Result)
1296        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
1297      else
1298        Result = V;
1299    }
1300
1301    if (Result == 0)
1302      SunkAddr = Constant::getNullValue(Addr->getType());
1303    else
1304      SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
1305  }
1306
1307  MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
1308
1309  if (Addr->use_empty())
1310    RecursivelyDeleteTriviallyDeadInstructions(Addr);
1311  return true;
1312}
1313
1314/// OptimizeInlineAsmInst - If there are any memory operands, use
1315/// OptimizeMemoryInst to sink their address computing into the block when
1316/// possible / profitable.
1317bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
1318                                           DenseMap<Value*,Value*> &SunkAddrs) {
1319  bool MadeChange = false;
1320  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
1321
1322  // Do a prepass over the constraints, canonicalizing them, and building up the
1323  // ConstraintOperands list.
1324  std::vector<InlineAsm::ConstraintInfo>
1325    ConstraintInfos = IA->ParseConstraints();
1326
1327  /// ConstraintOperands - Information about all of the constraints.
1328  std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands;
1329  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
1330  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
1331    ConstraintOperands.
1332      push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i]));
1333    TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back();
1334
1335    // Compute the value type for each operand.
1336    switch (OpInfo.Type) {
1337    case InlineAsm::isOutput:
1338      if (OpInfo.isIndirect)
1339        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
1340      break;
1341    case InlineAsm::isInput:
1342      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
1343      break;
1344    case InlineAsm::isClobber:
1345      // Nothing to do.
1346      break;
1347    }
1348
1349    // Compute the constraint code and ConstraintType to use.
1350    TLI->ComputeConstraintToUse(OpInfo, SDValue(),
1351                             OpInfo.ConstraintType == TargetLowering::C_Memory);
1352
1353    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
1354        OpInfo.isIndirect) {
1355      Value *OpVal = OpInfo.CallOperandVal;
1356      MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs);
1357    }
1358  }
1359
1360  return MadeChange;
1361}
1362
1363bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
1364  BasicBlock *DefBB = I->getParent();
1365
1366  // If both result of the {s|z}xt and its source are live out, rewrite all
1367  // other uses of the source with result of extension.
1368  Value *Src = I->getOperand(0);
1369  if (Src->hasOneUse())
1370    return false;
1371
1372  // Only do this xform if truncating is free.
1373  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
1374    return false;
1375
1376  // Only safe to perform the optimization if the source is also defined in
1377  // this block.
1378  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
1379    return false;
1380
1381  bool DefIsLiveOut = false;
1382  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1383       UI != E; ++UI) {
1384    Instruction *User = cast<Instruction>(*UI);
1385
1386    // Figure out which BB this ext is used in.
1387    BasicBlock *UserBB = User->getParent();
1388    if (UserBB == DefBB) continue;
1389    DefIsLiveOut = true;
1390    break;
1391  }
1392  if (!DefIsLiveOut)
1393    return false;
1394
1395  // Make sure non of the uses are PHI nodes.
1396  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1397       UI != E; ++UI) {
1398    Instruction *User = cast<Instruction>(*UI);
1399    BasicBlock *UserBB = User->getParent();
1400    if (UserBB == DefBB) continue;
1401    // Be conservative. We don't want this xform to end up introducing
1402    // reloads just before load / store instructions.
1403    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
1404      return false;
1405  }
1406
1407  // InsertedTruncs - Only insert one trunc in each block once.
1408  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
1409
1410  bool MadeChange = false;
1411  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1412       UI != E; ++UI) {
1413    Use &TheUse = UI.getUse();
1414    Instruction *User = cast<Instruction>(*UI);
1415
1416    // Figure out which BB this ext is used in.
1417    BasicBlock *UserBB = User->getParent();
1418    if (UserBB == DefBB) continue;
1419
1420    // Both src and def are live in this block. Rewrite the use.
1421    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
1422
1423    if (!InsertedTrunc) {
1424      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
1425
1426      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
1427    }
1428
1429    // Replace a use of the {s|z}ext source with a use of the result.
1430    TheUse = InsertedTrunc;
1431
1432    MadeChange = true;
1433  }
1434
1435  return MadeChange;
1436}
1437
1438// In this pass we look for GEP and cast instructions that are used
1439// across basic blocks and rewrite them to improve basic-block-at-a-time
1440// selection.
1441bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
1442  bool MadeChange = false;
1443
1444  // Split all critical edges where the dest block has a PHI.
1445  TerminatorInst *BBTI = BB.getTerminator();
1446  if (BBTI->getNumSuccessors() > 1) {
1447    for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
1448      BasicBlock *SuccBB = BBTI->getSuccessor(i);
1449      if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
1450        SplitEdgeNicely(BBTI, i, BackEdges, this);
1451    }
1452  }
1453
1454  // Keep track of non-local addresses that have been sunk into this block.
1455  // This allows us to avoid inserting duplicate code for blocks with multiple
1456  // load/stores of the same address.
1457  DenseMap<Value*, Value*> SunkAddrs;
1458
1459  for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) {
1460    Instruction *I = BBI++;
1461
1462    if (CastInst *CI = dyn_cast<CastInst>(I)) {
1463      // If the source of the cast is a constant, then this should have
1464      // already been constant folded.  The only reason NOT to constant fold
1465      // it is if something (e.g. LSR) was careful to place the constant
1466      // evaluation in a block other than then one that uses it (e.g. to hoist
1467      // the address of globals out of a loop).  If this is the case, we don't
1468      // want to forward-subst the cast.
1469      if (isa<Constant>(CI->getOperand(0)))
1470        continue;
1471
1472      bool Change = false;
1473      if (TLI) {
1474        Change = OptimizeNoopCopyExpression(CI, *TLI);
1475        MadeChange |= Change;
1476      }
1477
1478      if (!Change && (isa<ZExtInst>(I) || isa<SExtInst>(I)))
1479        MadeChange |= OptimizeExtUses(I);
1480    } else if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
1481      MadeChange |= OptimizeCmpExpression(CI);
1482    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1483      if (TLI)
1484        MadeChange |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType(),
1485                                         SunkAddrs);
1486    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1487      if (TLI)
1488        MadeChange |= OptimizeMemoryInst(I, SI->getOperand(1),
1489                                         SI->getOperand(0)->getType(),
1490                                         SunkAddrs);
1491    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
1492      if (GEPI->hasAllZeroIndices()) {
1493        /// The GEP operand must be a pointer, so must its result -> BitCast
1494        Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
1495                                          GEPI->getName(), GEPI);
1496        GEPI->replaceAllUsesWith(NC);
1497        GEPI->eraseFromParent();
1498        MadeChange = true;
1499        BBI = NC;
1500      }
1501    } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
1502      // If we found an inline asm expession, and if the target knows how to
1503      // lower it to normal LLVM code, do so now.
1504      if (TLI && isa<InlineAsm>(CI->getCalledValue()))
1505        if (const TargetAsmInfo *TAI =
1506            TLI->getTargetMachine().getTargetAsmInfo()) {
1507          if (TAI->ExpandInlineAsm(CI))
1508            BBI = BB.begin();
1509          else
1510            // Sink address computing for memory operands into the block.
1511            MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
1512        }
1513    }
1514  }
1515
1516  return MadeChange;
1517}
1518