InstCombineLoadStoreAlloca.cpp revision 05d62537276197b3351d9887f4967590b6a3b901
1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visit functions for load, store and alloca.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/IntrinsicInst.h"
16#include "llvm/Target/TargetData.h"
17#include "llvm/Transforms/Utils/BasicBlockUtils.h"
18#include "llvm/Transforms/Utils/Local.h"
19#include "llvm/ADT/Statistic.h"
20using namespace llvm;
21
22STATISTIC(NumDeadStore, "Number of dead stores eliminated");
23
24Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
25  // Ensure that the alloca array size argument has type intptr_t, so that
26  // any casting is exposed early.
27  if (TD) {
28    const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
29    if (AI.getArraySize()->getType() != IntPtrTy) {
30      Value *V = Builder->CreateIntCast(AI.getArraySize(),
31                                        IntPtrTy, false);
32      AI.setOperand(0, V);
33      return &AI;
34    }
35  }
36
37  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
38  if (AI.isArrayAllocation()) {  // Check C != 1
39    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
40      const Type *NewTy =
41        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
42      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
43      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
44      New->setAlignment(AI.getAlignment());
45
46      // Scan to the end of the allocation instructions, to skip over a block of
47      // allocas if possible...also skip interleaved debug info
48      //
49      BasicBlock::iterator It = New;
50      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
51
52      // Now that I is pointing to the first non-allocation-inst in the block,
53      // insert our getelementptr instruction...
54      //
55      Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
56      Value *Idx[2];
57      Idx[0] = NullIdx;
58      Idx[1] = NullIdx;
59      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
60                                                   New->getName()+".sub", It);
61
62      // Now make everything use the getelementptr instead of the original
63      // allocation.
64      return ReplaceInstUsesWith(AI, V);
65    } else if (isa<UndefValue>(AI.getArraySize())) {
66      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
67    }
68  }
69
70  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
71    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
72    // Note that we only do this for alloca's, because malloc should allocate
73    // and return a unique pointer, even for a zero byte allocation.
74    if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
75      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
76
77    // If the alignment is 0 (unspecified), assign it the preferred alignment.
78    if (AI.getAlignment() == 0)
79      AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
80  }
81
82  return 0;
83}
84
85
86/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
87static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
88                                        const TargetData *TD) {
89  User *CI = cast<User>(LI.getOperand(0));
90  Value *CastOp = CI->getOperand(0);
91
92  const PointerType *DestTy = cast<PointerType>(CI->getType());
93  const Type *DestPTy = DestTy->getElementType();
94  if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
95
96    // If the address spaces don't match, don't eliminate the cast.
97    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
98      return 0;
99
100    const Type *SrcPTy = SrcTy->getElementType();
101
102    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
103         DestPTy->isVectorTy()) {
104      // If the source is an array, the code below will not succeed.  Check to
105      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
106      // constants.
107      if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
108        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
109          if (ASrcTy->getNumElements() != 0) {
110            Value *Idxs[2];
111            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
112            Idxs[1] = Idxs[0];
113            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
114            SrcTy = cast<PointerType>(CastOp->getType());
115            SrcPTy = SrcTy->getElementType();
116          }
117
118      if (IC.getTargetData() &&
119          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
120            SrcPTy->isVectorTy()) &&
121          // Do not allow turning this into a load of an integer, which is then
122          // casted to a pointer, this pessimizes pointer analysis a lot.
123          (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
124          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
125               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
126
127        // Okay, we are casting from one integer or pointer type to another of
128        // the same size.  Instead of casting the pointer before the load, cast
129        // the result of the loaded value.
130        LoadInst *NewLoad =
131          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
132        NewLoad->setAlignment(LI.getAlignment());
133        // Now cast the result of the load.
134        return new BitCastInst(NewLoad, LI.getType());
135      }
136    }
137  }
138  return 0;
139}
140
141Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
142  Value *Op = LI.getOperand(0);
143
144  // Attempt to improve the alignment.
145  if (TD) {
146    unsigned KnownAlign =
147      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
148    if (KnownAlign >
149        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
150                                  LI.getAlignment()))
151      LI.setAlignment(KnownAlign);
152  }
153
154  // load (cast X) --> cast (load X) iff safe.
155  if (isa<CastInst>(Op))
156    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
157      return Res;
158
159  // None of the following transforms are legal for volatile loads.
160  if (LI.isVolatile()) return 0;
161
162  // Do really simple store-to-load forwarding and load CSE, to catch cases
163  // where there are several consequtive memory accesses to the same location,
164  // separated by a few arithmetic operations.
165  BasicBlock::iterator BBI = &LI;
166  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
167    return ReplaceInstUsesWith(LI, AvailableVal);
168
169  // load(gep null, ...) -> unreachable
170  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
171    const Value *GEPI0 = GEPI->getOperand(0);
172    // TODO: Consider a target hook for valid address spaces for this xform.
173    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
174      // Insert a new store to null instruction before the load to indicate
175      // that this code is not reachable.  We do this instead of inserting
176      // an unreachable instruction directly because we cannot modify the
177      // CFG.
178      new StoreInst(UndefValue::get(LI.getType()),
179                    Constant::getNullValue(Op->getType()), &LI);
180      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
181    }
182  }
183
184  // load null/undef -> unreachable
185  // TODO: Consider a target hook for valid address spaces for this xform.
186  if (isa<UndefValue>(Op) ||
187      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
188    // Insert a new store to null instruction before the load to indicate that
189    // this code is not reachable.  We do this instead of inserting an
190    // unreachable instruction directly because we cannot modify the CFG.
191    new StoreInst(UndefValue::get(LI.getType()),
192                  Constant::getNullValue(Op->getType()), &LI);
193    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
194  }
195
196  // Instcombine load (constantexpr_cast global) -> cast (load global)
197  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
198    if (CE->isCast())
199      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
200        return Res;
201
202  if (Op->hasOneUse()) {
203    // Change select and PHI nodes to select values instead of addresses: this
204    // helps alias analysis out a lot, allows many others simplifications, and
205    // exposes redundancy in the code.
206    //
207    // Note that we cannot do the transformation unless we know that the
208    // introduced loads cannot trap!  Something like this is valid as long as
209    // the condition is always false: load (select bool %C, int* null, int* %G),
210    // but it would not be valid if we transformed it to load from null
211    // unconditionally.
212    //
213    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
214      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
215      unsigned Align = LI.getAlignment();
216      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
217          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
218        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
219                                           SI->getOperand(1)->getName()+".val");
220        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
221                                           SI->getOperand(2)->getName()+".val");
222        V1->setAlignment(Align);
223        V2->setAlignment(Align);
224        return SelectInst::Create(SI->getCondition(), V1, V2);
225      }
226
227      // load (select (cond, null, P)) -> load P
228      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
229        if (C->isNullValue()) {
230          LI.setOperand(0, SI->getOperand(2));
231          return &LI;
232        }
233
234      // load (select (cond, P, null)) -> load P
235      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
236        if (C->isNullValue()) {
237          LI.setOperand(0, SI->getOperand(1));
238          return &LI;
239        }
240    }
241  }
242  return 0;
243}
244
245/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
246/// when possible.  This makes it generally easy to do alias analysis and/or
247/// SROA/mem2reg of the memory object.
248static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
249  User *CI = cast<User>(SI.getOperand(1));
250  Value *CastOp = CI->getOperand(0);
251
252  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
253  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
254  if (SrcTy == 0) return 0;
255
256  const Type *SrcPTy = SrcTy->getElementType();
257
258  if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
259    return 0;
260
261  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
262  /// to its first element.  This allows us to handle things like:
263  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
264  /// on 32-bit hosts.
265  SmallVector<Value*, 4> NewGEPIndices;
266
267  // If the source is an array, the code below will not succeed.  Check to
268  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
269  // constants.
270  if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
271    // Index through pointer.
272    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
273    NewGEPIndices.push_back(Zero);
274
275    while (1) {
276      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
277        if (!STy->getNumElements()) /* Struct can be empty {} */
278          break;
279        NewGEPIndices.push_back(Zero);
280        SrcPTy = STy->getElementType(0);
281      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
282        NewGEPIndices.push_back(Zero);
283        SrcPTy = ATy->getElementType();
284      } else {
285        break;
286      }
287    }
288
289    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
290  }
291
292  if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
293    return 0;
294
295  // If the pointers point into different address spaces or if they point to
296  // values with different sizes, we can't do the transformation.
297  if (!IC.getTargetData() ||
298      SrcTy->getAddressSpace() !=
299        cast<PointerType>(CI->getType())->getAddressSpace() ||
300      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
301      IC.getTargetData()->getTypeSizeInBits(DestPTy))
302    return 0;
303
304  // Okay, we are casting from one integer or pointer type to another of
305  // the same size.  Instead of casting the pointer before
306  // the store, cast the value to be stored.
307  Value *NewCast;
308  Value *SIOp0 = SI.getOperand(0);
309  Instruction::CastOps opcode = Instruction::BitCast;
310  const Type* CastSrcTy = SIOp0->getType();
311  const Type* CastDstTy = SrcPTy;
312  if (CastDstTy->isPointerTy()) {
313    if (CastSrcTy->isIntegerTy())
314      opcode = Instruction::IntToPtr;
315  } else if (CastDstTy->isIntegerTy()) {
316    if (SIOp0->getType()->isPointerTy())
317      opcode = Instruction::PtrToInt;
318  }
319
320  // SIOp0 is a pointer to aggregate and this is a store to the first field,
321  // emit a GEP to index into its first field.
322  if (!NewGEPIndices.empty())
323    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
324                                           NewGEPIndices.end());
325
326  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
327                                   SIOp0->getName()+".c");
328  return new StoreInst(NewCast, CastOp);
329}
330
331/// equivalentAddressValues - Test if A and B will obviously have the same
332/// value. This includes recognizing that %t0 and %t1 will have the same
333/// value in code like this:
334///   %t0 = getelementptr \@a, 0, 3
335///   store i32 0, i32* %t0
336///   %t1 = getelementptr \@a, 0, 3
337///   %t2 = load i32* %t1
338///
339static bool equivalentAddressValues(Value *A, Value *B) {
340  // Test if the values are trivially equivalent.
341  if (A == B) return true;
342
343  // Test if the values come form identical arithmetic instructions.
344  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
345  // its only used to compare two uses within the same basic block, which
346  // means that they'll always either have the same value or one of them
347  // will have an undefined value.
348  if (isa<BinaryOperator>(A) ||
349      isa<CastInst>(A) ||
350      isa<PHINode>(A) ||
351      isa<GetElementPtrInst>(A))
352    if (Instruction *BI = dyn_cast<Instruction>(B))
353      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
354        return true;
355
356  // Otherwise they may not be equivalent.
357  return false;
358}
359
360// If this instruction has two uses, one of which is a llvm.dbg.declare,
361// return the llvm.dbg.declare.
362DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
363  if (!V->hasNUses(2))
364    return 0;
365  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
366       UI != E; ++UI) {
367    if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI))
368      return DI;
369    if (isa<BitCastInst>(UI) && UI->hasOneUse()) {
370      if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin()))
371        return DI;
372      }
373  }
374  return 0;
375}
376
377Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
378  Value *Val = SI.getOperand(0);
379  Value *Ptr = SI.getOperand(1);
380
381  // If the RHS is an alloca with a single use, zapify the store, making the
382  // alloca dead.
383  // If the RHS is an alloca with a two uses, the other one being a
384  // llvm.dbg.declare, zapify the store and the declare, making the
385  // alloca dead.  We must do this to prevent declares from affecting
386  // codegen.
387  if (!SI.isVolatile()) {
388    if (Ptr->hasOneUse()) {
389      if (isa<AllocaInst>(Ptr))
390        return EraseInstFromFunction(SI);
391      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
392        if (isa<AllocaInst>(GEP->getOperand(0))) {
393          if (GEP->getOperand(0)->hasOneUse())
394            return EraseInstFromFunction(SI);
395          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
396            EraseInstFromFunction(*DI);
397            return EraseInstFromFunction(SI);
398          }
399        }
400      }
401    }
402    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
403      EraseInstFromFunction(*DI);
404      return EraseInstFromFunction(SI);
405    }
406  }
407
408  // Attempt to improve the alignment.
409  if (TD) {
410    unsigned KnownAlign =
411      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
412    if (KnownAlign >
413        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
414                                  SI.getAlignment()))
415      SI.setAlignment(KnownAlign);
416  }
417
418  // Do really simple DSE, to catch cases where there are several consecutive
419  // stores to the same location, separated by a few arithmetic operations. This
420  // situation often occurs with bitfield accesses.
421  BasicBlock::iterator BBI = &SI;
422  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
423       --ScanInsts) {
424    --BBI;
425    // Don't count debug info directives, lest they affect codegen,
426    // and we skip pointer-to-pointer bitcasts, which are NOPs.
427    if (isa<DbgInfoIntrinsic>(BBI) ||
428        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
429      ScanInsts++;
430      continue;
431    }
432
433    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
434      // Prev store isn't volatile, and stores to the same location?
435      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
436                                                          SI.getOperand(1))) {
437        ++NumDeadStore;
438        ++BBI;
439        EraseInstFromFunction(*PrevSI);
440        continue;
441      }
442      break;
443    }
444
445    // If this is a load, we have to stop.  However, if the loaded value is from
446    // the pointer we're loading and is producing the pointer we're storing,
447    // then *this* store is dead (X = load P; store X -> P).
448    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
449      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
450          !SI.isVolatile())
451        return EraseInstFromFunction(SI);
452
453      // Otherwise, this is a load from some other location.  Stores before it
454      // may not be dead.
455      break;
456    }
457
458    // Don't skip over loads or things that can modify memory.
459    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
460      break;
461  }
462
463
464  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
465
466  // store X, null    -> turns into 'unreachable' in SimplifyCFG
467  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
468    if (!isa<UndefValue>(Val)) {
469      SI.setOperand(0, UndefValue::get(Val->getType()));
470      if (Instruction *U = dyn_cast<Instruction>(Val))
471        Worklist.Add(U);  // Dropped a use.
472    }
473    return 0;  // Do not modify these!
474  }
475
476  // store undef, Ptr -> noop
477  if (isa<UndefValue>(Val))
478    return EraseInstFromFunction(SI);
479
480  // If the pointer destination is a cast, see if we can fold the cast into the
481  // source instead.
482  if (isa<CastInst>(Ptr))
483    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
484      return Res;
485  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
486    if (CE->isCast())
487      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
488        return Res;
489
490
491  // If this store is the last instruction in the basic block (possibly
492  // excepting debug info instructions), and if the block ends with an
493  // unconditional branch, try to move it to the successor block.
494  BBI = &SI;
495  do {
496    ++BBI;
497  } while (isa<DbgInfoIntrinsic>(BBI) ||
498           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
499  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
500    if (BI->isUnconditional())
501      if (SimplifyStoreAtEndOfBlock(SI))
502        return 0;  // xform done!
503
504  return 0;
505}
506
507/// SimplifyStoreAtEndOfBlock - Turn things like:
508///   if () { *P = v1; } else { *P = v2 }
509/// into a phi node with a store in the successor.
510///
511/// Simplify things like:
512///   *P = v1; if () { *P = v2; }
513/// into a phi node with a store in the successor.
514///
515bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
516  BasicBlock *StoreBB = SI.getParent();
517
518  // Check to see if the successor block has exactly two incoming edges.  If
519  // so, see if the other predecessor contains a store to the same location.
520  // if so, insert a PHI node (if needed) and move the stores down.
521  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
522
523  // Determine whether Dest has exactly two predecessors and, if so, compute
524  // the other predecessor.
525  pred_iterator PI = pred_begin(DestBB);
526  BasicBlock *OtherBB = 0;
527  if (*PI != StoreBB)
528    OtherBB = *PI;
529  ++PI;
530  if (PI == pred_end(DestBB))
531    return false;
532
533  if (*PI != StoreBB) {
534    if (OtherBB)
535      return false;
536    OtherBB = *PI;
537  }
538  if (++PI != pred_end(DestBB))
539    return false;
540
541  // Bail out if all the relevant blocks aren't distinct (this can happen,
542  // for example, if SI is in an infinite loop)
543  if (StoreBB == DestBB || OtherBB == DestBB)
544    return false;
545
546  // Verify that the other block ends in a branch and is not otherwise empty.
547  BasicBlock::iterator BBI = OtherBB->getTerminator();
548  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
549  if (!OtherBr || BBI == OtherBB->begin())
550    return false;
551
552  // If the other block ends in an unconditional branch, check for the 'if then
553  // else' case.  there is an instruction before the branch.
554  StoreInst *OtherStore = 0;
555  if (OtherBr->isUnconditional()) {
556    --BBI;
557    // Skip over debugging info.
558    while (isa<DbgInfoIntrinsic>(BBI) ||
559           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
560      if (BBI==OtherBB->begin())
561        return false;
562      --BBI;
563    }
564    // If this isn't a store, isn't a store to the same location, or if the
565    // alignments differ, bail out.
566    OtherStore = dyn_cast<StoreInst>(BBI);
567    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
568        OtherStore->getAlignment() != SI.getAlignment())
569      return false;
570  } else {
571    // Otherwise, the other block ended with a conditional branch. If one of the
572    // destinations is StoreBB, then we have the if/then case.
573    if (OtherBr->getSuccessor(0) != StoreBB &&
574        OtherBr->getSuccessor(1) != StoreBB)
575      return false;
576
577    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
578    // if/then triangle.  See if there is a store to the same ptr as SI that
579    // lives in OtherBB.
580    for (;; --BBI) {
581      // Check to see if we find the matching store.
582      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
583        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
584            OtherStore->getAlignment() != SI.getAlignment())
585          return false;
586        break;
587      }
588      // If we find something that may be using or overwriting the stored
589      // value, or if we run out of instructions, we can't do the xform.
590      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
591          BBI == OtherBB->begin())
592        return false;
593    }
594
595    // In order to eliminate the store in OtherBr, we have to
596    // make sure nothing reads or overwrites the stored value in
597    // StoreBB.
598    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
599      // FIXME: This should really be AA driven.
600      if (I->mayReadFromMemory() || I->mayWriteToMemory())
601        return false;
602    }
603  }
604
605  // Insert a PHI node now if we need it.
606  Value *MergedVal = OtherStore->getOperand(0);
607  if (MergedVal != SI.getOperand(0)) {
608    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
609    PN->reserveOperandSpace(2);
610    PN->addIncoming(SI.getOperand(0), SI.getParent());
611    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
612    MergedVal = InsertNewInstBefore(PN, DestBB->front());
613  }
614
615  // Advance to a place where it is safe to insert the new store and
616  // insert it.
617  BBI = DestBB->getFirstNonPHI();
618  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
619                                    OtherStore->isVolatile(),
620                                    SI.getAlignment()), *BBI);
621
622  // Nuke the old stores.
623  EraseInstFromFunction(SI);
624  EraseInstFromFunction(*OtherStore);
625  return true;
626}
627