1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the visit functions for load, store and alloca.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombine.h"
15#include "llvm/ADT/Statistic.h"
16#include "llvm/Analysis/Loads.h"
17#include "llvm/IR/DataLayout.h"
18#include "llvm/IR/IntrinsicInst.h"
19#include "llvm/Transforms/Utils/BasicBlockUtils.h"
20#include "llvm/Transforms/Utils/Local.h"
21using namespace llvm;
22
23#define DEBUG_TYPE "instcombine"
24
25STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
26STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
27
28/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
29/// some part of a constant global variable.  This intentionally only accepts
30/// constant expressions because we can't rewrite arbitrary instructions.
31static bool pointsToConstantGlobal(Value *V) {
32  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
33    return GV->isConstant();
34
35  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
36    if (CE->getOpcode() == Instruction::BitCast ||
37        CE->getOpcode() == Instruction::AddrSpaceCast ||
38        CE->getOpcode() == Instruction::GetElementPtr)
39      return pointsToConstantGlobal(CE->getOperand(0));
40  }
41  return false;
42}
43
44/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
45/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
46/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
47/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
48/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
49/// the alloca, and if the source pointer is a pointer to a constant global, we
50/// can optimize this.
51static bool
52isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
53                               SmallVectorImpl<Instruction *> &ToDelete) {
54  // We track lifetime intrinsics as we encounter them.  If we decide to go
55  // ahead and replace the value with the global, this lets the caller quickly
56  // eliminate the markers.
57
58  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
59  ValuesToInspect.push_back(std::make_pair(V, false));
60  while (!ValuesToInspect.empty()) {
61    auto ValuePair = ValuesToInspect.pop_back_val();
62    const bool IsOffset = ValuePair.second;
63    for (auto &U : ValuePair.first->uses()) {
64      Instruction *I = cast<Instruction>(U.getUser());
65
66      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
67        // Ignore non-volatile loads, they are always ok.
68        if (!LI->isSimple()) return false;
69        continue;
70      }
71
72      if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
73        // If uses of the bitcast are ok, we are ok.
74        ValuesToInspect.push_back(std::make_pair(I, IsOffset));
75        continue;
76      }
77      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
78        // If the GEP has all zero indices, it doesn't offset the pointer. If it
79        // doesn't, it does.
80        ValuesToInspect.push_back(
81            std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
82        continue;
83      }
84
85      if (CallSite CS = I) {
86        // If this is the function being called then we treat it like a load and
87        // ignore it.
88        if (CS.isCallee(&U))
89          continue;
90
91        // Inalloca arguments are clobbered by the call.
92        unsigned ArgNo = CS.getArgumentNo(&U);
93        if (CS.isInAllocaArgument(ArgNo))
94          return false;
95
96        // If this is a readonly/readnone call site, then we know it is just a
97        // load (but one that potentially returns the value itself), so we can
98        // ignore it if we know that the value isn't captured.
99        if (CS.onlyReadsMemory() &&
100            (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
101          continue;
102
103        // If this is being passed as a byval argument, the caller is making a
104        // copy, so it is only a read of the alloca.
105        if (CS.isByValArgument(ArgNo))
106          continue;
107      }
108
109      // Lifetime intrinsics can be handled by the caller.
110      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
111        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
112            II->getIntrinsicID() == Intrinsic::lifetime_end) {
113          assert(II->use_empty() && "Lifetime markers have no result to use!");
114          ToDelete.push_back(II);
115          continue;
116        }
117      }
118
119      // If this is isn't our memcpy/memmove, reject it as something we can't
120      // handle.
121      MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
122      if (!MI)
123        return false;
124
125      // If the transfer is using the alloca as a source of the transfer, then
126      // ignore it since it is a load (unless the transfer is volatile).
127      if (U.getOperandNo() == 1) {
128        if (MI->isVolatile()) return false;
129        continue;
130      }
131
132      // If we already have seen a copy, reject the second one.
133      if (TheCopy) return false;
134
135      // If the pointer has been offset from the start of the alloca, we can't
136      // safely handle this.
137      if (IsOffset) return false;
138
139      // If the memintrinsic isn't using the alloca as the dest, reject it.
140      if (U.getOperandNo() != 0) return false;
141
142      // If the source of the memcpy/move is not a constant global, reject it.
143      if (!pointsToConstantGlobal(MI->getSource()))
144        return false;
145
146      // Otherwise, the transform is safe.  Remember the copy instruction.
147      TheCopy = MI;
148    }
149  }
150  return true;
151}
152
153/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
154/// modified by a copy from a constant global.  If we can prove this, we can
155/// replace any uses of the alloca with uses of the global directly.
156static MemTransferInst *
157isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
158                               SmallVectorImpl<Instruction *> &ToDelete) {
159  MemTransferInst *TheCopy = nullptr;
160  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
161    return TheCopy;
162  return nullptr;
163}
164
165Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
166  // Ensure that the alloca array size argument has type intptr_t, so that
167  // any casting is exposed early.
168  if (DL) {
169    Type *IntPtrTy = DL->getIntPtrType(AI.getType());
170    if (AI.getArraySize()->getType() != IntPtrTy) {
171      Value *V = Builder->CreateIntCast(AI.getArraySize(),
172                                        IntPtrTy, false);
173      AI.setOperand(0, V);
174      return &AI;
175    }
176  }
177
178  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
179  if (AI.isArrayAllocation()) {  // Check C != 1
180    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
181      Type *NewTy =
182        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
183      AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
184      New->setAlignment(AI.getAlignment());
185
186      // Scan to the end of the allocation instructions, to skip over a block of
187      // allocas if possible...also skip interleaved debug info
188      //
189      BasicBlock::iterator It = New;
190      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
191
192      // Now that I is pointing to the first non-allocation-inst in the block,
193      // insert our getelementptr instruction...
194      //
195      Type *IdxTy = DL
196                  ? DL->getIntPtrType(AI.getType())
197                  : Type::getInt64Ty(AI.getContext());
198      Value *NullIdx = Constant::getNullValue(IdxTy);
199      Value *Idx[2] = { NullIdx, NullIdx };
200      Instruction *GEP =
201        GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
202      InsertNewInstBefore(GEP, *It);
203
204      // Now make everything use the getelementptr instead of the original
205      // allocation.
206      return ReplaceInstUsesWith(AI, GEP);
207    } else if (isa<UndefValue>(AI.getArraySize())) {
208      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
209    }
210  }
211
212  if (DL && AI.getAllocatedType()->isSized()) {
213    // If the alignment is 0 (unspecified), assign it the preferred alignment.
214    if (AI.getAlignment() == 0)
215      AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
216
217    // Move all alloca's of zero byte objects to the entry block and merge them
218    // together.  Note that we only do this for alloca's, because malloc should
219    // allocate and return a unique pointer, even for a zero byte allocation.
220    if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
221      // For a zero sized alloca there is no point in doing an array allocation.
222      // This is helpful if the array size is a complicated expression not used
223      // elsewhere.
224      if (AI.isArrayAllocation()) {
225        AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
226        return &AI;
227      }
228
229      // Get the first instruction in the entry block.
230      BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
231      Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
232      if (FirstInst != &AI) {
233        // If the entry block doesn't start with a zero-size alloca then move
234        // this one to the start of the entry block.  There is no problem with
235        // dominance as the array size was forced to a constant earlier already.
236        AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
237        if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
238            DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
239          AI.moveBefore(FirstInst);
240          return &AI;
241        }
242
243        // If the alignment of the entry block alloca is 0 (unspecified),
244        // assign it the preferred alignment.
245        if (EntryAI->getAlignment() == 0)
246          EntryAI->setAlignment(
247            DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
248        // Replace this zero-sized alloca with the one at the start of the entry
249        // block after ensuring that the address will be aligned enough for both
250        // types.
251        unsigned MaxAlign = std::max(EntryAI->getAlignment(),
252                                     AI.getAlignment());
253        EntryAI->setAlignment(MaxAlign);
254        if (AI.getType() != EntryAI->getType())
255          return new BitCastInst(EntryAI, AI.getType());
256        return ReplaceInstUsesWith(AI, EntryAI);
257      }
258    }
259  }
260
261  if (AI.getAlignment()) {
262    // Check to see if this allocation is only modified by a memcpy/memmove from
263    // a constant global whose alignment is equal to or exceeds that of the
264    // allocation.  If this is the case, we can change all users to use
265    // the constant global instead.  This is commonly produced by the CFE by
266    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
267    // is only subsequently read.
268    SmallVector<Instruction *, 4> ToDelete;
269    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
270      unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(),
271                                                        AI.getAlignment(), DL);
272      if (AI.getAlignment() <= SourceAlign) {
273        DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
274        DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
275        for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
276          EraseInstFromFunction(*ToDelete[i]);
277        Constant *TheSrc = cast<Constant>(Copy->getSource());
278        Constant *Cast
279          = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
280        Instruction *NewI = ReplaceInstUsesWith(AI, Cast);
281        EraseInstFromFunction(*Copy);
282        ++NumGlobalCopies;
283        return NewI;
284      }
285    }
286  }
287
288  // At last, use the generic allocation site handler to aggressively remove
289  // unused allocas.
290  return visitAllocSite(AI);
291}
292
293
294/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
295static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
296                                        const DataLayout *DL) {
297  User *CI = cast<User>(LI.getOperand(0));
298  Value *CastOp = CI->getOperand(0);
299
300  PointerType *DestTy = cast<PointerType>(CI->getType());
301  Type *DestPTy = DestTy->getElementType();
302  if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
303
304    // If the address spaces don't match, don't eliminate the cast.
305    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
306      return nullptr;
307
308    Type *SrcPTy = SrcTy->getElementType();
309
310    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
311         DestPTy->isVectorTy()) {
312      // If the source is an array, the code below will not succeed.  Check to
313      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
314      // constants.
315      if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
316        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
317          if (ASrcTy->getNumElements() != 0) {
318            Type *IdxTy = DL
319                        ? DL->getIntPtrType(SrcTy)
320                        : Type::getInt64Ty(SrcTy->getContext());
321            Value *Idx = Constant::getNullValue(IdxTy);
322            Value *Idxs[2] = { Idx, Idx };
323            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs);
324            SrcTy = cast<PointerType>(CastOp->getType());
325            SrcPTy = SrcTy->getElementType();
326          }
327
328      if (IC.getDataLayout() &&
329          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
330            SrcPTy->isVectorTy()) &&
331          // Do not allow turning this into a load of an integer, which is then
332          // casted to a pointer, this pessimizes pointer analysis a lot.
333          (SrcPTy->isPtrOrPtrVectorTy() ==
334           LI.getType()->isPtrOrPtrVectorTy()) &&
335          IC.getDataLayout()->getTypeSizeInBits(SrcPTy) ==
336               IC.getDataLayout()->getTypeSizeInBits(DestPTy)) {
337
338        // Okay, we are casting from one integer or pointer type to another of
339        // the same size.  Instead of casting the pointer before the load, cast
340        // the result of the loaded value.
341        LoadInst *NewLoad =
342          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
343        NewLoad->setAlignment(LI.getAlignment());
344        NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
345        // Now cast the result of the load.
346        PointerType *OldTy = dyn_cast<PointerType>(NewLoad->getType());
347        PointerType *NewTy = dyn_cast<PointerType>(LI.getType());
348        if (OldTy && NewTy &&
349            OldTy->getAddressSpace() != NewTy->getAddressSpace()) {
350          return new AddrSpaceCastInst(NewLoad, LI.getType());
351        }
352
353        return new BitCastInst(NewLoad, LI.getType());
354      }
355    }
356  }
357  return nullptr;
358}
359
360Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
361  Value *Op = LI.getOperand(0);
362
363  // Attempt to improve the alignment.
364  if (DL) {
365    unsigned KnownAlign =
366      getOrEnforceKnownAlignment(Op, DL->getPrefTypeAlignment(LI.getType()),DL);
367    unsigned LoadAlign = LI.getAlignment();
368    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
369      DL->getABITypeAlignment(LI.getType());
370
371    if (KnownAlign > EffectiveLoadAlign)
372      LI.setAlignment(KnownAlign);
373    else if (LoadAlign == 0)
374      LI.setAlignment(EffectiveLoadAlign);
375  }
376
377  // load (cast X) --> cast (load X) iff safe.
378  if (isa<CastInst>(Op))
379    if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
380      return Res;
381
382  // None of the following transforms are legal for volatile/atomic loads.
383  // FIXME: Some of it is okay for atomic loads; needs refactoring.
384  if (!LI.isSimple()) return nullptr;
385
386  // Do really simple store-to-load forwarding and load CSE, to catch cases
387  // where there are several consecutive memory accesses to the same location,
388  // separated by a few arithmetic operations.
389  BasicBlock::iterator BBI = &LI;
390  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
391    return ReplaceInstUsesWith(LI, AvailableVal);
392
393  // load(gep null, ...) -> unreachable
394  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
395    const Value *GEPI0 = GEPI->getOperand(0);
396    // TODO: Consider a target hook for valid address spaces for this xform.
397    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
398      // Insert a new store to null instruction before the load to indicate
399      // that this code is not reachable.  We do this instead of inserting
400      // an unreachable instruction directly because we cannot modify the
401      // CFG.
402      new StoreInst(UndefValue::get(LI.getType()),
403                    Constant::getNullValue(Op->getType()), &LI);
404      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
405    }
406  }
407
408  // load null/undef -> unreachable
409  // TODO: Consider a target hook for valid address spaces for this xform.
410  if (isa<UndefValue>(Op) ||
411      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
412    // Insert a new store to null instruction before the load to indicate that
413    // this code is not reachable.  We do this instead of inserting an
414    // unreachable instruction directly because we cannot modify the CFG.
415    new StoreInst(UndefValue::get(LI.getType()),
416                  Constant::getNullValue(Op->getType()), &LI);
417    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
418  }
419
420  // Instcombine load (constantexpr_cast global) -> cast (load global)
421  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
422    if (CE->isCast())
423      if (Instruction *Res = InstCombineLoadCast(*this, LI, DL))
424        return Res;
425
426  if (Op->hasOneUse()) {
427    // Change select and PHI nodes to select values instead of addresses: this
428    // helps alias analysis out a lot, allows many others simplifications, and
429    // exposes redundancy in the code.
430    //
431    // Note that we cannot do the transformation unless we know that the
432    // introduced loads cannot trap!  Something like this is valid as long as
433    // the condition is always false: load (select bool %C, int* null, int* %G),
434    // but it would not be valid if we transformed it to load from null
435    // unconditionally.
436    //
437    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
438      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
439      unsigned Align = LI.getAlignment();
440      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
441          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
442        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
443                                           SI->getOperand(1)->getName()+".val");
444        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
445                                           SI->getOperand(2)->getName()+".val");
446        V1->setAlignment(Align);
447        V2->setAlignment(Align);
448        return SelectInst::Create(SI->getCondition(), V1, V2);
449      }
450
451      // load (select (cond, null, P)) -> load P
452      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
453        if (C->isNullValue()) {
454          LI.setOperand(0, SI->getOperand(2));
455          return &LI;
456        }
457
458      // load (select (cond, P, null)) -> load P
459      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
460        if (C->isNullValue()) {
461          LI.setOperand(0, SI->getOperand(1));
462          return &LI;
463        }
464    }
465  }
466  return nullptr;
467}
468
469/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
470/// when possible.  This makes it generally easy to do alias analysis and/or
471/// SROA/mem2reg of the memory object.
472static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
473  User *CI = cast<User>(SI.getOperand(1));
474  Value *CastOp = CI->getOperand(0);
475
476  Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
477  PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
478  if (!SrcTy) return nullptr;
479
480  Type *SrcPTy = SrcTy->getElementType();
481
482  if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
483    return nullptr;
484
485  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
486  /// to its first element.  This allows us to handle things like:
487  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
488  /// on 32-bit hosts.
489  SmallVector<Value*, 4> NewGEPIndices;
490
491  // If the source is an array, the code below will not succeed.  Check to
492  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
493  // constants.
494  if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
495    // Index through pointer.
496    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
497    NewGEPIndices.push_back(Zero);
498
499    while (1) {
500      if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
501        if (!STy->getNumElements()) /* Struct can be empty {} */
502          break;
503        NewGEPIndices.push_back(Zero);
504        SrcPTy = STy->getElementType(0);
505      } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
506        NewGEPIndices.push_back(Zero);
507        SrcPTy = ATy->getElementType();
508      } else {
509        break;
510      }
511    }
512
513    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
514  }
515
516  if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
517    return nullptr;
518
519  // If the pointers point into different address spaces don't do the
520  // transformation.
521  if (SrcTy->getAddressSpace() !=
522      cast<PointerType>(CI->getType())->getAddressSpace())
523    return nullptr;
524
525  // If the pointers point to values of different sizes don't do the
526  // transformation.
527  if (!IC.getDataLayout() ||
528      IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
529      IC.getDataLayout()->getTypeSizeInBits(DestPTy))
530    return nullptr;
531
532  // If the pointers point to pointers to different address spaces don't do the
533  // transformation. It is not safe to introduce an addrspacecast instruction in
534  // this case since, depending on the target, addrspacecast may not be a no-op
535  // cast.
536  if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
537      SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
538    return nullptr;
539
540  // Okay, we are casting from one integer or pointer type to another of
541  // the same size.  Instead of casting the pointer before
542  // the store, cast the value to be stored.
543  Value *NewCast;
544  Instruction::CastOps opcode = Instruction::BitCast;
545  Type* CastSrcTy = DestPTy;
546  Type* CastDstTy = SrcPTy;
547  if (CastDstTy->isPointerTy()) {
548    if (CastSrcTy->isIntegerTy())
549      opcode = Instruction::IntToPtr;
550  } else if (CastDstTy->isIntegerTy()) {
551    if (CastSrcTy->isPointerTy())
552      opcode = Instruction::PtrToInt;
553  }
554
555  // SIOp0 is a pointer to aggregate and this is a store to the first field,
556  // emit a GEP to index into its first field.
557  if (!NewGEPIndices.empty())
558    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
559
560  Value *SIOp0 = SI.getOperand(0);
561  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
562                                   SIOp0->getName()+".c");
563  SI.setOperand(0, NewCast);
564  SI.setOperand(1, CastOp);
565  return &SI;
566}
567
568/// equivalentAddressValues - Test if A and B will obviously have the same
569/// value. This includes recognizing that %t0 and %t1 will have the same
570/// value in code like this:
571///   %t0 = getelementptr \@a, 0, 3
572///   store i32 0, i32* %t0
573///   %t1 = getelementptr \@a, 0, 3
574///   %t2 = load i32* %t1
575///
576static bool equivalentAddressValues(Value *A, Value *B) {
577  // Test if the values are trivially equivalent.
578  if (A == B) return true;
579
580  // Test if the values come form identical arithmetic instructions.
581  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
582  // its only used to compare two uses within the same basic block, which
583  // means that they'll always either have the same value or one of them
584  // will have an undefined value.
585  if (isa<BinaryOperator>(A) ||
586      isa<CastInst>(A) ||
587      isa<PHINode>(A) ||
588      isa<GetElementPtrInst>(A))
589    if (Instruction *BI = dyn_cast<Instruction>(B))
590      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
591        return true;
592
593  // Otherwise they may not be equivalent.
594  return false;
595}
596
597Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
598  Value *Val = SI.getOperand(0);
599  Value *Ptr = SI.getOperand(1);
600
601  // Attempt to improve the alignment.
602  if (DL) {
603    unsigned KnownAlign =
604      getOrEnforceKnownAlignment(Ptr, DL->getPrefTypeAlignment(Val->getType()),
605                                 DL);
606    unsigned StoreAlign = SI.getAlignment();
607    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
608      DL->getABITypeAlignment(Val->getType());
609
610    if (KnownAlign > EffectiveStoreAlign)
611      SI.setAlignment(KnownAlign);
612    else if (StoreAlign == 0)
613      SI.setAlignment(EffectiveStoreAlign);
614  }
615
616  // Don't hack volatile/atomic stores.
617  // FIXME: Some bits are legal for atomic stores; needs refactoring.
618  if (!SI.isSimple()) return nullptr;
619
620  // If the RHS is an alloca with a single use, zapify the store, making the
621  // alloca dead.
622  if (Ptr->hasOneUse()) {
623    if (isa<AllocaInst>(Ptr))
624      return EraseInstFromFunction(SI);
625    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
626      if (isa<AllocaInst>(GEP->getOperand(0))) {
627        if (GEP->getOperand(0)->hasOneUse())
628          return EraseInstFromFunction(SI);
629      }
630    }
631  }
632
633  // Do really simple DSE, to catch cases where there are several consecutive
634  // stores to the same location, separated by a few arithmetic operations. This
635  // situation often occurs with bitfield accesses.
636  BasicBlock::iterator BBI = &SI;
637  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
638       --ScanInsts) {
639    --BBI;
640    // Don't count debug info directives, lest they affect codegen,
641    // and we skip pointer-to-pointer bitcasts, which are NOPs.
642    if (isa<DbgInfoIntrinsic>(BBI) ||
643        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
644      ScanInsts++;
645      continue;
646    }
647
648    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
649      // Prev store isn't volatile, and stores to the same location?
650      if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
651                                                        SI.getOperand(1))) {
652        ++NumDeadStore;
653        ++BBI;
654        EraseInstFromFunction(*PrevSI);
655        continue;
656      }
657      break;
658    }
659
660    // If this is a load, we have to stop.  However, if the loaded value is from
661    // the pointer we're loading and is producing the pointer we're storing,
662    // then *this* store is dead (X = load P; store X -> P).
663    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
664      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
665          LI->isSimple())
666        return EraseInstFromFunction(SI);
667
668      // Otherwise, this is a load from some other location.  Stores before it
669      // may not be dead.
670      break;
671    }
672
673    // Don't skip over loads or things that can modify memory.
674    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
675      break;
676  }
677
678  // store X, null    -> turns into 'unreachable' in SimplifyCFG
679  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
680    if (!isa<UndefValue>(Val)) {
681      SI.setOperand(0, UndefValue::get(Val->getType()));
682      if (Instruction *U = dyn_cast<Instruction>(Val))
683        Worklist.Add(U);  // Dropped a use.
684    }
685    return nullptr;  // Do not modify these!
686  }
687
688  // store undef, Ptr -> noop
689  if (isa<UndefValue>(Val))
690    return EraseInstFromFunction(SI);
691
692  // If the pointer destination is a cast, see if we can fold the cast into the
693  // source instead.
694  if (isa<CastInst>(Ptr))
695    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
696      return Res;
697  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
698    if (CE->isCast())
699      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
700        return Res;
701
702
703  // If this store is the last instruction in the basic block (possibly
704  // excepting debug info instructions), and if the block ends with an
705  // unconditional branch, try to move it to the successor block.
706  BBI = &SI;
707  do {
708    ++BBI;
709  } while (isa<DbgInfoIntrinsic>(BBI) ||
710           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
711  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
712    if (BI->isUnconditional())
713      if (SimplifyStoreAtEndOfBlock(SI))
714        return nullptr;  // xform done!
715
716  return nullptr;
717}
718
719/// SimplifyStoreAtEndOfBlock - Turn things like:
720///   if () { *P = v1; } else { *P = v2 }
721/// into a phi node with a store in the successor.
722///
723/// Simplify things like:
724///   *P = v1; if () { *P = v2; }
725/// into a phi node with a store in the successor.
726///
727bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
728  BasicBlock *StoreBB = SI.getParent();
729
730  // Check to see if the successor block has exactly two incoming edges.  If
731  // so, see if the other predecessor contains a store to the same location.
732  // if so, insert a PHI node (if needed) and move the stores down.
733  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
734
735  // Determine whether Dest has exactly two predecessors and, if so, compute
736  // the other predecessor.
737  pred_iterator PI = pred_begin(DestBB);
738  BasicBlock *P = *PI;
739  BasicBlock *OtherBB = nullptr;
740
741  if (P != StoreBB)
742    OtherBB = P;
743
744  if (++PI == pred_end(DestBB))
745    return false;
746
747  P = *PI;
748  if (P != StoreBB) {
749    if (OtherBB)
750      return false;
751    OtherBB = P;
752  }
753  if (++PI != pred_end(DestBB))
754    return false;
755
756  // Bail out if all the relevant blocks aren't distinct (this can happen,
757  // for example, if SI is in an infinite loop)
758  if (StoreBB == DestBB || OtherBB == DestBB)
759    return false;
760
761  // Verify that the other block ends in a branch and is not otherwise empty.
762  BasicBlock::iterator BBI = OtherBB->getTerminator();
763  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
764  if (!OtherBr || BBI == OtherBB->begin())
765    return false;
766
767  // If the other block ends in an unconditional branch, check for the 'if then
768  // else' case.  there is an instruction before the branch.
769  StoreInst *OtherStore = nullptr;
770  if (OtherBr->isUnconditional()) {
771    --BBI;
772    // Skip over debugging info.
773    while (isa<DbgInfoIntrinsic>(BBI) ||
774           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
775      if (BBI==OtherBB->begin())
776        return false;
777      --BBI;
778    }
779    // If this isn't a store, isn't a store to the same location, or is not the
780    // right kind of store, bail out.
781    OtherStore = dyn_cast<StoreInst>(BBI);
782    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
783        !SI.isSameOperationAs(OtherStore))
784      return false;
785  } else {
786    // Otherwise, the other block ended with a conditional branch. If one of the
787    // destinations is StoreBB, then we have the if/then case.
788    if (OtherBr->getSuccessor(0) != StoreBB &&
789        OtherBr->getSuccessor(1) != StoreBB)
790      return false;
791
792    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
793    // if/then triangle.  See if there is a store to the same ptr as SI that
794    // lives in OtherBB.
795    for (;; --BBI) {
796      // Check to see if we find the matching store.
797      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
798        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
799            !SI.isSameOperationAs(OtherStore))
800          return false;
801        break;
802      }
803      // If we find something that may be using or overwriting the stored
804      // value, or if we run out of instructions, we can't do the xform.
805      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
806          BBI == OtherBB->begin())
807        return false;
808    }
809
810    // In order to eliminate the store in OtherBr, we have to
811    // make sure nothing reads or overwrites the stored value in
812    // StoreBB.
813    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
814      // FIXME: This should really be AA driven.
815      if (I->mayReadFromMemory() || I->mayWriteToMemory())
816        return false;
817    }
818  }
819
820  // Insert a PHI node now if we need it.
821  Value *MergedVal = OtherStore->getOperand(0);
822  if (MergedVal != SI.getOperand(0)) {
823    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
824    PN->addIncoming(SI.getOperand(0), SI.getParent());
825    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
826    MergedVal = InsertNewInstBefore(PN, DestBB->front());
827  }
828
829  // Advance to a place where it is safe to insert the new store and
830  // insert it.
831  BBI = DestBB->getFirstInsertionPt();
832  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
833                                   SI.isVolatile(),
834                                   SI.getAlignment(),
835                                   SI.getOrdering(),
836                                   SI.getSynchScope());
837  InsertNewInstBefore(NewSI, *BBI);
838  NewSI->setDebugLoc(OtherStore->getDebugLoc());
839
840  // If the two stores had the same TBAA tag, preserve it.
841  if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
842    if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag,
843                               OtherStore->getMetadata(LLVMContext::MD_tbaa))))
844      NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
845
846
847  // Nuke the old stores.
848  EraseInstFromFunction(SI);
849  EraseInstFromFunction(*OtherStore);
850  return true;
851}
852