1//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a trivial dead store elimination that only considers
11// basic-block local redundant stores.
12//
13// FIXME: This should eventually be extended to be a post-dominator tree
14// traversal.  Doing so would be pretty trivial.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SetVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/Analysis/CaptureTracking.h"
25#include "llvm/Analysis/GlobalsModRef.h"
26#include "llvm/Analysis/MemoryBuiltins.h"
27#include "llvm/Analysis/MemoryDependenceAnalysis.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DataLayout.h"
32#include "llvm/IR/Dominators.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/GlobalVariable.h"
35#include "llvm/IR/Instructions.h"
36#include "llvm/IR/IntrinsicInst.h"
37#include "llvm/Pass.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/Transforms/Scalar.h"
42#include "llvm/Transforms/Utils/Local.h"
43#include <map>
44using namespace llvm;
45
46#define DEBUG_TYPE "dse"
47
48STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
49STATISTIC(NumFastStores, "Number of stores deleted");
50STATISTIC(NumFastOther , "Number of other instrs removed");
51STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
52
53static cl::opt<bool>
54EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
55  cl::init(true), cl::Hidden,
56  cl::desc("Enable partial-overwrite tracking in DSE"));
57
58
59//===----------------------------------------------------------------------===//
60// Helper functions
61//===----------------------------------------------------------------------===//
62
63/// Delete this instruction.  Before we do, go through and zero out all the
64/// operands of this instruction.  If any of them become dead, delete them and
65/// the computation tree that feeds them.
66/// If ValueSet is non-null, remove any deleted instructions from it as well.
67static void
68deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
69                      MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
70                      SmallSetVector<Value *, 16> *ValueSet = nullptr) {
71  SmallVector<Instruction*, 32> NowDeadInsts;
72
73  NowDeadInsts.push_back(I);
74  --NumFastOther;
75
76  // Keeping the iterator straight is a pain, so we let this routine tell the
77  // caller what the next instruction is after we're done mucking about.
78  BasicBlock::iterator NewIter = *BBI;
79
80  // Before we touch this instruction, remove it from memdep!
81  do {
82    Instruction *DeadInst = NowDeadInsts.pop_back_val();
83    ++NumFastOther;
84
85    // This instruction is dead, zap it, in stages.  Start by removing it from
86    // MemDep, which needs to know the operands and needs it to be in the
87    // function.
88    MD.removeInstruction(DeadInst);
89
90    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
91      Value *Op = DeadInst->getOperand(op);
92      DeadInst->setOperand(op, nullptr);
93
94      // If this operand just became dead, add it to the NowDeadInsts list.
95      if (!Op->use_empty()) continue;
96
97      if (Instruction *OpI = dyn_cast<Instruction>(Op))
98        if (isInstructionTriviallyDead(OpI, &TLI))
99          NowDeadInsts.push_back(OpI);
100    }
101
102
103    if (NewIter == DeadInst->getIterator())
104      NewIter = DeadInst->eraseFromParent();
105    else
106      DeadInst->eraseFromParent();
107
108    if (ValueSet) ValueSet->remove(DeadInst);
109  } while (!NowDeadInsts.empty());
110  *BBI = NewIter;
111}
112
113/// Does this instruction write some memory?  This only returns true for things
114/// that we can analyze with other helpers below.
115static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
116  if (isa<StoreInst>(I))
117    return true;
118  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
119    switch (II->getIntrinsicID()) {
120    default:
121      return false;
122    case Intrinsic::memset:
123    case Intrinsic::memmove:
124    case Intrinsic::memcpy:
125    case Intrinsic::init_trampoline:
126    case Intrinsic::lifetime_end:
127      return true;
128    }
129  }
130  if (auto CS = CallSite(I)) {
131    if (Function *F = CS.getCalledFunction()) {
132      StringRef FnName = F->getName();
133      if (TLI.has(LibFunc::strcpy) && FnName == TLI.getName(LibFunc::strcpy))
134        return true;
135      if (TLI.has(LibFunc::strncpy) && FnName == TLI.getName(LibFunc::strncpy))
136        return true;
137      if (TLI.has(LibFunc::strcat) && FnName == TLI.getName(LibFunc::strcat))
138        return true;
139      if (TLI.has(LibFunc::strncat) && FnName == TLI.getName(LibFunc::strncat))
140        return true;
141    }
142  }
143  return false;
144}
145
146/// Return a Location stored to by the specified instruction. If isRemovable
147/// returns true, this function and getLocForRead completely describe the memory
148/// operations for this instruction.
149static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
150  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
151    return MemoryLocation::get(SI);
152
153  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
154    // memcpy/memmove/memset.
155    MemoryLocation Loc = MemoryLocation::getForDest(MI);
156    return Loc;
157  }
158
159  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
160  if (!II)
161    return MemoryLocation();
162
163  switch (II->getIntrinsicID()) {
164  default:
165    return MemoryLocation(); // Unhandled intrinsic.
166  case Intrinsic::init_trampoline:
167    // FIXME: We don't know the size of the trampoline, so we can't really
168    // handle it here.
169    return MemoryLocation(II->getArgOperand(0));
170  case Intrinsic::lifetime_end: {
171    uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
172    return MemoryLocation(II->getArgOperand(1), Len);
173  }
174  }
175}
176
177/// Return the location read by the specified "hasMemoryWrite" instruction if
178/// any.
179static MemoryLocation getLocForRead(Instruction *Inst,
180                                    const TargetLibraryInfo &TLI) {
181  assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");
182
183  // The only instructions that both read and write are the mem transfer
184  // instructions (memcpy/memmove).
185  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
186    return MemoryLocation::getForSource(MTI);
187  return MemoryLocation();
188}
189
190/// If the value of this instruction and the memory it writes to is unused, may
191/// we delete this instruction?
192static bool isRemovable(Instruction *I) {
193  // Don't remove volatile/atomic stores.
194  if (StoreInst *SI = dyn_cast<StoreInst>(I))
195    return SI->isUnordered();
196
197  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
198    switch (II->getIntrinsicID()) {
199    default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
200    case Intrinsic::lifetime_end:
201      // Never remove dead lifetime_end's, e.g. because it is followed by a
202      // free.
203      return false;
204    case Intrinsic::init_trampoline:
205      // Always safe to remove init_trampoline.
206      return true;
207
208    case Intrinsic::memset:
209    case Intrinsic::memmove:
210    case Intrinsic::memcpy:
211      // Don't remove volatile memory intrinsics.
212      return !cast<MemIntrinsic>(II)->isVolatile();
213    }
214  }
215
216  if (auto CS = CallSite(I))
217    return CS.getInstruction()->use_empty();
218
219  return false;
220}
221
222
223/// Returns true if the end of this instruction can be safely shortened in
224/// length.
225static bool isShortenableAtTheEnd(Instruction *I) {
226  // Don't shorten stores for now
227  if (isa<StoreInst>(I))
228    return false;
229
230  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
231    switch (II->getIntrinsicID()) {
232      default: return false;
233      case Intrinsic::memset:
234      case Intrinsic::memcpy:
235        // Do shorten memory intrinsics.
236        // FIXME: Add memmove if it's also safe to transform.
237        return true;
238    }
239  }
240
241  // Don't shorten libcalls calls for now.
242
243  return false;
244}
245
246/// Returns true if the beginning of this instruction can be safely shortened
247/// in length.
248static bool isShortenableAtTheBeginning(Instruction *I) {
249  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
250  // easily done by offsetting the source address.
251  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
252  return II && II->getIntrinsicID() == Intrinsic::memset;
253}
254
255/// Return the pointer that is being written to.
256static Value *getStoredPointerOperand(Instruction *I) {
257  if (StoreInst *SI = dyn_cast<StoreInst>(I))
258    return SI->getPointerOperand();
259  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
260    return MI->getDest();
261
262  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
263    switch (II->getIntrinsicID()) {
264    default: llvm_unreachable("Unexpected intrinsic!");
265    case Intrinsic::init_trampoline:
266      return II->getArgOperand(0);
267    }
268  }
269
270  CallSite CS(I);
271  // All the supported functions so far happen to have dest as their first
272  // argument.
273  return CS.getArgument(0);
274}
275
276static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
277                               const TargetLibraryInfo &TLI) {
278  uint64_t Size;
279  if (getObjectSize(V, Size, DL, &TLI))
280    return Size;
281  return MemoryLocation::UnknownSize;
282}
283
284namespace {
285enum OverwriteResult {
286  OverwriteBegin,
287  OverwriteComplete,
288  OverwriteEnd,
289  OverwriteUnknown
290};
291}
292
293typedef DenseMap<Instruction *,
294                 std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
295
296/// Return 'OverwriteComplete' if a store to the 'Later' location completely
297/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
298/// the 'Earlier' location is completely overwritten by 'Later',
299/// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten
300/// by 'Later', or 'OverwriteUnknown' if nothing can be determined.
301static OverwriteResult isOverwrite(const MemoryLocation &Later,
302                                   const MemoryLocation &Earlier,
303                                   const DataLayout &DL,
304                                   const TargetLibraryInfo &TLI,
305                                   int64_t &EarlierOff, int64_t &LaterOff,
306                                   Instruction *DepWrite,
307                                   InstOverlapIntervalsTy &IOL) {
308  // If we don't know the sizes of either access, then we can't do a comparison.
309  if (Later.Size == MemoryLocation::UnknownSize ||
310      Earlier.Size == MemoryLocation::UnknownSize)
311    return OverwriteUnknown;
312
313  const Value *P1 = Earlier.Ptr->stripPointerCasts();
314  const Value *P2 = Later.Ptr->stripPointerCasts();
315
316  // If the start pointers are the same, we just have to compare sizes to see if
317  // the later store was larger than the earlier store.
318  if (P1 == P2) {
319    // Make sure that the Later size is >= the Earlier size.
320    if (Later.Size >= Earlier.Size)
321      return OverwriteComplete;
322  }
323
324  // Check to see if the later store is to the entire object (either a global,
325  // an alloca, or a byval/inalloca argument).  If so, then it clearly
326  // overwrites any other store to the same object.
327  const Value *UO1 = GetUnderlyingObject(P1, DL),
328              *UO2 = GetUnderlyingObject(P2, DL);
329
330  // If we can't resolve the same pointers to the same object, then we can't
331  // analyze them at all.
332  if (UO1 != UO2)
333    return OverwriteUnknown;
334
335  // If the "Later" store is to a recognizable object, get its size.
336  uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
337  if (ObjectSize != MemoryLocation::UnknownSize)
338    if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
339      return OverwriteComplete;
340
341  // Okay, we have stores to two completely different pointers.  Try to
342  // decompose the pointer into a "base + constant_offset" form.  If the base
343  // pointers are equal, then we can reason about the two stores.
344  EarlierOff = 0;
345  LaterOff = 0;
346  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
347  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
348
349  // If the base pointers still differ, we have two completely different stores.
350  if (BP1 != BP2)
351    return OverwriteUnknown;
352
353  // The later store completely overlaps the earlier store if:
354  //
355  // 1. Both start at the same offset and the later one's size is greater than
356  //    or equal to the earlier one's, or
357  //
358  //      |--earlier--|
359  //      |--   later   --|
360  //
361  // 2. The earlier store has an offset greater than the later offset, but which
362  //    still lies completely within the later store.
363  //
364  //        |--earlier--|
365  //    |-----  later  ------|
366  //
367  // We have to be careful here as *Off is signed while *.Size is unsigned.
368  if (EarlierOff >= LaterOff &&
369      Later.Size >= Earlier.Size &&
370      uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
371    return OverwriteComplete;
372
373  // We may now overlap, although the overlap is not complete. There might also
374  // be other incomplete overlaps, and together, they might cover the complete
375  // earlier write.
376  // Note: The correctness of this logic depends on the fact that this function
377  // is not even called providing DepWrite when there are any intervening reads.
378  if (EnablePartialOverwriteTracking &&
379      LaterOff < int64_t(EarlierOff + Earlier.Size) &&
380      int64_t(LaterOff + Later.Size) >= EarlierOff) {
381
382    // Insert our part of the overlap into the map.
383    auto &IM = IOL[DepWrite];
384    DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff << ", " <<
385                    int64_t(EarlierOff + Earlier.Size) << ") Later [" <<
386                    LaterOff << ", " << int64_t(LaterOff + Later.Size) << ")\n");
387
388    // Make sure that we only insert non-overlapping intervals and combine
389    // adjacent intervals. The intervals are stored in the map with the ending
390    // offset as the key (in the half-open sense) and the starting offset as
391    // the value.
392    int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size;
393
394    // Find any intervals ending at, or after, LaterIntStart which start
395    // before LaterIntEnd.
396    auto ILI = IM.lower_bound(LaterIntStart);
397    if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
398      // This existing interval is overlapped with the current store somewhere
399      // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
400      // intervals and adjusting our start and end.
401      LaterIntStart = std::min(LaterIntStart, ILI->second);
402      LaterIntEnd = std::max(LaterIntEnd, ILI->first);
403      ILI = IM.erase(ILI);
404
405      // Continue erasing and adjusting our end in case other previous
406      // intervals are also overlapped with the current store.
407      //
408      // |--- ealier 1 ---|  |--- ealier 2 ---|
409      //     |------- later---------|
410      //
411      while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
412        assert(ILI->second > LaterIntStart && "Unexpected interval");
413        LaterIntEnd = std::max(LaterIntEnd, ILI->first);
414        ILI = IM.erase(ILI);
415      }
416    }
417
418    IM[LaterIntEnd] = LaterIntStart;
419
420    ILI = IM.begin();
421    if (ILI->second <= EarlierOff &&
422        ILI->first >= int64_t(EarlierOff + Earlier.Size)) {
423      DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" <<
424                      EarlierOff << ", " <<
425                      int64_t(EarlierOff + Earlier.Size) <<
426                      ") Composite Later [" <<
427                      ILI->second << ", " << ILI->first << ")\n");
428      ++NumCompletePartials;
429      return OverwriteComplete;
430    }
431  }
432
433  // Another interesting case is if the later store overwrites the end of the
434  // earlier store.
435  //
436  //      |--earlier--|
437  //                |--   later   --|
438  //
439  // In this case we may want to trim the size of earlier to avoid generating
440  // writes to addresses which will definitely be overwritten later
441  if (LaterOff > EarlierOff &&
442      LaterOff < int64_t(EarlierOff + Earlier.Size) &&
443      int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
444    return OverwriteEnd;
445
446  // Finally, we also need to check if the later store overwrites the beginning
447  // of the earlier store.
448  //
449  //                |--earlier--|
450  //      |--   later   --|
451  //
452  // In this case we may want to move the destination address and trim the size
453  // of earlier to avoid generating writes to addresses which will definitely
454  // be overwritten later.
455  if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
456    assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
457            && "Expect to be handled as OverwriteComplete" );
458    return OverwriteBegin;
459  }
460  // Otherwise, they don't completely overlap.
461  return OverwriteUnknown;
462}
463
464/// If 'Inst' might be a self read (i.e. a noop copy of a
465/// memory region into an identical pointer) then it doesn't actually make its
466/// input dead in the traditional sense.  Consider this case:
467///
468///   memcpy(A <- B)
469///   memcpy(A <- A)
470///
471/// In this case, the second store to A does not make the first store to A dead.
472/// The usual situation isn't an explicit A<-A store like this (which can be
473/// trivially removed) but a case where two pointers may alias.
474///
475/// This function detects when it is unsafe to remove a dependent instruction
476/// because the DSE inducing instruction may be a self-read.
477static bool isPossibleSelfRead(Instruction *Inst,
478                               const MemoryLocation &InstStoreLoc,
479                               Instruction *DepWrite,
480                               const TargetLibraryInfo &TLI,
481                               AliasAnalysis &AA) {
482  // Self reads can only happen for instructions that read memory.  Get the
483  // location read.
484  MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
485  if (!InstReadLoc.Ptr) return false;  // Not a reading instruction.
486
487  // If the read and written loc obviously don't alias, it isn't a read.
488  if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
489
490  // Okay, 'Inst' may copy over itself.  However, we can still remove a the
491  // DepWrite instruction if we can prove that it reads from the same location
492  // as Inst.  This handles useful cases like:
493  //   memcpy(A <- B)
494  //   memcpy(A <- B)
495  // Here we don't know if A/B may alias, but we do know that B/B are must
496  // aliases, so removing the first memcpy is safe (assuming it writes <= #
497  // bytes as the second one.
498  MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
499
500  if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
501    return false;
502
503  // If DepWrite doesn't read memory or if we can't prove it is a must alias,
504  // then it can't be considered dead.
505  return true;
506}
507
508
509/// Returns true if the memory which is accessed by the second instruction is not
510/// modified between the first and the second instruction.
511/// Precondition: Second instruction must be dominated by the first
512/// instruction.
513static bool memoryIsNotModifiedBetween(Instruction *FirstI,
514                                       Instruction *SecondI,
515                                       AliasAnalysis *AA) {
516  SmallVector<BasicBlock *, 16> WorkList;
517  SmallPtrSet<BasicBlock *, 8> Visited;
518  BasicBlock::iterator FirstBBI(FirstI);
519  ++FirstBBI;
520  BasicBlock::iterator SecondBBI(SecondI);
521  BasicBlock *FirstBB = FirstI->getParent();
522  BasicBlock *SecondBB = SecondI->getParent();
523  MemoryLocation MemLoc = MemoryLocation::get(SecondI);
524
525  // Start checking the store-block.
526  WorkList.push_back(SecondBB);
527  bool isFirstBlock = true;
528
529  // Check all blocks going backward until we reach the load-block.
530  while (!WorkList.empty()) {
531    BasicBlock *B = WorkList.pop_back_val();
532
533    // Ignore instructions before LI if this is the FirstBB.
534    BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
535
536    BasicBlock::iterator EI;
537    if (isFirstBlock) {
538      // Ignore instructions after SI if this is the first visit of SecondBB.
539      assert(B == SecondBB && "first block is not the store block");
540      EI = SecondBBI;
541      isFirstBlock = false;
542    } else {
543      // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
544      // In this case we also have to look at instructions after SI.
545      EI = B->end();
546    }
547    for (; BI != EI; ++BI) {
548      Instruction *I = &*BI;
549      if (I->mayWriteToMemory() && I != SecondI) {
550        auto Res = AA->getModRefInfo(I, MemLoc);
551        if (Res != MRI_NoModRef)
552          return false;
553      }
554    }
555    if (B != FirstBB) {
556      assert(B != &FirstBB->getParent()->getEntryBlock() &&
557          "Should not hit the entry block because SI must be dominated by LI");
558      for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
559        if (!Visited.insert(*PredI).second)
560          continue;
561        WorkList.push_back(*PredI);
562      }
563    }
564  }
565  return true;
566}
567
568/// Find all blocks that will unconditionally lead to the block BB and append
569/// them to F.
570static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
571                                   BasicBlock *BB, DominatorTree *DT) {
572  for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
573    BasicBlock *Pred = *I;
574    if (Pred == BB) continue;
575    TerminatorInst *PredTI = Pred->getTerminator();
576    if (PredTI->getNumSuccessors() != 1)
577      continue;
578
579    if (DT->isReachableFromEntry(Pred))
580      Blocks.push_back(Pred);
581  }
582}
583
584/// Handle frees of entire structures whose dependency is a store
585/// to a field of that structure.
586static bool handleFree(CallInst *F, AliasAnalysis *AA,
587                       MemoryDependenceResults *MD, DominatorTree *DT,
588                       const TargetLibraryInfo *TLI) {
589  bool MadeChange = false;
590
591  MemoryLocation Loc = MemoryLocation(F->getOperand(0));
592  SmallVector<BasicBlock *, 16> Blocks;
593  Blocks.push_back(F->getParent());
594  const DataLayout &DL = F->getModule()->getDataLayout();
595
596  while (!Blocks.empty()) {
597    BasicBlock *BB = Blocks.pop_back_val();
598    Instruction *InstPt = BB->getTerminator();
599    if (BB == F->getParent()) InstPt = F;
600
601    MemDepResult Dep =
602        MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB);
603    while (Dep.isDef() || Dep.isClobber()) {
604      Instruction *Dependency = Dep.getInst();
605      if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency))
606        break;
607
608      Value *DepPointer =
609          GetUnderlyingObject(getStoredPointerOperand(Dependency), DL);
610
611      // Check for aliasing.
612      if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
613        break;
614
615      // DCE instructions only used to calculate that store.
616      BasicBlock::iterator BBI(Dependency);
617      deleteDeadInstruction(Dependency, &BBI, *MD, *TLI);
618      ++NumFastStores;
619      MadeChange = true;
620
621      // Inst's old Dependency is now deleted. Compute the next dependency,
622      // which may also be dead, as in
623      //    s[0] = 0;
624      //    s[1] = 0; // This has just been deleted.
625      //    free(s);
626      Dep = MD->getPointerDependencyFrom(Loc, false, BBI, BB);
627    }
628
629    if (Dep.isNonLocal())
630      findUnconditionalPreds(Blocks, BB, DT);
631  }
632
633  return MadeChange;
634}
635
636/// Check to see if the specified location may alias any of the stack objects in
637/// the DeadStackObjects set. If so, they become live because the location is
638/// being loaded.
639static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
640                                  SmallSetVector<Value *, 16> &DeadStackObjects,
641                                  const DataLayout &DL, AliasAnalysis *AA,
642                                  const TargetLibraryInfo *TLI) {
643  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
644
645  // A constant can't be in the dead pointer set.
646  if (isa<Constant>(UnderlyingPointer))
647    return;
648
649  // If the kill pointer can be easily reduced to an alloca, don't bother doing
650  // extraneous AA queries.
651  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
652    DeadStackObjects.remove(const_cast<Value*>(UnderlyingPointer));
653    return;
654  }
655
656  // Remove objects that could alias LoadedLoc.
657  DeadStackObjects.remove_if([&](Value *I) {
658    // See if the loaded location could alias the stack location.
659    MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
660    return !AA->isNoAlias(StackLoc, LoadedLoc);
661  });
662}
663
664/// Remove dead stores to stack-allocated locations in the function end block.
665/// Ex:
666/// %A = alloca i32
667/// ...
668/// store i32 1, i32* %A
669/// ret void
670static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
671                             MemoryDependenceResults *MD,
672                             const TargetLibraryInfo *TLI) {
673  bool MadeChange = false;
674
675  // Keep track of all of the stack objects that are dead at the end of the
676  // function.
677  SmallSetVector<Value*, 16> DeadStackObjects;
678
679  // Find all of the alloca'd pointers in the entry block.
680  BasicBlock &Entry = BB.getParent()->front();
681  for (Instruction &I : Entry) {
682    if (isa<AllocaInst>(&I))
683      DeadStackObjects.insert(&I);
684
685    // Okay, so these are dead heap objects, but if the pointer never escapes
686    // then it's leaked by this function anyways.
687    else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
688      DeadStackObjects.insert(&I);
689  }
690
691  // Treat byval or inalloca arguments the same, stores to them are dead at the
692  // end of the function.
693  for (Argument &AI : BB.getParent()->args())
694    if (AI.hasByValOrInAllocaAttr())
695      DeadStackObjects.insert(&AI);
696
697  const DataLayout &DL = BB.getModule()->getDataLayout();
698
699  // Scan the basic block backwards
700  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
701    --BBI;
702
703    // If we find a store, check to see if it points into a dead stack value.
704    if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
705      // See through pointer-to-pointer bitcasts
706      SmallVector<Value *, 4> Pointers;
707      GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
708
709      // Stores to stack values are valid candidates for removal.
710      bool AllDead = true;
711      for (Value *Pointer : Pointers)
712        if (!DeadStackObjects.count(Pointer)) {
713          AllDead = false;
714          break;
715        }
716
717      if (AllDead) {
718        Instruction *Dead = &*BBI;
719
720        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
721                     << *Dead << "\n  Objects: ";
722              for (SmallVectorImpl<Value *>::iterator I = Pointers.begin(),
723                   E = Pointers.end(); I != E; ++I) {
724                dbgs() << **I;
725                if (std::next(I) != E)
726                  dbgs() << ", ";
727              }
728              dbgs() << '\n');
729
730        // DCE instructions only used to calculate that store.
731        deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects);
732        ++NumFastStores;
733        MadeChange = true;
734        continue;
735      }
736    }
737
738    // Remove any dead non-memory-mutating instructions.
739    if (isInstructionTriviallyDead(&*BBI, TLI)) {
740      deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects);
741      ++NumFastOther;
742      MadeChange = true;
743      continue;
744    }
745
746    if (isa<AllocaInst>(BBI)) {
747      // Remove allocas from the list of dead stack objects; there can't be
748      // any references before the definition.
749      DeadStackObjects.remove(&*BBI);
750      continue;
751    }
752
753    if (auto CS = CallSite(&*BBI)) {
754      // Remove allocation function calls from the list of dead stack objects;
755      // there can't be any references before the definition.
756      if (isAllocLikeFn(&*BBI, TLI))
757        DeadStackObjects.remove(&*BBI);
758
759      // If this call does not access memory, it can't be loading any of our
760      // pointers.
761      if (AA->doesNotAccessMemory(CS))
762        continue;
763
764      // If the call might load from any of our allocas, then any store above
765      // the call is live.
766      DeadStackObjects.remove_if([&](Value *I) {
767        // See if the call site touches the value.
768        ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI));
769
770        return A == MRI_ModRef || A == MRI_Ref;
771      });
772
773      // If all of the allocas were clobbered by the call then we're not going
774      // to find anything else to process.
775      if (DeadStackObjects.empty())
776        break;
777
778      continue;
779    }
780
781    // We can remove the dead stores, irrespective of the fence and its ordering
782    // (release/acquire/seq_cst). Fences only constraints the ordering of
783    // already visible stores, it does not make a store visible to other
784    // threads. So, skipping over a fence does not change a store from being
785    // dead.
786    if (isa<FenceInst>(*BBI))
787      continue;
788
789    MemoryLocation LoadedLoc;
790
791    // If we encounter a use of the pointer, it is no longer considered dead
792    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
793      if (!L->isUnordered()) // Be conservative with atomic/volatile load
794        break;
795      LoadedLoc = MemoryLocation::get(L);
796    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
797      LoadedLoc = MemoryLocation::get(V);
798    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
799      LoadedLoc = MemoryLocation::getForSource(MTI);
800    } else if (!BBI->mayReadFromMemory()) {
801      // Instruction doesn't read memory.  Note that stores that weren't removed
802      // above will hit this case.
803      continue;
804    } else {
805      // Unknown inst; assume it clobbers everything.
806      break;
807    }
808
809    // Remove any allocas from the DeadPointer set that are loaded, as this
810    // makes any stores above the access live.
811    removeAccessedObjects(LoadedLoc, DeadStackObjects, DL, AA, TLI);
812
813    // If all of the allocas were clobbered by the access then we're not going
814    // to find anything else to process.
815    if (DeadStackObjects.empty())
816      break;
817  }
818
819  return MadeChange;
820}
821
822static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
823                               AliasAnalysis *AA, MemoryDependenceResults *MD,
824                               const DataLayout &DL,
825                               const TargetLibraryInfo *TLI) {
826  // Must be a store instruction.
827  StoreInst *SI = dyn_cast<StoreInst>(Inst);
828  if (!SI)
829    return false;
830
831  // If we're storing the same value back to a pointer that we just loaded from,
832  // then the store can be removed.
833  if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
834    if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
835        isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) {
836
837      DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  LOAD: "
838                   << *DepLoad << "\n  STORE: " << *SI << '\n');
839
840      deleteDeadInstruction(SI, &BBI, *MD, *TLI);
841      ++NumRedundantStores;
842      return true;
843    }
844  }
845
846  // Remove null stores into the calloc'ed objects
847  Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
848  if (StoredConstant && StoredConstant->isNullValue() && isRemovable(SI)) {
849    Instruction *UnderlyingPointer =
850        dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL));
851
852    if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
853        memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) {
854      DEBUG(
855          dbgs() << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
856                 << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
857
858      deleteDeadInstruction(SI, &BBI, *MD, *TLI);
859      ++NumRedundantStores;
860      return true;
861    }
862  }
863  return false;
864}
865
866static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
867                                MemoryDependenceResults *MD, DominatorTree *DT,
868                                const TargetLibraryInfo *TLI) {
869  const DataLayout &DL = BB.getModule()->getDataLayout();
870  bool MadeChange = false;
871
872  // A map of interval maps representing partially-overwritten value parts.
873  InstOverlapIntervalsTy IOL;
874
875  // Do a top-down walk on the BB.
876  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
877    // Handle 'free' calls specially.
878    if (CallInst *F = isFreeCall(&*BBI, TLI)) {
879      MadeChange |= handleFree(F, AA, MD, DT, TLI);
880      // Increment BBI after handleFree has potentially deleted instructions.
881      // This ensures we maintain a valid iterator.
882      ++BBI;
883      continue;
884    }
885
886    Instruction *Inst = &*BBI++;
887
888    // Check to see if Inst writes to memory.  If not, continue.
889    if (!hasMemoryWrite(Inst, *TLI))
890      continue;
891
892    // eliminateNoopStore will update in iterator, if necessary.
893    if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI)) {
894      MadeChange = true;
895      continue;
896    }
897
898    // If we find something that writes memory, get its memory dependence.
899    MemDepResult InstDep = MD->getDependency(Inst);
900
901    // Ignore any store where we can't find a local dependence.
902    // FIXME: cross-block DSE would be fun. :)
903    if (!InstDep.isDef() && !InstDep.isClobber())
904      continue;
905
906    // Figure out what location is being stored to.
907    MemoryLocation Loc = getLocForWrite(Inst, *AA);
908
909    // If we didn't get a useful location, fail.
910    if (!Loc.Ptr)
911      continue;
912
913    while (InstDep.isDef() || InstDep.isClobber()) {
914      // Get the memory clobbered by the instruction we depend on.  MemDep will
915      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
916      // end up depending on a may- or must-aliased load, then we can't optimize
917      // away the store and we bail out.  However, if we depend on something
918      // that overwrites the memory location we *can* potentially optimize it.
919      //
920      // Find out what memory location the dependent instruction stores.
921      Instruction *DepWrite = InstDep.getInst();
922      MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA);
923      // If we didn't get a useful location, or if it isn't a size, bail out.
924      if (!DepLoc.Ptr)
925        break;
926
927      // If we find a write that is a) removable (i.e., non-volatile), b) is
928      // completely obliterated by the store to 'Loc', and c) which we know that
929      // 'Inst' doesn't load from, then we can remove it.
930      if (isRemovable(DepWrite) &&
931          !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
932        int64_t InstWriteOffset, DepWriteOffset;
933        OverwriteResult OR =
934            isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset,
935                        DepWrite, IOL);
936        if (OR == OverwriteComplete) {
937          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
938                << *DepWrite << "\n  KILLER: " << *Inst << '\n');
939
940          // Delete the store and now-dead instructions that feed it.
941          deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
942          ++NumFastStores;
943          MadeChange = true;
944
945          // We erased DepWrite; start over.
946          InstDep = MD->getDependency(Inst);
947          continue;
948        } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
949                   ((OR == OverwriteBegin &&
950                     isShortenableAtTheBeginning(DepWrite)))) {
951          // TODO: base this on the target vector size so that if the earlier
952          // store was too small to get vector writes anyway then its likely
953          // a good idea to shorten it
954          // Power of 2 vector writes are probably always a bad idea to optimize
955          // as any store/memset/memcpy is likely using vector instructions so
956          // shortening it to not vector size is likely to be slower
957          MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
958          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
959          bool IsOverwriteEnd = (OR == OverwriteEnd);
960          if (!IsOverwriteEnd)
961            InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
962
963          if ((llvm::isPowerOf2_64(InstWriteOffset) &&
964               DepWriteAlign <= InstWriteOffset) ||
965              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
966
967            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
968                         << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
969                         << *DepWrite << "\n  KILLER (offset "
970                         << InstWriteOffset << ", " << DepLoc.Size << ")"
971                         << *Inst << '\n');
972
973            int64_t NewLength =
974                IsOverwriteEnd
975                    ? InstWriteOffset - DepWriteOffset
976                    : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
977
978            Value *DepWriteLength = DepIntrinsic->getLength();
979            Value *TrimmedLength =
980                ConstantInt::get(DepWriteLength->getType(), NewLength);
981            DepIntrinsic->setLength(TrimmedLength);
982
983            if (!IsOverwriteEnd) {
984              int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
985              Value *Indices[1] = {
986                  ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
987              GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
988                  DepIntrinsic->getRawDest(), Indices, "", DepWrite);
989              DepIntrinsic->setDest(NewDestGEP);
990            }
991            MadeChange = true;
992          }
993        }
994      }
995
996      // If this is a may-aliased store that is clobbering the store value, we
997      // can keep searching past it for another must-aliased pointer that stores
998      // to the same location.  For example, in:
999      //   store -> P
1000      //   store -> Q
1001      //   store -> P
1002      // we can remove the first store to P even though we don't know if P and Q
1003      // alias.
1004      if (DepWrite == &BB.front()) break;
1005
1006      // Can't look past this instruction if it might read 'Loc'.
1007      if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
1008        break;
1009
1010      InstDep = MD->getPointerDependencyFrom(Loc, false,
1011                                             DepWrite->getIterator(), &BB);
1012    }
1013  }
1014
1015  // If this block ends in a return, unwind, or unreachable, all allocas are
1016  // dead at its end, which means stores to them are also dead.
1017  if (BB.getTerminator()->getNumSuccessors() == 0)
1018    MadeChange |= handleEndBlock(BB, AA, MD, TLI);
1019
1020  return MadeChange;
1021}
1022
1023static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
1024                                MemoryDependenceResults *MD, DominatorTree *DT,
1025                                const TargetLibraryInfo *TLI) {
1026  bool MadeChange = false;
1027  for (BasicBlock &BB : F)
1028    // Only check non-dead blocks.  Dead blocks may have strange pointer
1029    // cycles that will confuse alias analysis.
1030    if (DT->isReachableFromEntry(&BB))
1031      MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI);
1032  return MadeChange;
1033}
1034
1035//===----------------------------------------------------------------------===//
1036// DSE Pass
1037//===----------------------------------------------------------------------===//
1038PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
1039  AliasAnalysis *AA = &AM.getResult<AAManager>(F);
1040  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1041  MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
1042  const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
1043
1044  if (!eliminateDeadStores(F, AA, MD, DT, TLI))
1045    return PreservedAnalyses::all();
1046  PreservedAnalyses PA;
1047  PA.preserve<DominatorTreeAnalysis>();
1048  PA.preserve<GlobalsAA>();
1049  PA.preserve<MemoryDependenceAnalysis>();
1050  return PA;
1051}
1052
1053namespace {
1054/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
1055class DSELegacyPass : public FunctionPass {
1056public:
1057  DSELegacyPass() : FunctionPass(ID) {
1058    initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
1059  }
1060
1061  bool runOnFunction(Function &F) override {
1062    if (skipFunction(F))
1063      return false;
1064
1065    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1066    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1067    MemoryDependenceResults *MD =
1068        &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1069    const TargetLibraryInfo *TLI =
1070        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1071
1072    return eliminateDeadStores(F, AA, MD, DT, TLI);
1073  }
1074
1075  void getAnalysisUsage(AnalysisUsage &AU) const override {
1076    AU.setPreservesCFG();
1077    AU.addRequired<DominatorTreeWrapperPass>();
1078    AU.addRequired<AAResultsWrapperPass>();
1079    AU.addRequired<MemoryDependenceWrapperPass>();
1080    AU.addRequired<TargetLibraryInfoWrapperPass>();
1081    AU.addPreserved<DominatorTreeWrapperPass>();
1082    AU.addPreserved<GlobalsAAWrapperPass>();
1083    AU.addPreserved<MemoryDependenceWrapperPass>();
1084  }
1085
1086  static char ID; // Pass identification, replacement for typeid
1087};
1088} // end anonymous namespace
1089
1090char DSELegacyPass::ID = 0;
1091INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
1092                      false)
1093INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1094INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1095INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
1096INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
1097INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1098INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
1099                    false)
1100
1101FunctionPass *llvm::createDeadStoreEliminationPass() {
1102  return new DSELegacyPass();
1103}
1104