MemoryDependenceAnalysis.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements an analysis that determines, for a given memory
11// operation, what preceding memory operations it depends on.  It builds on
12// alias analysis information, and tries to provide a lazy, caching interface to
13// a common kind of alias information query.
14//
15//===----------------------------------------------------------------------===//
16
17#define DEBUG_TYPE "memdep"
18#include "llvm/Analysis/MemoryDependenceAnalysis.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/MemoryBuiltins.h"
24#include "llvm/Analysis/PHITransAddr.h"
25#include "llvm/Analysis/ValueTracking.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/Instructions.h"
30#include "llvm/IR/IntrinsicInst.h"
31#include "llvm/IR/LLVMContext.h"
32#include "llvm/IR/PredIteratorCache.h"
33#include "llvm/Support/Debug.h"
34using namespace llvm;
35
36STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
37STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
38STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
39
40STATISTIC(NumCacheNonLocalPtr,
41          "Number of fully cached non-local ptr responses");
42STATISTIC(NumCacheDirtyNonLocalPtr,
43          "Number of cached, but dirty, non-local ptr responses");
44STATISTIC(NumUncacheNonLocalPtr,
45          "Number of uncached non-local ptr responses");
46STATISTIC(NumCacheCompleteNonLocalPtr,
47          "Number of block queries that were completely cached");
48
49// Limit for the number of instructions to scan in a block.
50static const int BlockScanLimit = 100;
51
52char MemoryDependenceAnalysis::ID = 0;
53
54// Register this pass...
55INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
56                "Memory Dependence Analysis", false, true)
57INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
58INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
59                      "Memory Dependence Analysis", false, true)
60
61MemoryDependenceAnalysis::MemoryDependenceAnalysis()
62    : FunctionPass(ID), PredCache() {
63  initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
64}
65MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
66}
67
68/// Clean up memory in between runs
69void MemoryDependenceAnalysis::releaseMemory() {
70  LocalDeps.clear();
71  NonLocalDeps.clear();
72  NonLocalPointerDeps.clear();
73  ReverseLocalDeps.clear();
74  ReverseNonLocalDeps.clear();
75  ReverseNonLocalPtrDeps.clear();
76  PredCache->clear();
77}
78
79
80
81/// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
82///
83void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
84  AU.setPreservesAll();
85  AU.addRequiredTransitive<AliasAnalysis>();
86}
87
88bool MemoryDependenceAnalysis::runOnFunction(Function &) {
89  AA = &getAnalysis<AliasAnalysis>();
90  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
91  DL = DLP ? &DLP->getDataLayout() : 0;
92  DominatorTreeWrapperPass *DTWP =
93      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
94  DT = DTWP ? &DTWP->getDomTree() : 0;
95  if (!PredCache)
96    PredCache.reset(new PredIteratorCache());
97  return false;
98}
99
100/// RemoveFromReverseMap - This is a helper function that removes Val from
101/// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
102template <typename KeyTy>
103static void RemoveFromReverseMap(DenseMap<Instruction*,
104                                 SmallPtrSet<KeyTy, 4> > &ReverseMap,
105                                 Instruction *Inst, KeyTy Val) {
106  typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
107  InstIt = ReverseMap.find(Inst);
108  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
109  bool Found = InstIt->second.erase(Val);
110  assert(Found && "Invalid reverse map!"); (void)Found;
111  if (InstIt->second.empty())
112    ReverseMap.erase(InstIt);
113}
114
115/// GetLocation - If the given instruction references a specific memory
116/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
117/// Return a ModRefInfo value describing the general behavior of the
118/// instruction.
119static
120AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
121                                        AliasAnalysis::Location &Loc,
122                                        AliasAnalysis *AA) {
123  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
124    if (LI->isUnordered()) {
125      Loc = AA->getLocation(LI);
126      return AliasAnalysis::Ref;
127    }
128    if (LI->getOrdering() == Monotonic) {
129      Loc = AA->getLocation(LI);
130      return AliasAnalysis::ModRef;
131    }
132    Loc = AliasAnalysis::Location();
133    return AliasAnalysis::ModRef;
134  }
135
136  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
137    if (SI->isUnordered()) {
138      Loc = AA->getLocation(SI);
139      return AliasAnalysis::Mod;
140    }
141    if (SI->getOrdering() == Monotonic) {
142      Loc = AA->getLocation(SI);
143      return AliasAnalysis::ModRef;
144    }
145    Loc = AliasAnalysis::Location();
146    return AliasAnalysis::ModRef;
147  }
148
149  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
150    Loc = AA->getLocation(V);
151    return AliasAnalysis::ModRef;
152  }
153
154  if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
155    // calls to free() deallocate the entire structure
156    Loc = AliasAnalysis::Location(CI->getArgOperand(0));
157    return AliasAnalysis::Mod;
158  }
159
160  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
161    switch (II->getIntrinsicID()) {
162    case Intrinsic::lifetime_start:
163    case Intrinsic::lifetime_end:
164    case Intrinsic::invariant_start:
165      Loc = AliasAnalysis::Location(II->getArgOperand(1),
166                                    cast<ConstantInt>(II->getArgOperand(0))
167                                      ->getZExtValue(),
168                                    II->getMetadata(LLVMContext::MD_tbaa));
169      // These intrinsics don't really modify the memory, but returning Mod
170      // will allow them to be handled conservatively.
171      return AliasAnalysis::Mod;
172    case Intrinsic::invariant_end:
173      Loc = AliasAnalysis::Location(II->getArgOperand(2),
174                                    cast<ConstantInt>(II->getArgOperand(1))
175                                      ->getZExtValue(),
176                                    II->getMetadata(LLVMContext::MD_tbaa));
177      // These intrinsics don't really modify the memory, but returning Mod
178      // will allow them to be handled conservatively.
179      return AliasAnalysis::Mod;
180    default:
181      break;
182    }
183
184  // Otherwise, just do the coarse-grained thing that always works.
185  if (Inst->mayWriteToMemory())
186    return AliasAnalysis::ModRef;
187  if (Inst->mayReadFromMemory())
188    return AliasAnalysis::Ref;
189  return AliasAnalysis::NoModRef;
190}
191
192/// getCallSiteDependencyFrom - Private helper for finding the local
193/// dependencies of a call site.
194MemDepResult MemoryDependenceAnalysis::
195getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
196                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
197  unsigned Limit = BlockScanLimit;
198
199  // Walk backwards through the block, looking for dependencies
200  while (ScanIt != BB->begin()) {
201    // Limit the amount of scanning we do so we don't end up with quadratic
202    // running time on extreme testcases.
203    --Limit;
204    if (!Limit)
205      return MemDepResult::getUnknown();
206
207    Instruction *Inst = --ScanIt;
208
209    // If this inst is a memory op, get the pointer it accessed
210    AliasAnalysis::Location Loc;
211    AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
212    if (Loc.Ptr) {
213      // A simple instruction.
214      if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
215        return MemDepResult::getClobber(Inst);
216      continue;
217    }
218
219    if (CallSite InstCS = cast<Value>(Inst)) {
220      // Debug intrinsics don't cause dependences.
221      if (isa<DbgInfoIntrinsic>(Inst)) continue;
222      // If these two calls do not interfere, look past it.
223      switch (AA->getModRefInfo(CS, InstCS)) {
224      case AliasAnalysis::NoModRef:
225        // If the two calls are the same, return InstCS as a Def, so that
226        // CS can be found redundant and eliminated.
227        if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
228            CS.getInstruction()->isIdenticalToWhenDefined(Inst))
229          return MemDepResult::getDef(Inst);
230
231        // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
232        // keep scanning.
233        continue;
234      default:
235        return MemDepResult::getClobber(Inst);
236      }
237    }
238
239    // If we could not obtain a pointer for the instruction and the instruction
240    // touches memory then assume that this is a dependency.
241    if (MR != AliasAnalysis::NoModRef)
242      return MemDepResult::getClobber(Inst);
243  }
244
245  // No dependence found.  If this is the entry block of the function, it is
246  // unknown, otherwise it is non-local.
247  if (BB != &BB->getParent()->getEntryBlock())
248    return MemDepResult::getNonLocal();
249  return MemDepResult::getNonFuncLocal();
250}
251
252/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
253/// would fully overlap MemLoc if done as a wider legal integer load.
254///
255/// MemLocBase, MemLocOffset are lazily computed here the first time the
256/// base/offs of memloc is needed.
257static bool
258isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
259                                       const Value *&MemLocBase,
260                                       int64_t &MemLocOffs,
261                                       const LoadInst *LI,
262                                       const DataLayout *DL) {
263  // If we have no target data, we can't do this.
264  if (DL == 0) return false;
265
266  // If we haven't already computed the base/offset of MemLoc, do so now.
267  if (MemLocBase == 0)
268    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
269
270  unsigned Size = MemoryDependenceAnalysis::
271    getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
272                                    LI, *DL);
273  return Size != 0;
274}
275
276/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
277/// looks at a memory location for a load (specified by MemLocBase, Offs,
278/// and Size) and compares it against a load.  If the specified load could
279/// be safely widened to a larger integer load that is 1) still efficient,
280/// 2) safe for the target, and 3) would provide the specified memory
281/// location value, then this function returns the size in bytes of the
282/// load width to use.  If not, this returns zero.
283unsigned MemoryDependenceAnalysis::
284getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
285                                unsigned MemLocSize, const LoadInst *LI,
286                                const DataLayout &DL) {
287  // We can only extend simple integer loads.
288  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
289
290  // Load widening is hostile to ThreadSanitizer: it may cause false positives
291  // or make the reports more cryptic (access sizes are wrong).
292  if (LI->getParent()->getParent()->getAttributes().
293      hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
294    return 0;
295
296  // Get the base of this load.
297  int64_t LIOffs = 0;
298  const Value *LIBase =
299    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL);
300
301  // If the two pointers are not based on the same pointer, we can't tell that
302  // they are related.
303  if (LIBase != MemLocBase) return 0;
304
305  // Okay, the two values are based on the same pointer, but returned as
306  // no-alias.  This happens when we have things like two byte loads at "P+1"
307  // and "P+3".  Check to see if increasing the size of the "LI" load up to its
308  // alignment (or the largest native integer type) will allow us to load all
309  // the bits required by MemLoc.
310
311  // If MemLoc is before LI, then no widening of LI will help us out.
312  if (MemLocOffs < LIOffs) return 0;
313
314  // Get the alignment of the load in bytes.  We assume that it is safe to load
315  // any legal integer up to this size without a problem.  For example, if we're
316  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
317  // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
318  // to i16.
319  unsigned LoadAlign = LI->getAlignment();
320
321  int64_t MemLocEnd = MemLocOffs+MemLocSize;
322
323  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
324  if (LIOffs+LoadAlign < MemLocEnd) return 0;
325
326  // This is the size of the load to try.  Start with the next larger power of
327  // two.
328  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
329  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
330
331  while (1) {
332    // If this load size is bigger than our known alignment or would not fit
333    // into a native integer register, then we fail.
334    if (NewLoadByteSize > LoadAlign ||
335        !DL.fitsInLegalInteger(NewLoadByteSize*8))
336      return 0;
337
338    if (LIOffs+NewLoadByteSize > MemLocEnd &&
339        LI->getParent()->getParent()->getAttributes().
340          hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress))
341      // We will be reading past the location accessed by the original program.
342      // While this is safe in a regular build, Address Safety analysis tools
343      // may start reporting false warnings. So, don't do widening.
344      return 0;
345
346    // If a load of this width would include all of MemLoc, then we succeed.
347    if (LIOffs+NewLoadByteSize >= MemLocEnd)
348      return NewLoadByteSize;
349
350    NewLoadByteSize <<= 1;
351  }
352}
353
354/// getPointerDependencyFrom - Return the instruction on which a memory
355/// location depends.  If isLoad is true, this routine ignores may-aliases with
356/// read-only operations.  If isLoad is false, this routine ignores may-aliases
357/// with reads from read-only locations.  If possible, pass the query
358/// instruction as well; this function may take advantage of the metadata
359/// annotated to the query instruction to refine the result.
360MemDepResult MemoryDependenceAnalysis::
361getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
362                         BasicBlock::iterator ScanIt, BasicBlock *BB,
363                         Instruction *QueryInst) {
364
365  const Value *MemLocBase = 0;
366  int64_t MemLocOffset = 0;
367  unsigned Limit = BlockScanLimit;
368  bool isInvariantLoad = false;
369  if (isLoad && QueryInst) {
370    LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
371    if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0)
372      isInvariantLoad = true;
373  }
374
375  // Walk backwards through the basic block, looking for dependencies.
376  while (ScanIt != BB->begin()) {
377    Instruction *Inst = --ScanIt;
378
379    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
380      // Debug intrinsics don't (and can't) cause dependencies.
381      if (isa<DbgInfoIntrinsic>(II)) continue;
382
383    // Limit the amount of scanning we do so we don't end up with quadratic
384    // running time on extreme testcases.
385    --Limit;
386    if (!Limit)
387      return MemDepResult::getUnknown();
388
389    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
390      // If we reach a lifetime begin or end marker, then the query ends here
391      // because the value is undefined.
392      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
393        // FIXME: This only considers queries directly on the invariant-tagged
394        // pointer, not on query pointers that are indexed off of them.  It'd
395        // be nice to handle that at some point (the right approach is to use
396        // GetPointerBaseWithConstantOffset).
397        if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
398                            MemLoc))
399          return MemDepResult::getDef(II);
400        continue;
401      }
402    }
403
404    // Values depend on loads if the pointers are must aliased.  This means that
405    // a load depends on another must aliased load from the same value.
406    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
407      // Atomic loads have complications involved.
408      // FIXME: This is overly conservative.
409      if (!LI->isUnordered())
410        return MemDepResult::getClobber(LI);
411
412      AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
413
414      // If we found a pointer, check if it could be the same as our pointer.
415      AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
416
417      if (isLoad) {
418        if (R == AliasAnalysis::NoAlias) {
419          // If this is an over-aligned integer load (for example,
420          // "load i8* %P, align 4") see if it would obviously overlap with the
421          // queried location if widened to a larger load (e.g. if the queried
422          // location is 1 byte at P+1).  If so, return it as a load/load
423          // clobber result, allowing the client to decide to widen the load if
424          // it wants to.
425          if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
426            if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
427                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
428                                                       MemLocOffset, LI, DL))
429              return MemDepResult::getClobber(Inst);
430
431          continue;
432        }
433
434        // Must aliased loads are defs of each other.
435        if (R == AliasAnalysis::MustAlias)
436          return MemDepResult::getDef(Inst);
437
438#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
439      // in terms of clobbering loads, but since it does this by looking
440      // at the clobbering load directly, it doesn't know about any
441      // phi translation that may have happened along the way.
442
443        // If we have a partial alias, then return this as a clobber for the
444        // client to handle.
445        if (R == AliasAnalysis::PartialAlias)
446          return MemDepResult::getClobber(Inst);
447#endif
448
449        // Random may-alias loads don't depend on each other without a
450        // dependence.
451        continue;
452      }
453
454      // Stores don't depend on other no-aliased accesses.
455      if (R == AliasAnalysis::NoAlias)
456        continue;
457
458      // Stores don't alias loads from read-only memory.
459      if (AA->pointsToConstantMemory(LoadLoc))
460        continue;
461
462      // Stores depend on may/must aliased loads.
463      return MemDepResult::getDef(Inst);
464    }
465
466    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
467      // Atomic stores have complications involved.
468      // FIXME: This is overly conservative.
469      if (!SI->isUnordered())
470        return MemDepResult::getClobber(SI);
471
472      // If alias analysis can tell that this store is guaranteed to not modify
473      // the query pointer, ignore it.  Use getModRefInfo to handle cases where
474      // the query pointer points to constant memory etc.
475      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
476        continue;
477
478      // Ok, this store might clobber the query pointer.  Check to see if it is
479      // a must alias: in this case, we want to return this as a def.
480      AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
481
482      // If we found a pointer, check if it could be the same as our pointer.
483      AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
484
485      if (R == AliasAnalysis::NoAlias)
486        continue;
487      if (R == AliasAnalysis::MustAlias)
488        return MemDepResult::getDef(Inst);
489      if (isInvariantLoad)
490       continue;
491      return MemDepResult::getClobber(Inst);
492    }
493
494    // If this is an allocation, and if we know that the accessed pointer is to
495    // the allocation, return Def.  This means that there is no dependence and
496    // the access can be optimized based on that.  For example, a load could
497    // turn into undef.
498    // Note: Only determine this to be a malloc if Inst is the malloc call, not
499    // a subsequent bitcast of the malloc call result.  There can be stores to
500    // the malloced memory between the malloc call and its bitcast uses, and we
501    // need to continue scanning until the malloc call.
502    const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
503    if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
504      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
505
506      if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
507        return MemDepResult::getDef(Inst);
508      // Be conservative if the accessed pointer may alias the allocation.
509      if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
510        return MemDepResult::getClobber(Inst);
511      // If the allocation is not aliased and does not read memory (like
512      // strdup), it is safe to ignore.
513      if (isa<AllocaInst>(Inst) ||
514          isMallocLikeFn(Inst, TLI) || isCallocLikeFn(Inst, TLI))
515        continue;
516    }
517
518    // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
519    AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
520    // If necessary, perform additional analysis.
521    if (MR == AliasAnalysis::ModRef)
522      MR = AA->callCapturesBefore(Inst, MemLoc, DT);
523    switch (MR) {
524    case AliasAnalysis::NoModRef:
525      // If the call has no effect on the queried pointer, just ignore it.
526      continue;
527    case AliasAnalysis::Mod:
528      return MemDepResult::getClobber(Inst);
529    case AliasAnalysis::Ref:
530      // If the call is known to never store to the pointer, and if this is a
531      // load query, we can safely ignore it (scan past it).
532      if (isLoad)
533        continue;
534    default:
535      // Otherwise, there is a potential dependence.  Return a clobber.
536      return MemDepResult::getClobber(Inst);
537    }
538  }
539
540  // No dependence found.  If this is the entry block of the function, it is
541  // unknown, otherwise it is non-local.
542  if (BB != &BB->getParent()->getEntryBlock())
543    return MemDepResult::getNonLocal();
544  return MemDepResult::getNonFuncLocal();
545}
546
547/// getDependency - Return the instruction on which a memory operation
548/// depends.
549MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
550  Instruction *ScanPos = QueryInst;
551
552  // Check for a cached result
553  MemDepResult &LocalCache = LocalDeps[QueryInst];
554
555  // If the cached entry is non-dirty, just return it.  Note that this depends
556  // on MemDepResult's default constructing to 'dirty'.
557  if (!LocalCache.isDirty())
558    return LocalCache;
559
560  // Otherwise, if we have a dirty entry, we know we can start the scan at that
561  // instruction, which may save us some work.
562  if (Instruction *Inst = LocalCache.getInst()) {
563    ScanPos = Inst;
564
565    RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
566  }
567
568  BasicBlock *QueryParent = QueryInst->getParent();
569
570  // Do the scan.
571  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
572    // No dependence found.  If this is the entry block of the function, it is
573    // unknown, otherwise it is non-local.
574    if (QueryParent != &QueryParent->getParent()->getEntryBlock())
575      LocalCache = MemDepResult::getNonLocal();
576    else
577      LocalCache = MemDepResult::getNonFuncLocal();
578  } else {
579    AliasAnalysis::Location MemLoc;
580    AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
581    if (MemLoc.Ptr) {
582      // If we can do a pointer scan, make it happen.
583      bool isLoad = !(MR & AliasAnalysis::Mod);
584      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
585        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
586
587      LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
588                                            QueryParent, QueryInst);
589    } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
590      CallSite QueryCS(QueryInst);
591      bool isReadOnly = AA->onlyReadsMemory(QueryCS);
592      LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
593                                             QueryParent);
594    } else
595      // Non-memory instruction.
596      LocalCache = MemDepResult::getUnknown();
597  }
598
599  // Remember the result!
600  if (Instruction *I = LocalCache.getInst())
601    ReverseLocalDeps[I].insert(QueryInst);
602
603  return LocalCache;
604}
605
606#ifndef NDEBUG
607/// AssertSorted - This method is used when -debug is specified to verify that
608/// cache arrays are properly kept sorted.
609static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
610                         int Count = -1) {
611  if (Count == -1) Count = Cache.size();
612  if (Count == 0) return;
613
614  for (unsigned i = 1; i != unsigned(Count); ++i)
615    assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
616}
617#endif
618
619/// getNonLocalCallDependency - Perform a full dependency query for the
620/// specified call, returning the set of blocks that the value is
621/// potentially live across.  The returned set of results will include a
622/// "NonLocal" result for all blocks where the value is live across.
623///
624/// This method assumes the instruction returns a "NonLocal" dependency
625/// within its own block.
626///
627/// This returns a reference to an internal data structure that may be
628/// invalidated on the next non-local query or when an instruction is
629/// removed.  Clients must copy this data if they want it around longer than
630/// that.
631const MemoryDependenceAnalysis::NonLocalDepInfo &
632MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
633  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
634 "getNonLocalCallDependency should only be used on calls with non-local deps!");
635  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
636  NonLocalDepInfo &Cache = CacheP.first;
637
638  /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
639  /// the cached case, this can happen due to instructions being deleted etc. In
640  /// the uncached case, this starts out as the set of predecessors we care
641  /// about.
642  SmallVector<BasicBlock*, 32> DirtyBlocks;
643
644  if (!Cache.empty()) {
645    // Okay, we have a cache entry.  If we know it is not dirty, just return it
646    // with no computation.
647    if (!CacheP.second) {
648      ++NumCacheNonLocal;
649      return Cache;
650    }
651
652    // If we already have a partially computed set of results, scan them to
653    // determine what is dirty, seeding our initial DirtyBlocks worklist.
654    for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
655       I != E; ++I)
656      if (I->getResult().isDirty())
657        DirtyBlocks.push_back(I->getBB());
658
659    // Sort the cache so that we can do fast binary search lookups below.
660    std::sort(Cache.begin(), Cache.end());
661
662    ++NumCacheDirtyNonLocal;
663    //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
664    //     << Cache.size() << " cached: " << *QueryInst;
665  } else {
666    // Seed DirtyBlocks with each of the preds of QueryInst's block.
667    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
668    for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
669      DirtyBlocks.push_back(*PI);
670    ++NumUncacheNonLocal;
671  }
672
673  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
674  bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
675
676  SmallPtrSet<BasicBlock*, 64> Visited;
677
678  unsigned NumSortedEntries = Cache.size();
679  DEBUG(AssertSorted(Cache));
680
681  // Iterate while we still have blocks to update.
682  while (!DirtyBlocks.empty()) {
683    BasicBlock *DirtyBB = DirtyBlocks.back();
684    DirtyBlocks.pop_back();
685
686    // Already processed this block?
687    if (!Visited.insert(DirtyBB))
688      continue;
689
690    // Do a binary search to see if we already have an entry for this block in
691    // the cache set.  If so, find it.
692    DEBUG(AssertSorted(Cache, NumSortedEntries));
693    NonLocalDepInfo::iterator Entry =
694      std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
695                       NonLocalDepEntry(DirtyBB));
696    if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
697      --Entry;
698
699    NonLocalDepEntry *ExistingResult = 0;
700    if (Entry != Cache.begin()+NumSortedEntries &&
701        Entry->getBB() == DirtyBB) {
702      // If we already have an entry, and if it isn't already dirty, the block
703      // is done.
704      if (!Entry->getResult().isDirty())
705        continue;
706
707      // Otherwise, remember this slot so we can update the value.
708      ExistingResult = &*Entry;
709    }
710
711    // If the dirty entry has a pointer, start scanning from it so we don't have
712    // to rescan the entire block.
713    BasicBlock::iterator ScanPos = DirtyBB->end();
714    if (ExistingResult) {
715      if (Instruction *Inst = ExistingResult->getResult().getInst()) {
716        ScanPos = Inst;
717        // We're removing QueryInst's use of Inst.
718        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
719                             QueryCS.getInstruction());
720      }
721    }
722
723    // Find out if this block has a local dependency for QueryInst.
724    MemDepResult Dep;
725
726    if (ScanPos != DirtyBB->begin()) {
727      Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
728    } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
729      // No dependence found.  If this is the entry block of the function, it is
730      // a clobber, otherwise it is unknown.
731      Dep = MemDepResult::getNonLocal();
732    } else {
733      Dep = MemDepResult::getNonFuncLocal();
734    }
735
736    // If we had a dirty entry for the block, update it.  Otherwise, just add
737    // a new entry.
738    if (ExistingResult)
739      ExistingResult->setResult(Dep);
740    else
741      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
742
743    // If the block has a dependency (i.e. it isn't completely transparent to
744    // the value), remember the association!
745    if (!Dep.isNonLocal()) {
746      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
747      // update this when we remove instructions.
748      if (Instruction *Inst = Dep.getInst())
749        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
750    } else {
751
752      // If the block *is* completely transparent to the load, we need to check
753      // the predecessors of this block.  Add them to our worklist.
754      for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
755        DirtyBlocks.push_back(*PI);
756    }
757  }
758
759  return Cache;
760}
761
762/// getNonLocalPointerDependency - Perform a full dependency query for an
763/// access to the specified (non-volatile) memory location, returning the
764/// set of instructions that either define or clobber the value.
765///
766/// This method assumes the pointer has a "NonLocal" dependency within its
767/// own block.
768///
769void MemoryDependenceAnalysis::
770getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
771                             BasicBlock *FromBB,
772                             SmallVectorImpl<NonLocalDepResult> &Result) {
773  assert(Loc.Ptr->getType()->isPointerTy() &&
774         "Can't get pointer deps of a non-pointer!");
775  Result.clear();
776
777  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
778
779  // This is the set of blocks we've inspected, and the pointer we consider in
780  // each block.  Because of critical edges, we currently bail out if querying
781  // a block with multiple different pointers.  This can happen during PHI
782  // translation.
783  DenseMap<BasicBlock*, Value*> Visited;
784  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
785                                   Result, Visited, true))
786    return;
787  Result.clear();
788  Result.push_back(NonLocalDepResult(FromBB,
789                                     MemDepResult::getUnknown(),
790                                     const_cast<Value *>(Loc.Ptr)));
791}
792
793/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
794/// Pointer/PointeeSize using either cached information in Cache or by doing a
795/// lookup (which may use dirty cache info if available).  If we do a lookup,
796/// add the result to the cache.
797MemDepResult MemoryDependenceAnalysis::
798GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
799                        bool isLoad, BasicBlock *BB,
800                        NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
801
802  // Do a binary search to see if we already have an entry for this block in
803  // the cache set.  If so, find it.
804  NonLocalDepInfo::iterator Entry =
805    std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
806                     NonLocalDepEntry(BB));
807  if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
808    --Entry;
809
810  NonLocalDepEntry *ExistingResult = 0;
811  if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
812    ExistingResult = &*Entry;
813
814  // If we have a cached entry, and it is non-dirty, use it as the value for
815  // this dependency.
816  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
817    ++NumCacheNonLocalPtr;
818    return ExistingResult->getResult();
819  }
820
821  // Otherwise, we have to scan for the value.  If we have a dirty cache
822  // entry, start scanning from its position, otherwise we scan from the end
823  // of the block.
824  BasicBlock::iterator ScanPos = BB->end();
825  if (ExistingResult && ExistingResult->getResult().getInst()) {
826    assert(ExistingResult->getResult().getInst()->getParent() == BB &&
827           "Instruction invalidated?");
828    ++NumCacheDirtyNonLocalPtr;
829    ScanPos = ExistingResult->getResult().getInst();
830
831    // Eliminating the dirty entry from 'Cache', so update the reverse info.
832    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
833    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
834  } else {
835    ++NumUncacheNonLocalPtr;
836  }
837
838  // Scan the block for the dependency.
839  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
840
841  // If we had a dirty entry for the block, update it.  Otherwise, just add
842  // a new entry.
843  if (ExistingResult)
844    ExistingResult->setResult(Dep);
845  else
846    Cache->push_back(NonLocalDepEntry(BB, Dep));
847
848  // If the block has a dependency (i.e. it isn't completely transparent to
849  // the value), remember the reverse association because we just added it
850  // to Cache!
851  if (!Dep.isDef() && !Dep.isClobber())
852    return Dep;
853
854  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
855  // update MemDep when we remove instructions.
856  Instruction *Inst = Dep.getInst();
857  assert(Inst && "Didn't depend on anything?");
858  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
859  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
860  return Dep;
861}
862
863/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
864/// number of elements in the array that are already properly ordered.  This is
865/// optimized for the case when only a few entries are added.
866static void
867SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
868                         unsigned NumSortedEntries) {
869  switch (Cache.size() - NumSortedEntries) {
870  case 0:
871    // done, no new entries.
872    break;
873  case 2: {
874    // Two new entries, insert the last one into place.
875    NonLocalDepEntry Val = Cache.back();
876    Cache.pop_back();
877    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
878      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
879    Cache.insert(Entry, Val);
880    // FALL THROUGH.
881  }
882  case 1:
883    // One new entry, Just insert the new value at the appropriate position.
884    if (Cache.size() != 1) {
885      NonLocalDepEntry Val = Cache.back();
886      Cache.pop_back();
887      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
888        std::upper_bound(Cache.begin(), Cache.end(), Val);
889      Cache.insert(Entry, Val);
890    }
891    break;
892  default:
893    // Added many values, do a full scale sort.
894    std::sort(Cache.begin(), Cache.end());
895    break;
896  }
897}
898
899/// getNonLocalPointerDepFromBB - Perform a dependency query based on
900/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
901/// results to the results vector and keep track of which blocks are visited in
902/// 'Visited'.
903///
904/// This has special behavior for the first block queries (when SkipFirstBlock
905/// is true).  In this special case, it ignores the contents of the specified
906/// block and starts returning dependence info for its predecessors.
907///
908/// This function returns false on success, or true to indicate that it could
909/// not compute dependence information for some reason.  This should be treated
910/// as a clobber dependence on the first instruction in the predecessor block.
911bool MemoryDependenceAnalysis::
912getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
913                            const AliasAnalysis::Location &Loc,
914                            bool isLoad, BasicBlock *StartBB,
915                            SmallVectorImpl<NonLocalDepResult> &Result,
916                            DenseMap<BasicBlock*, Value*> &Visited,
917                            bool SkipFirstBlock) {
918  // Look up the cached info for Pointer.
919  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
920
921  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
922  // CacheKey, this value will be inserted as the associated value. Otherwise,
923  // it'll be ignored, and we'll have to check to see if the cached size and
924  // tbaa tag are consistent with the current query.
925  NonLocalPointerInfo InitialNLPI;
926  InitialNLPI.Size = Loc.Size;
927  InitialNLPI.TBAATag = Loc.TBAATag;
928
929  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
930  // already have one.
931  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
932    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
933  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
934
935  // If we already have a cache entry for this CacheKey, we may need to do some
936  // work to reconcile the cache entry and the current query.
937  if (!Pair.second) {
938    if (CacheInfo->Size < Loc.Size) {
939      // The query's Size is greater than the cached one. Throw out the
940      // cached data and proceed with the query at the greater size.
941      CacheInfo->Pair = BBSkipFirstBlockPair();
942      CacheInfo->Size = Loc.Size;
943      for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
944           DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
945        if (Instruction *Inst = DI->getResult().getInst())
946          RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
947      CacheInfo->NonLocalDeps.clear();
948    } else if (CacheInfo->Size > Loc.Size) {
949      // This query's Size is less than the cached one. Conservatively restart
950      // the query using the greater size.
951      return getNonLocalPointerDepFromBB(Pointer,
952                                         Loc.getWithNewSize(CacheInfo->Size),
953                                         isLoad, StartBB, Result, Visited,
954                                         SkipFirstBlock);
955    }
956
957    // If the query's TBAATag is inconsistent with the cached one,
958    // conservatively throw out the cached data and restart the query with
959    // no tag if needed.
960    if (CacheInfo->TBAATag != Loc.TBAATag) {
961      if (CacheInfo->TBAATag) {
962        CacheInfo->Pair = BBSkipFirstBlockPair();
963        CacheInfo->TBAATag = 0;
964        for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
965             DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
966          if (Instruction *Inst = DI->getResult().getInst())
967            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
968        CacheInfo->NonLocalDeps.clear();
969      }
970      if (Loc.TBAATag)
971        return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
972                                           isLoad, StartBB, Result, Visited,
973                                           SkipFirstBlock);
974    }
975  }
976
977  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
978
979  // If we have valid cached information for exactly the block we are
980  // investigating, just return it with no recomputation.
981  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
982    // We have a fully cached result for this query then we can just return the
983    // cached results and populate the visited set.  However, we have to verify
984    // that we don't already have conflicting results for these blocks.  Check
985    // to ensure that if a block in the results set is in the visited set that
986    // it was for the same pointer query.
987    if (!Visited.empty()) {
988      for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
989           I != E; ++I) {
990        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
991        if (VI == Visited.end() || VI->second == Pointer.getAddr())
992          continue;
993
994        // We have a pointer mismatch in a block.  Just return clobber, saying
995        // that something was clobbered in this result.  We could also do a
996        // non-fully cached query, but there is little point in doing this.
997        return true;
998      }
999    }
1000
1001    Value *Addr = Pointer.getAddr();
1002    for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
1003         I != E; ++I) {
1004      Visited.insert(std::make_pair(I->getBB(), Addr));
1005      if (I->getResult().isNonLocal()) {
1006        continue;
1007      }
1008
1009      if (!DT) {
1010        Result.push_back(NonLocalDepResult(I->getBB(),
1011                                           MemDepResult::getUnknown(),
1012                                           Addr));
1013      } else if (DT->isReachableFromEntry(I->getBB())) {
1014        Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
1015      }
1016    }
1017    ++NumCacheCompleteNonLocalPtr;
1018    return false;
1019  }
1020
1021  // Otherwise, either this is a new block, a block with an invalid cache
1022  // pointer or one that we're about to invalidate by putting more info into it
1023  // than its valid cache info.  If empty, the result will be valid cache info,
1024  // otherwise it isn't.
1025  if (Cache->empty())
1026    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1027  else
1028    CacheInfo->Pair = BBSkipFirstBlockPair();
1029
1030  SmallVector<BasicBlock*, 32> Worklist;
1031  Worklist.push_back(StartBB);
1032
1033  // PredList used inside loop.
1034  SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
1035
1036  // Keep track of the entries that we know are sorted.  Previously cached
1037  // entries will all be sorted.  The entries we add we only sort on demand (we
1038  // don't insert every element into its sorted position).  We know that we
1039  // won't get any reuse from currently inserted values, because we don't
1040  // revisit blocks after we insert info for them.
1041  unsigned NumSortedEntries = Cache->size();
1042  DEBUG(AssertSorted(*Cache));
1043
1044  while (!Worklist.empty()) {
1045    BasicBlock *BB = Worklist.pop_back_val();
1046
1047    // Skip the first block if we have it.
1048    if (!SkipFirstBlock) {
1049      // Analyze the dependency of *Pointer in FromBB.  See if we already have
1050      // been here.
1051      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1052
1053      // Get the dependency info for Pointer in BB.  If we have cached
1054      // information, we will use it, otherwise we compute it.
1055      DEBUG(AssertSorted(*Cache, NumSortedEntries));
1056      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
1057                                                 NumSortedEntries);
1058
1059      // If we got a Def or Clobber, add this to the list of results.
1060      if (!Dep.isNonLocal()) {
1061        if (!DT) {
1062          Result.push_back(NonLocalDepResult(BB,
1063                                             MemDepResult::getUnknown(),
1064                                             Pointer.getAddr()));
1065          continue;
1066        } else if (DT->isReachableFromEntry(BB)) {
1067          Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1068          continue;
1069        }
1070      }
1071    }
1072
1073    // If 'Pointer' is an instruction defined in this block, then we need to do
1074    // phi translation to change it into a value live in the predecessor block.
1075    // If not, we just add the predecessors to the worklist and scan them with
1076    // the same Pointer.
1077    if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1078      SkipFirstBlock = false;
1079      SmallVector<BasicBlock*, 16> NewBlocks;
1080      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
1081        // Verify that we haven't looked at this block yet.
1082        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
1083          InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
1084        if (InsertRes.second) {
1085          // First time we've looked at *PI.
1086          NewBlocks.push_back(*PI);
1087          continue;
1088        }
1089
1090        // If we have seen this block before, but it was with a different
1091        // pointer then we have a phi translation failure and we have to treat
1092        // this as a clobber.
1093        if (InsertRes.first->second != Pointer.getAddr()) {
1094          // Make sure to clean up the Visited map before continuing on to
1095          // PredTranslationFailure.
1096          for (unsigned i = 0; i < NewBlocks.size(); i++)
1097            Visited.erase(NewBlocks[i]);
1098          goto PredTranslationFailure;
1099        }
1100      }
1101      Worklist.append(NewBlocks.begin(), NewBlocks.end());
1102      continue;
1103    }
1104
1105    // We do need to do phi translation, if we know ahead of time we can't phi
1106    // translate this value, don't even try.
1107    if (!Pointer.IsPotentiallyPHITranslatable())
1108      goto PredTranslationFailure;
1109
1110    // We may have added values to the cache list before this PHI translation.
1111    // If so, we haven't done anything to ensure that the cache remains sorted.
1112    // Sort it now (if needed) so that recursive invocations of
1113    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1114    // value will only see properly sorted cache arrays.
1115    if (Cache && NumSortedEntries != Cache->size()) {
1116      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1117      NumSortedEntries = Cache->size();
1118    }
1119    Cache = 0;
1120
1121    PredList.clear();
1122    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
1123      BasicBlock *Pred = *PI;
1124      PredList.push_back(std::make_pair(Pred, Pointer));
1125
1126      // Get the PHI translated pointer in this predecessor.  This can fail if
1127      // not translatable, in which case the getAddr() returns null.
1128      PHITransAddr &PredPointer = PredList.back().second;
1129      PredPointer.PHITranslateValue(BB, Pred, 0);
1130
1131      Value *PredPtrVal = PredPointer.getAddr();
1132
1133      // Check to see if we have already visited this pred block with another
1134      // pointer.  If so, we can't do this lookup.  This failure can occur
1135      // with PHI translation when a critical edge exists and the PHI node in
1136      // the successor translates to a pointer value different than the
1137      // pointer the block was first analyzed with.
1138      std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
1139        InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
1140
1141      if (!InsertRes.second) {
1142        // We found the pred; take it off the list of preds to visit.
1143        PredList.pop_back();
1144
1145        // If the predecessor was visited with PredPtr, then we already did
1146        // the analysis and can ignore it.
1147        if (InsertRes.first->second == PredPtrVal)
1148          continue;
1149
1150        // Otherwise, the block was previously analyzed with a different
1151        // pointer.  We can't represent the result of this case, so we just
1152        // treat this as a phi translation failure.
1153
1154        // Make sure to clean up the Visited map before continuing on to
1155        // PredTranslationFailure.
1156        for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1157          Visited.erase(PredList[i].first);
1158
1159        goto PredTranslationFailure;
1160      }
1161    }
1162
1163    // Actually process results here; this need to be a separate loop to avoid
1164    // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1165    // any results for.  (getNonLocalPointerDepFromBB will modify our
1166    // datastructures in ways the code after the PredTranslationFailure label
1167    // doesn't expect.)
1168    for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1169      BasicBlock *Pred = PredList[i].first;
1170      PHITransAddr &PredPointer = PredList[i].second;
1171      Value *PredPtrVal = PredPointer.getAddr();
1172
1173      bool CanTranslate = true;
1174      // If PHI translation was unable to find an available pointer in this
1175      // predecessor, then we have to assume that the pointer is clobbered in
1176      // that predecessor.  We can still do PRE of the load, which would insert
1177      // a computation of the pointer in this predecessor.
1178      if (PredPtrVal == 0)
1179        CanTranslate = false;
1180
1181      // FIXME: it is entirely possible that PHI translating will end up with
1182      // the same value.  Consider PHI translating something like:
1183      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
1184      // to recurse here, pedantically speaking.
1185
1186      // If getNonLocalPointerDepFromBB fails here, that means the cached
1187      // result conflicted with the Visited list; we have to conservatively
1188      // assume it is unknown, but this also does not block PRE of the load.
1189      if (!CanTranslate ||
1190          getNonLocalPointerDepFromBB(PredPointer,
1191                                      Loc.getWithNewPtr(PredPtrVal),
1192                                      isLoad, Pred,
1193                                      Result, Visited)) {
1194        // Add the entry to the Result list.
1195        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1196        Result.push_back(Entry);
1197
1198        // Since we had a phi translation failure, the cache for CacheKey won't
1199        // include all of the entries that we need to immediately satisfy future
1200        // queries.  Mark this in NonLocalPointerDeps by setting the
1201        // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
1202        // cached value to do more work but not miss the phi trans failure.
1203        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1204        NLPI.Pair = BBSkipFirstBlockPair();
1205        continue;
1206      }
1207    }
1208
1209    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1210    CacheInfo = &NonLocalPointerDeps[CacheKey];
1211    Cache = &CacheInfo->NonLocalDeps;
1212    NumSortedEntries = Cache->size();
1213
1214    // Since we did phi translation, the "Cache" set won't contain all of the
1215    // results for the query.  This is ok (we can still use it to accelerate
1216    // specific block queries) but we can't do the fastpath "return all
1217    // results from the set"  Clear out the indicator for this.
1218    CacheInfo->Pair = BBSkipFirstBlockPair();
1219    SkipFirstBlock = false;
1220    continue;
1221
1222  PredTranslationFailure:
1223    // The following code is "failure"; we can't produce a sane translation
1224    // for the given block.  It assumes that we haven't modified any of
1225    // our datastructures while processing the current block.
1226
1227    if (Cache == 0) {
1228      // Refresh the CacheInfo/Cache pointer if it got invalidated.
1229      CacheInfo = &NonLocalPointerDeps[CacheKey];
1230      Cache = &CacheInfo->NonLocalDeps;
1231      NumSortedEntries = Cache->size();
1232    }
1233
1234    // Since we failed phi translation, the "Cache" set won't contain all of the
1235    // results for the query.  This is ok (we can still use it to accelerate
1236    // specific block queries) but we can't do the fastpath "return all
1237    // results from the set".  Clear out the indicator for this.
1238    CacheInfo->Pair = BBSkipFirstBlockPair();
1239
1240    // If *nothing* works, mark the pointer as unknown.
1241    //
1242    // If this is the magic first block, return this as a clobber of the whole
1243    // incoming value.  Since we can't phi translate to one of the predecessors,
1244    // we have to bail out.
1245    if (SkipFirstBlock)
1246      return true;
1247
1248    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
1249      assert(I != Cache->rend() && "Didn't find current block??");
1250      if (I->getBB() != BB)
1251        continue;
1252
1253      assert(I->getResult().isNonLocal() &&
1254             "Should only be here with transparent block");
1255      I->setResult(MemDepResult::getUnknown());
1256      Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
1257                                         Pointer.getAddr()));
1258      break;
1259    }
1260  }
1261
1262  // Okay, we're done now.  If we added new values to the cache, re-sort it.
1263  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1264  DEBUG(AssertSorted(*Cache));
1265  return false;
1266}
1267
1268/// RemoveCachedNonLocalPointerDependencies - If P exists in
1269/// CachedNonLocalPointerInfo, remove it.
1270void MemoryDependenceAnalysis::
1271RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
1272  CachedNonLocalPointerInfo::iterator It =
1273    NonLocalPointerDeps.find(P);
1274  if (It == NonLocalPointerDeps.end()) return;
1275
1276  // Remove all of the entries in the BB->val map.  This involves removing
1277  // instructions from the reverse map.
1278  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1279
1280  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1281    Instruction *Target = PInfo[i].getResult().getInst();
1282    if (Target == 0) continue;  // Ignore non-local dep results.
1283    assert(Target->getParent() == PInfo[i].getBB());
1284
1285    // Eliminating the dirty entry from 'Cache', so update the reverse info.
1286    RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1287  }
1288
1289  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1290  NonLocalPointerDeps.erase(It);
1291}
1292
1293
1294/// invalidateCachedPointerInfo - This method is used to invalidate cached
1295/// information about the specified pointer, because it may be too
1296/// conservative in memdep.  This is an optional call that can be used when
1297/// the client detects an equivalence between the pointer and some other
1298/// value and replaces the other value with ptr. This can make Ptr available
1299/// in more places that cached info does not necessarily keep.
1300void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
1301  // If Ptr isn't really a pointer, just ignore it.
1302  if (!Ptr->getType()->isPointerTy()) return;
1303  // Flush store info for the pointer.
1304  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1305  // Flush load info for the pointer.
1306  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1307}
1308
1309/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
1310/// This needs to be done when the CFG changes, e.g., due to splitting
1311/// critical edges.
1312void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
1313  PredCache->clear();
1314}
1315
1316/// removeInstruction - Remove an instruction from the dependence analysis,
1317/// updating the dependence of instructions that previously depended on it.
1318/// This method attempts to keep the cache coherent using the reverse map.
1319void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
1320  // Walk through the Non-local dependencies, removing this one as the value
1321  // for any cached queries.
1322  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1323  if (NLDI != NonLocalDeps.end()) {
1324    NonLocalDepInfo &BlockMap = NLDI->second.first;
1325    for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
1326         DI != DE; ++DI)
1327      if (Instruction *Inst = DI->getResult().getInst())
1328        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1329    NonLocalDeps.erase(NLDI);
1330  }
1331
1332  // If we have a cached local dependence query for this instruction, remove it.
1333  //
1334  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1335  if (LocalDepEntry != LocalDeps.end()) {
1336    // Remove us from DepInst's reverse set now that the local dep info is gone.
1337    if (Instruction *Inst = LocalDepEntry->second.getInst())
1338      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1339
1340    // Remove this local dependency info.
1341    LocalDeps.erase(LocalDepEntry);
1342  }
1343
1344  // If we have any cached pointer dependencies on this instruction, remove
1345  // them.  If the instruction has non-pointer type, then it can't be a pointer
1346  // base.
1347
1348  // Remove it from both the load info and the store info.  The instruction
1349  // can't be in either of these maps if it is non-pointer.
1350  if (RemInst->getType()->isPointerTy()) {
1351    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1352    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1353  }
1354
1355  // Loop over all of the things that depend on the instruction we're removing.
1356  //
1357  SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
1358
1359  // If we find RemInst as a clobber or Def in any of the maps for other values,
1360  // we need to replace its entry with a dirty version of the instruction after
1361  // it.  If RemInst is a terminator, we use a null dirty value.
1362  //
1363  // Using a dirty version of the instruction after RemInst saves having to scan
1364  // the entire block to get to this point.
1365  MemDepResult NewDirtyVal;
1366  if (!RemInst->isTerminator())
1367    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
1368
1369  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1370  if (ReverseDepIt != ReverseLocalDeps.end()) {
1371    SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
1372    // RemInst can't be the terminator if it has local stuff depending on it.
1373    assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
1374           "Nothing can locally depend on a terminator");
1375
1376    for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
1377         E = ReverseDeps.end(); I != E; ++I) {
1378      Instruction *InstDependingOnRemInst = *I;
1379      assert(InstDependingOnRemInst != RemInst &&
1380             "Already removed our local dep info");
1381
1382      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1383
1384      // Make sure to remember that new things depend on NewDepInst.
1385      assert(NewDirtyVal.getInst() && "There is no way something else can have "
1386             "a local dep on this if it is a terminator!");
1387      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
1388                                                InstDependingOnRemInst));
1389    }
1390
1391    ReverseLocalDeps.erase(ReverseDepIt);
1392
1393    // Add new reverse deps after scanning the set, to avoid invalidating the
1394    // 'ReverseDeps' reference.
1395    while (!ReverseDepsToAdd.empty()) {
1396      ReverseLocalDeps[ReverseDepsToAdd.back().first]
1397        .insert(ReverseDepsToAdd.back().second);
1398      ReverseDepsToAdd.pop_back();
1399    }
1400  }
1401
1402  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1403  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1404    SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
1405    for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
1406         I != E; ++I) {
1407      assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
1408
1409      PerInstNLInfo &INLD = NonLocalDeps[*I];
1410      // The information is now dirty!
1411      INLD.second = true;
1412
1413      for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
1414           DE = INLD.first.end(); DI != DE; ++DI) {
1415        if (DI->getResult().getInst() != RemInst) continue;
1416
1417        // Convert to a dirty entry for the subsequent instruction.
1418        DI->setResult(NewDirtyVal);
1419
1420        if (Instruction *NextI = NewDirtyVal.getInst())
1421          ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
1422      }
1423    }
1424
1425    ReverseNonLocalDeps.erase(ReverseDepIt);
1426
1427    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1428    while (!ReverseDepsToAdd.empty()) {
1429      ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
1430        .insert(ReverseDepsToAdd.back().second);
1431      ReverseDepsToAdd.pop_back();
1432    }
1433  }
1434
1435  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1436  // value in the NonLocalPointerDeps info.
1437  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1438    ReverseNonLocalPtrDeps.find(RemInst);
1439  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1440    SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
1441    SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
1442
1443    for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
1444         E = Set.end(); I != E; ++I) {
1445      ValueIsLoadPair P = *I;
1446      assert(P.getPointer() != RemInst &&
1447             "Already removed NonLocalPointerDeps info for RemInst");
1448
1449      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1450
1451      // The cache is not valid for any specific block anymore.
1452      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1453
1454      // Update any entries for RemInst to use the instruction after it.
1455      for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
1456           DI != DE; ++DI) {
1457        if (DI->getResult().getInst() != RemInst) continue;
1458
1459        // Convert to a dirty entry for the subsequent instruction.
1460        DI->setResult(NewDirtyVal);
1461
1462        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1463          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1464      }
1465
1466      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
1467      // subsequent value may invalidate the sortedness.
1468      std::sort(NLPDI.begin(), NLPDI.end());
1469    }
1470
1471    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1472
1473    while (!ReversePtrDepsToAdd.empty()) {
1474      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
1475        .insert(ReversePtrDepsToAdd.back().second);
1476      ReversePtrDepsToAdd.pop_back();
1477    }
1478  }
1479
1480
1481  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1482  AA->deleteValue(RemInst);
1483  DEBUG(verifyRemoved(RemInst));
1484}
1485/// verifyRemoved - Verify that the specified instruction does not occur
1486/// in our internal data structures.
1487void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
1488  for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
1489       E = LocalDeps.end(); I != E; ++I) {
1490    assert(I->first != D && "Inst occurs in data structures");
1491    assert(I->second.getInst() != D &&
1492           "Inst occurs in data structures");
1493  }
1494
1495  for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
1496       E = NonLocalPointerDeps.end(); I != E; ++I) {
1497    assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
1498    const NonLocalDepInfo &Val = I->second.NonLocalDeps;
1499    for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
1500         II != E; ++II)
1501      assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
1502  }
1503
1504  for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
1505       E = NonLocalDeps.end(); I != E; ++I) {
1506    assert(I->first != D && "Inst occurs in data structures");
1507    const PerInstNLInfo &INLD = I->second;
1508    for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
1509         EE = INLD.first.end(); II  != EE; ++II)
1510      assert(II->getResult().getInst() != D && "Inst occurs in data structures");
1511  }
1512
1513  for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
1514       E = ReverseLocalDeps.end(); I != E; ++I) {
1515    assert(I->first != D && "Inst occurs in data structures");
1516    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
1517         EE = I->second.end(); II != EE; ++II)
1518      assert(*II != D && "Inst occurs in data structures");
1519  }
1520
1521  for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
1522       E = ReverseNonLocalDeps.end();
1523       I != E; ++I) {
1524    assert(I->first != D && "Inst occurs in data structures");
1525    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
1526         EE = I->second.end(); II != EE; ++II)
1527      assert(*II != D && "Inst occurs in data structures");
1528  }
1529
1530  for (ReverseNonLocalPtrDepTy::const_iterator
1531       I = ReverseNonLocalPtrDeps.begin(),
1532       E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
1533    assert(I->first != D && "Inst occurs in rev NLPD map");
1534
1535    for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
1536         E = I->second.end(); II != E; ++II)
1537      assert(*II != ValueIsLoadPair(D, false) &&
1538             *II != ValueIsLoadPair(D, true) &&
1539             "Inst occurs in ReverseNonLocalPtrDeps map");
1540  }
1541
1542}
1543