MemoryDependenceAnalysis.cpp revision 9ff5a23186f8761d9e5b4b5adf6fae9ce7d63860
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation  --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements an analysis that determines, for a given memory
11// operation, what preceding memory operations it depends on.  It builds on
12// alias analysis information, and tries to provide a lazy, caching interface to
13// a common kind of alias information query.
14//
15//===----------------------------------------------------------------------===//
16
17#define DEBUG_TYPE "memdep"
18#include "llvm/Analysis/MemoryDependenceAnalysis.h"
19#include "llvm/Instructions.h"
20#include "llvm/IntrinsicInst.h"
21#include "llvm/Function.h"
22#include "llvm/Analysis/AliasAnalysis.h"
23#include "llvm/Analysis/Dominators.h"
24#include "llvm/Analysis/InstructionSimplify.h"
25#include "llvm/Analysis/MemoryBuiltins.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/STLExtras.h"
28#include "llvm/Support/PredIteratorCache.h"
29#include "llvm/Support/Debug.h"
30using namespace llvm;
31
32STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
33STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
34STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
35
36STATISTIC(NumCacheNonLocalPtr,
37          "Number of fully cached non-local ptr responses");
38STATISTIC(NumCacheDirtyNonLocalPtr,
39          "Number of cached, but dirty, non-local ptr responses");
40STATISTIC(NumUncacheNonLocalPtr,
41          "Number of uncached non-local ptr responses");
42STATISTIC(NumCacheCompleteNonLocalPtr,
43          "Number of block queries that were completely cached");
44
45char MemoryDependenceAnalysis::ID = 0;
46
47// Register this pass...
48static RegisterPass<MemoryDependenceAnalysis> X("memdep",
49                                     "Memory Dependence Analysis", false, true);
50
51MemoryDependenceAnalysis::MemoryDependenceAnalysis()
52: FunctionPass(&ID), PredCache(0) {
53}
54MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
55}
56
57/// Clean up memory in between runs
58void MemoryDependenceAnalysis::releaseMemory() {
59  LocalDeps.clear();
60  NonLocalDeps.clear();
61  NonLocalPointerDeps.clear();
62  ReverseLocalDeps.clear();
63  ReverseNonLocalDeps.clear();
64  ReverseNonLocalPtrDeps.clear();
65  PredCache->clear();
66}
67
68
69
70/// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
71///
72void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
73  AU.setPreservesAll();
74  AU.addRequiredTransitive<AliasAnalysis>();
75}
76
77bool MemoryDependenceAnalysis::runOnFunction(Function &) {
78  AA = &getAnalysis<AliasAnalysis>();
79  if (PredCache == 0)
80    PredCache.reset(new PredIteratorCache());
81  return false;
82}
83
84/// RemoveFromReverseMap - This is a helper function that removes Val from
85/// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
86template <typename KeyTy>
87static void RemoveFromReverseMap(DenseMap<Instruction*,
88                                 SmallPtrSet<KeyTy, 4> > &ReverseMap,
89                                 Instruction *Inst, KeyTy Val) {
90  typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
91  InstIt = ReverseMap.find(Inst);
92  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
93  bool Found = InstIt->second.erase(Val);
94  assert(Found && "Invalid reverse map!"); Found=Found;
95  if (InstIt->second.empty())
96    ReverseMap.erase(InstIt);
97}
98
99
100/// getCallSiteDependencyFrom - Private helper for finding the local
101/// dependencies of a call site.
102MemDepResult MemoryDependenceAnalysis::
103getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
104                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
105  // Walk backwards through the block, looking for dependencies
106  while (ScanIt != BB->begin()) {
107    Instruction *Inst = --ScanIt;
108
109    // If this inst is a memory op, get the pointer it accessed
110    Value *Pointer = 0;
111    uint64_t PointerSize = 0;
112    if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
113      Pointer = S->getPointerOperand();
114      PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
115    } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
116      Pointer = V->getOperand(0);
117      PointerSize = AA->getTypeStoreSize(V->getType());
118    } else if (isFreeCall(Inst)) {
119      Pointer = Inst->getOperand(1);
120      // calls to free() erase the entire structure
121      PointerSize = ~0ULL;
122    } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
123      // Debug intrinsics don't cause dependences.
124      if (isa<DbgInfoIntrinsic>(Inst)) continue;
125      CallSite InstCS = CallSite::get(Inst);
126      // If these two calls do not interfere, look past it.
127      switch (AA->getModRefInfo(CS, InstCS)) {
128      case AliasAnalysis::NoModRef:
129        // If the two calls don't interact (e.g. InstCS is readnone) keep
130        // scanning.
131        continue;
132      case AliasAnalysis::Ref:
133        // If the two calls read the same memory locations and CS is a readonly
134        // function, then we have two cases: 1) the calls may not interfere with
135        // each other at all.  2) the calls may produce the same value.  In case
136        // #1 we want to ignore the values, in case #2, we want to return Inst
137        // as a Def dependence.  This allows us to CSE in cases like:
138        //   X = strlen(P);
139        //    memchr(...);
140        //   Y = strlen(P);  // Y = X
141        if (isReadOnlyCall) {
142          if (CS.getCalledFunction() != 0 &&
143              CS.getCalledFunction() == InstCS.getCalledFunction())
144            return MemDepResult::getDef(Inst);
145          // Ignore unrelated read/read call dependences.
146          continue;
147        }
148        // FALL THROUGH
149      default:
150        return MemDepResult::getClobber(Inst);
151      }
152    } else {
153      // Non-memory instruction.
154      continue;
155    }
156
157    if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef)
158      return MemDepResult::getClobber(Inst);
159  }
160
161  // No dependence found.  If this is the entry block of the function, it is a
162  // clobber, otherwise it is non-local.
163  if (BB != &BB->getParent()->getEntryBlock())
164    return MemDepResult::getNonLocal();
165  return MemDepResult::getClobber(ScanIt);
166}
167
168/// getPointerDependencyFrom - Return the instruction on which a memory
169/// location depends.  If isLoad is true, this routine ignore may-aliases with
170/// read-only operations.
171MemDepResult MemoryDependenceAnalysis::
172getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
173                         BasicBlock::iterator ScanIt, BasicBlock *BB) {
174
175  Value *InvariantTag = 0;
176
177  // Walk backwards through the basic block, looking for dependencies.
178  while (ScanIt != BB->begin()) {
179    Instruction *Inst = --ScanIt;
180
181    // If we're in an invariant region, no dependencies can be found before
182    // we pass an invariant-begin marker.
183    if (InvariantTag == Inst) {
184      InvariantTag = 0;
185      continue;
186    }
187
188    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
189      // Debug intrinsics don't cause dependences.
190      if (isa<DbgInfoIntrinsic>(Inst)) continue;
191
192      // If we pass an invariant-end marker, then we've just entered an
193      // invariant region and can start ignoring dependencies.
194      if (II->getIntrinsicID() == Intrinsic::invariant_end) {
195        // FIXME: This only considers queries directly on the invariant-tagged
196        // pointer, not on query pointers that are indexed off of them.  It'd
197        // be nice to handle that at some point.
198        AliasAnalysis::AliasResult R =
199          AA->alias(II->getOperand(3), ~0ULL, MemPtr, ~0ULL);
200        if (R == AliasAnalysis::MustAlias) {
201          InvariantTag = II->getOperand(1);
202          continue;
203        }
204
205      // If we reach a lifetime begin or end marker, then the query ends here
206      // because the value is undefined.
207      } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
208        // FIXME: This only considers queries directly on the invariant-tagged
209        // pointer, not on query pointers that are indexed off of them.  It'd
210        // be nice to handle that at some point.
211        AliasAnalysis::AliasResult R =
212          AA->alias(II->getOperand(2), ~0ULL, MemPtr, ~0ULL);
213        if (R == AliasAnalysis::MustAlias)
214          return MemDepResult::getDef(II);
215      }
216    }
217
218    // If we're querying on a load and we're in an invariant region, we're done
219    // at this point. Nothing a load depends on can live in an invariant region.
220    if (isLoad && InvariantTag) continue;
221
222    // Values depend on loads if the pointers are must aliased.  This means that
223    // a load depends on another must aliased load from the same value.
224    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
225      Value *Pointer = LI->getPointerOperand();
226      uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
227
228      // If we found a pointer, check if it could be the same as our pointer.
229      AliasAnalysis::AliasResult R =
230        AA->alias(Pointer, PointerSize, MemPtr, MemSize);
231      if (R == AliasAnalysis::NoAlias)
232        continue;
233
234      // May-alias loads don't depend on each other without a dependence.
235      if (isLoad && R == AliasAnalysis::MayAlias)
236        continue;
237      // Stores depend on may and must aliased loads, loads depend on must-alias
238      // loads.
239      return MemDepResult::getDef(Inst);
240    }
241
242    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
243      // There can't be stores to the value we care about inside an
244      // invariant region.
245      if (InvariantTag) continue;
246
247      // If alias analysis can tell that this store is guaranteed to not modify
248      // the query pointer, ignore it.  Use getModRefInfo to handle cases where
249      // the query pointer points to constant memory etc.
250      if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef)
251        continue;
252
253      // Ok, this store might clobber the query pointer.  Check to see if it is
254      // a must alias: in this case, we want to return this as a def.
255      Value *Pointer = SI->getPointerOperand();
256      uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
257
258      // If we found a pointer, check if it could be the same as our pointer.
259      AliasAnalysis::AliasResult R =
260        AA->alias(Pointer, PointerSize, MemPtr, MemSize);
261
262      if (R == AliasAnalysis::NoAlias)
263        continue;
264      if (R == AliasAnalysis::MayAlias)
265        return MemDepResult::getClobber(Inst);
266      return MemDepResult::getDef(Inst);
267    }
268
269    // If this is an allocation, and if we know that the accessed pointer is to
270    // the allocation, return Def.  This means that there is no dependence and
271    // the access can be optimized based on that.  For example, a load could
272    // turn into undef.
273    // Note: Only determine this to be a malloc if Inst is the malloc call, not
274    // a subsequent bitcast of the malloc call result.  There can be stores to
275    // the malloced memory between the malloc call and its bitcast uses, and we
276    // need to continue scanning until the malloc call.
277    if (isa<AllocaInst>(Inst) || extractMallocCall(Inst)) {
278      Value *AccessPtr = MemPtr->getUnderlyingObject();
279
280      if (AccessPtr == Inst ||
281          AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
282        return MemDepResult::getDef(Inst);
283      continue;
284    }
285
286    // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
287    switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
288    case AliasAnalysis::NoModRef:
289      // If the call has no effect on the queried pointer, just ignore it.
290      continue;
291    case AliasAnalysis::Mod:
292      // If we're in an invariant region, we can ignore calls that ONLY
293      // modify the pointer.
294      if (InvariantTag) continue;
295      return MemDepResult::getClobber(Inst);
296    case AliasAnalysis::Ref:
297      // If the call is known to never store to the pointer, and if this is a
298      // load query, we can safely ignore it (scan past it).
299      if (isLoad)
300        continue;
301    default:
302      // Otherwise, there is a potential dependence.  Return a clobber.
303      return MemDepResult::getClobber(Inst);
304    }
305  }
306
307  // No dependence found.  If this is the entry block of the function, it is a
308  // clobber, otherwise it is non-local.
309  if (BB != &BB->getParent()->getEntryBlock())
310    return MemDepResult::getNonLocal();
311  return MemDepResult::getClobber(ScanIt);
312}
313
314/// getDependency - Return the instruction on which a memory operation
315/// depends.
316MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
317  Instruction *ScanPos = QueryInst;
318
319  // Check for a cached result
320  MemDepResult &LocalCache = LocalDeps[QueryInst];
321
322  // If the cached entry is non-dirty, just return it.  Note that this depends
323  // on MemDepResult's default constructing to 'dirty'.
324  if (!LocalCache.isDirty())
325    return LocalCache;
326
327  // Otherwise, if we have a dirty entry, we know we can start the scan at that
328  // instruction, which may save us some work.
329  if (Instruction *Inst = LocalCache.getInst()) {
330    ScanPos = Inst;
331
332    RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
333  }
334
335  BasicBlock *QueryParent = QueryInst->getParent();
336
337  Value *MemPtr = 0;
338  uint64_t MemSize = 0;
339
340  // Do the scan.
341  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
342    // No dependence found.  If this is the entry block of the function, it is a
343    // clobber, otherwise it is non-local.
344    if (QueryParent != &QueryParent->getParent()->getEntryBlock())
345      LocalCache = MemDepResult::getNonLocal();
346    else
347      LocalCache = MemDepResult::getClobber(QueryInst);
348  } else if (StoreInst *SI = dyn_cast<StoreInst>(QueryInst)) {
349    // If this is a volatile store, don't mess around with it.  Just return the
350    // previous instruction as a clobber.
351    if (SI->isVolatile())
352      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
353    else {
354      MemPtr = SI->getPointerOperand();
355      MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
356    }
357  } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
358    // If this is a volatile load, don't mess around with it.  Just return the
359    // previous instruction as a clobber.
360    if (LI->isVolatile())
361      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
362    else {
363      MemPtr = LI->getPointerOperand();
364      MemSize = AA->getTypeStoreSize(LI->getType());
365    }
366  } else if (isFreeCall(QueryInst)) {
367    MemPtr = QueryInst->getOperand(1);
368    // calls to free() erase the entire structure, not just a field.
369    MemSize = ~0UL;
370  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
371    int IntrinsicID = 0;  // Intrinsic IDs start at 1.
372    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
373      IntrinsicID = II->getIntrinsicID();
374
375    switch (IntrinsicID) {
376      case Intrinsic::lifetime_start:
377      case Intrinsic::lifetime_end:
378      case Intrinsic::invariant_start:
379        MemPtr = QueryInst->getOperand(2);
380        MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
381        break;
382      case Intrinsic::invariant_end:
383        MemPtr = QueryInst->getOperand(3);
384        MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
385        break;
386      default:
387        CallSite QueryCS = CallSite::get(QueryInst);
388        bool isReadOnly = AA->onlyReadsMemory(QueryCS);
389        LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
390                                               QueryParent);
391    }
392  } else {
393    // Non-memory instruction.
394    LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
395  }
396
397  // If we need to do a pointer scan, make it happen.
398  if (MemPtr) {
399    bool isLoad = !QueryInst->mayWriteToMemory();
400    if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) {
401      isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
402    }
403    LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos,
404                                          QueryParent);
405  }
406
407  // Remember the result!
408  if (Instruction *I = LocalCache.getInst())
409    ReverseLocalDeps[I].insert(QueryInst);
410
411  return LocalCache;
412}
413
414#ifndef NDEBUG
415/// AssertSorted - This method is used when -debug is specified to verify that
416/// cache arrays are properly kept sorted.
417static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
418                         int Count = -1) {
419  if (Count == -1) Count = Cache.size();
420  if (Count == 0) return;
421
422  for (unsigned i = 1; i != unsigned(Count); ++i)
423    assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!");
424}
425#endif
426
427/// getNonLocalCallDependency - Perform a full dependency query for the
428/// specified call, returning the set of blocks that the value is
429/// potentially live across.  The returned set of results will include a
430/// "NonLocal" result for all blocks where the value is live across.
431///
432/// This method assumes the instruction returns a "NonLocal" dependency
433/// within its own block.
434///
435/// This returns a reference to an internal data structure that may be
436/// invalidated on the next non-local query or when an instruction is
437/// removed.  Clients must copy this data if they want it around longer than
438/// that.
439const MemoryDependenceAnalysis::NonLocalDepInfo &
440MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
441  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
442 "getNonLocalCallDependency should only be used on calls with non-local deps!");
443  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
444  NonLocalDepInfo &Cache = CacheP.first;
445
446  /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
447  /// the cached case, this can happen due to instructions being deleted etc. In
448  /// the uncached case, this starts out as the set of predecessors we care
449  /// about.
450  SmallVector<BasicBlock*, 32> DirtyBlocks;
451
452  if (!Cache.empty()) {
453    // Okay, we have a cache entry.  If we know it is not dirty, just return it
454    // with no computation.
455    if (!CacheP.second) {
456      NumCacheNonLocal++;
457      return Cache;
458    }
459
460    // If we already have a partially computed set of results, scan them to
461    // determine what is dirty, seeding our initial DirtyBlocks worklist.
462    for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
463       I != E; ++I)
464      if (I->second.isDirty())
465        DirtyBlocks.push_back(I->first);
466
467    // Sort the cache so that we can do fast binary search lookups below.
468    std::sort(Cache.begin(), Cache.end());
469
470    ++NumCacheDirtyNonLocal;
471    //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
472    //     << Cache.size() << " cached: " << *QueryInst;
473  } else {
474    // Seed DirtyBlocks with each of the preds of QueryInst's block.
475    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
476    for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
477      DirtyBlocks.push_back(*PI);
478    NumUncacheNonLocal++;
479  }
480
481  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
482  bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
483
484  SmallPtrSet<BasicBlock*, 64> Visited;
485
486  unsigned NumSortedEntries = Cache.size();
487  DEBUG(AssertSorted(Cache));
488
489  // Iterate while we still have blocks to update.
490  while (!DirtyBlocks.empty()) {
491    BasicBlock *DirtyBB = DirtyBlocks.back();
492    DirtyBlocks.pop_back();
493
494    // Already processed this block?
495    if (!Visited.insert(DirtyBB))
496      continue;
497
498    // Do a binary search to see if we already have an entry for this block in
499    // the cache set.  If so, find it.
500    DEBUG(AssertSorted(Cache, NumSortedEntries));
501    NonLocalDepInfo::iterator Entry =
502      std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
503                       std::make_pair(DirtyBB, MemDepResult()));
504    if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB)
505      --Entry;
506
507    MemDepResult *ExistingResult = 0;
508    if (Entry != Cache.begin()+NumSortedEntries &&
509        Entry->first == DirtyBB) {
510      // If we already have an entry, and if it isn't already dirty, the block
511      // is done.
512      if (!Entry->second.isDirty())
513        continue;
514
515      // Otherwise, remember this slot so we can update the value.
516      ExistingResult = &Entry->second;
517    }
518
519    // If the dirty entry has a pointer, start scanning from it so we don't have
520    // to rescan the entire block.
521    BasicBlock::iterator ScanPos = DirtyBB->end();
522    if (ExistingResult) {
523      if (Instruction *Inst = ExistingResult->getInst()) {
524        ScanPos = Inst;
525        // We're removing QueryInst's use of Inst.
526        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
527                             QueryCS.getInstruction());
528      }
529    }
530
531    // Find out if this block has a local dependency for QueryInst.
532    MemDepResult Dep;
533
534    if (ScanPos != DirtyBB->begin()) {
535      Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
536    } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
537      // No dependence found.  If this is the entry block of the function, it is
538      // a clobber, otherwise it is non-local.
539      Dep = MemDepResult::getNonLocal();
540    } else {
541      Dep = MemDepResult::getClobber(ScanPos);
542    }
543
544    // If we had a dirty entry for the block, update it.  Otherwise, just add
545    // a new entry.
546    if (ExistingResult)
547      *ExistingResult = Dep;
548    else
549      Cache.push_back(std::make_pair(DirtyBB, Dep));
550
551    // If the block has a dependency (i.e. it isn't completely transparent to
552    // the value), remember the association!
553    if (!Dep.isNonLocal()) {
554      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
555      // update this when we remove instructions.
556      if (Instruction *Inst = Dep.getInst())
557        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
558    } else {
559
560      // If the block *is* completely transparent to the load, we need to check
561      // the predecessors of this block.  Add them to our worklist.
562      for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
563        DirtyBlocks.push_back(*PI);
564    }
565  }
566
567  return Cache;
568}
569
570/// getNonLocalPointerDependency - Perform a full dependency query for an
571/// access to the specified (non-volatile) memory location, returning the
572/// set of instructions that either define or clobber the value.
573///
574/// This method assumes the pointer has a "NonLocal" dependency within its
575/// own block.
576///
577void MemoryDependenceAnalysis::
578getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
579                             SmallVectorImpl<NonLocalDepEntry> &Result) {
580  assert(isa<PointerType>(Pointer->getType()) &&
581         "Can't get pointer deps of a non-pointer!");
582  Result.clear();
583
584  // We know that the pointer value is live into FromBB find the def/clobbers
585  // from presecessors.
586  const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
587  uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
588
589  // This is the set of blocks we've inspected, and the pointer we consider in
590  // each block.  Because of critical edges, we currently bail out if querying
591  // a block with multiple different pointers.  This can happen during PHI
592  // translation.
593  DenseMap<BasicBlock*, Value*> Visited;
594  if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
595                                   Result, Visited, true))
596    return;
597  Result.clear();
598  Result.push_back(std::make_pair(FromBB,
599                                  MemDepResult::getClobber(FromBB->begin())));
600}
601
602/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
603/// Pointer/PointeeSize using either cached information in Cache or by doing a
604/// lookup (which may use dirty cache info if available).  If we do a lookup,
605/// add the result to the cache.
606MemDepResult MemoryDependenceAnalysis::
607GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
608                        bool isLoad, BasicBlock *BB,
609                        NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
610
611  // Do a binary search to see if we already have an entry for this block in
612  // the cache set.  If so, find it.
613  NonLocalDepInfo::iterator Entry =
614    std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
615                     std::make_pair(BB, MemDepResult()));
616  if (Entry != Cache->begin() && prior(Entry)->first == BB)
617    --Entry;
618
619  MemDepResult *ExistingResult = 0;
620  if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB)
621    ExistingResult = &Entry->second;
622
623  // If we have a cached entry, and it is non-dirty, use it as the value for
624  // this dependency.
625  if (ExistingResult && !ExistingResult->isDirty()) {
626    ++NumCacheNonLocalPtr;
627    return *ExistingResult;
628  }
629
630  // Otherwise, we have to scan for the value.  If we have a dirty cache
631  // entry, start scanning from its position, otherwise we scan from the end
632  // of the block.
633  BasicBlock::iterator ScanPos = BB->end();
634  if (ExistingResult && ExistingResult->getInst()) {
635    assert(ExistingResult->getInst()->getParent() == BB &&
636           "Instruction invalidated?");
637    ++NumCacheDirtyNonLocalPtr;
638    ScanPos = ExistingResult->getInst();
639
640    // Eliminating the dirty entry from 'Cache', so update the reverse info.
641    ValueIsLoadPair CacheKey(Pointer, isLoad);
642    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
643  } else {
644    ++NumUncacheNonLocalPtr;
645  }
646
647  // Scan the block for the dependency.
648  MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad,
649                                              ScanPos, BB);
650
651  // If we had a dirty entry for the block, update it.  Otherwise, just add
652  // a new entry.
653  if (ExistingResult)
654    *ExistingResult = Dep;
655  else
656    Cache->push_back(std::make_pair(BB, Dep));
657
658  // If the block has a dependency (i.e. it isn't completely transparent to
659  // the value), remember the reverse association because we just added it
660  // to Cache!
661  if (Dep.isNonLocal())
662    return Dep;
663
664  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
665  // update MemDep when we remove instructions.
666  Instruction *Inst = Dep.getInst();
667  assert(Inst && "Didn't depend on anything?");
668  ValueIsLoadPair CacheKey(Pointer, isLoad);
669  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
670  return Dep;
671}
672
673/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
674/// number of elements in the array that are already properly ordered.  This is
675/// optimized for the case when only a few entries are added.
676static void
677SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
678                         unsigned NumSortedEntries) {
679  switch (Cache.size() - NumSortedEntries) {
680  case 0:
681    // done, no new entries.
682    break;
683  case 2: {
684    // Two new entries, insert the last one into place.
685    MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
686    Cache.pop_back();
687    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
688      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
689    Cache.insert(Entry, Val);
690    // FALL THROUGH.
691  }
692  case 1:
693    // One new entry, Just insert the new value at the appropriate position.
694    if (Cache.size() != 1) {
695      MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
696      Cache.pop_back();
697      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
698        std::upper_bound(Cache.begin(), Cache.end(), Val);
699      Cache.insert(Entry, Val);
700    }
701    break;
702  default:
703    // Added many values, do a full scale sort.
704    std::sort(Cache.begin(), Cache.end());
705    break;
706  }
707}
708
709/// isPHITranslatable - Return true if the specified computation is derived from
710/// a PHI node in the current block and if it is simple enough for us to handle.
711static bool isPHITranslatable(Instruction *Inst) {
712  if (isa<PHINode>(Inst))
713    return true;
714
715  // We can handle bitcast of a PHI, but the PHI needs to be in the same block
716  // as the bitcast.
717  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
718    Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0));
719    if (OpI == 0 || OpI->getParent() != Inst->getParent())
720      return true;
721    return isPHITranslatable(OpI);
722  }
723
724  // We can translate a GEP if all of its operands defined in this block are phi
725  // translatable.
726  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
727    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
728      Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i));
729      if (OpI == 0 || OpI->getParent() != Inst->getParent())
730        continue;
731
732      if (!isPHITranslatable(OpI))
733        return false;
734    }
735    return true;
736  }
737
738  if (Inst->getOpcode() == Instruction::Add &&
739      isa<ConstantInt>(Inst->getOperand(1))) {
740    Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
741    if (OpI == 0 || OpI->getParent() != Inst->getParent())
742      return true;
743    return isPHITranslatable(OpI);
744  }
745
746  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
747  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
748  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
749
750  return false;
751}
752
753/// GetPHITranslatedValue - Given a computation that satisfied the
754/// isPHITranslatable predicate, see if we can translate the computation into
755/// the specified predecessor block.  If so, return that value.
756Value *MemoryDependenceAnalysis::
757GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred,
758                      const TargetData *TD) const {
759  // If the input value is not an instruction, or if it is not defined in CurBB,
760  // then we don't need to phi translate it.
761  Instruction *Inst = dyn_cast<Instruction>(InVal);
762  if (Inst == 0 || Inst->getParent() != CurBB)
763    return InVal;
764
765  if (PHINode *PN = dyn_cast<PHINode>(Inst))
766    return PN->getIncomingValueForBlock(Pred);
767
768  // Handle bitcast of PHI.
769  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
770    // PHI translate the input operand.
771    Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD);
772    if (PHIIn == 0) return 0;
773
774    // Constants are trivial to phi translate.
775    if (Constant *C = dyn_cast<Constant>(PHIIn))
776      return ConstantExpr::getBitCast(C, BC->getType());
777
778    // Otherwise we have to see if a bitcasted version of the incoming pointer
779    // is available.  If so, we can use it, otherwise we have to fail.
780    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
781         UI != E; ++UI) {
782      if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
783        if (BCI->getType() == BC->getType())
784          return BCI;
785    }
786    return 0;
787  }
788
789  // Handle getelementptr with at least one PHI translatable operand.
790  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
791    SmallVector<Value*, 8> GEPOps;
792    BasicBlock *CurBB = GEP->getParent();
793    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
794      Value *GEPOp = GEP->getOperand(i);
795      // No PHI translation is needed of operands whose values are live in to
796      // the predecessor block.
797      if (!isa<Instruction>(GEPOp) ||
798          cast<Instruction>(GEPOp)->getParent() != CurBB) {
799        GEPOps.push_back(GEPOp);
800        continue;
801      }
802
803      // If the operand is a phi node, do phi translation.
804      Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD);
805      if (InOp == 0) return 0;
806
807      GEPOps.push_back(InOp);
808    }
809
810    // Simplify the GEP to handle 'gep x, 0' -> x etc.
811    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD))
812      return V;
813
814    // Scan to see if we have this GEP available.
815    Value *APHIOp = GEPOps[0];
816    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
817         UI != E; ++UI) {
818      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
819        if (GEPI->getType() == GEP->getType() &&
820            GEPI->getNumOperands() == GEPOps.size() &&
821            GEPI->getParent()->getParent() == CurBB->getParent()) {
822          bool Mismatch = false;
823          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
824            if (GEPI->getOperand(i) != GEPOps[i]) {
825              Mismatch = true;
826              break;
827            }
828          if (!Mismatch)
829            return GEPI;
830        }
831    }
832    return 0;
833  }
834
835  // Handle add with a constant RHS.
836  if (Inst->getOpcode() == Instruction::Add &&
837      isa<ConstantInt>(Inst->getOperand(1))) {
838    // PHI translate the LHS.
839    Value *LHS;
840    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
841    Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
842    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
843    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
844
845    if (OpI == 0 || OpI->getParent() != Inst->getParent())
846      LHS = Inst->getOperand(0);
847    else {
848      LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD);
849      if (LHS == 0)
850        return 0;
851    }
852
853    // If the PHI translated LHS is an add of a constant, fold the immediates.
854    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
855      if (BOp->getOpcode() == Instruction::Add)
856        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
857          LHS = BOp->getOperand(0);
858          RHS = ConstantExpr::getAdd(RHS, CI);
859          isNSW = isNUW = false;
860        }
861
862    // See if the add simplifies away.
863    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD))
864      return Res;
865
866    // Otherwise, see if we have this add available somewhere.
867    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
868         UI != E; ++UI) {
869      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
870        if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
871            BO->getParent()->getParent() == CurBB->getParent())
872          return BO;
873    }
874
875    return 0;
876  }
877
878  return 0;
879}
880
881/// GetAvailablePHITranslatePointer - Return the value computed by
882/// PHITranslatePointer if it dominates PredBB, otherwise return null.
883Value *MemoryDependenceAnalysis::
884GetAvailablePHITranslatedValue(Value *V,
885                               BasicBlock *CurBB, BasicBlock *PredBB,
886                               const TargetData *TD,
887                               const DominatorTree &DT) const {
888  // See if PHI translation succeeds.
889  V = GetPHITranslatedValue(V, CurBB, PredBB, TD);
890  if (V == 0) return 0;
891
892  // Make sure the value is live in the predecessor.
893  if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
894    if (!DT.dominates(Inst->getParent(), PredBB))
895      return 0;
896  return V;
897}
898
899
900/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
901/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
902/// block.  All newly created instructions are added to the NewInsts list.
903///
904Value *MemoryDependenceAnalysis::
905InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
906                           BasicBlock *PredBB, const TargetData *TD,
907                           const DominatorTree &DT,
908                           SmallVectorImpl<Instruction*> &NewInsts) const {
909  // See if we have a version of this value already available and dominating
910  // PredBB.  If so, there is no need to insert a new copy.
911  if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT))
912    return Res;
913
914  // If we don't have an available version of this value, it must be an
915  // instruction.
916  Instruction *Inst = cast<Instruction>(InVal);
917
918  // Handle bitcast of PHI translatable value.
919  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
920    Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0),
921                                              CurBB, PredBB, TD, DT, NewInsts);
922    if (OpVal == 0) return 0;
923
924    // Otherwise insert a bitcast at the end of PredBB.
925    BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
926                                       InVal->getName()+".phi.trans.insert",
927                                       PredBB->getTerminator());
928    NewInsts.push_back(New);
929    return New;
930  }
931
932  // Handle getelementptr with at least one PHI operand.
933  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
934    SmallVector<Value*, 8> GEPOps;
935    BasicBlock *CurBB = GEP->getParent();
936    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
937      Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i),
938                                                CurBB, PredBB, TD, DT, NewInsts);
939      if (OpVal == 0) return 0;
940      GEPOps.push_back(OpVal);
941    }
942
943    GetElementPtrInst *Result =
944      GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
945                                InVal->getName()+".phi.trans.insert",
946                                PredBB->getTerminator());
947    Result->setIsInBounds(GEP->isInBounds());
948    NewInsts.push_back(Result);
949    return Result;
950  }
951
952#if 0
953  // FIXME: This code works, but it is unclear that we actually want to insert
954  // a big chain of computation in order to make a value available in a block.
955  // This needs to be evaluated carefully to consider its cost trade offs.
956
957  // Handle add with a constant RHS.
958  if (Inst->getOpcode() == Instruction::Add &&
959      isa<ConstantInt>(Inst->getOperand(1))) {
960    // PHI translate the LHS.
961    Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0),
962                                              CurBB, PredBB, TD, DT, NewInsts);
963    if (OpVal == 0) return 0;
964
965    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
966                                           InVal->getName()+".phi.trans.insert",
967                                                    PredBB->getTerminator());
968    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
969    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
970    NewInsts.push_back(Res);
971    return Res;
972  }
973#endif
974
975  return 0;
976}
977
978/// getNonLocalPointerDepFromBB - Perform a dependency query based on
979/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
980/// results to the results vector and keep track of which blocks are visited in
981/// 'Visited'.
982///
983/// This has special behavior for the first block queries (when SkipFirstBlock
984/// is true).  In this special case, it ignores the contents of the specified
985/// block and starts returning dependence info for its predecessors.
986///
987/// This function returns false on success, or true to indicate that it could
988/// not compute dependence information for some reason.  This should be treated
989/// as a clobber dependence on the first instruction in the predecessor block.
990bool MemoryDependenceAnalysis::
991getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
992                            bool isLoad, BasicBlock *StartBB,
993                            SmallVectorImpl<NonLocalDepEntry> &Result,
994                            DenseMap<BasicBlock*, Value*> &Visited,
995                            bool SkipFirstBlock) {
996
997  // Look up the cached info for Pointer.
998  ValueIsLoadPair CacheKey(Pointer, isLoad);
999
1000  std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
1001    &NonLocalPointerDeps[CacheKey];
1002  NonLocalDepInfo *Cache = &CacheInfo->second;
1003
1004  // If we have valid cached information for exactly the block we are
1005  // investigating, just return it with no recomputation.
1006  if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1007    // We have a fully cached result for this query then we can just return the
1008    // cached results and populate the visited set.  However, we have to verify
1009    // that we don't already have conflicting results for these blocks.  Check
1010    // to ensure that if a block in the results set is in the visited set that
1011    // it was for the same pointer query.
1012    if (!Visited.empty()) {
1013      for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
1014           I != E; ++I) {
1015        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
1016        if (VI == Visited.end() || VI->second == Pointer) continue;
1017
1018        // We have a pointer mismatch in a block.  Just return clobber, saying
1019        // that something was clobbered in this result.  We could also do a
1020        // non-fully cached query, but there is little point in doing this.
1021        return true;
1022      }
1023    }
1024
1025    for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
1026         I != E; ++I) {
1027      Visited.insert(std::make_pair(I->first, Pointer));
1028      if (!I->second.isNonLocal())
1029        Result.push_back(*I);
1030    }
1031    ++NumCacheCompleteNonLocalPtr;
1032    return false;
1033  }
1034
1035  // Otherwise, either this is a new block, a block with an invalid cache
1036  // pointer or one that we're about to invalidate by putting more info into it
1037  // than its valid cache info.  If empty, the result will be valid cache info,
1038  // otherwise it isn't.
1039  if (Cache->empty())
1040    CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1041  else
1042    CacheInfo->first = BBSkipFirstBlockPair();
1043
1044  SmallVector<BasicBlock*, 32> Worklist;
1045  Worklist.push_back(StartBB);
1046
1047  // Keep track of the entries that we know are sorted.  Previously cached
1048  // entries will all be sorted.  The entries we add we only sort on demand (we
1049  // don't insert every element into its sorted position).  We know that we
1050  // won't get any reuse from currently inserted values, because we don't
1051  // revisit blocks after we insert info for them.
1052  unsigned NumSortedEntries = Cache->size();
1053  DEBUG(AssertSorted(*Cache));
1054
1055  while (!Worklist.empty()) {
1056    BasicBlock *BB = Worklist.pop_back_val();
1057
1058    // Skip the first block if we have it.
1059    if (!SkipFirstBlock) {
1060      // Analyze the dependency of *Pointer in FromBB.  See if we already have
1061      // been here.
1062      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1063
1064      // Get the dependency info for Pointer in BB.  If we have cached
1065      // information, we will use it, otherwise we compute it.
1066      DEBUG(AssertSorted(*Cache, NumSortedEntries));
1067      MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad,
1068                                                 BB, Cache, NumSortedEntries);
1069
1070      // If we got a Def or Clobber, add this to the list of results.
1071      if (!Dep.isNonLocal()) {
1072        Result.push_back(NonLocalDepEntry(BB, Dep));
1073        continue;
1074      }
1075    }
1076
1077    // If 'Pointer' is an instruction defined in this block, then we need to do
1078    // phi translation to change it into a value live in the predecessor block.
1079    // If phi translation fails, then we can't continue dependence analysis.
1080    Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
1081    bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
1082
1083    // If no PHI translation is needed, just add all the predecessors of this
1084    // block to scan them as well.
1085    if (!NeedsPHITranslation) {
1086      SkipFirstBlock = false;
1087      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
1088        // Verify that we haven't looked at this block yet.
1089        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
1090          InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
1091        if (InsertRes.second) {
1092          // First time we've looked at *PI.
1093          Worklist.push_back(*PI);
1094          continue;
1095        }
1096
1097        // If we have seen this block before, but it was with a different
1098        // pointer then we have a phi translation failure and we have to treat
1099        // this as a clobber.
1100        if (InsertRes.first->second != Pointer)
1101          goto PredTranslationFailure;
1102      }
1103      continue;
1104    }
1105
1106    // If we do need to do phi translation, then there are a bunch of different
1107    // cases, because we have to find a Value* live in the predecessor block. We
1108    // know that PtrInst is defined in this block at least.
1109
1110    // We may have added values to the cache list before this PHI translation.
1111    // If so, we haven't done anything to ensure that the cache remains sorted.
1112    // Sort it now (if needed) so that recursive invocations of
1113    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1114    // value will only see properly sorted cache arrays.
1115    if (Cache && NumSortedEntries != Cache->size()) {
1116      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1117      NumSortedEntries = Cache->size();
1118    }
1119
1120    // If this is a computation derived from a PHI node, use the suitably
1121    // translated incoming values for each pred as the phi translated version.
1122    if (!isPHITranslatable(PtrInst))
1123      goto PredTranslationFailure;
1124
1125    Cache = 0;
1126
1127    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
1128      BasicBlock *Pred = *PI;
1129      // Get the PHI translated pointer in this predecessor.  This can fail and
1130      // return null if not translatable.
1131      Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD);
1132
1133      // Check to see if we have already visited this pred block with another
1134      // pointer.  If so, we can't do this lookup.  This failure can occur
1135      // with PHI translation when a critical edge exists and the PHI node in
1136      // the successor translates to a pointer value different than the
1137      // pointer the block was first analyzed with.
1138      std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
1139        InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
1140
1141      if (!InsertRes.second) {
1142        // If the predecessor was visited with PredPtr, then we already did
1143        // the analysis and can ignore it.
1144        if (InsertRes.first->second == PredPtr)
1145          continue;
1146
1147        // Otherwise, the block was previously analyzed with a different
1148        // pointer.  We can't represent the result of this case, so we just
1149        // treat this as a phi translation failure.
1150        goto PredTranslationFailure;
1151      }
1152
1153      // If PHI translation was unable to find an available pointer in this
1154      // predecessor, then we have to assume that the pointer is clobbered in
1155      // that predecessor.  We can still do PRE of the load, which would insert
1156      // a computation of the pointer in this predecessor.
1157      if (PredPtr == 0) {
1158        // Add the entry to the Result list.
1159        NonLocalDepEntry Entry(Pred,
1160                               MemDepResult::getClobber(Pred->getTerminator()));
1161        Result.push_back(Entry);
1162
1163        // Add it to the cache for this CacheKey so that subsequent queries get
1164        // this result.
1165        Cache = &NonLocalPointerDeps[CacheKey].second;
1166        MemoryDependenceAnalysis::NonLocalDepInfo::iterator It =
1167          std::upper_bound(Cache->begin(), Cache->end(), Entry);
1168
1169        if (It != Cache->begin() && prior(It)->first == Pred)
1170          --It;
1171
1172        if (It == Cache->end() || It->first != Pred) {
1173          Cache->insert(It, Entry);
1174          // Add it to the reverse map.
1175          ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
1176        } else if (!It->second.isDirty()) {
1177          // noop
1178        } else if (It->second.getInst() == Pred->getTerminator()) {
1179          // Same instruction, clear the dirty marker.
1180          It->second = Entry.second;
1181        } else if (It->second.getInst() == 0) {
1182          // Dirty, with no instruction, just add this.
1183          It->second = Entry.second;
1184          ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
1185        } else {
1186          // Otherwise, dirty with a different instruction.
1187          RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(),
1188                               CacheKey);
1189          It->second = Entry.second;
1190          ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
1191        }
1192        Cache = 0;
1193        continue;
1194      }
1195
1196      // FIXME: it is entirely possible that PHI translating will end up with
1197      // the same value.  Consider PHI translating something like:
1198      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
1199      // to recurse here, pedantically speaking.
1200
1201      // If we have a problem phi translating, fall through to the code below
1202      // to handle the failure condition.
1203      if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
1204                                      Result, Visited))
1205        goto PredTranslationFailure;
1206    }
1207
1208    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1209    CacheInfo = &NonLocalPointerDeps[CacheKey];
1210    Cache = &CacheInfo->second;
1211    NumSortedEntries = Cache->size();
1212
1213    // Since we did phi translation, the "Cache" set won't contain all of the
1214    // results for the query.  This is ok (we can still use it to accelerate
1215    // specific block queries) but we can't do the fastpath "return all
1216    // results from the set"  Clear out the indicator for this.
1217    CacheInfo->first = BBSkipFirstBlockPair();
1218    SkipFirstBlock = false;
1219    continue;
1220
1221  PredTranslationFailure:
1222
1223    if (Cache == 0) {
1224      // Refresh the CacheInfo/Cache pointer if it got invalidated.
1225      CacheInfo = &NonLocalPointerDeps[CacheKey];
1226      Cache = &CacheInfo->second;
1227      NumSortedEntries = Cache->size();
1228    }
1229
1230    // Since we did phi translation, the "Cache" set won't contain all of the
1231    // results for the query.  This is ok (we can still use it to accelerate
1232    // specific block queries) but we can't do the fastpath "return all
1233    // results from the set"  Clear out the indicator for this.
1234    CacheInfo->first = BBSkipFirstBlockPair();
1235
1236    // If *nothing* works, mark the pointer as being clobbered by the first
1237    // instruction in this block.
1238    //
1239    // If this is the magic first block, return this as a clobber of the whole
1240    // incoming value.  Since we can't phi translate to one of the predecessors,
1241    // we have to bail out.
1242    if (SkipFirstBlock)
1243      return true;
1244
1245    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
1246      assert(I != Cache->rend() && "Didn't find current block??");
1247      if (I->first != BB)
1248        continue;
1249
1250      assert(I->second.isNonLocal() &&
1251             "Should only be here with transparent block");
1252      I->second = MemDepResult::getClobber(BB->begin());
1253      ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
1254      Result.push_back(*I);
1255      break;
1256    }
1257  }
1258
1259  // Okay, we're done now.  If we added new values to the cache, re-sort it.
1260  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1261  DEBUG(AssertSorted(*Cache));
1262  return false;
1263}
1264
1265/// RemoveCachedNonLocalPointerDependencies - If P exists in
1266/// CachedNonLocalPointerInfo, remove it.
1267void MemoryDependenceAnalysis::
1268RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
1269  CachedNonLocalPointerInfo::iterator It =
1270    NonLocalPointerDeps.find(P);
1271  if (It == NonLocalPointerDeps.end()) return;
1272
1273  // Remove all of the entries in the BB->val map.  This involves removing
1274  // instructions from the reverse map.
1275  NonLocalDepInfo &PInfo = It->second.second;
1276
1277  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1278    Instruction *Target = PInfo[i].second.getInst();
1279    if (Target == 0) continue;  // Ignore non-local dep results.
1280    assert(Target->getParent() == PInfo[i].first);
1281
1282    // Eliminating the dirty entry from 'Cache', so update the reverse info.
1283    RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1284  }
1285
1286  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1287  NonLocalPointerDeps.erase(It);
1288}
1289
1290
1291/// invalidateCachedPointerInfo - This method is used to invalidate cached
1292/// information about the specified pointer, because it may be too
1293/// conservative in memdep.  This is an optional call that can be used when
1294/// the client detects an equivalence between the pointer and some other
1295/// value and replaces the other value with ptr. This can make Ptr available
1296/// in more places that cached info does not necessarily keep.
1297void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
1298  // If Ptr isn't really a pointer, just ignore it.
1299  if (!isa<PointerType>(Ptr->getType())) return;
1300  // Flush store info for the pointer.
1301  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1302  // Flush load info for the pointer.
1303  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1304}
1305
1306/// removeInstruction - Remove an instruction from the dependence analysis,
1307/// updating the dependence of instructions that previously depended on it.
1308/// This method attempts to keep the cache coherent using the reverse map.
1309void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
1310  // Walk through the Non-local dependencies, removing this one as the value
1311  // for any cached queries.
1312  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1313  if (NLDI != NonLocalDeps.end()) {
1314    NonLocalDepInfo &BlockMap = NLDI->second.first;
1315    for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
1316         DI != DE; ++DI)
1317      if (Instruction *Inst = DI->second.getInst())
1318        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1319    NonLocalDeps.erase(NLDI);
1320  }
1321
1322  // If we have a cached local dependence query for this instruction, remove it.
1323  //
1324  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1325  if (LocalDepEntry != LocalDeps.end()) {
1326    // Remove us from DepInst's reverse set now that the local dep info is gone.
1327    if (Instruction *Inst = LocalDepEntry->second.getInst())
1328      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1329
1330    // Remove this local dependency info.
1331    LocalDeps.erase(LocalDepEntry);
1332  }
1333
1334  // If we have any cached pointer dependencies on this instruction, remove
1335  // them.  If the instruction has non-pointer type, then it can't be a pointer
1336  // base.
1337
1338  // Remove it from both the load info and the store info.  The instruction
1339  // can't be in either of these maps if it is non-pointer.
1340  if (isa<PointerType>(RemInst->getType())) {
1341    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1342    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1343  }
1344
1345  // Loop over all of the things that depend on the instruction we're removing.
1346  //
1347  SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
1348
1349  // If we find RemInst as a clobber or Def in any of the maps for other values,
1350  // we need to replace its entry with a dirty version of the instruction after
1351  // it.  If RemInst is a terminator, we use a null dirty value.
1352  //
1353  // Using a dirty version of the instruction after RemInst saves having to scan
1354  // the entire block to get to this point.
1355  MemDepResult NewDirtyVal;
1356  if (!RemInst->isTerminator())
1357    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
1358
1359  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1360  if (ReverseDepIt != ReverseLocalDeps.end()) {
1361    SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
1362    // RemInst can't be the terminator if it has local stuff depending on it.
1363    assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
1364           "Nothing can locally depend on a terminator");
1365
1366    for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
1367         E = ReverseDeps.end(); I != E; ++I) {
1368      Instruction *InstDependingOnRemInst = *I;
1369      assert(InstDependingOnRemInst != RemInst &&
1370             "Already removed our local dep info");
1371
1372      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1373
1374      // Make sure to remember that new things depend on NewDepInst.
1375      assert(NewDirtyVal.getInst() && "There is no way something else can have "
1376             "a local dep on this if it is a terminator!");
1377      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(),
1378                                                InstDependingOnRemInst));
1379    }
1380
1381    ReverseLocalDeps.erase(ReverseDepIt);
1382
1383    // Add new reverse deps after scanning the set, to avoid invalidating the
1384    // 'ReverseDeps' reference.
1385    while (!ReverseDepsToAdd.empty()) {
1386      ReverseLocalDeps[ReverseDepsToAdd.back().first]
1387        .insert(ReverseDepsToAdd.back().second);
1388      ReverseDepsToAdd.pop_back();
1389    }
1390  }
1391
1392  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1393  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1394    SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
1395    for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
1396         I != E; ++I) {
1397      assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
1398
1399      PerInstNLInfo &INLD = NonLocalDeps[*I];
1400      // The information is now dirty!
1401      INLD.second = true;
1402
1403      for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
1404           DE = INLD.first.end(); DI != DE; ++DI) {
1405        if (DI->second.getInst() != RemInst) continue;
1406
1407        // Convert to a dirty entry for the subsequent instruction.
1408        DI->second = NewDirtyVal;
1409
1410        if (Instruction *NextI = NewDirtyVal.getInst())
1411          ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
1412      }
1413    }
1414
1415    ReverseNonLocalDeps.erase(ReverseDepIt);
1416
1417    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1418    while (!ReverseDepsToAdd.empty()) {
1419      ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
1420        .insert(ReverseDepsToAdd.back().second);
1421      ReverseDepsToAdd.pop_back();
1422    }
1423  }
1424
1425  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1426  // value in the NonLocalPointerDeps info.
1427  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1428    ReverseNonLocalPtrDeps.find(RemInst);
1429  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1430    SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
1431    SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
1432
1433    for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
1434         E = Set.end(); I != E; ++I) {
1435      ValueIsLoadPair P = *I;
1436      assert(P.getPointer() != RemInst &&
1437             "Already removed NonLocalPointerDeps info for RemInst");
1438
1439      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
1440
1441      // The cache is not valid for any specific block anymore.
1442      NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
1443
1444      // Update any entries for RemInst to use the instruction after it.
1445      for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
1446           DI != DE; ++DI) {
1447        if (DI->second.getInst() != RemInst) continue;
1448
1449        // Convert to a dirty entry for the subsequent instruction.
1450        DI->second = NewDirtyVal;
1451
1452        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1453          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1454      }
1455
1456      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
1457      // subsequent value may invalidate the sortedness.
1458      std::sort(NLPDI.begin(), NLPDI.end());
1459    }
1460
1461    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1462
1463    while (!ReversePtrDepsToAdd.empty()) {
1464      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
1465        .insert(ReversePtrDepsToAdd.back().second);
1466      ReversePtrDepsToAdd.pop_back();
1467    }
1468  }
1469
1470
1471  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1472  AA->deleteValue(RemInst);
1473  DEBUG(verifyRemoved(RemInst));
1474}
1475/// verifyRemoved - Verify that the specified instruction does not occur
1476/// in our internal data structures.
1477void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
1478  for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
1479       E = LocalDeps.end(); I != E; ++I) {
1480    assert(I->first != D && "Inst occurs in data structures");
1481    assert(I->second.getInst() != D &&
1482           "Inst occurs in data structures");
1483  }
1484
1485  for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
1486       E = NonLocalPointerDeps.end(); I != E; ++I) {
1487    assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
1488    const NonLocalDepInfo &Val = I->second.second;
1489    for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
1490         II != E; ++II)
1491      assert(II->second.getInst() != D && "Inst occurs as NLPD value");
1492  }
1493
1494  for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
1495       E = NonLocalDeps.end(); I != E; ++I) {
1496    assert(I->first != D && "Inst occurs in data structures");
1497    const PerInstNLInfo &INLD = I->second;
1498    for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
1499         EE = INLD.first.end(); II  != EE; ++II)
1500      assert(II->second.getInst() != D && "Inst occurs in data structures");
1501  }
1502
1503  for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
1504       E = ReverseLocalDeps.end(); I != E; ++I) {
1505    assert(I->first != D && "Inst occurs in data structures");
1506    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
1507         EE = I->second.end(); II != EE; ++II)
1508      assert(*II != D && "Inst occurs in data structures");
1509  }
1510
1511  for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
1512       E = ReverseNonLocalDeps.end();
1513       I != E; ++I) {
1514    assert(I->first != D && "Inst occurs in data structures");
1515    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
1516         EE = I->second.end(); II != EE; ++II)
1517      assert(*II != D && "Inst occurs in data structures");
1518  }
1519
1520  for (ReverseNonLocalPtrDepTy::const_iterator
1521       I = ReverseNonLocalPtrDeps.begin(),
1522       E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
1523    assert(I->first != D && "Inst occurs in rev NLPD map");
1524
1525    for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
1526         E = I->second.end(); II != E; ++II)
1527      assert(*II != ValueIsLoadPair(D, false) &&
1528             *II != ValueIsLoadPair(D, true) &&
1529             "Inst occurs in ReverseNonLocalPtrDeps map");
1530  }
1531
1532}
1533