1//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the TypeBasedAliasAnalysis pass, which implements
11// metadata-based TBAA.
12//
13// In LLVM IR, memory does not have types, so LLVM's own type system is not
14// suitable for doing TBAA. Instead, metadata is added to the IR to describe
15// a type system of a higher level language. This can be used to implement
16// typical C/C++ TBAA, but it can also be used to implement custom alias
17// analysis behavior for other languages.
18//
19// The current metadata format is very simple. TBAA MDNodes have up to
20// three fields, e.g.:
21//   !0 = metadata !{ metadata !"an example type tree" }
22//   !1 = metadata !{ metadata !"int", metadata !0 }
23//   !2 = metadata !{ metadata !"float", metadata !0 }
24//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
25//
26// The first field is an identity field. It can be any value, usually
27// an MDString, which uniquely identifies the type. The most important
28// name in the tree is the name of the root node. Two trees with
29// different root node names are entirely disjoint, even if they
30// have leaves with common names.
31//
32// The second field identifies the type's parent node in the tree, or
33// is null or omitted for a root node. A type is considered to alias
34// all of its descendants and all of its ancestors in the tree. Also,
35// a type is considered to alias all types in other trees, so that
36// bitcode produced from multiple front-ends is handled conservatively.
37//
38// If the third field is present, it's an integer which if equal to 1
39// indicates that the type is "constant" (meaning pointsToConstantMemory
40// should return true; see
41// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
42//
43// TODO: The current metadata format doesn't support struct
44// fields. For example:
45//   struct X {
46//     double d;
47//     int i;
48//   };
49//   void foo(struct X *x, struct X *y, double *p) {
50//     *x = *y;
51//     *p = 0.0;
52//   }
53// Struct X has a double member, so the store to *x can alias the store to *p.
54// Currently it's not possible to precisely describe all the things struct X
55// aliases, so struct assignments must use conservative TBAA nodes. There's
56// no scheme for attaching metadata to @llvm.memcpy yet either.
57//
58//===----------------------------------------------------------------------===//
59
60#include "llvm/Analysis/Passes.h"
61#include "llvm/Analysis/AliasAnalysis.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/IR/Metadata.h"
65#include "llvm/IR/Module.h"
66#include "llvm/Pass.h"
67#include "llvm/Support/CommandLine.h"
68using namespace llvm;
69
70// A handy option for disabling TBAA functionality. The same effect can also be
71// achieved by stripping the !tbaa tags from IR, but this option is sometimes
72// more convenient.
73static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
74static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
75
76namespace {
77  /// TBAANode - This is a simple wrapper around an MDNode which provides a
78  /// higher-level interface by hiding the details of how alias analysis
79  /// information is encoded in its operands.
80  class TBAANode {
81    const MDNode *Node;
82
83  public:
84    TBAANode() : Node(0) {}
85    explicit TBAANode(const MDNode *N) : Node(N) {}
86
87    /// getNode - Get the MDNode for this TBAANode.
88    const MDNode *getNode() const { return Node; }
89
90    /// getParent - Get this TBAANode's Alias tree parent.
91    TBAANode getParent() const {
92      if (Node->getNumOperands() < 2)
93        return TBAANode();
94      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
95      if (!P)
96        return TBAANode();
97      // Ok, this node has a valid parent. Return it.
98      return TBAANode(P);
99    }
100
101    /// TypeIsImmutable - Test if this TBAANode represents a type for objects
102    /// which are not modified (by any means) in the context where this
103    /// AliasAnalysis is relevant.
104    bool TypeIsImmutable() const {
105      if (Node->getNumOperands() < 3)
106        return false;
107      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
108      if (!CI)
109        return false;
110      return CI->getValue()[0];
111    }
112  };
113
114  /// This is a simple wrapper around an MDNode which provides a
115  /// higher-level interface by hiding the details of how alias analysis
116  /// information is encoded in its operands.
117  class TBAAStructTagNode {
118    /// This node should be created with createTBAAStructTagNode.
119    const MDNode *Node;
120
121  public:
122    TBAAStructTagNode() : Node(0) {}
123    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
124
125    /// Get the MDNode for this TBAAStructTagNode.
126    const MDNode *getNode() const { return Node; }
127
128    const MDNode *getBaseType() const {
129      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
130    }
131    const MDNode *getAccessType() const {
132      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
133    }
134    uint64_t getOffset() const {
135      return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
136    }
137    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
138    /// objects which are not modified (by any means) in the context where this
139    /// AliasAnalysis is relevant.
140    bool TypeIsImmutable() const {
141      if (Node->getNumOperands() < 4)
142        return false;
143      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
144      if (!CI)
145        return false;
146      return CI->getValue()[0];
147    }
148  };
149
150  /// This is a simple wrapper around an MDNode which provides a
151  /// higher-level interface by hiding the details of how alias analysis
152  /// information is encoded in its operands.
153  class TBAAStructTypeNode {
154    /// This node should be created with createTBAAStructTypeNode.
155    const MDNode *Node;
156
157  public:
158    TBAAStructTypeNode() : Node(0) {}
159    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
160
161    /// Get the MDNode for this TBAAStructTypeNode.
162    const MDNode *getNode() const { return Node; }
163
164    /// Get this TBAAStructTypeNode's field in the type DAG with
165    /// given offset. Update the offset to be relative to the field type.
166    TBAAStructTypeNode getParent(uint64_t &Offset) const {
167      // Parent can be omitted for the root node.
168      if (Node->getNumOperands() < 2)
169        return TBAAStructTypeNode();
170
171      // Special handling for a scalar type node.
172      if (Node->getNumOperands() <= 3) {
173        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
174        if (!P)
175          return TBAAStructTypeNode();
176        return TBAAStructTypeNode(P);
177      }
178
179      // Assume the offsets are in order. We return the previous field if
180      // the current offset is bigger than the given offset.
181      unsigned TheIdx = 0;
182      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
183        uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
184                         getZExtValue();
185        if (Cur > Offset) {
186          assert(Idx >= 3 &&
187                 "TBAAStructTypeNode::getParent should have an offset match!");
188          TheIdx = Idx - 2;
189          break;
190        }
191      }
192      // Move along the last field.
193      if (TheIdx == 0)
194        TheIdx = Node->getNumOperands() - 2;
195      uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
196                       getZExtValue();
197      Offset -= Cur;
198      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
199      if (!P)
200        return TBAAStructTypeNode();
201      return TBAAStructTypeNode(P);
202    }
203  };
204}
205
206namespace {
207  /// TypeBasedAliasAnalysis - This is a simple alias analysis
208  /// implementation that uses TypeBased to answer queries.
209  class TypeBasedAliasAnalysis : public ImmutablePass,
210                                 public AliasAnalysis {
211  public:
212    static char ID; // Class identification, replacement for typeinfo
213    TypeBasedAliasAnalysis() : ImmutablePass(ID) {
214      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
215    }
216
217    virtual void initializePass() {
218      InitializeAliasAnalysis(this);
219    }
220
221    /// getAdjustedAnalysisPointer - This method is used when a pass implements
222    /// an analysis interface through multiple inheritance.  If needed, it
223    /// should override this to adjust the this pointer as needed for the
224    /// specified pass info.
225    virtual void *getAdjustedAnalysisPointer(const void *PI) {
226      if (PI == &AliasAnalysis::ID)
227        return (AliasAnalysis*)this;
228      return this;
229    }
230
231    bool Aliases(const MDNode *A, const MDNode *B) const;
232    bool PathAliases(const MDNode *A, const MDNode *B) const;
233
234  private:
235    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
236    virtual AliasResult alias(const Location &LocA, const Location &LocB);
237    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
238    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
239    virtual ModRefBehavior getModRefBehavior(const Function *F);
240    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
241                                       const Location &Loc);
242    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
243                                       ImmutableCallSite CS2);
244  };
245}  // End of anonymous namespace
246
247// Register this pass...
248char TypeBasedAliasAnalysis::ID = 0;
249INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
250                   "Type-Based Alias Analysis", false, true, false)
251
252ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
253  return new TypeBasedAliasAnalysis();
254}
255
256void
257TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
258  AU.setPreservesAll();
259  AliasAnalysis::getAnalysisUsage(AU);
260}
261
262/// Aliases - Test whether the type represented by A may alias the
263/// type represented by B.
264bool
265TypeBasedAliasAnalysis::Aliases(const MDNode *A,
266                                const MDNode *B) const {
267  if (EnableStructPathTBAA)
268    return PathAliases(A, B);
269
270  // Keep track of the root node for A and B.
271  TBAANode RootA, RootB;
272
273  // Climb the tree from A to see if we reach B.
274  for (TBAANode T(A); ; ) {
275    if (T.getNode() == B)
276      // B is an ancestor of A.
277      return true;
278
279    RootA = T;
280    T = T.getParent();
281    if (!T.getNode())
282      break;
283  }
284
285  // Climb the tree from B to see if we reach A.
286  for (TBAANode T(B); ; ) {
287    if (T.getNode() == A)
288      // A is an ancestor of B.
289      return true;
290
291    RootB = T;
292    T = T.getParent();
293    if (!T.getNode())
294      break;
295  }
296
297  // Neither node is an ancestor of the other.
298
299  // If they have different roots, they're part of different potentially
300  // unrelated type systems, so we must be conservative.
301  if (RootA.getNode() != RootB.getNode())
302    return true;
303
304  // If they have the same root, then we've proved there's no alias.
305  return false;
306}
307
308/// Test whether the struct-path tag represented by A may alias the
309/// struct-path tag represented by B.
310bool
311TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
312                                    const MDNode *B) const {
313  // Keep track of the root node for A and B.
314  TBAAStructTypeNode RootA, RootB;
315  TBAAStructTagNode TagA(A), TagB(B);
316
317  // TODO: We need to check if AccessType of TagA encloses AccessType of
318  // TagB to support aggregate AccessType. If yes, return true.
319
320  // Start from the base type of A, follow the edge with the correct offset in
321  // the type DAG and adjust the offset until we reach the base type of B or
322  // until we reach the Root node.
323  // Compare the adjusted offset once we have the same base.
324
325  // Climb the type DAG from base type of A to see if we reach base type of B.
326  const MDNode *BaseA = TagA.getBaseType();
327  const MDNode *BaseB = TagB.getBaseType();
328  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
329  for (TBAAStructTypeNode T(BaseA); ; ) {
330    if (T.getNode() == BaseB)
331      // Base type of A encloses base type of B, check if the offsets match.
332      return OffsetA == OffsetB;
333
334    RootA = T;
335    // Follow the edge with the correct offset, OffsetA will be adjusted to
336    // be relative to the field type.
337    T = T.getParent(OffsetA);
338    if (!T.getNode())
339      break;
340  }
341
342  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
343  // base type of A.
344  OffsetA = TagA.getOffset();
345  for (TBAAStructTypeNode T(BaseB); ; ) {
346    if (T.getNode() == BaseA)
347      // Base type of B encloses base type of A, check if the offsets match.
348      return OffsetA == OffsetB;
349
350    RootB = T;
351    // Follow the edge with the correct offset, OffsetB will be adjusted to
352    // be relative to the field type.
353    T = T.getParent(OffsetB);
354    if (!T.getNode())
355      break;
356  }
357
358  // Neither node is an ancestor of the other.
359
360  // If they have different roots, they're part of different potentially
361  // unrelated type systems, so we must be conservative.
362  if (RootA.getNode() != RootB.getNode())
363    return true;
364
365  // If they have the same root, then we've proved there's no alias.
366  return false;
367}
368
369AliasAnalysis::AliasResult
370TypeBasedAliasAnalysis::alias(const Location &LocA,
371                              const Location &LocB) {
372  if (!EnableTBAA)
373    return AliasAnalysis::alias(LocA, LocB);
374
375  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
376  // be conservative.
377  const MDNode *AM = LocA.TBAATag;
378  if (!AM) return AliasAnalysis::alias(LocA, LocB);
379  const MDNode *BM = LocB.TBAATag;
380  if (!BM) return AliasAnalysis::alias(LocA, LocB);
381
382  // If they may alias, chain to the next AliasAnalysis.
383  if (Aliases(AM, BM))
384    return AliasAnalysis::alias(LocA, LocB);
385
386  // Otherwise return a definitive result.
387  return NoAlias;
388}
389
390bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
391                                                    bool OrLocal) {
392  if (!EnableTBAA)
393    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
394
395  const MDNode *M = Loc.TBAATag;
396  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
397
398  // If this is an "immutable" type, we can assume the pointer is pointing
399  // to constant memory.
400  if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
401      (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
402    return true;
403
404  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
405}
406
407AliasAnalysis::ModRefBehavior
408TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
409  if (!EnableTBAA)
410    return AliasAnalysis::getModRefBehavior(CS);
411
412  ModRefBehavior Min = UnknownModRefBehavior;
413
414  // If this is an "immutable" type, we can assume the call doesn't write
415  // to memory.
416  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
417    if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
418        (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
419      Min = OnlyReadsMemory;
420
421  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
422}
423
424AliasAnalysis::ModRefBehavior
425TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
426  // Functions don't have metadata. Just chain to the next implementation.
427  return AliasAnalysis::getModRefBehavior(F);
428}
429
430AliasAnalysis::ModRefResult
431TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
432                                      const Location &Loc) {
433  if (!EnableTBAA)
434    return AliasAnalysis::getModRefInfo(CS, Loc);
435
436  if (const MDNode *L = Loc.TBAATag)
437    if (const MDNode *M =
438          CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
439      if (!Aliases(L, M))
440        return NoModRef;
441
442  return AliasAnalysis::getModRefInfo(CS, Loc);
443}
444
445AliasAnalysis::ModRefResult
446TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
447                                      ImmutableCallSite CS2) {
448  if (!EnableTBAA)
449    return AliasAnalysis::getModRefInfo(CS1, CS2);
450
451  if (const MDNode *M1 =
452        CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
453    if (const MDNode *M2 =
454          CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
455      if (!Aliases(M1, M2))
456        return NoModRef;
457
458  return AliasAnalysis::getModRefInfo(CS1, CS2);
459}
460
461MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
462  if (!A || !B)
463    return NULL;
464
465  if (A == B)
466    return A;
467
468  // For struct-path aware TBAA, we use the access type of the tag.
469  if (EnableStructPathTBAA) {
470    A = cast_or_null<MDNode>(A->getOperand(1));
471    if (!A) return 0;
472    B = cast_or_null<MDNode>(B->getOperand(1));
473    if (!B) return 0;
474  }
475
476  SmallVector<MDNode *, 4> PathA;
477  MDNode *T = A;
478  while (T) {
479    PathA.push_back(T);
480    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
481  }
482
483  SmallVector<MDNode *, 4> PathB;
484  T = B;
485  while (T) {
486    PathB.push_back(T);
487    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
488  }
489
490  int IA = PathA.size() - 1;
491  int IB = PathB.size() - 1;
492
493  MDNode *Ret = 0;
494  while (IA >= 0 && IB >=0) {
495    if (PathA[IA] == PathB[IB])
496      Ret = PathA[IA];
497    else
498      break;
499    --IA;
500    --IB;
501  }
502  if (!EnableStructPathTBAA)
503    return Ret;
504
505  if (!Ret)
506    return 0;
507  // We need to convert from a type node to a tag node.
508  Type *Int64 = IntegerType::get(A->getContext(), 64);
509  Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
510  return MDNode::get(A->getContext(), Ops);
511}
512