TypeBasedAliasAnalysis.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the TypeBasedAliasAnalysis pass, which implements
11// metadata-based TBAA.
12//
13// In LLVM IR, memory does not have types, so LLVM's own type system is not
14// suitable for doing TBAA. Instead, metadata is added to the IR to describe
15// a type system of a higher level language. This can be used to implement
16// typical C/C++ TBAA, but it can also be used to implement custom alias
17// analysis behavior for other languages.
18//
19// We now support two types of metadata format: scalar TBAA and struct-path
20// aware TBAA. After all testing cases are upgraded to use struct-path aware
21// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
22// can be dropped.
23//
24// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
25// three fields, e.g.:
26//   !0 = metadata !{ metadata !"an example type tree" }
27//   !1 = metadata !{ metadata !"int", metadata !0 }
28//   !2 = metadata !{ metadata !"float", metadata !0 }
29//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
30//
31// The first field is an identity field. It can be any value, usually
32// an MDString, which uniquely identifies the type. The most important
33// name in the tree is the name of the root node. Two trees with
34// different root node names are entirely disjoint, even if they
35// have leaves with common names.
36//
37// The second field identifies the type's parent node in the tree, or
38// is null or omitted for a root node. A type is considered to alias
39// all of its descendants and all of its ancestors in the tree. Also,
40// a type is considered to alias all types in other trees, so that
41// bitcode produced from multiple front-ends is handled conservatively.
42//
43// If the third field is present, it's an integer which if equal to 1
44// indicates that the type is "constant" (meaning pointsToConstantMemory
45// should return true; see
46// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
47//
48// With struct-path aware TBAA, the MDNodes attached to an instruction using
49// "!tbaa" are called path tag nodes.
50//
51// The path tag node has 4 fields with the last field being optional.
52//
53// The first field is the base type node, it can be a struct type node
54// or a scalar type node. The second field is the access type node, it
55// must be a scalar type node. The third field is the offset into the base type.
56// The last field has the same meaning as the last field of our scalar TBAA:
57// it's an integer which if equal to 1 indicates that the access is "constant".
58//
59// The struct type node has a name and a list of pairs, one pair for each member
60// of the struct. The first element of each pair is a type node (a struct type
61// node or a sclar type node), specifying the type of the member, the second
62// element of each pair is the offset of the member.
63//
64// Given an example
65// typedef struct {
66//   short s;
67// } A;
68// typedef struct {
69//   uint16_t s;
70//   A a;
71// } B;
72//
73// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
74// instruction. The base type is !4 (struct B), the access type is !2 (scalar
75// type short) and the offset is 4.
76//
77// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
78// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
79// !2 = metadata !{metadata !"short", metadata !1}           // Scalar type node
80// !3 = metadata !{metadata !"A", metadata !2, i64 0}        // Struct type node
81// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
82//                                                           // Struct type node
83// !5 = metadata !{metadata !4, metadata !2, i64 4}          // Path tag node
84//
85// The struct type nodes and the scalar type nodes form a type DAG.
86//         Root (!0)
87//         char (!1)  -- edge to Root
88//         short (!2) -- edge to char
89//         A (!3) -- edge with offset 0 to short
90//         B (!4) -- edge with offset 0 to short and edge with offset 4 to A
91//
92// To check if two tags (tagX and tagY) can alias, we start from the base type
93// of tagX, follow the edge with the correct offset in the type DAG and adjust
94// the offset until we reach the base type of tagY or until we reach the Root
95// node.
96// If we reach the base type of tagY, compare the adjusted offset with
97// offset of tagY, return Alias if the offsets are the same, return NoAlias
98// otherwise.
99// If we reach the Root node, perform the above starting from base type of tagY
100// to see if we reach base type of tagX.
101//
102// If they have different roots, they're part of different potentially
103// unrelated type systems, so we return Alias to be conservative.
104// If neither node is an ancestor of the other and they have the same root,
105// then we say NoAlias.
106//
107// TODO: The current metadata format doesn't support struct
108// fields. For example:
109//   struct X {
110//     double d;
111//     int i;
112//   };
113//   void foo(struct X *x, struct X *y, double *p) {
114//     *x = *y;
115//     *p = 0.0;
116//   }
117// Struct X has a double member, so the store to *x can alias the store to *p.
118// Currently it's not possible to precisely describe all the things struct X
119// aliases, so struct assignments must use conservative TBAA nodes. There's
120// no scheme for attaching metadata to @llvm.memcpy yet either.
121//
122//===----------------------------------------------------------------------===//
123
124#include "llvm/Analysis/Passes.h"
125#include "llvm/Analysis/AliasAnalysis.h"
126#include "llvm/IR/Constants.h"
127#include "llvm/IR/LLVMContext.h"
128#include "llvm/IR/Metadata.h"
129#include "llvm/IR/Module.h"
130#include "llvm/Pass.h"
131#include "llvm/Support/CommandLine.h"
132using namespace llvm;
133
134// A handy option for disabling TBAA functionality. The same effect can also be
135// achieved by stripping the !tbaa tags from IR, but this option is sometimes
136// more convenient.
137static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
138
139namespace {
140  /// TBAANode - This is a simple wrapper around an MDNode which provides a
141  /// higher-level interface by hiding the details of how alias analysis
142  /// information is encoded in its operands.
143  class TBAANode {
144    const MDNode *Node;
145
146  public:
147    TBAANode() : Node(nullptr) {}
148    explicit TBAANode(const MDNode *N) : Node(N) {}
149
150    /// getNode - Get the MDNode for this TBAANode.
151    const MDNode *getNode() const { return Node; }
152
153    /// getParent - Get this TBAANode's Alias tree parent.
154    TBAANode getParent() const {
155      if (Node->getNumOperands() < 2)
156        return TBAANode();
157      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
158      if (!P)
159        return TBAANode();
160      // Ok, this node has a valid parent. Return it.
161      return TBAANode(P);
162    }
163
164    /// TypeIsImmutable - Test if this TBAANode represents a type for objects
165    /// which are not modified (by any means) in the context where this
166    /// AliasAnalysis is relevant.
167    bool TypeIsImmutable() const {
168      if (Node->getNumOperands() < 3)
169        return false;
170      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
171      if (!CI)
172        return false;
173      return CI->getValue()[0];
174    }
175  };
176
177  /// This is a simple wrapper around an MDNode which provides a
178  /// higher-level interface by hiding the details of how alias analysis
179  /// information is encoded in its operands.
180  class TBAAStructTagNode {
181    /// This node should be created with createTBAAStructTagNode.
182    const MDNode *Node;
183
184  public:
185    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
186
187    /// Get the MDNode for this TBAAStructTagNode.
188    const MDNode *getNode() const { return Node; }
189
190    const MDNode *getBaseType() const {
191      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
192    }
193    const MDNode *getAccessType() const {
194      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
195    }
196    uint64_t getOffset() const {
197      return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
198    }
199    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
200    /// objects which are not modified (by any means) in the context where this
201    /// AliasAnalysis is relevant.
202    bool TypeIsImmutable() const {
203      if (Node->getNumOperands() < 4)
204        return false;
205      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
206      if (!CI)
207        return false;
208      return CI->getValue()[0];
209    }
210  };
211
212  /// This is a simple wrapper around an MDNode which provides a
213  /// higher-level interface by hiding the details of how alias analysis
214  /// information is encoded in its operands.
215  class TBAAStructTypeNode {
216    /// This node should be created with createTBAAStructTypeNode.
217    const MDNode *Node;
218
219  public:
220    TBAAStructTypeNode() : Node(nullptr) {}
221    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
222
223    /// Get the MDNode for this TBAAStructTypeNode.
224    const MDNode *getNode() const { return Node; }
225
226    /// Get this TBAAStructTypeNode's field in the type DAG with
227    /// given offset. Update the offset to be relative to the field type.
228    TBAAStructTypeNode getParent(uint64_t &Offset) const {
229      // Parent can be omitted for the root node.
230      if (Node->getNumOperands() < 2)
231        return TBAAStructTypeNode();
232
233      // Fast path for a scalar type node and a struct type node with a single
234      // field.
235      if (Node->getNumOperands() <= 3) {
236        uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
237                       cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
238        Offset -= Cur;
239        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
240        if (!P)
241          return TBAAStructTypeNode();
242        return TBAAStructTypeNode(P);
243      }
244
245      // Assume the offsets are in order. We return the previous field if
246      // the current offset is bigger than the given offset.
247      unsigned TheIdx = 0;
248      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
249        uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
250                         getZExtValue();
251        if (Cur > Offset) {
252          assert(Idx >= 3 &&
253                 "TBAAStructTypeNode::getParent should have an offset match!");
254          TheIdx = Idx - 2;
255          break;
256        }
257      }
258      // Move along the last field.
259      if (TheIdx == 0)
260        TheIdx = Node->getNumOperands() - 2;
261      uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
262                       getZExtValue();
263      Offset -= Cur;
264      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
265      if (!P)
266        return TBAAStructTypeNode();
267      return TBAAStructTypeNode(P);
268    }
269  };
270}
271
272namespace {
273  /// TypeBasedAliasAnalysis - This is a simple alias analysis
274  /// implementation that uses TypeBased to answer queries.
275  class TypeBasedAliasAnalysis : public ImmutablePass,
276                                 public AliasAnalysis {
277  public:
278    static char ID; // Class identification, replacement for typeinfo
279    TypeBasedAliasAnalysis() : ImmutablePass(ID) {
280      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
281    }
282
283    void initializePass() override {
284      InitializeAliasAnalysis(this);
285    }
286
287    /// getAdjustedAnalysisPointer - This method is used when a pass implements
288    /// an analysis interface through multiple inheritance.  If needed, it
289    /// should override this to adjust the this pointer as needed for the
290    /// specified pass info.
291    void *getAdjustedAnalysisPointer(const void *PI) override {
292      if (PI == &AliasAnalysis::ID)
293        return (AliasAnalysis*)this;
294      return this;
295    }
296
297    bool Aliases(const MDNode *A, const MDNode *B) const;
298    bool PathAliases(const MDNode *A, const MDNode *B) const;
299
300  private:
301    void getAnalysisUsage(AnalysisUsage &AU) const override;
302    AliasResult alias(const Location &LocA, const Location &LocB) override;
303    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
304    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
305    ModRefBehavior getModRefBehavior(const Function *F) override;
306    ModRefResult getModRefInfo(ImmutableCallSite CS,
307                               const Location &Loc) override;
308    ModRefResult getModRefInfo(ImmutableCallSite CS1,
309                               ImmutableCallSite CS2) override;
310  };
311}  // End of anonymous namespace
312
313// Register this pass...
314char TypeBasedAliasAnalysis::ID = 0;
315INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
316                   "Type-Based Alias Analysis", false, true, false)
317
318ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
319  return new TypeBasedAliasAnalysis();
320}
321
322void
323TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
324  AU.setPreservesAll();
325  AliasAnalysis::getAnalysisUsage(AU);
326}
327
328/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
329/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
330/// format.
331static bool isStructPathTBAA(const MDNode *MD) {
332  // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
333  // a TBAA tag.
334  return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
335}
336
337/// Aliases - Test whether the type represented by A may alias the
338/// type represented by B.
339bool
340TypeBasedAliasAnalysis::Aliases(const MDNode *A,
341                                const MDNode *B) const {
342  // Make sure that both MDNodes are struct-path aware.
343  if (isStructPathTBAA(A) && isStructPathTBAA(B))
344    return PathAliases(A, B);
345
346  // Keep track of the root node for A and B.
347  TBAANode RootA, RootB;
348
349  // Climb the tree from A to see if we reach B.
350  for (TBAANode T(A); ; ) {
351    if (T.getNode() == B)
352      // B is an ancestor of A.
353      return true;
354
355    RootA = T;
356    T = T.getParent();
357    if (!T.getNode())
358      break;
359  }
360
361  // Climb the tree from B to see if we reach A.
362  for (TBAANode T(B); ; ) {
363    if (T.getNode() == A)
364      // A is an ancestor of B.
365      return true;
366
367    RootB = T;
368    T = T.getParent();
369    if (!T.getNode())
370      break;
371  }
372
373  // Neither node is an ancestor of the other.
374
375  // If they have different roots, they're part of different potentially
376  // unrelated type systems, so we must be conservative.
377  if (RootA.getNode() != RootB.getNode())
378    return true;
379
380  // If they have the same root, then we've proved there's no alias.
381  return false;
382}
383
384/// Test whether the struct-path tag represented by A may alias the
385/// struct-path tag represented by B.
386bool
387TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
388                                    const MDNode *B) const {
389  // Verify that both input nodes are struct-path aware.
390  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
391  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
392
393  // Keep track of the root node for A and B.
394  TBAAStructTypeNode RootA, RootB;
395  TBAAStructTagNode TagA(A), TagB(B);
396
397  // TODO: We need to check if AccessType of TagA encloses AccessType of
398  // TagB to support aggregate AccessType. If yes, return true.
399
400  // Start from the base type of A, follow the edge with the correct offset in
401  // the type DAG and adjust the offset until we reach the base type of B or
402  // until we reach the Root node.
403  // Compare the adjusted offset once we have the same base.
404
405  // Climb the type DAG from base type of A to see if we reach base type of B.
406  const MDNode *BaseA = TagA.getBaseType();
407  const MDNode *BaseB = TagB.getBaseType();
408  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
409  for (TBAAStructTypeNode T(BaseA); ; ) {
410    if (T.getNode() == BaseB)
411      // Base type of A encloses base type of B, check if the offsets match.
412      return OffsetA == OffsetB;
413
414    RootA = T;
415    // Follow the edge with the correct offset, OffsetA will be adjusted to
416    // be relative to the field type.
417    T = T.getParent(OffsetA);
418    if (!T.getNode())
419      break;
420  }
421
422  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
423  // base type of A.
424  OffsetA = TagA.getOffset();
425  for (TBAAStructTypeNode T(BaseB); ; ) {
426    if (T.getNode() == BaseA)
427      // Base type of B encloses base type of A, check if the offsets match.
428      return OffsetA == OffsetB;
429
430    RootB = T;
431    // Follow the edge with the correct offset, OffsetB will be adjusted to
432    // be relative to the field type.
433    T = T.getParent(OffsetB);
434    if (!T.getNode())
435      break;
436  }
437
438  // Neither node is an ancestor of the other.
439
440  // If they have different roots, they're part of different potentially
441  // unrelated type systems, so we must be conservative.
442  if (RootA.getNode() != RootB.getNode())
443    return true;
444
445  // If they have the same root, then we've proved there's no alias.
446  return false;
447}
448
449AliasAnalysis::AliasResult
450TypeBasedAliasAnalysis::alias(const Location &LocA,
451                              const Location &LocB) {
452  if (!EnableTBAA)
453    return AliasAnalysis::alias(LocA, LocB);
454
455  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
456  // be conservative.
457  const MDNode *AM = LocA.TBAATag;
458  if (!AM) return AliasAnalysis::alias(LocA, LocB);
459  const MDNode *BM = LocB.TBAATag;
460  if (!BM) return AliasAnalysis::alias(LocA, LocB);
461
462  // If they may alias, chain to the next AliasAnalysis.
463  if (Aliases(AM, BM))
464    return AliasAnalysis::alias(LocA, LocB);
465
466  // Otherwise return a definitive result.
467  return NoAlias;
468}
469
470bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
471                                                    bool OrLocal) {
472  if (!EnableTBAA)
473    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
474
475  const MDNode *M = Loc.TBAATag;
476  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
477
478  // If this is an "immutable" type, we can assume the pointer is pointing
479  // to constant memory.
480  if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
481      (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
482    return true;
483
484  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
485}
486
487AliasAnalysis::ModRefBehavior
488TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
489  if (!EnableTBAA)
490    return AliasAnalysis::getModRefBehavior(CS);
491
492  ModRefBehavior Min = UnknownModRefBehavior;
493
494  // If this is an "immutable" type, we can assume the call doesn't write
495  // to memory.
496  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
497    if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
498        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
499      Min = OnlyReadsMemory;
500
501  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
502}
503
504AliasAnalysis::ModRefBehavior
505TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
506  // Functions don't have metadata. Just chain to the next implementation.
507  return AliasAnalysis::getModRefBehavior(F);
508}
509
510AliasAnalysis::ModRefResult
511TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
512                                      const Location &Loc) {
513  if (!EnableTBAA)
514    return AliasAnalysis::getModRefInfo(CS, Loc);
515
516  if (const MDNode *L = Loc.TBAATag)
517    if (const MDNode *M =
518          CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
519      if (!Aliases(L, M))
520        return NoModRef;
521
522  return AliasAnalysis::getModRefInfo(CS, Loc);
523}
524
525AliasAnalysis::ModRefResult
526TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
527                                      ImmutableCallSite CS2) {
528  if (!EnableTBAA)
529    return AliasAnalysis::getModRefInfo(CS1, CS2);
530
531  if (const MDNode *M1 =
532        CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
533    if (const MDNode *M2 =
534          CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
535      if (!Aliases(M1, M2))
536        return NoModRef;
537
538  return AliasAnalysis::getModRefInfo(CS1, CS2);
539}
540
541bool MDNode::isTBAAVtableAccess() const {
542  if (!isStructPathTBAA(this)) {
543    if (getNumOperands() < 1) return false;
544    if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
545      if (Tag1->getString() == "vtable pointer") return true;
546    }
547    return false;
548  }
549
550  // For struct-path aware TBAA, we use the access type of the tag.
551  if (getNumOperands() < 2) return false;
552  MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
553  if (!Tag) return false;
554  if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
555    if (Tag1->getString() == "vtable pointer") return true;
556  }
557  return false;
558}
559
560MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
561  if (!A || !B)
562    return nullptr;
563
564  if (A == B)
565    return A;
566
567  // For struct-path aware TBAA, we use the access type of the tag.
568  bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
569  if (StructPath) {
570    A = cast_or_null<MDNode>(A->getOperand(1));
571    if (!A) return nullptr;
572    B = cast_or_null<MDNode>(B->getOperand(1));
573    if (!B) return nullptr;
574  }
575
576  SmallVector<MDNode *, 4> PathA;
577  MDNode *T = A;
578  while (T) {
579    PathA.push_back(T);
580    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
581                                 : nullptr;
582  }
583
584  SmallVector<MDNode *, 4> PathB;
585  T = B;
586  while (T) {
587    PathB.push_back(T);
588    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
589                                 : nullptr;
590  }
591
592  int IA = PathA.size() - 1;
593  int IB = PathB.size() - 1;
594
595  MDNode *Ret = nullptr;
596  while (IA >= 0 && IB >=0) {
597    if (PathA[IA] == PathB[IB])
598      Ret = PathA[IA];
599    else
600      break;
601    --IA;
602    --IB;
603  }
604  if (!StructPath)
605    return Ret;
606
607  if (!Ret)
608    return nullptr;
609  // We need to convert from a type node to a tag node.
610  Type *Int64 = IntegerType::get(A->getContext(), 64);
611  Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
612  return MDNode::get(A->getContext(), Ops);
613}
614