1//======- CFLGraph.h - Abstract stratified sets implementation. --------======//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file defines CFLGraph, an auxiliary data structure used by CFL-based
11/// alias analysis.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ANALYSIS_CFLGRAPH_H
16#define LLVM_ANALYSIS_CFLGRAPH_H
17
18#include "AliasAnalysisSummary.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Analysis/MemoryBuiltins.h"
21#include "llvm/IR/InstVisitor.h"
22#include "llvm/IR/Instructions.h"
23
24namespace llvm {
25namespace cflaa {
26
27/// \brief The Program Expression Graph (PEG) of CFL analysis
28/// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
29/// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
30/// the main purpose of this graph is to abstract away unrelated facts and
31/// translate the rest into a form that can be easily digested by CFL analyses.
32/// Each Node in the graph is an InstantiatedValue, and each edge represent a
33/// pointer assignment between InstantiatedValue. Pointer
34/// references/dereferences are not explicitly stored in the graph: we
35/// implicitly assume that for each node (X, I) it has a dereference edge to (X,
36/// I+1) and a reference edge to (X, I-1).
37class CFLGraph {
38public:
39  typedef InstantiatedValue Node;
40
41  struct Edge {
42    Node Other;
43  };
44
45  typedef std::vector<Edge> EdgeList;
46
47  struct NodeInfo {
48    EdgeList Edges;
49    AliasAttrs Attr;
50  };
51
52  class ValueInfo {
53    std::vector<NodeInfo> Levels;
54
55  public:
56    bool addNodeToLevel(unsigned Level) {
57      auto NumLevels = Levels.size();
58      if (NumLevels > Level)
59        return false;
60      Levels.resize(Level + 1);
61      return true;
62    }
63
64    NodeInfo &getNodeInfoAtLevel(unsigned Level) {
65      assert(Level < Levels.size());
66      return Levels[Level];
67    }
68    const NodeInfo &getNodeInfoAtLevel(unsigned Level) const {
69      assert(Level < Levels.size());
70      return Levels[Level];
71    }
72
73    unsigned getNumLevels() const { return Levels.size(); }
74  };
75
76private:
77  typedef DenseMap<Value *, ValueInfo> ValueMap;
78  ValueMap ValueImpls;
79
80  const NodeInfo *getNode(Node N) const {
81    auto Itr = ValueImpls.find(N.Val);
82    if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
83      return nullptr;
84    return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
85  }
86  NodeInfo *getNode(Node N) {
87    auto Itr = ValueImpls.find(N.Val);
88    if (Itr == ValueImpls.end() || Itr->second.getNumLevels() <= N.DerefLevel)
89      return nullptr;
90    return &Itr->second.getNodeInfoAtLevel(N.DerefLevel);
91  }
92
93public:
94  typedef ValueMap::const_iterator const_value_iterator;
95
96  bool addNode(Node N, AliasAttrs Attr = AliasAttrs()) {
97    assert(N.Val != nullptr);
98    auto &ValInfo = ValueImpls[N.Val];
99    auto Changed = ValInfo.addNodeToLevel(N.DerefLevel);
100    ValInfo.getNodeInfoAtLevel(N.DerefLevel).Attr |= Attr;
101    return Changed;
102  }
103
104  void addAttr(Node N, AliasAttrs Attr) {
105    auto *Info = getNode(N);
106    assert(Info != nullptr);
107    Info->Attr |= Attr;
108  }
109
110  void addEdge(Node From, Node To, int64_t Offset = 0) {
111    assert(getNode(To) != nullptr);
112
113    auto *FromInfo = getNode(From);
114    assert(FromInfo != nullptr);
115    FromInfo->Edges.push_back(Edge{To});
116  }
117
118  AliasAttrs attrFor(Node N) const {
119    auto *Info = getNode(N);
120    assert(Info != nullptr);
121    return Info->Attr;
122  }
123
124  iterator_range<const_value_iterator> value_mappings() const {
125    return make_range<const_value_iterator>(ValueImpls.begin(),
126                                            ValueImpls.end());
127  }
128};
129
130///\brief A builder class used to create CFLGraph instance from a given function
131/// The CFL-AA that uses this builder must provide its own type as a template
132/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
133/// needs a way of obtaining the summary of other functions when callinsts are
134/// encountered.
135/// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
136/// member function that takes a Function& and returns the corresponding summary
137/// as a const AliasSummary*.
138template <typename CFLAA> class CFLGraphBuilder {
139  // Input of the builder
140  CFLAA &Analysis;
141  const TargetLibraryInfo &TLI;
142
143  // Output of the builder
144  CFLGraph Graph;
145  SmallVector<Value *, 4> ReturnedValues;
146
147  // Helper class
148  /// Gets the edges our graph should have, based on an Instruction*
149  class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
150    CFLAA &AA;
151    const TargetLibraryInfo &TLI;
152
153    CFLGraph &Graph;
154    SmallVectorImpl<Value *> &ReturnValues;
155
156    static bool hasUsefulEdges(ConstantExpr *CE) {
157      // ConstantExpr doesn't have terminators, invokes, or fences, so only
158      // needs
159      // to check for compares.
160      return CE->getOpcode() != Instruction::ICmp &&
161             CE->getOpcode() != Instruction::FCmp;
162    }
163
164    // Returns possible functions called by CS into the given SmallVectorImpl.
165    // Returns true if targets found, false otherwise.
166    static bool getPossibleTargets(CallSite CS,
167                                   SmallVectorImpl<Function *> &Output) {
168      if (auto *Fn = CS.getCalledFunction()) {
169        Output.push_back(Fn);
170        return true;
171      }
172
173      // TODO: If the call is indirect, we might be able to enumerate all
174      // potential
175      // targets of the call and return them, rather than just failing.
176      return false;
177    }
178
179    void addNode(Value *Val, AliasAttrs Attr = AliasAttrs()) {
180      assert(Val != nullptr && Val->getType()->isPointerTy());
181      if (auto GVal = dyn_cast<GlobalValue>(Val)) {
182        if (Graph.addNode(InstantiatedValue{GVal, 0},
183                          getGlobalOrArgAttrFromValue(*GVal)))
184          Graph.addNode(InstantiatedValue{GVal, 1}, getAttrUnknown());
185      } else if (auto CExpr = dyn_cast<ConstantExpr>(Val)) {
186        if (hasUsefulEdges(CExpr)) {
187          if (Graph.addNode(InstantiatedValue{CExpr, 0}))
188            visitConstantExpr(CExpr);
189        }
190      } else
191        Graph.addNode(InstantiatedValue{Val, 0}, Attr);
192    }
193
194    void addAssignEdge(Value *From, Value *To, int64_t Offset = 0) {
195      assert(From != nullptr && To != nullptr);
196      if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
197        return;
198      addNode(From);
199      if (To != From) {
200        addNode(To);
201        Graph.addEdge(InstantiatedValue{From, 0}, InstantiatedValue{To, 0},
202                      Offset);
203      }
204    }
205
206    void addDerefEdge(Value *From, Value *To) {
207      assert(From != nullptr && To != nullptr);
208      if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
209        return;
210      addNode(From);
211      addNode(To);
212      Graph.addNode(InstantiatedValue{From, 1});
213      Graph.addEdge(InstantiatedValue{From, 1}, InstantiatedValue{To, 0});
214    }
215
216  public:
217    GetEdgesVisitor(CFLGraphBuilder &Builder)
218        : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
219          ReturnValues(Builder.ReturnedValues) {}
220
221    void visitInstruction(Instruction &) {
222      llvm_unreachable("Unsupported instruction encountered");
223    }
224
225    void visitReturnInst(ReturnInst &Inst) {
226      if (auto RetVal = Inst.getReturnValue()) {
227        if (RetVal->getType()->isPointerTy()) {
228          addNode(RetVal);
229          ReturnValues.push_back(RetVal);
230        }
231      }
232    }
233
234    void visitPtrToIntInst(PtrToIntInst &Inst) {
235      auto *Ptr = Inst.getOperand(0);
236      addNode(Ptr, getAttrEscaped());
237    }
238
239    void visitIntToPtrInst(IntToPtrInst &Inst) {
240      auto *Ptr = &Inst;
241      addNode(Ptr, getAttrUnknown());
242    }
243
244    void visitCastInst(CastInst &Inst) {
245      auto *Src = Inst.getOperand(0);
246      addAssignEdge(Src, &Inst);
247    }
248
249    void visitBinaryOperator(BinaryOperator &Inst) {
250      auto *Op1 = Inst.getOperand(0);
251      auto *Op2 = Inst.getOperand(1);
252      addAssignEdge(Op1, &Inst);
253      addAssignEdge(Op2, &Inst);
254    }
255
256    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
257      auto *Ptr = Inst.getPointerOperand();
258      auto *Val = Inst.getNewValOperand();
259      addDerefEdge(Ptr, Val);
260    }
261
262    void visitAtomicRMWInst(AtomicRMWInst &Inst) {
263      auto *Ptr = Inst.getPointerOperand();
264      auto *Val = Inst.getValOperand();
265      addDerefEdge(Ptr, Val);
266    }
267
268    void visitPHINode(PHINode &Inst) {
269      for (Value *Val : Inst.incoming_values())
270        addAssignEdge(Val, &Inst);
271    }
272
273    void visitGetElementPtrInst(GetElementPtrInst &Inst) {
274      auto *Op = Inst.getPointerOperand();
275      addAssignEdge(Op, &Inst);
276    }
277
278    void visitSelectInst(SelectInst &Inst) {
279      // Condition is not processed here (The actual statement producing
280      // the condition result is processed elsewhere). For select, the
281      // condition is evaluated, but not loaded, stored, or assigned
282      // simply as a result of being the condition of a select.
283
284      auto *TrueVal = Inst.getTrueValue();
285      auto *FalseVal = Inst.getFalseValue();
286      addAssignEdge(TrueVal, &Inst);
287      addAssignEdge(FalseVal, &Inst);
288    }
289
290    void visitAllocaInst(AllocaInst &Inst) { addNode(&Inst); }
291
292    void visitLoadInst(LoadInst &Inst) {
293      auto *Ptr = Inst.getPointerOperand();
294      auto *Val = &Inst;
295      addDerefEdge(Ptr, Val);
296    }
297
298    void visitStoreInst(StoreInst &Inst) {
299      auto *Ptr = Inst.getPointerOperand();
300      auto *Val = Inst.getValueOperand();
301      addDerefEdge(Ptr, Val);
302    }
303
304    void visitVAArgInst(VAArgInst &Inst) {
305      // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
306      // does
307      // two things:
308      //  1. Loads a value from *((T*)*Ptr).
309      //  2. Increments (stores to) *Ptr by some target-specific amount.
310      // For now, we'll handle this like a landingpad instruction (by placing
311      // the
312      // result in its own group, and having that group alias externals).
313      addNode(&Inst, getAttrUnknown());
314    }
315
316    static bool isFunctionExternal(Function *Fn) {
317      return !Fn->hasExactDefinition();
318    }
319
320    bool tryInterproceduralAnalysis(CallSite CS,
321                                    const SmallVectorImpl<Function *> &Fns) {
322      assert(Fns.size() > 0);
323
324      if (CS.arg_size() > MaxSupportedArgsInSummary)
325        return false;
326
327      // Exit early if we'll fail anyway
328      for (auto *Fn : Fns) {
329        if (isFunctionExternal(Fn) || Fn->isVarArg())
330          return false;
331        // Fail if the caller does not provide enough arguments
332        assert(Fn->arg_size() <= CS.arg_size());
333        if (!AA.getAliasSummary(*Fn))
334          return false;
335      }
336
337      for (auto *Fn : Fns) {
338        auto Summary = AA.getAliasSummary(*Fn);
339        assert(Summary != nullptr);
340
341        auto &RetParamRelations = Summary->RetParamRelations;
342        for (auto &Relation : RetParamRelations) {
343          auto IRelation = instantiateExternalRelation(Relation, CS);
344          if (IRelation.hasValue()) {
345            Graph.addNode(IRelation->From);
346            Graph.addNode(IRelation->To);
347            Graph.addEdge(IRelation->From, IRelation->To);
348          }
349        }
350
351        auto &RetParamAttributes = Summary->RetParamAttributes;
352        for (auto &Attribute : RetParamAttributes) {
353          auto IAttr = instantiateExternalAttribute(Attribute, CS);
354          if (IAttr.hasValue())
355            Graph.addNode(IAttr->IValue, IAttr->Attr);
356        }
357      }
358
359      return true;
360    }
361
362    void visitCallSite(CallSite CS) {
363      auto Inst = CS.getInstruction();
364
365      // Make sure all arguments and return value are added to the graph first
366      for (Value *V : CS.args())
367        if (V->getType()->isPointerTy())
368          addNode(V);
369      if (Inst->getType()->isPointerTy())
370        addNode(Inst);
371
372      // Check if Inst is a call to a library function that
373      // allocates/deallocates
374      // on the heap. Those kinds of functions do not introduce any aliases.
375      // TODO: address other common library functions such as realloc(),
376      // strdup(),
377      // etc.
378      if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
379          isFreeCall(Inst, &TLI))
380        return;
381
382      // TODO: Add support for noalias args/all the other fun function
383      // attributes
384      // that we can tack on.
385      SmallVector<Function *, 4> Targets;
386      if (getPossibleTargets(CS, Targets))
387        if (tryInterproceduralAnalysis(CS, Targets))
388          return;
389
390      // Because the function is opaque, we need to note that anything
391      // could have happened to the arguments (unless the function is marked
392      // readonly or readnone), and that the result could alias just about
393      // anything, too (unless the result is marked noalias).
394      if (!CS.onlyReadsMemory())
395        for (Value *V : CS.args()) {
396          if (V->getType()->isPointerTy()) {
397            // The argument itself escapes.
398            Graph.addAttr(InstantiatedValue{V, 0}, getAttrEscaped());
399            // The fate of argument memory is unknown. Note that since
400            // AliasAttrs is transitive with respect to dereference, we only
401            // need to specify it for the first-level memory.
402            Graph.addNode(InstantiatedValue{V, 1}, getAttrUnknown());
403          }
404        }
405
406      if (Inst->getType()->isPointerTy()) {
407        auto *Fn = CS.getCalledFunction();
408        if (Fn == nullptr || !Fn->doesNotAlias(0))
409          // No need to call addNode() since we've added Inst at the
410          // beginning of this function and we know it is not a global.
411          Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown());
412      }
413    }
414
415    /// Because vectors/aggregates are immutable and unaddressable, there's
416    /// nothing we can do to coax a value out of them, other than calling
417    /// Extract{Element,Value}. We can effectively treat them as pointers to
418    /// arbitrary memory locations we can store in and load from.
419    void visitExtractElementInst(ExtractElementInst &Inst) {
420      auto *Ptr = Inst.getVectorOperand();
421      auto *Val = &Inst;
422      addDerefEdge(Ptr, Val);
423    }
424
425    void visitInsertElementInst(InsertElementInst &Inst) {
426      auto *Vec = Inst.getOperand(0);
427      auto *Val = Inst.getOperand(1);
428      addAssignEdge(Vec, &Inst);
429      addDerefEdge(&Inst, Val);
430    }
431
432    void visitLandingPadInst(LandingPadInst &Inst) {
433      // Exceptions come from "nowhere", from our analysis' perspective.
434      // So we place the instruction its own group, noting that said group may
435      // alias externals
436      addNode(&Inst, getAttrUnknown());
437    }
438
439    void visitInsertValueInst(InsertValueInst &Inst) {
440      auto *Agg = Inst.getOperand(0);
441      auto *Val = Inst.getOperand(1);
442      addAssignEdge(Agg, &Inst);
443      addDerefEdge(&Inst, Val);
444    }
445
446    void visitExtractValueInst(ExtractValueInst &Inst) {
447      auto *Ptr = Inst.getAggregateOperand();
448      addDerefEdge(Ptr, &Inst);
449    }
450
451    void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
452      auto *From1 = Inst.getOperand(0);
453      auto *From2 = Inst.getOperand(1);
454      addAssignEdge(From1, &Inst);
455      addAssignEdge(From2, &Inst);
456    }
457
458    void visitConstantExpr(ConstantExpr *CE) {
459      switch (CE->getOpcode()) {
460      default:
461        llvm_unreachable("Unknown instruction type encountered!");
462// Build the switch statement using the Instruction.def file.
463#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
464  case Instruction::OPCODE:                                                    \
465    this->visit##OPCODE(*(CLASS *)CE);                                         \
466    break;
467#include "llvm/IR/Instruction.def"
468      }
469    }
470  };
471
472  // Helper functions
473
474  // Determines whether or not we an instruction is useless to us (e.g.
475  // FenceInst)
476  static bool hasUsefulEdges(Instruction *Inst) {
477    bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
478                                    !isa<InvokeInst>(Inst) &&
479                                    !isa<ReturnInst>(Inst);
480    return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
481           !IsNonInvokeRetTerminator;
482  }
483
484  void addArgumentToGraph(Argument &Arg) {
485    if (Arg.getType()->isPointerTy()) {
486      Graph.addNode(InstantiatedValue{&Arg, 0},
487                    getGlobalOrArgAttrFromValue(Arg));
488      // Pointees of a formal parameter is known to the caller
489      Graph.addNode(InstantiatedValue{&Arg, 1}, getAttrCaller());
490    }
491  }
492
493  // Given an Instruction, this will add it to the graph, along with any
494  // Instructions that are potentially only available from said Instruction
495  // For example, given the following line:
496  //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
497  // addInstructionToGraph would add both the `load` and `getelementptr`
498  // instructions to the graph appropriately.
499  void addInstructionToGraph(GetEdgesVisitor &Visitor, Instruction &Inst) {
500    if (!hasUsefulEdges(&Inst))
501      return;
502
503    Visitor.visit(Inst);
504  }
505
506  // Builds the graph needed for constructing the StratifiedSets for the given
507  // function
508  void buildGraphFrom(Function &Fn) {
509    GetEdgesVisitor Visitor(*this);
510
511    for (auto &Bb : Fn.getBasicBlockList())
512      for (auto &Inst : Bb.getInstList())
513        addInstructionToGraph(Visitor, Inst);
514
515    for (auto &Arg : Fn.args())
516      addArgumentToGraph(Arg);
517  }
518
519public:
520  CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
521      : Analysis(Analysis), TLI(TLI) {
522    buildGraphFrom(Fn);
523  }
524
525  const CFLGraph &getCFLGraph() const { return Graph; }
526  const SmallVector<Value *, 4> &getReturnValues() const {
527    return ReturnedValues;
528  }
529};
530}
531}
532
533#endif
534