ArrayBoundCheckerV2.cpp revision 55fc873017f10f6f566b182b70f6fc22aefa3464
1//== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines ArrayBoundCheckerV2, which is a path-sensitive check
11// which looks for an out-of-bound array element access.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ClangSACheckers.h"
16#include "clang/AST/CharUnits.h"
17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace clang;
26using namespace ento;
27
28namespace {
29class ArrayBoundCheckerV2 :
30    public Checker<check::Location> {
31  mutable OwningPtr<BuiltinBug> BT;
32
33  enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
34
35  void reportOOB(CheckerContext &C, ProgramStateRef errorState,
36                 OOB_Kind kind) const;
37
38public:
39  void checkLocation(SVal l, bool isLoad, const Stmt*S,
40                     CheckerContext &C) const;
41};
42
43// FIXME: Eventually replace RegionRawOffset with this class.
44class RegionRawOffsetV2 {
45private:
46  const SubRegion *baseRegion;
47  SVal byteOffset;
48
49  RegionRawOffsetV2()
50    : baseRegion(0), byteOffset(UnknownVal()) {}
51
52public:
53  RegionRawOffsetV2(const SubRegion* base, SVal offset)
54    : baseRegion(base), byteOffset(offset) {}
55
56  NonLoc getByteOffset() const { return cast<NonLoc>(byteOffset); }
57  const SubRegion *getRegion() const { return baseRegion; }
58
59  static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
60                                         SValBuilder &svalBuilder,
61                                         SVal location);
62
63  void dump() const;
64  void dumpToStream(raw_ostream &os) const;
65};
66}
67
68static SVal computeExtentBegin(SValBuilder &svalBuilder,
69                               const MemRegion *region) {
70  while (true)
71    switch (region->getKind()) {
72      default:
73        return svalBuilder.makeZeroArrayIndex();
74      case MemRegion::SymbolicRegionKind:
75        // FIXME: improve this later by tracking symbolic lower bounds
76        // for symbolic regions.
77        return UnknownVal();
78      case MemRegion::ElementRegionKind:
79        region = cast<SubRegion>(region)->getSuperRegion();
80        continue;
81    }
82}
83
84void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
85                                        const Stmt* LoadS,
86                                        CheckerContext &checkerContext) const {
87
88  // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
89  // some new logic here that reasons directly about memory region extents.
90  // Once that logic is more mature, we can bring it back to assumeInBound()
91  // for all clients to use.
92  //
93  // The algorithm we are using here for bounds checking is to see if the
94  // memory access is within the extent of the base region.  Since we
95  // have some flexibility in defining the base region, we can achieve
96  // various levels of conservatism in our buffer overflow checking.
97  ProgramStateRef state = checkerContext.getState();
98  ProgramStateRef originalState = state;
99
100  SValBuilder &svalBuilder = checkerContext.getSValBuilder();
101  const RegionRawOffsetV2 &rawOffset =
102    RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
103
104  if (!rawOffset.getRegion())
105    return;
106
107  // CHECK LOWER BOUND: Is byteOffset < extent begin?
108  //  If so, we are doing a load/store
109  //  before the first valid offset in the memory region.
110
111  SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
112
113  if (isa<NonLoc>(extentBegin)) {
114    SVal lowerBound
115      = svalBuilder.evalBinOpNN(state, BO_LT, rawOffset.getByteOffset(),
116                                cast<NonLoc>(extentBegin),
117                                svalBuilder.getConditionType());
118
119    NonLoc *lowerBoundToCheck = dyn_cast<NonLoc>(&lowerBound);
120    if (!lowerBoundToCheck)
121      return;
122
123    ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
124    llvm::tie(state_precedesLowerBound, state_withinLowerBound) =
125      state->assume(*lowerBoundToCheck);
126
127    // Are we constrained enough to definitely precede the lower bound?
128    if (state_precedesLowerBound && !state_withinLowerBound) {
129      reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
130      return;
131    }
132
133    // Otherwise, assume the constraint of the lower bound.
134    assert(state_withinLowerBound);
135    state = state_withinLowerBound;
136  }
137
138  do {
139    // CHECK UPPER BOUND: Is byteOffset >= extent(baseRegion)?  If so,
140    // we are doing a load/store after the last valid offset.
141    DefinedOrUnknownSVal extentVal =
142      rawOffset.getRegion()->getExtent(svalBuilder);
143    if (!isa<NonLoc>(extentVal))
144      break;
145
146    SVal upperbound
147      = svalBuilder.evalBinOpNN(state, BO_GE, rawOffset.getByteOffset(),
148                                cast<NonLoc>(extentVal),
149                                svalBuilder.getConditionType());
150
151    NonLoc *upperboundToCheck = dyn_cast<NonLoc>(&upperbound);
152    if (!upperboundToCheck)
153      break;
154
155    ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
156    llvm::tie(state_exceedsUpperBound, state_withinUpperBound) =
157      state->assume(*upperboundToCheck);
158
159    // If we are under constrained and the index variables are tainted, report.
160    if (state_exceedsUpperBound && state_withinUpperBound) {
161      if (state->isTainted(rawOffset.getByteOffset()))
162        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted);
163        return;
164    }
165
166    // If we are constrained enough to definitely exceed the upper bound, report.
167    if (state_exceedsUpperBound) {
168      assert(!state_withinUpperBound);
169      reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
170      return;
171    }
172
173    assert(state_withinUpperBound);
174    state = state_withinUpperBound;
175  }
176  while (false);
177
178  if (state != originalState)
179    checkerContext.addTransition(state);
180}
181
182void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
183                                    ProgramStateRef errorState,
184                                    OOB_Kind kind) const {
185
186  ExplodedNode *errorNode = checkerContext.generateSink(errorState);
187  if (!errorNode)
188    return;
189
190  if (!BT)
191    BT.reset(new BuiltinBug("Out-of-bound access"));
192
193  // FIXME: This diagnostics are preliminary.  We should get far better
194  // diagnostics for explaining buffer overruns.
195
196  SmallString<256> buf;
197  llvm::raw_svector_ostream os(buf);
198  os << "Out of bound memory access ";
199  switch (kind) {
200  case OOB_Precedes:
201    os << "(accessed memory precedes memory block)";
202    break;
203  case OOB_Excedes:
204    os << "(access exceeds upper limit of memory block)";
205    break;
206  case OOB_Tainted:
207    os << "(index is tainted)";
208    break;
209  }
210
211  checkerContext.emitReport(new BugReport(*BT, os.str(), errorNode));
212}
213
214void RegionRawOffsetV2::dump() const {
215  dumpToStream(llvm::errs());
216}
217
218void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
219  os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
220}
221
222// FIXME: Merge with the implementation of the same method in Store.cpp
223static bool IsCompleteType(ASTContext &Ctx, QualType Ty) {
224  if (const RecordType *RT = Ty->getAs<RecordType>()) {
225    const RecordDecl *D = RT->getDecl();
226    if (!D->getDefinition())
227      return false;
228  }
229
230  return true;
231}
232
233
234// Lazily computes a value to be used by 'computeOffset'.  If 'val'
235// is unknown or undefined, we lazily substitute '0'.  Otherwise,
236// return 'val'.
237static inline SVal getValue(SVal val, SValBuilder &svalBuilder) {
238  return isa<UndefinedVal>(val) ? svalBuilder.makeArrayIndex(0) : val;
239}
240
241// Scale a base value by a scaling factor, and return the scaled
242// value as an SVal.  Used by 'computeOffset'.
243static inline SVal scaleValue(ProgramStateRef state,
244                              NonLoc baseVal, CharUnits scaling,
245                              SValBuilder &sb) {
246  return sb.evalBinOpNN(state, BO_Mul, baseVal,
247                        sb.makeArrayIndex(scaling.getQuantity()),
248                        sb.getArrayIndexType());
249}
250
251// Add an SVal to another, treating unknown and undefined values as
252// summing to UnknownVal.  Used by 'computeOffset'.
253static SVal addValue(ProgramStateRef state, SVal x, SVal y,
254                     SValBuilder &svalBuilder) {
255  // We treat UnknownVals and UndefinedVals the same here because we
256  // only care about computing offsets.
257  if (x.isUnknownOrUndef() || y.isUnknownOrUndef())
258    return UnknownVal();
259
260  return svalBuilder.evalBinOpNN(state, BO_Add,
261                                 cast<NonLoc>(x), cast<NonLoc>(y),
262                                 svalBuilder.getArrayIndexType());
263}
264
265/// Compute a raw byte offset from a base region.  Used for array bounds
266/// checking.
267RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
268                                                   SValBuilder &svalBuilder,
269                                                   SVal location)
270{
271  const MemRegion *region = location.getAsRegion();
272  SVal offset = UndefinedVal();
273
274  while (region) {
275    switch (region->getKind()) {
276      default: {
277        if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
278          offset = getValue(offset, svalBuilder);
279          if (!offset.isUnknownOrUndef())
280            return RegionRawOffsetV2(subReg, offset);
281        }
282        return RegionRawOffsetV2();
283      }
284      case MemRegion::ElementRegionKind: {
285        const ElementRegion *elemReg = cast<ElementRegion>(region);
286        SVal index = elemReg->getIndex();
287        if (!isa<NonLoc>(index))
288          return RegionRawOffsetV2();
289        QualType elemType = elemReg->getElementType();
290        // If the element is an incomplete type, go no further.
291        ASTContext &astContext = svalBuilder.getContext();
292        if (!IsCompleteType(astContext, elemType))
293          return RegionRawOffsetV2();
294
295        // Update the offset.
296        offset = addValue(state,
297                          getValue(offset, svalBuilder),
298                          scaleValue(state,
299                          cast<NonLoc>(index),
300                          astContext.getTypeSizeInChars(elemType),
301                          svalBuilder),
302                          svalBuilder);
303
304        if (offset.isUnknownOrUndef())
305          return RegionRawOffsetV2();
306
307        region = elemReg->getSuperRegion();
308        continue;
309      }
310    }
311  }
312  return RegionRawOffsetV2();
313}
314
315
316void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
317  mgr.registerChecker<ArrayBoundCheckerV2>();
318}
319