RangeConstraintManager.cpp revision 40d8551890bc8454c4e0a28c9072c9c1d1dd588a
1//== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines RangeConstraintManager, a class that tracks simple
11//  equality and inequality constraints on symbolic values of ProgramState.
12//
13//===----------------------------------------------------------------------===//
14
15#include "SimpleConstraintManager.h"
16#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
17#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
18#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/ADT/FoldingSet.h"
21#include "llvm/ADT/ImmutableSet.h"
22#include "llvm/Support/raw_ostream.h"
23
24using namespace clang;
25using namespace ento;
26
27/// A Range represents the closed range [from, to].  The caller must
28/// guarantee that from <= to.  Note that Range is immutable, so as not
29/// to subvert RangeSet's immutability.
30namespace {
31class Range : public std::pair<const llvm::APSInt*,
32                                                const llvm::APSInt*> {
33public:
34  Range(const llvm::APSInt &from, const llvm::APSInt &to)
35    : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) {
36    assert(from <= to);
37  }
38  bool Includes(const llvm::APSInt &v) const {
39    return *first <= v && v <= *second;
40  }
41  const llvm::APSInt &From() const {
42    return *first;
43  }
44  const llvm::APSInt &To() const {
45    return *second;
46  }
47  const llvm::APSInt *getConcreteValue() const {
48    return &From() == &To() ? &From() : NULL;
49  }
50
51  void Profile(llvm::FoldingSetNodeID &ID) const {
52    ID.AddPointer(&From());
53    ID.AddPointer(&To());
54  }
55};
56
57
58class RangeTrait : public llvm::ImutContainerInfo<Range> {
59public:
60  // When comparing if one Range is less than another, we should compare
61  // the actual APSInt values instead of their pointers.  This keeps the order
62  // consistent (instead of comparing by pointer values) and can potentially
63  // be used to speed up some of the operations in RangeSet.
64  static inline bool isLess(key_type_ref lhs, key_type_ref rhs) {
65    return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) &&
66                                       *lhs.second < *rhs.second);
67  }
68};
69
70/// RangeSet contains a set of ranges. If the set is empty, then
71///  there the value of a symbol is overly constrained and there are no
72///  possible values for that symbol.
73class RangeSet {
74  typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet;
75  PrimRangeSet ranges; // no need to make const, since it is an
76                       // ImmutableSet - this allows default operator=
77                       // to work.
78public:
79  typedef PrimRangeSet::Factory Factory;
80  typedef PrimRangeSet::iterator iterator;
81
82  RangeSet(PrimRangeSet RS) : ranges(RS) {}
83
84  iterator begin() const { return ranges.begin(); }
85  iterator end() const { return ranges.end(); }
86
87  bool isEmpty() const { return ranges.isEmpty(); }
88
89  /// Construct a new RangeSet representing '{ [from, to] }'.
90  RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
91    : ranges(F.add(F.getEmptySet(), Range(from, to))) {}
92
93  /// Profile - Generates a hash profile of this RangeSet for use
94  ///  by FoldingSet.
95  void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }
96
97  /// getConcreteValue - If a symbol is contrained to equal a specific integer
98  ///  constant then this method returns that value.  Otherwise, it returns
99  ///  NULL.
100  const llvm::APSInt* getConcreteValue() const {
101    return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0;
102  }
103
104private:
105  void IntersectInRange(BasicValueFactory &BV, Factory &F,
106                        const llvm::APSInt &Lower,
107                        const llvm::APSInt &Upper,
108                        PrimRangeSet &newRanges,
109                        PrimRangeSet::iterator &i,
110                        PrimRangeSet::iterator &e) const {
111    // There are six cases for each range R in the set:
112    //   1. R is entirely before the intersection range.
113    //   2. R is entirely after the intersection range.
114    //   3. R contains the entire intersection range.
115    //   4. R starts before the intersection range and ends in the middle.
116    //   5. R starts in the middle of the intersection range and ends after it.
117    //   6. R is entirely contained in the intersection range.
118    // These correspond to each of the conditions below.
119    for (/* i = begin(), e = end() */; i != e; ++i) {
120      if (i->To() < Lower) {
121        continue;
122      }
123      if (i->From() > Upper) {
124        break;
125      }
126
127      if (i->Includes(Lower)) {
128        if (i->Includes(Upper)) {
129          newRanges = F.add(newRanges, Range(BV.getValue(Lower),
130                                             BV.getValue(Upper)));
131          break;
132        } else
133          newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To()));
134      } else {
135        if (i->Includes(Upper)) {
136          newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper)));
137          break;
138        } else
139          newRanges = F.add(newRanges, *i);
140      }
141    }
142  }
143
144  const llvm::APSInt &getMinValue() const {
145    assert(!isEmpty());
146    return ranges.begin()->From();
147  }
148
149  bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
150    // This function has nine cases, the cartesian product of range-testing
151    // both the upper and lower bounds against the symbol's type.
152    // Each case requires a different pinning operation.
153    // The function returns false if the described range is entirely outside
154    // the range of values for the associated symbol.
155    APSIntType Type(getMinValue());
156    APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower);
157    APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper);
158
159    switch (LowerTest) {
160    case APSIntType::RTR_Below:
161      switch (UpperTest) {
162      case APSIntType::RTR_Below:
163        // The entire range is outside the symbol's set of possible values.
164        // If this is a conventionally-ordered range, the state is infeasible.
165        if (Lower < Upper)
166          return false;
167
168        // However, if the range wraps around, it spans all possible values.
169        Lower = Type.getMinValue();
170        Upper = Type.getMaxValue();
171        break;
172      case APSIntType::RTR_Within:
173        // The range starts below what's possible but ends within it. Pin.
174        Lower = Type.getMinValue();
175        Type.apply(Upper);
176        break;
177      case APSIntType::RTR_Above:
178        // The range spans all possible values for the symbol. Pin.
179        Lower = Type.getMinValue();
180        Upper = Type.getMaxValue();
181        break;
182      }
183      break;
184    case APSIntType::RTR_Within:
185      switch (UpperTest) {
186      case APSIntType::RTR_Below:
187        // The range wraps around, but all lower values are not possible.
188        Type.apply(Lower);
189        Upper = Type.getMaxValue();
190        break;
191      case APSIntType::RTR_Within:
192        // The range may or may not wrap around, but both limits are valid.
193        Type.apply(Lower);
194        Type.apply(Upper);
195        break;
196      case APSIntType::RTR_Above:
197        // The range starts within what's possible but ends above it. Pin.
198        Type.apply(Lower);
199        Upper = Type.getMaxValue();
200        break;
201      }
202      break;
203    case APSIntType::RTR_Above:
204      switch (UpperTest) {
205      case APSIntType::RTR_Below:
206        // The range wraps but is outside the symbol's set of possible values.
207        return false;
208      case APSIntType::RTR_Within:
209        // The range starts above what's possible but ends within it (wrap).
210        Lower = Type.getMinValue();
211        Type.apply(Upper);
212        break;
213      case APSIntType::RTR_Above:
214        // The entire range is outside the symbol's set of possible values.
215        // If this is a conventionally-ordered range, the state is infeasible.
216        if (Lower < Upper)
217          return false;
218
219        // However, if the range wraps around, it spans all possible values.
220        Lower = Type.getMinValue();
221        Upper = Type.getMaxValue();
222        break;
223      }
224      break;
225    }
226
227    return true;
228  }
229
230public:
231  // Returns a set containing the values in the receiving set, intersected with
232  // the closed range [Lower, Upper]. Unlike the Range type, this range uses
233  // modular arithmetic, corresponding to the common treatment of C integer
234  // overflow. Thus, if the Lower bound is greater than the Upper bound, the
235  // range is taken to wrap around. This is equivalent to taking the
236  // intersection with the two ranges [Min, Upper] and [Lower, Max],
237  // or, alternatively, /removing/ all integers between Upper and Lower.
238  RangeSet Intersect(BasicValueFactory &BV, Factory &F,
239                     llvm::APSInt Lower, llvm::APSInt Upper) const {
240    if (!pin(Lower, Upper))
241      return F.getEmptySet();
242
243    PrimRangeSet newRanges = F.getEmptySet();
244
245    PrimRangeSet::iterator i = begin(), e = end();
246    if (Lower <= Upper)
247      IntersectInRange(BV, F, Lower, Upper, newRanges, i, e);
248    else {
249      // The order of the next two statements is important!
250      // IntersectInRange() does not reset the iteration state for i and e.
251      // Therefore, the lower range most be handled first.
252      IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e);
253      IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e);
254    }
255
256    return newRanges;
257  }
258
259  void print(raw_ostream &os) const {
260    bool isFirst = true;
261    os << "{ ";
262    for (iterator i = begin(), e = end(); i != e; ++i) {
263      if (isFirst)
264        isFirst = false;
265      else
266        os << ", ";
267
268      os << '[' << i->From().toString(10) << ", " << i->To().toString(10)
269         << ']';
270    }
271    os << " }";
272  }
273
274  bool operator==(const RangeSet &other) const {
275    return ranges == other.ranges;
276  }
277};
278} // end anonymous namespace
279
280REGISTER_TRAIT_WITH_PROGRAMSTATE(ConstraintRange,
281                                 CLANG_ENTO_PROGRAMSTATE_MAP(SymbolRef,
282                                                             RangeSet))
283
284namespace {
285class RangeConstraintManager : public SimpleConstraintManager{
286  RangeSet GetRange(ProgramStateRef state, SymbolRef sym);
287public:
288  RangeConstraintManager(SubEngine *subengine, BasicValueFactory &BVF)
289    : SimpleConstraintManager(subengine, BVF) {}
290
291  ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym,
292                             const llvm::APSInt& Int,
293                             const llvm::APSInt& Adjustment);
294
295  ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym,
296                             const llvm::APSInt& Int,
297                             const llvm::APSInt& Adjustment);
298
299  ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym,
300                             const llvm::APSInt& Int,
301                             const llvm::APSInt& Adjustment);
302
303  ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym,
304                             const llvm::APSInt& Int,
305                             const llvm::APSInt& Adjustment);
306
307  ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym,
308                             const llvm::APSInt& Int,
309                             const llvm::APSInt& Adjustment);
310
311  ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym,
312                             const llvm::APSInt& Int,
313                             const llvm::APSInt& Adjustment);
314
315  const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const;
316  ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym);
317
318  ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper);
319
320  void print(ProgramStateRef St, raw_ostream &Out,
321             const char* nl, const char *sep);
322
323private:
324  RangeSet::Factory F;
325};
326
327} // end anonymous namespace
328
329ConstraintManager *
330ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine *Eng) {
331  return new RangeConstraintManager(Eng, StMgr.getBasicVals());
332}
333
334const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St,
335                                                      SymbolRef sym) const {
336  const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym);
337  return T ? T->getConcreteValue() : NULL;
338}
339
340ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
341                                                    SymbolRef Sym) {
342  const RangeSet *Ranges = State->get<ConstraintRange>(Sym);
343
344  // If we don't have any information about this symbol, it's underconstrained.
345  if (!Ranges)
346    return ConditionTruthVal();
347
348  // If we have a concrete value, see if it's zero.
349  if (const llvm::APSInt *Value = Ranges->getConcreteValue())
350    return *Value == 0;
351
352  BasicValueFactory &BV = getBasicVals();
353  APSIntType IntType = BV.getAPSIntType(Sym->getType());
354  llvm::APSInt Zero = IntType.getZeroValue();
355
356  // Check if zero is in the set of possible values.
357  if (Ranges->Intersect(BV, F, Zero, Zero).isEmpty())
358    return false;
359
360  // Zero is a possible value, but it is not the /only/ possible value.
361  return ConditionTruthVal();
362}
363
364/// Scan all symbols referenced by the constraints. If the symbol is not alive
365/// as marked in LSymbols, mark it as dead in DSymbols.
366ProgramStateRef
367RangeConstraintManager::removeDeadBindings(ProgramStateRef state,
368                                           SymbolReaper& SymReaper) {
369
370  ConstraintRangeTy CR = state->get<ConstraintRange>();
371  ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>();
372
373  for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) {
374    SymbolRef sym = I.getKey();
375    if (SymReaper.maybeDead(sym))
376      CR = CRFactory.remove(CR, sym);
377  }
378
379  return state->set<ConstraintRange>(CR);
380}
381
382RangeSet
383RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) {
384  if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym))
385    return *V;
386
387  // Lazily generate a new RangeSet representing all possible values for the
388  // given symbol type.
389  BasicValueFactory &BV = getBasicVals();
390  QualType T = sym->getType();
391
392  RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T));
393
394  // Special case: references are known to be non-zero.
395  if (T->isReferenceType()) {
396    APSIntType IntType = BV.getAPSIntType(T);
397    Result = Result.Intersect(BV, F, ++IntType.getZeroValue(),
398                                     --IntType.getZeroValue());
399  }
400
401  return Result;
402}
403
404//===------------------------------------------------------------------------===
405// assumeSymX methods: public interface for RangeConstraintManager.
406//===------------------------------------------------------------------------===/
407
408// The syntax for ranges below is mathematical, using [x, y] for closed ranges
409// and (x, y) for open ranges. These ranges are modular, corresponding with
410// a common treatment of C integer overflow. This means that these methods
411// do not have to worry about overflow; RangeSet::Intersect can handle such a
412// "wraparound" range.
413// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
414// UINT_MAX, 0, 1, and 2.
415
416ProgramStateRef
417RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
418                                    const llvm::APSInt &Int,
419                                    const llvm::APSInt &Adjustment) {
420  // Before we do any real work, see if the value can even show up.
421  APSIntType AdjustmentType(Adjustment);
422  if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
423    return St;
424
425  llvm::APSInt Lower = AdjustmentType.convert(Int) - Adjustment;
426  llvm::APSInt Upper = Lower;
427  --Lower;
428  ++Upper;
429
430  // [Int-Adjustment+1, Int-Adjustment-1]
431  // Notice that the lower bound is greater than the upper bound.
432  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Upper, Lower);
433  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
434}
435
436ProgramStateRef
437RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
438                                    const llvm::APSInt &Int,
439                                    const llvm::APSInt &Adjustment) {
440  // Before we do any real work, see if the value can even show up.
441  APSIntType AdjustmentType(Adjustment);
442  if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
443    return NULL;
444
445  // [Int-Adjustment, Int-Adjustment]
446  llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
447  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt);
448  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
449}
450
451ProgramStateRef
452RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
453                                    const llvm::APSInt &Int,
454                                    const llvm::APSInt &Adjustment) {
455  // Before we do any real work, see if the value can even show up.
456  APSIntType AdjustmentType(Adjustment);
457  switch (AdjustmentType.testInRange(Int)) {
458  case APSIntType::RTR_Below:
459    return NULL;
460  case APSIntType::RTR_Within:
461    break;
462  case APSIntType::RTR_Above:
463    return St;
464  }
465
466  // Special case for Int == Min. This is always false.
467  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
468  llvm::APSInt Min = AdjustmentType.getMinValue();
469  if (ComparisonVal == Min)
470    return NULL;
471
472  llvm::APSInt Lower = Min-Adjustment;
473  llvm::APSInt Upper = ComparisonVal-Adjustment;
474  --Upper;
475
476  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
477  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
478}
479
480ProgramStateRef
481RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
482                                    const llvm::APSInt &Int,
483                                    const llvm::APSInt &Adjustment) {
484  // Before we do any real work, see if the value can even show up.
485  APSIntType AdjustmentType(Adjustment);
486  switch (AdjustmentType.testInRange(Int)) {
487  case APSIntType::RTR_Below:
488    return St;
489  case APSIntType::RTR_Within:
490    break;
491  case APSIntType::RTR_Above:
492    return NULL;
493  }
494
495  // Special case for Int == Max. This is always false.
496  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
497  llvm::APSInt Max = AdjustmentType.getMaxValue();
498  if (ComparisonVal == Max)
499    return NULL;
500
501  llvm::APSInt Lower = ComparisonVal-Adjustment;
502  llvm::APSInt Upper = Max-Adjustment;
503  ++Lower;
504
505  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
506  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
507}
508
509ProgramStateRef
510RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
511                                    const llvm::APSInt &Int,
512                                    const llvm::APSInt &Adjustment) {
513  // Before we do any real work, see if the value can even show up.
514  APSIntType AdjustmentType(Adjustment);
515  switch (AdjustmentType.testInRange(Int)) {
516  case APSIntType::RTR_Below:
517    return St;
518  case APSIntType::RTR_Within:
519    break;
520  case APSIntType::RTR_Above:
521    return NULL;
522  }
523
524  // Special case for Int == Min. This is always feasible.
525  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
526  llvm::APSInt Min = AdjustmentType.getMinValue();
527  if (ComparisonVal == Min)
528    return St;
529
530  llvm::APSInt Max = AdjustmentType.getMaxValue();
531  llvm::APSInt Lower = ComparisonVal-Adjustment;
532  llvm::APSInt Upper = Max-Adjustment;
533
534  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
535  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
536}
537
538ProgramStateRef
539RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
540                                    const llvm::APSInt &Int,
541                                    const llvm::APSInt &Adjustment) {
542  // Before we do any real work, see if the value can even show up.
543  APSIntType AdjustmentType(Adjustment);
544  switch (AdjustmentType.testInRange(Int)) {
545  case APSIntType::RTR_Below:
546    return NULL;
547  case APSIntType::RTR_Within:
548    break;
549  case APSIntType::RTR_Above:
550    return St;
551  }
552
553  // Special case for Int == Max. This is always feasible.
554  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
555  llvm::APSInt Max = AdjustmentType.getMaxValue();
556  if (ComparisonVal == Max)
557    return St;
558
559  llvm::APSInt Min = AdjustmentType.getMinValue();
560  llvm::APSInt Lower = Min-Adjustment;
561  llvm::APSInt Upper = ComparisonVal-Adjustment;
562
563  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
564  return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
565}
566
567//===------------------------------------------------------------------------===
568// Pretty-printing.
569//===------------------------------------------------------------------------===/
570
571void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out,
572                                   const char* nl, const char *sep) {
573
574  ConstraintRangeTy Ranges = St->get<ConstraintRange>();
575
576  if (Ranges.isEmpty()) {
577    Out << nl << sep << "Ranges are empty." << nl;
578    return;
579  }
580
581  Out << nl << sep << "Ranges of symbol values:";
582  for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){
583    Out << nl << ' ' << I.getKey() << " : ";
584    I.getData().print(Out);
585  }
586  Out << nl;
587}
588