CStringChecker.cpp revision 695fb502825a53ccd178ec1c85c77929d88acb71
11d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
21d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling//
31d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling//                     The LLVM Compiler Infrastructure
41d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling//
51d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This defines CStringChecker, which is an assortment of checks on calls
11// to functions in <string.h>.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ClangSACheckers.h"
16#include "clang/StaticAnalyzer/Core/CheckerManager.h"
17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerVisitor.h"
19#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h"
20#include "llvm/ADT/StringSwitch.h"
21
22using namespace clang;
23using namespace ento;
24
25namespace {
26class CStringChecker : public CheckerVisitor<CStringChecker> {
27  BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString;
28public:
29  CStringChecker()
30  : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0)
31  {}
32  static void *getTag() { static int tag; return &tag; }
33
34  bool evalCallExpr(CheckerContext &C, const CallExpr *CE);
35  void PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS);
36  void MarkLiveSymbols(const GRState *state, SymbolReaper &SR);
37  void evalDeadSymbols(CheckerContext &C, SymbolReaper &SR);
38  bool wantsRegionChangeUpdate(const GRState *state);
39
40  const GRState *EvalRegionChanges(const GRState *state,
41                                   const MemRegion * const *Begin,
42                                   const MemRegion * const *End,
43                                   bool*);
44
45  typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *);
46
47  void evalMemcpy(CheckerContext &C, const CallExpr *CE);
48  void evalMemmove(CheckerContext &C, const CallExpr *CE);
49  void evalBcopy(CheckerContext &C, const CallExpr *CE);
50  void evalCopyCommon(CheckerContext &C, const GRState *state,
51                      const Expr *Size, const Expr *Source, const Expr *Dest,
52                      bool Restricted = false);
53
54  void evalMemcmp(CheckerContext &C, const CallExpr *CE);
55
56  void evalstrLength(CheckerContext &C, const CallExpr *CE);
57
58  void evalStrcpy(CheckerContext &C, const CallExpr *CE);
59  void evalStpcpy(CheckerContext &C, const CallExpr *CE);
60  void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd);
61
62  // Utility methods
63  std::pair<const GRState*, const GRState*>
64  assumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty);
65
66  const GRState *setCStringLength(const GRState *state, const MemRegion *MR,
67                                  SVal strLength);
68  SVal getCStringLengthForRegion(CheckerContext &C, const GRState *&state,
69                                 const Expr *Ex, const MemRegion *MR);
70  SVal getCStringLength(CheckerContext &C, const GRState *&state,
71                        const Expr *Ex, SVal Buf);
72
73  const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state,
74                                  const Expr *Ex, SVal V);
75
76  bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
77                       const MemRegion *MR);
78
79  // Re-usable checks
80  const GRState *checkNonNull(CheckerContext &C, const GRState *state,
81                               const Expr *S, SVal l);
82  const GRState *CheckLocation(CheckerContext &C, const GRState *state,
83                               const Expr *S, SVal l,
84                               bool IsDestination = false);
85  const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
86                                   const Expr *Size,
87                                   const Expr *FirstBuf,
88                                   const Expr *SecondBuf = NULL,
89                                   bool FirstIsDestination = false);
90  const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
91                              const Expr *Size, const Expr *First,
92                              const Expr *Second);
93  void emitOverlapBug(CheckerContext &C, const GRState *state,
94                      const Stmt *First, const Stmt *Second);
95};
96
97class CStringLength {
98public:
99  typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
100};
101} //end anonymous namespace
102
103namespace clang {
104namespace ento {
105  template <>
106  struct GRStateTrait<CStringLength>
107    : public GRStatePartialTrait<CStringLength::EntryMap> {
108    static void *GDMIndex() { return CStringChecker::getTag(); }
109  };
110}
111}
112
113static void RegisterCStringChecker(ExprEngine &Eng) {
114  Eng.registerCheck(new CStringChecker());
115}
116
117void ento::registerCStringChecker(CheckerManager &mgr) {
118  mgr.addCheckerRegisterFunction(RegisterCStringChecker);
119}
120
121//===----------------------------------------------------------------------===//
122// Individual checks and utility methods.
123//===----------------------------------------------------------------------===//
124
125std::pair<const GRState*, const GRState*>
126CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V,
127                           QualType Ty) {
128  DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
129  if (!val)
130    return std::pair<const GRState*, const GRState *>(state, state);
131
132  SValBuilder &svalBuilder = C.getSValBuilder();
133  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
134  return state->assume(svalBuilder.evalEQ(state, *val, zero));
135}
136
137const GRState *CStringChecker::checkNonNull(CheckerContext &C,
138                                            const GRState *state,
139                                            const Expr *S, SVal l) {
140  // If a previous check has failed, propagate the failure.
141  if (!state)
142    return NULL;
143
144  const GRState *stateNull, *stateNonNull;
145  llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
146
147  if (stateNull && !stateNonNull) {
148    ExplodedNode *N = C.generateSink(stateNull);
149    if (!N)
150      return NULL;
151
152    if (!BT_Null)
153      BT_Null = new BuiltinBug("API",
154        "Null pointer argument in call to byte string function");
155
156    // Generate a report for this bug.
157    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null);
158    EnhancedBugReport *report = new EnhancedBugReport(*BT,
159                                                      BT->getDescription(), N);
160
161    report->addRange(S->getSourceRange());
162    report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
163    C.EmitReport(report);
164    return NULL;
165  }
166
167  // From here on, assume that the value is non-null.
168  assert(stateNonNull);
169  return stateNonNull;
170}
171
172// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
173const GRState *CStringChecker::CheckLocation(CheckerContext &C,
174                                             const GRState *state,
175                                             const Expr *S, SVal l,
176                                             bool IsDestination) {
177  // If a previous check has failed, propagate the failure.
178  if (!state)
179    return NULL;
180
181  // Check for out of bound array element access.
182  const MemRegion *R = l.getAsRegion();
183  if (!R)
184    return state;
185
186  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
187  if (!ER)
188    return state;
189
190  assert(ER->getValueType() == C.getASTContext().CharTy &&
191    "CheckLocation should only be called with char* ElementRegions");
192
193  // Get the size of the array.
194  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
195  SValBuilder &svalBuilder = C.getSValBuilder();
196  SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
197  DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
198
199  // Get the index of the accessed element.
200  DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
201
202  const GRState *StInBound = state->assumeInBound(Idx, Size, true);
203  const GRState *StOutBound = state->assumeInBound(Idx, Size, false);
204  if (StOutBound && !StInBound) {
205    ExplodedNode *N = C.generateSink(StOutBound);
206    if (!N)
207      return NULL;
208
209    BuiltinBug *BT;
210    if (IsDestination) {
211      if (!BT_BoundsWrite) {
212        BT_BoundsWrite = new BuiltinBug("Out-of-bound array access",
213          "Byte string function overflows destination buffer");
214      }
215      BT = static_cast<BuiltinBug*>(BT_BoundsWrite);
216    } else {
217      if (!BT_Bounds) {
218        BT_Bounds = new BuiltinBug("Out-of-bound array access",
219          "Byte string function accesses out-of-bound array element");
220      }
221      BT = static_cast<BuiltinBug*>(BT_Bounds);
222    }
223
224    // FIXME: It would be nice to eventually make this diagnostic more clear,
225    // e.g., by referencing the original declaration or by saying *why* this
226    // reference is outside the range.
227
228    // Generate a report for this bug.
229    RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
230
231    report->addRange(S->getSourceRange());
232    C.EmitReport(report);
233    return NULL;
234  }
235
236  // Array bound check succeeded.  From this point forward the array bound
237  // should always succeed.
238  return StInBound;
239}
240
241const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
242                                                 const GRState *state,
243                                                 const Expr *Size,
244                                                 const Expr *FirstBuf,
245                                                 const Expr *SecondBuf,
246                                                 bool FirstIsDestination) {
247  // If a previous check has failed, propagate the failure.
248  if (!state)
249    return NULL;
250
251  SValBuilder &svalBuilder = C.getSValBuilder();
252  ASTContext &Ctx = C.getASTContext();
253
254  QualType sizeTy = Size->getType();
255  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
256
257  // Check that the first buffer is non-null.
258  SVal BufVal = state->getSVal(FirstBuf);
259  state = checkNonNull(C, state, FirstBuf, BufVal);
260  if (!state)
261    return NULL;
262
263  // Get the access length and make sure it is known.
264  SVal LengthVal = state->getSVal(Size);
265  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
266  if (!Length)
267    return state;
268
269  // Compute the offset of the last element to be accessed: size-1.
270  NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
271  NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
272                                                    *Length, One, sizeTy));
273
274  // Check that the first buffer is sufficently long.
275  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
276  if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
277    SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
278                                          LastOffset, PtrTy);
279    state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
280
281    // If the buffer isn't large enough, abort.
282    if (!state)
283      return NULL;
284  }
285
286  // If there's a second buffer, check it as well.
287  if (SecondBuf) {
288    BufVal = state->getSVal(SecondBuf);
289    state = checkNonNull(C, state, SecondBuf, BufVal);
290    if (!state)
291      return NULL;
292
293    BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
294    if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
295      SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
296                                            LastOffset, PtrTy);
297      state = CheckLocation(C, state, SecondBuf, BufEnd);
298    }
299  }
300
301  // Large enough or not, return this state!
302  return state;
303}
304
305const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
306                                            const GRState *state,
307                                            const Expr *Size,
308                                            const Expr *First,
309                                            const Expr *Second) {
310  // Do a simple check for overlap: if the two arguments are from the same
311  // buffer, see if the end of the first is greater than the start of the second
312  // or vice versa.
313
314  // If a previous check has failed, propagate the failure.
315  if (!state)
316    return NULL;
317
318  const GRState *stateTrue, *stateFalse;
319
320  // Get the buffer values and make sure they're known locations.
321  SVal firstVal = state->getSVal(First);
322  SVal secondVal = state->getSVal(Second);
323
324  Loc *firstLoc = dyn_cast<Loc>(&firstVal);
325  if (!firstLoc)
326    return state;
327
328  Loc *secondLoc = dyn_cast<Loc>(&secondVal);
329  if (!secondLoc)
330    return state;
331
332  // Are the two values the same?
333  SValBuilder &svalBuilder = C.getSValBuilder();
334  llvm::tie(stateTrue, stateFalse) =
335    state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
336
337  if (stateTrue && !stateFalse) {
338    // If the values are known to be equal, that's automatically an overlap.
339    emitOverlapBug(C, stateTrue, First, Second);
340    return NULL;
341  }
342
343  // assume the two expressions are not equal.
344  assert(stateFalse);
345  state = stateFalse;
346
347  // Which value comes first?
348  ASTContext &Ctx = svalBuilder.getContext();
349  QualType cmpTy = Ctx.IntTy;
350  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
351                                         *firstLoc, *secondLoc, cmpTy);
352  DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
353  if (!reverseTest)
354    return state;
355
356  llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
357  if (stateTrue) {
358    if (stateFalse) {
359      // If we don't know which one comes first, we can't perform this test.
360      return state;
361    } else {
362      // Switch the values so that firstVal is before secondVal.
363      Loc *tmpLoc = firstLoc;
364      firstLoc = secondLoc;
365      secondLoc = tmpLoc;
366
367      // Switch the Exprs as well, so that they still correspond.
368      const Expr *tmpExpr = First;
369      First = Second;
370      Second = tmpExpr;
371    }
372  }
373
374  // Get the length, and make sure it too is known.
375  SVal LengthVal = state->getSVal(Size);
376  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
377  if (!Length)
378    return state;
379
380  // Convert the first buffer's start address to char*.
381  // Bail out if the cast fails.
382  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
383  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType());
384  Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
385  if (!FirstStartLoc)
386    return state;
387
388  // Compute the end of the first buffer. Bail out if THAT fails.
389  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
390                                 *FirstStartLoc, *Length, CharPtrTy);
391  Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
392  if (!FirstEndLoc)
393    return state;
394
395  // Is the end of the first buffer past the start of the second buffer?
396  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
397                                *FirstEndLoc, *secondLoc, cmpTy);
398  DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
399  if (!OverlapTest)
400    return state;
401
402  llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
403
404  if (stateTrue && !stateFalse) {
405    // Overlap!
406    emitOverlapBug(C, stateTrue, First, Second);
407    return NULL;
408  }
409
410  // assume the two expressions don't overlap.
411  assert(stateFalse);
412  return stateFalse;
413}
414
415void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state,
416                                    const Stmt *First, const Stmt *Second) {
417  ExplodedNode *N = C.generateSink(state);
418  if (!N)
419    return;
420
421  if (!BT_Overlap)
422    BT_Overlap = new BugType("Unix API", "Improper arguments");
423
424  // Generate a report for this bug.
425  RangedBugReport *report =
426    new RangedBugReport(*BT_Overlap,
427      "Arguments must not be overlapping buffers", N);
428  report->addRange(First->getSourceRange());
429  report->addRange(Second->getSourceRange());
430
431  C.EmitReport(report);
432}
433
434const GRState *CStringChecker::setCStringLength(const GRState *state,
435                                                const MemRegion *MR,
436                                                SVal strLength) {
437  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
438  if (strLength.isUnknown())
439    return state;
440
441  MR = MR->StripCasts();
442
443  switch (MR->getKind()) {
444  case MemRegion::StringRegionKind:
445    // FIXME: This can happen if we strcpy() into a string region. This is
446    // undefined [C99 6.4.5p6], but we should still warn about it.
447    return state;
448
449  case MemRegion::SymbolicRegionKind:
450  case MemRegion::AllocaRegionKind:
451  case MemRegion::VarRegionKind:
452  case MemRegion::FieldRegionKind:
453  case MemRegion::ObjCIvarRegionKind:
454    return state->set<CStringLength>(MR, strLength);
455
456  case MemRegion::ElementRegionKind:
457    // FIXME: Handle element regions by upper-bounding the parent region's
458    // string length.
459    return state;
460
461  default:
462    // Other regions (mostly non-data) can't have a reliable C string length.
463    // For now, just ignore the change.
464    // FIXME: These are rare but not impossible. We should output some kind of
465    // warning for things like strcpy((char[]){'a', 0}, "b");
466    return state;
467  }
468}
469
470SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
471                                               const GRState *&state,
472                                               const Expr *Ex,
473                                               const MemRegion *MR) {
474  // If there's a recorded length, go ahead and return it.
475  const SVal *Recorded = state->get<CStringLength>(MR);
476  if (Recorded)
477    return *Recorded;
478
479  // Otherwise, get a new symbol and update the state.
480  unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
481  SValBuilder &svalBuilder = C.getSValBuilder();
482  QualType sizeTy = svalBuilder.getContext().getSizeType();
483  SVal strLength = svalBuilder.getMetadataSymbolVal(getTag(), MR, Ex, sizeTy, Count);
484  state = state->set<CStringLength>(MR, strLength);
485  return strLength;
486}
487
488SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state,
489                                      const Expr *Ex, SVal Buf) {
490  const MemRegion *MR = Buf.getAsRegion();
491  if (!MR) {
492    // If we can't get a region, see if it's something we /know/ isn't a
493    // C string. In the context of locations, the only time we can issue such
494    // a warning is for labels.
495    if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
496      if (ExplodedNode *N = C.generateNode(state)) {
497        if (!BT_NotCString)
498          BT_NotCString = new BuiltinBug("API",
499            "Argument is not a null-terminated string.");
500
501        llvm::SmallString<120> buf;
502        llvm::raw_svector_ostream os(buf);
503        os << "Argument to byte string function is the address of the label '"
504           << Label->getLabel()->getName()
505           << "', which is not a null-terminated string";
506
507        // Generate a report for this bug.
508        EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
509                                                          os.str(), N);
510
511        report->addRange(Ex->getSourceRange());
512        C.EmitReport(report);
513      }
514
515      return UndefinedVal();
516    }
517
518    // If it's not a region and not a label, give up.
519    return UnknownVal();
520  }
521
522  // If we have a region, strip casts from it and see if we can figure out
523  // its length. For anything we can't figure out, just return UnknownVal.
524  MR = MR->StripCasts();
525
526  switch (MR->getKind()) {
527  case MemRegion::StringRegionKind: {
528    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
529    // so we can assume that the byte length is the correct C string length.
530    SValBuilder &svalBuilder = C.getSValBuilder();
531    QualType sizeTy = svalBuilder.getContext().getSizeType();
532    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
533    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
534  }
535  case MemRegion::SymbolicRegionKind:
536  case MemRegion::AllocaRegionKind:
537  case MemRegion::VarRegionKind:
538  case MemRegion::FieldRegionKind:
539  case MemRegion::ObjCIvarRegionKind:
540    return getCStringLengthForRegion(C, state, Ex, MR);
541  case MemRegion::CompoundLiteralRegionKind:
542    // FIXME: Can we track this? Is it necessary?
543    return UnknownVal();
544  case MemRegion::ElementRegionKind:
545    // FIXME: How can we handle this? It's not good enough to subtract the
546    // offset from the base string length; consider "123\x00567" and &a[5].
547    return UnknownVal();
548  default:
549    // Other regions (mostly non-data) can't have a reliable C string length.
550    // In this case, an error is emitted and UndefinedVal is returned.
551    // The caller should always be prepared to handle this case.
552    if (ExplodedNode *N = C.generateNode(state)) {
553      if (!BT_NotCString)
554        BT_NotCString = new BuiltinBug("API",
555          "Argument is not a null-terminated string.");
556
557      llvm::SmallString<120> buf;
558      llvm::raw_svector_ostream os(buf);
559
560      os << "Argument to byte string function is ";
561
562      if (SummarizeRegion(os, C.getASTContext(), MR))
563        os << ", which is not a null-terminated string";
564      else
565        os << "not a null-terminated string";
566
567      // Generate a report for this bug.
568      EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
569                                                        os.str(), N);
570
571      report->addRange(Ex->getSourceRange());
572      C.EmitReport(report);
573    }
574
575    return UndefinedVal();
576  }
577}
578
579const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
580                                                const GRState *state,
581                                                const Expr *E, SVal V) {
582  Loc *L = dyn_cast<Loc>(&V);
583  if (!L)
584    return state;
585
586  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
587  // some assumptions about the value that CFRefCount can't. Even so, it should
588  // probably be refactored.
589  if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
590    const MemRegion *R = MR->getRegion()->StripCasts();
591
592    // Are we dealing with an ElementRegion?  If so, we should be invalidating
593    // the super-region.
594    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
595      R = ER->getSuperRegion();
596      // FIXME: What about layers of ElementRegions?
597    }
598
599    // Invalidate this region.
600    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
601    return state->invalidateRegion(R, E, Count, NULL);
602  }
603
604  // If we have a non-region value by chance, just remove the binding.
605  // FIXME: is this necessary or correct? This handles the non-Region
606  //  cases.  Is it ever valid to store to these?
607  return state->unbindLoc(*L);
608}
609
610bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
611                                     const MemRegion *MR) {
612  const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
613  if (!TR)
614    return false;
615
616  switch (TR->getKind()) {
617  case MemRegion::FunctionTextRegionKind: {
618    const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
619    if (FD)
620      os << "the address of the function '" << FD << "'";
621    else
622      os << "the address of a function";
623    return true;
624  }
625  case MemRegion::BlockTextRegionKind:
626    os << "block text";
627    return true;
628  case MemRegion::BlockDataRegionKind:
629    os << "a block";
630    return true;
631  case MemRegion::CXXThisRegionKind:
632  case MemRegion::CXXTempObjectRegionKind:
633    os << "a C++ temp object of type " << TR->getValueType().getAsString();
634    return true;
635  case MemRegion::VarRegionKind:
636    os << "a variable of type" << TR->getValueType().getAsString();
637    return true;
638  case MemRegion::FieldRegionKind:
639    os << "a field of type " << TR->getValueType().getAsString();
640    return true;
641  case MemRegion::ObjCIvarRegionKind:
642    os << "an instance variable of type " << TR->getValueType().getAsString();
643    return true;
644  default:
645    return false;
646  }
647}
648
649//===----------------------------------------------------------------------===//
650// evaluation of individual function calls.
651//===----------------------------------------------------------------------===//
652
653void CStringChecker::evalCopyCommon(CheckerContext &C, const GRState *state,
654                                    const Expr *Size, const Expr *Dest,
655                                    const Expr *Source, bool Restricted) {
656  // See if the size argument is zero.
657  SVal sizeVal = state->getSVal(Size);
658  QualType sizeTy = Size->getType();
659
660  const GRState *stateZeroSize, *stateNonZeroSize;
661  llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy);
662
663  // If the size is zero, there won't be any actual memory access.
664  if (stateZeroSize)
665    C.addTransition(stateZeroSize);
666
667  // If the size can be nonzero, we have to check the other arguments.
668  if (stateNonZeroSize) {
669    state = stateNonZeroSize;
670    state = CheckBufferAccess(C, state, Size, Dest, Source,
671                              /* FirstIsDst = */ true);
672    if (Restricted)
673      state = CheckOverlap(C, state, Size, Dest, Source);
674
675    if (state) {
676      // Invalidate the destination.
677      // FIXME: Even if we can't perfectly model the copy, we should see if we
678      // can use LazyCompoundVals to copy the source values into the destination.
679      // This would probably remove any existing bindings past the end of the
680      // copied region, but that's still an improvement over blank invalidation.
681      state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
682      C.addTransition(state);
683    }
684  }
685}
686
687
688void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) {
689  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
690  // The return value is the address of the destination buffer.
691  const Expr *Dest = CE->getArg(0);
692  const GRState *state = C.getState();
693  state = state->BindExpr(CE, state->getSVal(Dest));
694  evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1), true);
695}
696
697void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) {
698  // void *memmove(void *dst, const void *src, size_t n);
699  // The return value is the address of the destination buffer.
700  const Expr *Dest = CE->getArg(0);
701  const GRState *state = C.getState();
702  state = state->BindExpr(CE, state->getSVal(Dest));
703  evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1));
704}
705
706void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) {
707  // void bcopy(const void *src, void *dst, size_t n);
708  evalCopyCommon(C, C.getState(), CE->getArg(2), CE->getArg(1), CE->getArg(0));
709}
710
711void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) {
712  // int memcmp(const void *s1, const void *s2, size_t n);
713  const Expr *Left = CE->getArg(0);
714  const Expr *Right = CE->getArg(1);
715  const Expr *Size = CE->getArg(2);
716
717  const GRState *state = C.getState();
718  SValBuilder &svalBuilder = C.getSValBuilder();
719
720  // See if the size argument is zero.
721  SVal sizeVal = state->getSVal(Size);
722  QualType sizeTy = Size->getType();
723
724  const GRState *stateZeroSize, *stateNonZeroSize;
725  llvm::tie(stateZeroSize, stateNonZeroSize) =
726    assumeZero(C, state, sizeVal, sizeTy);
727
728  // If the size can be zero, the result will be 0 in that case, and we don't
729  // have to check either of the buffers.
730  if (stateZeroSize) {
731    state = stateZeroSize;
732    state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
733    C.addTransition(state);
734  }
735
736  // If the size can be nonzero, we have to check the other arguments.
737  if (stateNonZeroSize) {
738    state = stateNonZeroSize;
739    // If we know the two buffers are the same, we know the result is 0.
740    // First, get the two buffers' addresses. Another checker will have already
741    // made sure they're not undefined.
742    DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
743    DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
744
745    // See if they are the same.
746    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
747    const GRState *StSameBuf, *StNotSameBuf;
748    llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
749
750    // If the two arguments might be the same buffer, we know the result is zero,
751    // and we only need to check one size.
752    if (StSameBuf) {
753      state = StSameBuf;
754      state = CheckBufferAccess(C, state, Size, Left);
755      if (state) {
756        state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
757        C.addTransition(state);
758      }
759    }
760
761    // If the two arguments might be different buffers, we have to check the
762    // size of both of them.
763    if (StNotSameBuf) {
764      state = StNotSameBuf;
765      state = CheckBufferAccess(C, state, Size, Left, Right);
766      if (state) {
767        // The return value is the comparison result, which we don't know.
768        unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
769        SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
770        state = state->BindExpr(CE, CmpV);
771        C.addTransition(state);
772      }
773    }
774  }
775}
776
777void CStringChecker::evalstrLength(CheckerContext &C, const CallExpr *CE) {
778  // size_t strlen(const char *s);
779  const GRState *state = C.getState();
780  const Expr *Arg = CE->getArg(0);
781  SVal ArgVal = state->getSVal(Arg);
782
783  // Check that the argument is non-null.
784  state = checkNonNull(C, state, Arg, ArgVal);
785
786  if (state) {
787    SVal strLength = getCStringLength(C, state, Arg, ArgVal);
788
789    // If the argument isn't a valid C string, there's no valid state to
790    // transition to.
791    if (strLength.isUndef())
792      return;
793
794    // If getCStringLength couldn't figure out the length, conjure a return
795    // value, so it can be used in constraints, at least.
796    if (strLength.isUnknown()) {
797      unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
798      strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
799    }
800
801    // Bind the return value.
802    state = state->BindExpr(CE, strLength);
803    C.addTransition(state);
804  }
805}
806
807void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) {
808  // char *strcpy(char *restrict dst, const char *restrict src);
809  evalStrcpyCommon(C, CE, /* returnEnd = */ false);
810}
811
812void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) {
813  // char *stpcpy(char *restrict dst, const char *restrict src);
814  evalStrcpyCommon(C, CE, /* returnEnd = */ true);
815}
816
817void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
818                                      bool returnEnd) {
819  const GRState *state = C.getState();
820
821  // Check that the destination is non-null
822  const Expr *Dst = CE->getArg(0);
823  SVal DstVal = state->getSVal(Dst);
824
825  state = checkNonNull(C, state, Dst, DstVal);
826  if (!state)
827    return;
828
829  // Check that the source is non-null.
830  const Expr *srcExpr = CE->getArg(1);
831  SVal srcVal = state->getSVal(srcExpr);
832  state = checkNonNull(C, state, srcExpr, srcVal);
833  if (!state)
834    return;
835
836  // Get the string length of the source.
837  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
838
839  // If the source isn't a valid C string, give up.
840  if (strLength.isUndef())
841    return;
842
843  SVal Result = (returnEnd ? UnknownVal() : DstVal);
844
845  // If the destination is a MemRegion, try to check for a buffer overflow and
846  // record the new string length.
847  if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
848    // If the length is known, we can check for an overflow.
849    if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) {
850      SVal lastElement =
851        C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal,
852                                       *knownStrLength, Dst->getType());
853
854      state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true);
855      if (!state)
856        return;
857
858      // If this is a stpcpy-style copy, the last element is the return value.
859      if (returnEnd)
860        Result = lastElement;
861    }
862
863    // Invalidate the destination. This must happen before we set the C string
864    // length because invalidation will clear the length.
865    // FIXME: Even if we can't perfectly model the copy, we should see if we
866    // can use LazyCompoundVals to copy the source values into the destination.
867    // This would probably remove any existing bindings past the end of the
868    // string, but that's still an improvement over blank invalidation.
869    state = InvalidateBuffer(C, state, Dst, *dstRegVal);
870
871    // Set the C string length of the destination.
872    state = setCStringLength(state, dstRegVal->getRegion(), strLength);
873  }
874
875  // If this is a stpcpy-style copy, but we were unable to check for a buffer
876  // overflow, we still need a result. Conjure a return value.
877  if (returnEnd && Result.isUnknown()) {
878    SValBuilder &svalBuilder = C.getSValBuilder();
879    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
880    strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
881  }
882
883  // Set the return value.
884  state = state->BindExpr(CE, Result);
885  C.addTransition(state);
886}
887
888//===----------------------------------------------------------------------===//
889// The driver method, and other Checker callbacks.
890//===----------------------------------------------------------------------===//
891
892bool CStringChecker::evalCallExpr(CheckerContext &C, const CallExpr *CE) {
893  // Get the callee.  All the functions we care about are C functions
894  // with simple identifiers.
895  const GRState *state = C.getState();
896  const Expr *Callee = CE->getCallee();
897  const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
898
899  if (!FD)
900    return false;
901
902  // Get the name of the callee. If it's a builtin, strip off the prefix.
903  IdentifierInfo *II = FD->getIdentifier();
904  if (!II)   // if no identifier, not a simple C function
905    return false;
906  llvm::StringRef Name = II->getName();
907  if (Name.startswith("__builtin_"))
908    Name = Name.substr(10);
909
910  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
911    .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy)
912    .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp)
913    .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove)
914    .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy)
915    .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy)
916    .Case("strlen", &CStringChecker::evalstrLength)
917    .Case("bcopy", &CStringChecker::evalBcopy)
918    .Default(NULL);
919
920  // If the callee isn't a string function, let another checker handle it.
921  if (!evalFunction)
922    return false;
923
924  // Check and evaluate the call.
925  (this->*evalFunction)(C, CE);
926  return true;
927}
928
929void CStringChecker::PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS) {
930  // Record string length for char a[] = "abc";
931  const GRState *state = C.getState();
932
933  for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
934       I != E; ++I) {
935    const VarDecl *D = dyn_cast<VarDecl>(*I);
936    if (!D)
937      continue;
938
939    // FIXME: Handle array fields of structs.
940    if (!D->getType()->isArrayType())
941      continue;
942
943    const Expr *Init = D->getInit();
944    if (!Init)
945      continue;
946    if (!isa<StringLiteral>(Init))
947      continue;
948
949    Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
950    const MemRegion *MR = VarLoc.getAsRegion();
951    if (!MR)
952      continue;
953
954    SVal StrVal = state->getSVal(Init);
955    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
956    DefinedOrUnknownSVal strLength
957      = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
958
959    state = state->set<CStringLength>(MR, strLength);
960  }
961
962  C.addTransition(state);
963}
964
965bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) {
966  CStringLength::EntryMap Entries = state->get<CStringLength>();
967  return !Entries.isEmpty();
968}
969
970const GRState *CStringChecker::EvalRegionChanges(const GRState *state,
971                                                 const MemRegion * const *Begin,
972                                                 const MemRegion * const *End,
973                                                 bool *) {
974  CStringLength::EntryMap Entries = state->get<CStringLength>();
975  if (Entries.isEmpty())
976    return state;
977
978  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
979  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
980
981  // First build sets for the changed regions and their super-regions.
982  for ( ; Begin != End; ++Begin) {
983    const MemRegion *MR = *Begin;
984    Invalidated.insert(MR);
985
986    SuperRegions.insert(MR);
987    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
988      MR = SR->getSuperRegion();
989      SuperRegions.insert(MR);
990    }
991  }
992
993  CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
994
995  // Then loop over the entries in the current state.
996  for (CStringLength::EntryMap::iterator I = Entries.begin(),
997       E = Entries.end(); I != E; ++I) {
998    const MemRegion *MR = I.getKey();
999
1000    // Is this entry for a super-region of a changed region?
1001    if (SuperRegions.count(MR)) {
1002      Entries = F.remove(Entries, MR);
1003      continue;
1004    }
1005
1006    // Is this entry for a sub-region of a changed region?
1007    const MemRegion *Super = MR;
1008    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1009      Super = SR->getSuperRegion();
1010      if (Invalidated.count(Super)) {
1011        Entries = F.remove(Entries, MR);
1012        break;
1013      }
1014    }
1015  }
1016
1017  return state->set<CStringLength>(Entries);
1018}
1019
1020void CStringChecker::MarkLiveSymbols(const GRState *state, SymbolReaper &SR) {
1021  // Mark all symbols in our string length map as valid.
1022  CStringLength::EntryMap Entries = state->get<CStringLength>();
1023
1024  for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1025       I != E; ++I) {
1026    SVal Len = I.getData();
1027    if (SymbolRef Sym = Len.getAsSymbol())
1028      SR.markInUse(Sym);
1029  }
1030}
1031
1032void CStringChecker::evalDeadSymbols(CheckerContext &C, SymbolReaper &SR) {
1033  if (!SR.hasDeadSymbols())
1034    return;
1035
1036  const GRState *state = C.getState();
1037  CStringLength::EntryMap Entries = state->get<CStringLength>();
1038  if (Entries.isEmpty())
1039    return;
1040
1041  CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1042  for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1043       I != E; ++I) {
1044    SVal Len = I.getData();
1045    if (SymbolRef Sym = Len.getAsSymbol()) {
1046      if (SR.isDead(Sym))
1047        Entries = F.remove(Entries, I.getKey());
1048    }
1049  }
1050
1051  state = state->set<CStringLength>(Entries);
1052  C.generateNode(state);
1053}
1054