CStringChecker.cpp revision 1655bcd052a67a3050fc55df8ecce57342352e68
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This defines CStringChecker, which is an assortment of checks on calls
11// to functions in <string.h>.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ClangSACheckers.h"
16#include "InterCheckerAPI.h"
17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallString.h"
24#include "llvm/ADT/StringSwitch.h"
25#include "llvm/Support/raw_ostream.h"
26
27using namespace clang;
28using namespace ento;
29
30namespace {
31class CStringChecker : public Checker< eval::Call,
32                                         check::PreStmt<DeclStmt>,
33                                         check::LiveSymbols,
34                                         check::DeadSymbols,
35                                         check::RegionChanges
36                                         > {
37  mutable OwningPtr<BugType> BT_Null,
38                             BT_Bounds,
39                             BT_Overlap,
40                             BT_NotCString,
41                             BT_AdditionOverflow;
42
43  mutable const char *CurrentFunctionDescription;
44
45public:
46  /// The filter is used to filter out the diagnostics which are not enabled by
47  /// the user.
48  struct CStringChecksFilter {
49    DefaultBool CheckCStringNullArg;
50    DefaultBool CheckCStringOutOfBounds;
51    DefaultBool CheckCStringBufferOverlap;
52    DefaultBool CheckCStringNotNullTerm;
53  };
54
55  CStringChecksFilter Filter;
56
57  static void *getTag() { static int tag; return &tag; }
58
59  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
60  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
61  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
62  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
63  bool wantsRegionChangeUpdate(ProgramStateRef state) const;
64
65  ProgramStateRef
66    checkRegionChanges(ProgramStateRef state,
67                       const InvalidatedSymbols *,
68                       ArrayRef<const MemRegion *> ExplicitRegions,
69                       ArrayRef<const MemRegion *> Regions,
70                       const CallEvent *Call) const;
71
72  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
73                                          const CallExpr *) const;
74
75  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
76  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
77  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
78  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
79  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
80                      ProgramStateRef state,
81                      const Expr *Size,
82                      const Expr *Source,
83                      const Expr *Dest,
84                      bool Restricted = false,
85                      bool IsMempcpy = false) const;
86
87  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
88
89  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
90  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
91  void evalstrLengthCommon(CheckerContext &C,
92                           const CallExpr *CE,
93                           bool IsStrnlen = false) const;
94
95  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
96  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
97  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
98  void evalStrcpyCommon(CheckerContext &C,
99                        const CallExpr *CE,
100                        bool returnEnd,
101                        bool isBounded,
102                        bool isAppending) const;
103
104  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
105  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
106
107  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
108  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
109  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
110  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
111  void evalStrcmpCommon(CheckerContext &C,
112                        const CallExpr *CE,
113                        bool isBounded = false,
114                        bool ignoreCase = false) const;
115
116  // Utility methods
117  std::pair<ProgramStateRef , ProgramStateRef >
118  static assumeZero(CheckerContext &C,
119                    ProgramStateRef state, SVal V, QualType Ty);
120
121  static ProgramStateRef setCStringLength(ProgramStateRef state,
122                                              const MemRegion *MR,
123                                              SVal strLength);
124  static SVal getCStringLengthForRegion(CheckerContext &C,
125                                        ProgramStateRef &state,
126                                        const Expr *Ex,
127                                        const MemRegion *MR,
128                                        bool hypothetical);
129  SVal getCStringLength(CheckerContext &C,
130                        ProgramStateRef &state,
131                        const Expr *Ex,
132                        SVal Buf,
133                        bool hypothetical = false) const;
134
135  const StringLiteral *getCStringLiteral(CheckerContext &C,
136                                         ProgramStateRef &state,
137                                         const Expr *expr,
138                                         SVal val) const;
139
140  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
141                                              ProgramStateRef state,
142                                              const Expr *Ex, SVal V);
143
144  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
145                              const MemRegion *MR);
146
147  // Re-usable checks
148  ProgramStateRef checkNonNull(CheckerContext &C,
149                                   ProgramStateRef state,
150                                   const Expr *S,
151                                   SVal l) const;
152  ProgramStateRef CheckLocation(CheckerContext &C,
153                                    ProgramStateRef state,
154                                    const Expr *S,
155                                    SVal l,
156                                    const char *message = NULL) const;
157  ProgramStateRef CheckBufferAccess(CheckerContext &C,
158                                        ProgramStateRef state,
159                                        const Expr *Size,
160                                        const Expr *FirstBuf,
161                                        const Expr *SecondBuf,
162                                        const char *firstMessage = NULL,
163                                        const char *secondMessage = NULL,
164                                        bool WarnAboutSize = false) const;
165
166  ProgramStateRef CheckBufferAccess(CheckerContext &C,
167                                        ProgramStateRef state,
168                                        const Expr *Size,
169                                        const Expr *Buf,
170                                        const char *message = NULL,
171                                        bool WarnAboutSize = false) const {
172    // This is a convenience override.
173    return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
174                             WarnAboutSize);
175  }
176  ProgramStateRef CheckOverlap(CheckerContext &C,
177                                   ProgramStateRef state,
178                                   const Expr *Size,
179                                   const Expr *First,
180                                   const Expr *Second) const;
181  void emitOverlapBug(CheckerContext &C,
182                      ProgramStateRef state,
183                      const Stmt *First,
184                      const Stmt *Second) const;
185
186  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
187                                            ProgramStateRef state,
188                                            NonLoc left,
189                                            NonLoc right) const;
190};
191
192} //end anonymous namespace
193
194REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
195
196//===----------------------------------------------------------------------===//
197// Individual checks and utility methods.
198//===----------------------------------------------------------------------===//
199
200std::pair<ProgramStateRef , ProgramStateRef >
201CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
202                           QualType Ty) {
203  DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
204  if (!val)
205    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
206
207  SValBuilder &svalBuilder = C.getSValBuilder();
208  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
209  return state->assume(svalBuilder.evalEQ(state, *val, zero));
210}
211
212ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
213                                            ProgramStateRef state,
214                                            const Expr *S, SVal l) const {
215  // If a previous check has failed, propagate the failure.
216  if (!state)
217    return NULL;
218
219  ProgramStateRef stateNull, stateNonNull;
220  llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
221
222  if (stateNull && !stateNonNull) {
223    if (!Filter.CheckCStringNullArg)
224      return NULL;
225
226    ExplodedNode *N = C.generateSink(stateNull);
227    if (!N)
228      return NULL;
229
230    if (!BT_Null)
231      BT_Null.reset(new BuiltinBug("Unix API",
232        "Null pointer argument in call to byte string function"));
233
234    SmallString<80> buf;
235    llvm::raw_svector_ostream os(buf);
236    assert(CurrentFunctionDescription);
237    os << "Null pointer argument in call to " << CurrentFunctionDescription;
238
239    // Generate a report for this bug.
240    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
241    BugReport *report = new BugReport(*BT, os.str(), N);
242
243    report->addRange(S->getSourceRange());
244    bugreporter::trackNullOrUndefValue(N, S, *report);
245    C.emitReport(report);
246    return NULL;
247  }
248
249  // From here on, assume that the value is non-null.
250  assert(stateNonNull);
251  return stateNonNull;
252}
253
254// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
255ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
256                                             ProgramStateRef state,
257                                             const Expr *S, SVal l,
258                                             const char *warningMsg) const {
259  // If a previous check has failed, propagate the failure.
260  if (!state)
261    return NULL;
262
263  // Check for out of bound array element access.
264  const MemRegion *R = l.getAsRegion();
265  if (!R)
266    return state;
267
268  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
269  if (!ER)
270    return state;
271
272  assert(ER->getValueType() == C.getASTContext().CharTy &&
273    "CheckLocation should only be called with char* ElementRegions");
274
275  // Get the size of the array.
276  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
277  SValBuilder &svalBuilder = C.getSValBuilder();
278  SVal Extent =
279    svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
280  DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
281
282  // Get the index of the accessed element.
283  DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
284
285  ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
286  ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
287  if (StOutBound && !StInBound) {
288    ExplodedNode *N = C.generateSink(StOutBound);
289    if (!N)
290      return NULL;
291
292    if (!BT_Bounds) {
293      BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
294        "Byte string function accesses out-of-bound array element"));
295    }
296    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
297
298    // Generate a report for this bug.
299    BugReport *report;
300    if (warningMsg) {
301      report = new BugReport(*BT, warningMsg, N);
302    } else {
303      assert(CurrentFunctionDescription);
304      assert(CurrentFunctionDescription[0] != '\0');
305
306      SmallString<80> buf;
307      llvm::raw_svector_ostream os(buf);
308      os << (char)toupper(CurrentFunctionDescription[0])
309         << &CurrentFunctionDescription[1]
310         << " accesses out-of-bound array element";
311      report = new BugReport(*BT, os.str(), N);
312    }
313
314    // FIXME: It would be nice to eventually make this diagnostic more clear,
315    // e.g., by referencing the original declaration or by saying *why* this
316    // reference is outside the range.
317
318    report->addRange(S->getSourceRange());
319    C.emitReport(report);
320    return NULL;
321  }
322
323  // Array bound check succeeded.  From this point forward the array bound
324  // should always succeed.
325  return StInBound;
326}
327
328ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
329                                                 ProgramStateRef state,
330                                                 const Expr *Size,
331                                                 const Expr *FirstBuf,
332                                                 const Expr *SecondBuf,
333                                                 const char *firstMessage,
334                                                 const char *secondMessage,
335                                                 bool WarnAboutSize) const {
336  // If a previous check has failed, propagate the failure.
337  if (!state)
338    return NULL;
339
340  SValBuilder &svalBuilder = C.getSValBuilder();
341  ASTContext &Ctx = svalBuilder.getContext();
342  const LocationContext *LCtx = C.getLocationContext();
343
344  QualType sizeTy = Size->getType();
345  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
346
347  // Check that the first buffer is non-null.
348  SVal BufVal = state->getSVal(FirstBuf, LCtx);
349  state = checkNonNull(C, state, FirstBuf, BufVal);
350  if (!state)
351    return NULL;
352
353  // If out-of-bounds checking is turned off, skip the rest.
354  if (!Filter.CheckCStringOutOfBounds)
355    return state;
356
357  // Get the access length and make sure it is known.
358  // FIXME: This assumes the caller has already checked that the access length
359  // is positive. And that it's unsigned.
360  SVal LengthVal = state->getSVal(Size, LCtx);
361  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
362  if (!Length)
363    return state;
364
365  // Compute the offset of the last element to be accessed: size-1.
366  NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
367  NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
368                                                    *Length, One, sizeTy));
369
370  // Check that the first buffer is sufficiently long.
371  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
372  if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
373    const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
374
375    SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
376                                          LastOffset, PtrTy);
377    state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
378
379    // If the buffer isn't large enough, abort.
380    if (!state)
381      return NULL;
382  }
383
384  // If there's a second buffer, check it as well.
385  if (SecondBuf) {
386    BufVal = state->getSVal(SecondBuf, LCtx);
387    state = checkNonNull(C, state, SecondBuf, BufVal);
388    if (!state)
389      return NULL;
390
391    BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
392    if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
393      const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
394
395      SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
396                                            LastOffset, PtrTy);
397      state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
398    }
399  }
400
401  // Large enough or not, return this state!
402  return state;
403}
404
405ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
406                                            ProgramStateRef state,
407                                            const Expr *Size,
408                                            const Expr *First,
409                                            const Expr *Second) const {
410  if (!Filter.CheckCStringBufferOverlap)
411    return state;
412
413  // Do a simple check for overlap: if the two arguments are from the same
414  // buffer, see if the end of the first is greater than the start of the second
415  // or vice versa.
416
417  // If a previous check has failed, propagate the failure.
418  if (!state)
419    return NULL;
420
421  ProgramStateRef stateTrue, stateFalse;
422
423  // Get the buffer values and make sure they're known locations.
424  const LocationContext *LCtx = C.getLocationContext();
425  SVal firstVal = state->getSVal(First, LCtx);
426  SVal secondVal = state->getSVal(Second, LCtx);
427
428  Loc *firstLoc = dyn_cast<Loc>(&firstVal);
429  if (!firstLoc)
430    return state;
431
432  Loc *secondLoc = dyn_cast<Loc>(&secondVal);
433  if (!secondLoc)
434    return state;
435
436  // Are the two values the same?
437  SValBuilder &svalBuilder = C.getSValBuilder();
438  llvm::tie(stateTrue, stateFalse) =
439    state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
440
441  if (stateTrue && !stateFalse) {
442    // If the values are known to be equal, that's automatically an overlap.
443    emitOverlapBug(C, stateTrue, First, Second);
444    return NULL;
445  }
446
447  // assume the two expressions are not equal.
448  assert(stateFalse);
449  state = stateFalse;
450
451  // Which value comes first?
452  QualType cmpTy = svalBuilder.getConditionType();
453  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
454                                         *firstLoc, *secondLoc, cmpTy);
455  DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
456  if (!reverseTest)
457    return state;
458
459  llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
460  if (stateTrue) {
461    if (stateFalse) {
462      // If we don't know which one comes first, we can't perform this test.
463      return state;
464    } else {
465      // Switch the values so that firstVal is before secondVal.
466      Loc *tmpLoc = firstLoc;
467      firstLoc = secondLoc;
468      secondLoc = tmpLoc;
469
470      // Switch the Exprs as well, so that they still correspond.
471      const Expr *tmpExpr = First;
472      First = Second;
473      Second = tmpExpr;
474    }
475  }
476
477  // Get the length, and make sure it too is known.
478  SVal LengthVal = state->getSVal(Size, LCtx);
479  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
480  if (!Length)
481    return state;
482
483  // Convert the first buffer's start address to char*.
484  // Bail out if the cast fails.
485  ASTContext &Ctx = svalBuilder.getContext();
486  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
487  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
488                                         First->getType());
489  Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
490  if (!FirstStartLoc)
491    return state;
492
493  // Compute the end of the first buffer. Bail out if THAT fails.
494  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
495                                 *FirstStartLoc, *Length, CharPtrTy);
496  Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
497  if (!FirstEndLoc)
498    return state;
499
500  // Is the end of the first buffer past the start of the second buffer?
501  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
502                                *FirstEndLoc, *secondLoc, cmpTy);
503  DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
504  if (!OverlapTest)
505    return state;
506
507  llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
508
509  if (stateTrue && !stateFalse) {
510    // Overlap!
511    emitOverlapBug(C, stateTrue, First, Second);
512    return NULL;
513  }
514
515  // assume the two expressions don't overlap.
516  assert(stateFalse);
517  return stateFalse;
518}
519
520void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
521                                  const Stmt *First, const Stmt *Second) const {
522  ExplodedNode *N = C.generateSink(state);
523  if (!N)
524    return;
525
526  if (!BT_Overlap)
527    BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
528
529  // Generate a report for this bug.
530  BugReport *report =
531    new BugReport(*BT_Overlap,
532      "Arguments must not be overlapping buffers", N);
533  report->addRange(First->getSourceRange());
534  report->addRange(Second->getSourceRange());
535
536  C.emitReport(report);
537}
538
539ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
540                                                     ProgramStateRef state,
541                                                     NonLoc left,
542                                                     NonLoc right) const {
543  // If out-of-bounds checking is turned off, skip the rest.
544  if (!Filter.CheckCStringOutOfBounds)
545    return state;
546
547  // If a previous check has failed, propagate the failure.
548  if (!state)
549    return NULL;
550
551  SValBuilder &svalBuilder = C.getSValBuilder();
552  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
553
554  QualType sizeTy = svalBuilder.getContext().getSizeType();
555  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
556  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
557
558  SVal maxMinusRight;
559  if (isa<nonloc::ConcreteInt>(right)) {
560    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
561                                                 sizeTy);
562  } else {
563    // Try switching the operands. (The order of these two assignments is
564    // important!)
565    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
566                                            sizeTy);
567    left = right;
568  }
569
570  if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
571    QualType cmpTy = svalBuilder.getConditionType();
572    // If left > max - right, we have an overflow.
573    SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
574                                                *maxMinusRightNL, cmpTy);
575
576    ProgramStateRef stateOverflow, stateOkay;
577    llvm::tie(stateOverflow, stateOkay) =
578      state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
579
580    if (stateOverflow && !stateOkay) {
581      // We have an overflow. Emit a bug report.
582      ExplodedNode *N = C.generateSink(stateOverflow);
583      if (!N)
584        return NULL;
585
586      if (!BT_AdditionOverflow)
587        BT_AdditionOverflow.reset(new BuiltinBug("API",
588          "Sum of expressions causes overflow"));
589
590      // This isn't a great error message, but this should never occur in real
591      // code anyway -- you'd have to create a buffer longer than a size_t can
592      // represent, which is sort of a contradiction.
593      const char *warning =
594        "This expression will create a string whose length is too big to "
595        "be represented as a size_t";
596
597      // Generate a report for this bug.
598      BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
599      C.emitReport(report);
600
601      return NULL;
602    }
603
604    // From now on, assume an overflow didn't occur.
605    assert(stateOkay);
606    state = stateOkay;
607  }
608
609  return state;
610}
611
612ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
613                                                const MemRegion *MR,
614                                                SVal strLength) {
615  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
616
617  MR = MR->StripCasts();
618
619  switch (MR->getKind()) {
620  case MemRegion::StringRegionKind:
621    // FIXME: This can happen if we strcpy() into a string region. This is
622    // undefined [C99 6.4.5p6], but we should still warn about it.
623    return state;
624
625  case MemRegion::SymbolicRegionKind:
626  case MemRegion::AllocaRegionKind:
627  case MemRegion::VarRegionKind:
628  case MemRegion::FieldRegionKind:
629  case MemRegion::ObjCIvarRegionKind:
630    // These are the types we can currently track string lengths for.
631    break;
632
633  case MemRegion::ElementRegionKind:
634    // FIXME: Handle element regions by upper-bounding the parent region's
635    // string length.
636    return state;
637
638  default:
639    // Other regions (mostly non-data) can't have a reliable C string length.
640    // For now, just ignore the change.
641    // FIXME: These are rare but not impossible. We should output some kind of
642    // warning for things like strcpy((char[]){'a', 0}, "b");
643    return state;
644  }
645
646  if (strLength.isUnknown())
647    return state->remove<CStringLength>(MR);
648
649  return state->set<CStringLength>(MR, strLength);
650}
651
652SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
653                                               ProgramStateRef &state,
654                                               const Expr *Ex,
655                                               const MemRegion *MR,
656                                               bool hypothetical) {
657  if (!hypothetical) {
658    // If there's a recorded length, go ahead and return it.
659    const SVal *Recorded = state->get<CStringLength>(MR);
660    if (Recorded)
661      return *Recorded;
662  }
663
664  // Otherwise, get a new symbol and update the state.
665  SValBuilder &svalBuilder = C.getSValBuilder();
666  QualType sizeTy = svalBuilder.getContext().getSizeType();
667  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
668                                                    MR, Ex, sizeTy,
669                                                    C.blockCount());
670
671  if (!hypothetical)
672    state = state->set<CStringLength>(MR, strLength);
673
674  return strLength;
675}
676
677SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
678                                      const Expr *Ex, SVal Buf,
679                                      bool hypothetical) const {
680  const MemRegion *MR = Buf.getAsRegion();
681  if (!MR) {
682    // If we can't get a region, see if it's something we /know/ isn't a
683    // C string. In the context of locations, the only time we can issue such
684    // a warning is for labels.
685    if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
686      if (!Filter.CheckCStringNotNullTerm)
687        return UndefinedVal();
688
689      if (ExplodedNode *N = C.addTransition(state)) {
690        if (!BT_NotCString)
691          BT_NotCString.reset(new BuiltinBug("Unix API",
692            "Argument is not a null-terminated string."));
693
694        SmallString<120> buf;
695        llvm::raw_svector_ostream os(buf);
696        assert(CurrentFunctionDescription);
697        os << "Argument to " << CurrentFunctionDescription
698           << " is the address of the label '" << Label->getLabel()->getName()
699           << "', which is not a null-terminated string";
700
701        // Generate a report for this bug.
702        BugReport *report = new BugReport(*BT_NotCString,
703                                                          os.str(), N);
704
705        report->addRange(Ex->getSourceRange());
706        C.emitReport(report);
707      }
708      return UndefinedVal();
709
710    }
711
712    // If it's not a region and not a label, give up.
713    return UnknownVal();
714  }
715
716  // If we have a region, strip casts from it and see if we can figure out
717  // its length. For anything we can't figure out, just return UnknownVal.
718  MR = MR->StripCasts();
719
720  switch (MR->getKind()) {
721  case MemRegion::StringRegionKind: {
722    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
723    // so we can assume that the byte length is the correct C string length.
724    SValBuilder &svalBuilder = C.getSValBuilder();
725    QualType sizeTy = svalBuilder.getContext().getSizeType();
726    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
727    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
728  }
729  case MemRegion::SymbolicRegionKind:
730  case MemRegion::AllocaRegionKind:
731  case MemRegion::VarRegionKind:
732  case MemRegion::FieldRegionKind:
733  case MemRegion::ObjCIvarRegionKind:
734    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
735  case MemRegion::CompoundLiteralRegionKind:
736    // FIXME: Can we track this? Is it necessary?
737    return UnknownVal();
738  case MemRegion::ElementRegionKind:
739    // FIXME: How can we handle this? It's not good enough to subtract the
740    // offset from the base string length; consider "123\x00567" and &a[5].
741    return UnknownVal();
742  default:
743    // Other regions (mostly non-data) can't have a reliable C string length.
744    // In this case, an error is emitted and UndefinedVal is returned.
745    // The caller should always be prepared to handle this case.
746    if (!Filter.CheckCStringNotNullTerm)
747      return UndefinedVal();
748
749    if (ExplodedNode *N = C.addTransition(state)) {
750      if (!BT_NotCString)
751        BT_NotCString.reset(new BuiltinBug("Unix API",
752          "Argument is not a null-terminated string."));
753
754      SmallString<120> buf;
755      llvm::raw_svector_ostream os(buf);
756
757      assert(CurrentFunctionDescription);
758      os << "Argument to " << CurrentFunctionDescription << " is ";
759
760      if (SummarizeRegion(os, C.getASTContext(), MR))
761        os << ", which is not a null-terminated string";
762      else
763        os << "not a null-terminated string";
764
765      // Generate a report for this bug.
766      BugReport *report = new BugReport(*BT_NotCString,
767                                                        os.str(), N);
768
769      report->addRange(Ex->getSourceRange());
770      C.emitReport(report);
771    }
772
773    return UndefinedVal();
774  }
775}
776
777const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
778  ProgramStateRef &state, const Expr *expr, SVal val) const {
779
780  // Get the memory region pointed to by the val.
781  const MemRegion *bufRegion = val.getAsRegion();
782  if (!bufRegion)
783    return NULL;
784
785  // Strip casts off the memory region.
786  bufRegion = bufRegion->StripCasts();
787
788  // Cast the memory region to a string region.
789  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
790  if (!strRegion)
791    return NULL;
792
793  // Return the actual string in the string region.
794  return strRegion->getStringLiteral();
795}
796
797ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
798                                                ProgramStateRef state,
799                                                const Expr *E, SVal V) {
800  Loc *L = dyn_cast<Loc>(&V);
801  if (!L)
802    return state;
803
804  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
805  // some assumptions about the value that CFRefCount can't. Even so, it should
806  // probably be refactored.
807  if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
808    const MemRegion *R = MR->getRegion()->StripCasts();
809
810    // Are we dealing with an ElementRegion?  If so, we should be invalidating
811    // the super-region.
812    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
813      R = ER->getSuperRegion();
814      // FIXME: What about layers of ElementRegions?
815    }
816
817    // Invalidate this region.
818    const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
819    return state->invalidateRegions(R, E, C.blockCount(), LCtx,
820                                    /*CausedByPointerEscape*/ false);
821  }
822
823  // If we have a non-region value by chance, just remove the binding.
824  // FIXME: is this necessary or correct? This handles the non-Region
825  //  cases.  Is it ever valid to store to these?
826  return state->killBinding(*L);
827}
828
829bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
830                                     const MemRegion *MR) {
831  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
832
833  switch (MR->getKind()) {
834  case MemRegion::FunctionTextRegionKind: {
835    const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
836    if (FD)
837      os << "the address of the function '" << *FD << '\'';
838    else
839      os << "the address of a function";
840    return true;
841  }
842  case MemRegion::BlockTextRegionKind:
843    os << "block text";
844    return true;
845  case MemRegion::BlockDataRegionKind:
846    os << "a block";
847    return true;
848  case MemRegion::CXXThisRegionKind:
849  case MemRegion::CXXTempObjectRegionKind:
850    os << "a C++ temp object of type " << TVR->getValueType().getAsString();
851    return true;
852  case MemRegion::VarRegionKind:
853    os << "a variable of type" << TVR->getValueType().getAsString();
854    return true;
855  case MemRegion::FieldRegionKind:
856    os << "a field of type " << TVR->getValueType().getAsString();
857    return true;
858  case MemRegion::ObjCIvarRegionKind:
859    os << "an instance variable of type " << TVR->getValueType().getAsString();
860    return true;
861  default:
862    return false;
863  }
864}
865
866//===----------------------------------------------------------------------===//
867// evaluation of individual function calls.
868//===----------------------------------------------------------------------===//
869
870void CStringChecker::evalCopyCommon(CheckerContext &C,
871                                    const CallExpr *CE,
872                                    ProgramStateRef state,
873                                    const Expr *Size, const Expr *Dest,
874                                    const Expr *Source, bool Restricted,
875                                    bool IsMempcpy) const {
876  CurrentFunctionDescription = "memory copy function";
877
878  // See if the size argument is zero.
879  const LocationContext *LCtx = C.getLocationContext();
880  SVal sizeVal = state->getSVal(Size, LCtx);
881  QualType sizeTy = Size->getType();
882
883  ProgramStateRef stateZeroSize, stateNonZeroSize;
884  llvm::tie(stateZeroSize, stateNonZeroSize) =
885    assumeZero(C, state, sizeVal, sizeTy);
886
887  // Get the value of the Dest.
888  SVal destVal = state->getSVal(Dest, LCtx);
889
890  // If the size is zero, there won't be any actual memory access, so
891  // just bind the return value to the destination buffer and return.
892  if (stateZeroSize && !stateNonZeroSize) {
893    stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
894    C.addTransition(stateZeroSize);
895    return;
896  }
897
898  // If the size can be nonzero, we have to check the other arguments.
899  if (stateNonZeroSize) {
900    state = stateNonZeroSize;
901
902    // Ensure the destination is not null. If it is NULL there will be a
903    // NULL pointer dereference.
904    state = checkNonNull(C, state, Dest, destVal);
905    if (!state)
906      return;
907
908    // Get the value of the Src.
909    SVal srcVal = state->getSVal(Source, LCtx);
910
911    // Ensure the source is not null. If it is NULL there will be a
912    // NULL pointer dereference.
913    state = checkNonNull(C, state, Source, srcVal);
914    if (!state)
915      return;
916
917    // Ensure the accesses are valid and that the buffers do not overlap.
918    const char * const writeWarning =
919      "Memory copy function overflows destination buffer";
920    state = CheckBufferAccess(C, state, Size, Dest, Source,
921                              writeWarning, /* sourceWarning = */ NULL);
922    if (Restricted)
923      state = CheckOverlap(C, state, Size, Dest, Source);
924
925    if (!state)
926      return;
927
928    // If this is mempcpy, get the byte after the last byte copied and
929    // bind the expr.
930    if (IsMempcpy) {
931      loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
932      assert(destRegVal && "Destination should be a known MemRegionVal here");
933
934      // Get the length to copy.
935      NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
936
937      if (lenValNonLoc) {
938        // Get the byte after the last byte copied.
939        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
940                                                          *destRegVal,
941                                                          *lenValNonLoc,
942                                                          Dest->getType());
943
944        // The byte after the last byte copied is the return value.
945        state = state->BindExpr(CE, LCtx, lastElement);
946      } else {
947        // If we don't know how much we copied, we can at least
948        // conjure a return value for later.
949        SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
950                                                          C.blockCount());
951        state = state->BindExpr(CE, LCtx, result);
952      }
953
954    } else {
955      // All other copies return the destination buffer.
956      // (Well, bcopy() has a void return type, but this won't hurt.)
957      state = state->BindExpr(CE, LCtx, destVal);
958    }
959
960    // Invalidate the destination.
961    // FIXME: Even if we can't perfectly model the copy, we should see if we
962    // can use LazyCompoundVals to copy the source values into the destination.
963    // This would probably remove any existing bindings past the end of the
964    // copied region, but that's still an improvement over blank invalidation.
965    state = InvalidateBuffer(C, state, Dest,
966                             state->getSVal(Dest, C.getLocationContext()));
967    C.addTransition(state);
968  }
969}
970
971
972void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
973  if (CE->getNumArgs() < 3)
974    return;
975
976  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
977  // The return value is the address of the destination buffer.
978  const Expr *Dest = CE->getArg(0);
979  ProgramStateRef state = C.getState();
980
981  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
982}
983
984void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
985  if (CE->getNumArgs() < 3)
986    return;
987
988  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
989  // The return value is a pointer to the byte following the last written byte.
990  const Expr *Dest = CE->getArg(0);
991  ProgramStateRef state = C.getState();
992
993  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
994}
995
996void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
997  if (CE->getNumArgs() < 3)
998    return;
999
1000  // void *memmove(void *dst, const void *src, size_t n);
1001  // The return value is the address of the destination buffer.
1002  const Expr *Dest = CE->getArg(0);
1003  ProgramStateRef state = C.getState();
1004
1005  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1006}
1007
1008void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1009  if (CE->getNumArgs() < 3)
1010    return;
1011
1012  // void bcopy(const void *src, void *dst, size_t n);
1013  evalCopyCommon(C, CE, C.getState(),
1014                 CE->getArg(2), CE->getArg(1), CE->getArg(0));
1015}
1016
1017void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1018  if (CE->getNumArgs() < 3)
1019    return;
1020
1021  // int memcmp(const void *s1, const void *s2, size_t n);
1022  CurrentFunctionDescription = "memory comparison function";
1023
1024  const Expr *Left = CE->getArg(0);
1025  const Expr *Right = CE->getArg(1);
1026  const Expr *Size = CE->getArg(2);
1027
1028  ProgramStateRef state = C.getState();
1029  SValBuilder &svalBuilder = C.getSValBuilder();
1030
1031  // See if the size argument is zero.
1032  const LocationContext *LCtx = C.getLocationContext();
1033  SVal sizeVal = state->getSVal(Size, LCtx);
1034  QualType sizeTy = Size->getType();
1035
1036  ProgramStateRef stateZeroSize, stateNonZeroSize;
1037  llvm::tie(stateZeroSize, stateNonZeroSize) =
1038    assumeZero(C, state, sizeVal, sizeTy);
1039
1040  // If the size can be zero, the result will be 0 in that case, and we don't
1041  // have to check either of the buffers.
1042  if (stateZeroSize) {
1043    state = stateZeroSize;
1044    state = state->BindExpr(CE, LCtx,
1045                            svalBuilder.makeZeroVal(CE->getType()));
1046    C.addTransition(state);
1047  }
1048
1049  // If the size can be nonzero, we have to check the other arguments.
1050  if (stateNonZeroSize) {
1051    state = stateNonZeroSize;
1052    // If we know the two buffers are the same, we know the result is 0.
1053    // First, get the two buffers' addresses. Another checker will have already
1054    // made sure they're not undefined.
1055    DefinedOrUnknownSVal LV =
1056      cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx));
1057    DefinedOrUnknownSVal RV =
1058      cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx));
1059
1060    // See if they are the same.
1061    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1062    ProgramStateRef StSameBuf, StNotSameBuf;
1063    llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1064
1065    // If the two arguments might be the same buffer, we know the result is 0,
1066    // and we only need to check one size.
1067    if (StSameBuf) {
1068      state = StSameBuf;
1069      state = CheckBufferAccess(C, state, Size, Left);
1070      if (state) {
1071        state = StSameBuf->BindExpr(CE, LCtx,
1072                                    svalBuilder.makeZeroVal(CE->getType()));
1073        C.addTransition(state);
1074      }
1075    }
1076
1077    // If the two arguments might be different buffers, we have to check the
1078    // size of both of them.
1079    if (StNotSameBuf) {
1080      state = StNotSameBuf;
1081      state = CheckBufferAccess(C, state, Size, Left, Right);
1082      if (state) {
1083        // The return value is the comparison result, which we don't know.
1084        SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1085        state = state->BindExpr(CE, LCtx, CmpV);
1086        C.addTransition(state);
1087      }
1088    }
1089  }
1090}
1091
1092void CStringChecker::evalstrLength(CheckerContext &C,
1093                                   const CallExpr *CE) const {
1094  if (CE->getNumArgs() < 1)
1095    return;
1096
1097  // size_t strlen(const char *s);
1098  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1099}
1100
1101void CStringChecker::evalstrnLength(CheckerContext &C,
1102                                    const CallExpr *CE) const {
1103  if (CE->getNumArgs() < 2)
1104    return;
1105
1106  // size_t strnlen(const char *s, size_t maxlen);
1107  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1108}
1109
1110void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1111                                         bool IsStrnlen) const {
1112  CurrentFunctionDescription = "string length function";
1113  ProgramStateRef state = C.getState();
1114  const LocationContext *LCtx = C.getLocationContext();
1115
1116  if (IsStrnlen) {
1117    const Expr *maxlenExpr = CE->getArg(1);
1118    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1119
1120    ProgramStateRef stateZeroSize, stateNonZeroSize;
1121    llvm::tie(stateZeroSize, stateNonZeroSize) =
1122      assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1123
1124    // If the size can be zero, the result will be 0 in that case, and we don't
1125    // have to check the string itself.
1126    if (stateZeroSize) {
1127      SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1128      stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1129      C.addTransition(stateZeroSize);
1130    }
1131
1132    // If the size is GUARANTEED to be zero, we're done!
1133    if (!stateNonZeroSize)
1134      return;
1135
1136    // Otherwise, record the assumption that the size is nonzero.
1137    state = stateNonZeroSize;
1138  }
1139
1140  // Check that the string argument is non-null.
1141  const Expr *Arg = CE->getArg(0);
1142  SVal ArgVal = state->getSVal(Arg, LCtx);
1143
1144  state = checkNonNull(C, state, Arg, ArgVal);
1145
1146  if (!state)
1147    return;
1148
1149  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1150
1151  // If the argument isn't a valid C string, there's no valid state to
1152  // transition to.
1153  if (strLength.isUndef())
1154    return;
1155
1156  DefinedOrUnknownSVal result = UnknownVal();
1157
1158  // If the check is for strnlen() then bind the return value to no more than
1159  // the maxlen value.
1160  if (IsStrnlen) {
1161    QualType cmpTy = C.getSValBuilder().getConditionType();
1162
1163    // It's a little unfortunate to be getting this again,
1164    // but it's not that expensive...
1165    const Expr *maxlenExpr = CE->getArg(1);
1166    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1167
1168    NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1169    NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1170
1171    if (strLengthNL && maxlenValNL) {
1172      ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1173
1174      // Check if the strLength is greater than the maxlen.
1175      llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1176        state->assume(cast<DefinedOrUnknownSVal>
1177                      (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1178                                                      *strLengthNL,
1179                                                      *maxlenValNL,
1180                                                      cmpTy)));
1181
1182      if (stateStringTooLong && !stateStringNotTooLong) {
1183        // If the string is longer than maxlen, return maxlen.
1184        result = *maxlenValNL;
1185      } else if (stateStringNotTooLong && !stateStringTooLong) {
1186        // If the string is shorter than maxlen, return its length.
1187        result = *strLengthNL;
1188      }
1189    }
1190
1191    if (result.isUnknown()) {
1192      // If we don't have enough information for a comparison, there's
1193      // no guarantee the full string length will actually be returned.
1194      // All we know is the return value is the min of the string length
1195      // and the limit. This is better than nothing.
1196      result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1197      NonLoc *resultNL = cast<NonLoc>(&result);
1198
1199      if (strLengthNL) {
1200        state = state->assume(cast<DefinedOrUnknownSVal>
1201                              (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1202                                                              *resultNL,
1203                                                              *strLengthNL,
1204                                                              cmpTy)), true);
1205      }
1206
1207      if (maxlenValNL) {
1208        state = state->assume(cast<DefinedOrUnknownSVal>
1209                              (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1210                                                              *resultNL,
1211                                                              *maxlenValNL,
1212                                                              cmpTy)), true);
1213      }
1214    }
1215
1216  } else {
1217    // This is a plain strlen(), not strnlen().
1218    result = cast<DefinedOrUnknownSVal>(strLength);
1219
1220    // If we don't know the length of the string, conjure a return
1221    // value, so it can be used in constraints, at least.
1222    if (result.isUnknown()) {
1223      result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1224    }
1225  }
1226
1227  // Bind the return value.
1228  assert(!result.isUnknown() && "Should have conjured a value by now");
1229  state = state->BindExpr(CE, LCtx, result);
1230  C.addTransition(state);
1231}
1232
1233void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1234  if (CE->getNumArgs() < 2)
1235    return;
1236
1237  // char *strcpy(char *restrict dst, const char *restrict src);
1238  evalStrcpyCommon(C, CE,
1239                   /* returnEnd = */ false,
1240                   /* isBounded = */ false,
1241                   /* isAppending = */ false);
1242}
1243
1244void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1245  if (CE->getNumArgs() < 3)
1246    return;
1247
1248  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1249  evalStrcpyCommon(C, CE,
1250                   /* returnEnd = */ false,
1251                   /* isBounded = */ true,
1252                   /* isAppending = */ false);
1253}
1254
1255void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1256  if (CE->getNumArgs() < 2)
1257    return;
1258
1259  // char *stpcpy(char *restrict dst, const char *restrict src);
1260  evalStrcpyCommon(C, CE,
1261                   /* returnEnd = */ true,
1262                   /* isBounded = */ false,
1263                   /* isAppending = */ false);
1264}
1265
1266void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1267  if (CE->getNumArgs() < 2)
1268    return;
1269
1270  //char *strcat(char *restrict s1, const char *restrict s2);
1271  evalStrcpyCommon(C, CE,
1272                   /* returnEnd = */ false,
1273                   /* isBounded = */ false,
1274                   /* isAppending = */ true);
1275}
1276
1277void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1278  if (CE->getNumArgs() < 3)
1279    return;
1280
1281  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1282  evalStrcpyCommon(C, CE,
1283                   /* returnEnd = */ false,
1284                   /* isBounded = */ true,
1285                   /* isAppending = */ true);
1286}
1287
1288void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1289                                      bool returnEnd, bool isBounded,
1290                                      bool isAppending) const {
1291  CurrentFunctionDescription = "string copy function";
1292  ProgramStateRef state = C.getState();
1293  const LocationContext *LCtx = C.getLocationContext();
1294
1295  // Check that the destination is non-null.
1296  const Expr *Dst = CE->getArg(0);
1297  SVal DstVal = state->getSVal(Dst, LCtx);
1298
1299  state = checkNonNull(C, state, Dst, DstVal);
1300  if (!state)
1301    return;
1302
1303  // Check that the source is non-null.
1304  const Expr *srcExpr = CE->getArg(1);
1305  SVal srcVal = state->getSVal(srcExpr, LCtx);
1306  state = checkNonNull(C, state, srcExpr, srcVal);
1307  if (!state)
1308    return;
1309
1310  // Get the string length of the source.
1311  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1312
1313  // If the source isn't a valid C string, give up.
1314  if (strLength.isUndef())
1315    return;
1316
1317  SValBuilder &svalBuilder = C.getSValBuilder();
1318  QualType cmpTy = svalBuilder.getConditionType();
1319  QualType sizeTy = svalBuilder.getContext().getSizeType();
1320
1321  // These two values allow checking two kinds of errors:
1322  // - actual overflows caused by a source that doesn't fit in the destination
1323  // - potential overflows caused by a bound that could exceed the destination
1324  SVal amountCopied = UnknownVal();
1325  SVal maxLastElementIndex = UnknownVal();
1326  const char *boundWarning = NULL;
1327
1328  // If the function is strncpy, strncat, etc... it is bounded.
1329  if (isBounded) {
1330    // Get the max number of characters to copy.
1331    const Expr *lenExpr = CE->getArg(2);
1332    SVal lenVal = state->getSVal(lenExpr, LCtx);
1333
1334    // Protect against misdeclared strncpy().
1335    lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1336
1337    NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1338    NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1339
1340    // If we know both values, we might be able to figure out how much
1341    // we're copying.
1342    if (strLengthNL && lenValNL) {
1343      ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1344
1345      // Check if the max number to copy is less than the length of the src.
1346      // If the bound is equal to the source length, strncpy won't null-
1347      // terminate the result!
1348      llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1349        state->assume(cast<DefinedOrUnknownSVal>
1350                      (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL,
1351                                               *lenValNL, cmpTy)));
1352
1353      if (stateSourceTooLong && !stateSourceNotTooLong) {
1354        // Max number to copy is less than the length of the src, so the actual
1355        // strLength copied is the max number arg.
1356        state = stateSourceTooLong;
1357        amountCopied = lenVal;
1358
1359      } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1360        // The source buffer entirely fits in the bound.
1361        state = stateSourceNotTooLong;
1362        amountCopied = strLength;
1363      }
1364    }
1365
1366    // We still want to know if the bound is known to be too large.
1367    if (lenValNL) {
1368      if (isAppending) {
1369        // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1370
1371        // Get the string length of the destination. If the destination is
1372        // memory that can't have a string length, we shouldn't be copying
1373        // into it anyway.
1374        SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1375        if (dstStrLength.isUndef())
1376          return;
1377
1378        if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) {
1379          maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1380                                                        *lenValNL,
1381                                                        *dstStrLengthNL,
1382                                                        sizeTy);
1383          boundWarning = "Size argument is greater than the free space in the "
1384                         "destination buffer";
1385        }
1386
1387      } else {
1388        // For strncpy, this is just checking that lenVal <= sizeof(dst)
1389        // (Yes, strncpy and strncat differ in how they treat termination.
1390        // strncat ALWAYS terminates, but strncpy doesn't.)
1391
1392        // We need a special case for when the copy size is zero, in which
1393        // case strncpy will do no work at all. Our bounds check uses n-1
1394        // as the last element accessed, so n == 0 is problematic.
1395        ProgramStateRef StateZeroSize, StateNonZeroSize;
1396        llvm::tie(StateZeroSize, StateNonZeroSize) =
1397          assumeZero(C, state, *lenValNL, sizeTy);
1398
1399        // If the size is known to be zero, we're done.
1400        if (StateZeroSize && !StateNonZeroSize) {
1401          StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1402          C.addTransition(StateZeroSize);
1403          return;
1404        }
1405
1406        // Otherwise, go ahead and figure out the last element we'll touch.
1407        // We don't record the non-zero assumption here because we can't
1408        // be sure. We won't warn on a possible zero.
1409        NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
1410        maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1411                                                      one, sizeTy);
1412        boundWarning = "Size argument is greater than the length of the "
1413                       "destination buffer";
1414      }
1415    }
1416
1417    // If we couldn't pin down the copy length, at least bound it.
1418    // FIXME: We should actually run this code path for append as well, but
1419    // right now it creates problems with constraints (since we can end up
1420    // trying to pass constraints from symbol to symbol).
1421    if (amountCopied.isUnknown() && !isAppending) {
1422      // Try to get a "hypothetical" string length symbol, which we can later
1423      // set as a real value if that turns out to be the case.
1424      amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1425      assert(!amountCopied.isUndef());
1426
1427      if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1428        if (lenValNL) {
1429          // amountCopied <= lenVal
1430          SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1431                                                             *amountCopiedNL,
1432                                                             *lenValNL,
1433                                                             cmpTy);
1434          state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1435                                true);
1436          if (!state)
1437            return;
1438        }
1439
1440        if (strLengthNL) {
1441          // amountCopied <= strlen(source)
1442          SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1443                                                           *amountCopiedNL,
1444                                                           *strLengthNL,
1445                                                           cmpTy);
1446          state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1447                                true);
1448          if (!state)
1449            return;
1450        }
1451      }
1452    }
1453
1454  } else {
1455    // The function isn't bounded. The amount copied should match the length
1456    // of the source buffer.
1457    amountCopied = strLength;
1458  }
1459
1460  assert(state);
1461
1462  // This represents the number of characters copied into the destination
1463  // buffer. (It may not actually be the strlen if the destination buffer
1464  // is not terminated.)
1465  SVal finalStrLength = UnknownVal();
1466
1467  // If this is an appending function (strcat, strncat...) then set the
1468  // string length to strlen(src) + strlen(dst) since the buffer will
1469  // ultimately contain both.
1470  if (isAppending) {
1471    // Get the string length of the destination. If the destination is memory
1472    // that can't have a string length, we shouldn't be copying into it anyway.
1473    SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1474    if (dstStrLength.isUndef())
1475      return;
1476
1477    NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1478    NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1479
1480    // If we know both string lengths, we might know the final string length.
1481    if (srcStrLengthNL && dstStrLengthNL) {
1482      // Make sure the two lengths together don't overflow a size_t.
1483      state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1484      if (!state)
1485        return;
1486
1487      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1488                                               *dstStrLengthNL, sizeTy);
1489    }
1490
1491    // If we couldn't get a single value for the final string length,
1492    // we can at least bound it by the individual lengths.
1493    if (finalStrLength.isUnknown()) {
1494      // Try to get a "hypothetical" string length symbol, which we can later
1495      // set as a real value if that turns out to be the case.
1496      finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1497      assert(!finalStrLength.isUndef());
1498
1499      if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1500        if (srcStrLengthNL) {
1501          // finalStrLength >= srcStrLength
1502          SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1503                                                        *finalStrLengthNL,
1504                                                        *srcStrLengthNL,
1505                                                        cmpTy);
1506          state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1507                                true);
1508          if (!state)
1509            return;
1510        }
1511
1512        if (dstStrLengthNL) {
1513          // finalStrLength >= dstStrLength
1514          SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1515                                                      *finalStrLengthNL,
1516                                                      *dstStrLengthNL,
1517                                                      cmpTy);
1518          state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1519                                true);
1520          if (!state)
1521            return;
1522        }
1523      }
1524    }
1525
1526  } else {
1527    // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1528    // the final string length will match the input string length.
1529    finalStrLength = amountCopied;
1530  }
1531
1532  // The final result of the function will either be a pointer past the last
1533  // copied element, or a pointer to the start of the destination buffer.
1534  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1535
1536  assert(state);
1537
1538  // If the destination is a MemRegion, try to check for a buffer overflow and
1539  // record the new string length.
1540  if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1541    QualType ptrTy = Dst->getType();
1542
1543    // If we have an exact value on a bounded copy, use that to check for
1544    // overflows, rather than our estimate about how much is actually copied.
1545    if (boundWarning) {
1546      if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) {
1547        SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1548                                                      *maxLastNL, ptrTy);
1549        state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1550                              boundWarning);
1551        if (!state)
1552          return;
1553      }
1554    }
1555
1556    // Then, if the final length is known...
1557    if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1558      SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1559                                                 *knownStrLength, ptrTy);
1560
1561      // ...and we haven't checked the bound, we'll check the actual copy.
1562      if (!boundWarning) {
1563        const char * const warningMsg =
1564          "String copy function overflows destination buffer";
1565        state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1566        if (!state)
1567          return;
1568      }
1569
1570      // If this is a stpcpy-style copy, the last element is the return value.
1571      if (returnEnd)
1572        Result = lastElement;
1573    }
1574
1575    // Invalidate the destination. This must happen before we set the C string
1576    // length because invalidation will clear the length.
1577    // FIXME: Even if we can't perfectly model the copy, we should see if we
1578    // can use LazyCompoundVals to copy the source values into the destination.
1579    // This would probably remove any existing bindings past the end of the
1580    // string, but that's still an improvement over blank invalidation.
1581    state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1582
1583    // Set the C string length of the destination, if we know it.
1584    if (isBounded && !isAppending) {
1585      // strncpy is annoying in that it doesn't guarantee to null-terminate
1586      // the result string. If the original string didn't fit entirely inside
1587      // the bound (including the null-terminator), we don't know how long the
1588      // result is.
1589      if (amountCopied != strLength)
1590        finalStrLength = UnknownVal();
1591    }
1592    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1593  }
1594
1595  assert(state);
1596
1597  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1598  // overflow, we still need a result. Conjure a return value.
1599  if (returnEnd && Result.isUnknown()) {
1600    Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1601  }
1602
1603  // Set the return value.
1604  state = state->BindExpr(CE, LCtx, Result);
1605  C.addTransition(state);
1606}
1607
1608void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1609  if (CE->getNumArgs() < 2)
1610    return;
1611
1612  //int strcmp(const char *s1, const char *s2);
1613  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1614}
1615
1616void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1617  if (CE->getNumArgs() < 3)
1618    return;
1619
1620  //int strncmp(const char *s1, const char *s2, size_t n);
1621  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1622}
1623
1624void CStringChecker::evalStrcasecmp(CheckerContext &C,
1625                                    const CallExpr *CE) const {
1626  if (CE->getNumArgs() < 2)
1627    return;
1628
1629  //int strcasecmp(const char *s1, const char *s2);
1630  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1631}
1632
1633void CStringChecker::evalStrncasecmp(CheckerContext &C,
1634                                     const CallExpr *CE) const {
1635  if (CE->getNumArgs() < 3)
1636    return;
1637
1638  //int strncasecmp(const char *s1, const char *s2, size_t n);
1639  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1640}
1641
1642void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1643                                      bool isBounded, bool ignoreCase) const {
1644  CurrentFunctionDescription = "string comparison function";
1645  ProgramStateRef state = C.getState();
1646  const LocationContext *LCtx = C.getLocationContext();
1647
1648  // Check that the first string is non-null
1649  const Expr *s1 = CE->getArg(0);
1650  SVal s1Val = state->getSVal(s1, LCtx);
1651  state = checkNonNull(C, state, s1, s1Val);
1652  if (!state)
1653    return;
1654
1655  // Check that the second string is non-null.
1656  const Expr *s2 = CE->getArg(1);
1657  SVal s2Val = state->getSVal(s2, LCtx);
1658  state = checkNonNull(C, state, s2, s2Val);
1659  if (!state)
1660    return;
1661
1662  // Get the string length of the first string or give up.
1663  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1664  if (s1Length.isUndef())
1665    return;
1666
1667  // Get the string length of the second string or give up.
1668  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1669  if (s2Length.isUndef())
1670    return;
1671
1672  // If we know the two buffers are the same, we know the result is 0.
1673  // First, get the two buffers' addresses. Another checker will have already
1674  // made sure they're not undefined.
1675  DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
1676  DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
1677
1678  // See if they are the same.
1679  SValBuilder &svalBuilder = C.getSValBuilder();
1680  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1681  ProgramStateRef StSameBuf, StNotSameBuf;
1682  llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1683
1684  // If the two arguments might be the same buffer, we know the result is 0,
1685  // and we only need to check one size.
1686  if (StSameBuf) {
1687    StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1688                                    svalBuilder.makeZeroVal(CE->getType()));
1689    C.addTransition(StSameBuf);
1690
1691    // If the two arguments are GUARANTEED to be the same, we're done!
1692    if (!StNotSameBuf)
1693      return;
1694  }
1695
1696  assert(StNotSameBuf);
1697  state = StNotSameBuf;
1698
1699  // At this point we can go about comparing the two buffers.
1700  // For now, we only do this if they're both known string literals.
1701
1702  // Attempt to extract string literals from both expressions.
1703  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1704  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1705  bool canComputeResult = false;
1706
1707  if (s1StrLiteral && s2StrLiteral) {
1708    StringRef s1StrRef = s1StrLiteral->getString();
1709    StringRef s2StrRef = s2StrLiteral->getString();
1710
1711    if (isBounded) {
1712      // Get the max number of characters to compare.
1713      const Expr *lenExpr = CE->getArg(2);
1714      SVal lenVal = state->getSVal(lenExpr, LCtx);
1715
1716      // If the length is known, we can get the right substrings.
1717      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1718        // Create substrings of each to compare the prefix.
1719        s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1720        s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1721        canComputeResult = true;
1722      }
1723    } else {
1724      // This is a normal, unbounded strcmp.
1725      canComputeResult = true;
1726    }
1727
1728    if (canComputeResult) {
1729      // Real strcmp stops at null characters.
1730      size_t s1Term = s1StrRef.find('\0');
1731      if (s1Term != StringRef::npos)
1732        s1StrRef = s1StrRef.substr(0, s1Term);
1733
1734      size_t s2Term = s2StrRef.find('\0');
1735      if (s2Term != StringRef::npos)
1736        s2StrRef = s2StrRef.substr(0, s2Term);
1737
1738      // Use StringRef's comparison methods to compute the actual result.
1739      int result;
1740
1741      if (ignoreCase) {
1742        // Compare string 1 to string 2 the same way strcasecmp() does.
1743        result = s1StrRef.compare_lower(s2StrRef);
1744      } else {
1745        // Compare string 1 to string 2 the same way strcmp() does.
1746        result = s1StrRef.compare(s2StrRef);
1747      }
1748
1749      // Build the SVal of the comparison and bind the return value.
1750      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1751      state = state->BindExpr(CE, LCtx, resultVal);
1752    }
1753  }
1754
1755  if (!canComputeResult) {
1756    // Conjure a symbolic value. It's the best we can do.
1757    SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1758    state = state->BindExpr(CE, LCtx, resultVal);
1759  }
1760
1761  // Record this as a possible path.
1762  C.addTransition(state);
1763}
1764
1765//===----------------------------------------------------------------------===//
1766// The driver method, and other Checker callbacks.
1767//===----------------------------------------------------------------------===//
1768
1769bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1770  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1771
1772  if (!FDecl)
1773    return false;
1774
1775  FnCheck evalFunction = 0;
1776  if (C.isCLibraryFunction(FDecl, "memcpy"))
1777    evalFunction =  &CStringChecker::evalMemcpy;
1778  else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1779    evalFunction =  &CStringChecker::evalMempcpy;
1780  else if (C.isCLibraryFunction(FDecl, "memcmp"))
1781    evalFunction =  &CStringChecker::evalMemcmp;
1782  else if (C.isCLibraryFunction(FDecl, "memmove"))
1783    evalFunction =  &CStringChecker::evalMemmove;
1784  else if (C.isCLibraryFunction(FDecl, "strcpy"))
1785    evalFunction =  &CStringChecker::evalStrcpy;
1786  else if (C.isCLibraryFunction(FDecl, "strncpy"))
1787    evalFunction =  &CStringChecker::evalStrncpy;
1788  else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1789    evalFunction =  &CStringChecker::evalStpcpy;
1790  else if (C.isCLibraryFunction(FDecl, "strcat"))
1791    evalFunction =  &CStringChecker::evalStrcat;
1792  else if (C.isCLibraryFunction(FDecl, "strncat"))
1793    evalFunction =  &CStringChecker::evalStrncat;
1794  else if (C.isCLibraryFunction(FDecl, "strlen"))
1795    evalFunction =  &CStringChecker::evalstrLength;
1796  else if (C.isCLibraryFunction(FDecl, "strnlen"))
1797    evalFunction =  &CStringChecker::evalstrnLength;
1798  else if (C.isCLibraryFunction(FDecl, "strcmp"))
1799    evalFunction =  &CStringChecker::evalStrcmp;
1800  else if (C.isCLibraryFunction(FDecl, "strncmp"))
1801    evalFunction =  &CStringChecker::evalStrncmp;
1802  else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1803    evalFunction =  &CStringChecker::evalStrcasecmp;
1804  else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1805    evalFunction =  &CStringChecker::evalStrncasecmp;
1806  else if (C.isCLibraryFunction(FDecl, "bcopy"))
1807    evalFunction =  &CStringChecker::evalBcopy;
1808  else if (C.isCLibraryFunction(FDecl, "bcmp"))
1809    evalFunction =  &CStringChecker::evalMemcmp;
1810
1811  // If the callee isn't a string function, let another checker handle it.
1812  if (!evalFunction)
1813    return false;
1814
1815  // Make sure each function sets its own description.
1816  // (But don't bother in a release build.)
1817  assert(!(CurrentFunctionDescription = NULL));
1818
1819  // Check and evaluate the call.
1820  (this->*evalFunction)(C, CE);
1821
1822  // If the evaluate call resulted in no change, chain to the next eval call
1823  // handler.
1824  // Note, the custom CString evaluation calls assume that basic safety
1825  // properties are held. However, if the user chooses to turn off some of these
1826  // checks, we ignore the issues and leave the call evaluation to a generic
1827  // handler.
1828  if (!C.isDifferent())
1829    return false;
1830
1831  return true;
1832}
1833
1834void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1835  // Record string length for char a[] = "abc";
1836  ProgramStateRef state = C.getState();
1837
1838  for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1839       I != E; ++I) {
1840    const VarDecl *D = dyn_cast<VarDecl>(*I);
1841    if (!D)
1842      continue;
1843
1844    // FIXME: Handle array fields of structs.
1845    if (!D->getType()->isArrayType())
1846      continue;
1847
1848    const Expr *Init = D->getInit();
1849    if (!Init)
1850      continue;
1851    if (!isa<StringLiteral>(Init))
1852      continue;
1853
1854    Loc VarLoc = state->getLValue(D, C.getLocationContext());
1855    const MemRegion *MR = VarLoc.getAsRegion();
1856    if (!MR)
1857      continue;
1858
1859    SVal StrVal = state->getSVal(Init, C.getLocationContext());
1860    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1861    DefinedOrUnknownSVal strLength
1862      = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1863
1864    state = state->set<CStringLength>(MR, strLength);
1865  }
1866
1867  C.addTransition(state);
1868}
1869
1870bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1871  CStringLengthTy Entries = state->get<CStringLength>();
1872  return !Entries.isEmpty();
1873}
1874
1875ProgramStateRef
1876CStringChecker::checkRegionChanges(ProgramStateRef state,
1877                                   const InvalidatedSymbols *,
1878                                   ArrayRef<const MemRegion *> ExplicitRegions,
1879                                   ArrayRef<const MemRegion *> Regions,
1880                                   const CallEvent *Call) const {
1881  CStringLengthTy Entries = state->get<CStringLength>();
1882  if (Entries.isEmpty())
1883    return state;
1884
1885  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1886  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1887
1888  // First build sets for the changed regions and their super-regions.
1889  for (ArrayRef<const MemRegion *>::iterator
1890       I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1891    const MemRegion *MR = *I;
1892    Invalidated.insert(MR);
1893
1894    SuperRegions.insert(MR);
1895    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1896      MR = SR->getSuperRegion();
1897      SuperRegions.insert(MR);
1898    }
1899  }
1900
1901  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1902
1903  // Then loop over the entries in the current state.
1904  for (CStringLengthTy::iterator I = Entries.begin(),
1905       E = Entries.end(); I != E; ++I) {
1906    const MemRegion *MR = I.getKey();
1907
1908    // Is this entry for a super-region of a changed region?
1909    if (SuperRegions.count(MR)) {
1910      Entries = F.remove(Entries, MR);
1911      continue;
1912    }
1913
1914    // Is this entry for a sub-region of a changed region?
1915    const MemRegion *Super = MR;
1916    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1917      Super = SR->getSuperRegion();
1918      if (Invalidated.count(Super)) {
1919        Entries = F.remove(Entries, MR);
1920        break;
1921      }
1922    }
1923  }
1924
1925  return state->set<CStringLength>(Entries);
1926}
1927
1928void CStringChecker::checkLiveSymbols(ProgramStateRef state,
1929                                      SymbolReaper &SR) const {
1930  // Mark all symbols in our string length map as valid.
1931  CStringLengthTy Entries = state->get<CStringLength>();
1932
1933  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1934       I != E; ++I) {
1935    SVal Len = I.getData();
1936
1937    for (SymExpr::symbol_iterator si = Len.symbol_begin(),
1938                                  se = Len.symbol_end(); si != se; ++si)
1939      SR.markInUse(*si);
1940  }
1941}
1942
1943void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1944                                      CheckerContext &C) const {
1945  if (!SR.hasDeadSymbols())
1946    return;
1947
1948  ProgramStateRef state = C.getState();
1949  CStringLengthTy Entries = state->get<CStringLength>();
1950  if (Entries.isEmpty())
1951    return;
1952
1953  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1954  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1955       I != E; ++I) {
1956    SVal Len = I.getData();
1957    if (SymbolRef Sym = Len.getAsSymbol()) {
1958      if (SR.isDead(Sym))
1959        Entries = F.remove(Entries, I.getKey());
1960    }
1961  }
1962
1963  state = state->set<CStringLength>(Entries);
1964  C.addTransition(state);
1965}
1966
1967#define REGISTER_CHECKER(name) \
1968void ento::register##name(CheckerManager &mgr) {\
1969  static CStringChecker *TheChecker = 0; \
1970  if (TheChecker == 0) \
1971    TheChecker = mgr.registerChecker<CStringChecker>(); \
1972  TheChecker->Filter.Check##name = true; \
1973}
1974
1975REGISTER_CHECKER(CStringNullArg)
1976REGISTER_CHECKER(CStringOutOfBounds)
1977REGISTER_CHECKER(CStringBufferOverlap)
1978REGISTER_CHECKER(CStringNotNullTerm)
1979
1980void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
1981  registerCStringNullArg(Mgr);
1982}
1983