CStringChecker.cpp revision a93d0f280693b8418bc88cf7a8c93325f7fcf4c6
1c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
3c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//                     The LLVM Compiler Infrastructure
4c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
5b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant// This file is distributed under the University of Illinois Open Source
6b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant// License. See LICENSE.TXT for details.
7c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
8c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===----------------------------------------------------------------------===//
9c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
10c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// This defines CStringChecker, which is an assortment of checks on calls
11c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant// to functions in <string.h>.
12c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//
13c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant//===----------------------------------------------------------------------===//
14c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
15c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "ClangSACheckers.h"
16c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "InterCheckerAPI.h"
17c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "clang/StaticAnalyzer/Core/Checker.h"
18c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "clang/StaticAnalyzer/Core/CheckerManager.h"
19c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
20c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "llvm/ADT/SmallString.h"
23c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "llvm/ADT/STLExtras.h"
24c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "llvm/ADT/StringSwitch.h"
25c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant#include "llvm/Support/raw_ostream.h"
26c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
27c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantusing namespace clang;
28c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantusing namespace ento;
29c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
30c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantnamespace {
31c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantclass CStringChecker : public Checker< eval::Call,
32c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                                         check::PreStmt<DeclStmt>,
33c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                                         check::LiveSymbols,
34c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                                         check::DeadSymbols,
35c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                                         check::RegionChanges
36c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                                         > {
37c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant  mutable OwningPtr<BugType> BT_Null,
38c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                             BT_Bounds,
39c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                             BT_Overlap,
40c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                             BT_NotCString,
41c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant                             BT_AdditionOverflow;
42c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
43c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant  mutable const char *CurrentFunctionDescription;
44c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant
45c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnantpublic:
46c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant  /// The filter is used to filter out the diagnostics which are not enabled by
47c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant  /// the user.
48c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant  struct CStringChecksFilter {
49c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant    DefaultBool CheckCStringNullArg;
50c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant    DefaultBool CheckCStringOutOfBounds;
51c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant    DefaultBool CheckCStringBufferOverlap;
52c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant    DefaultBool CheckCStringNotNullTerm;
53c52f43e72dfcea03037729649da84c23b3beb04aHoward Hinnant  };
54
55  CStringChecksFilter Filter;
56
57  static void *getTag() { static int tag; return &tag; }
58
59  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
60  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
61  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
62  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
63  bool wantsRegionChangeUpdate(ProgramStateRef state) const;
64
65  ProgramStateRef
66    checkRegionChanges(ProgramStateRef state,
67                       const StoreManager::InvalidatedSymbols *,
68                       ArrayRef<const MemRegion *> ExplicitRegions,
69                       ArrayRef<const MemRegion *> Regions,
70                       const CallEvent *Call) const;
71
72  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
73                                          const CallExpr *) const;
74
75  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
76  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
77  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
78  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
79  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
80                      ProgramStateRef state,
81                      const Expr *Size,
82                      const Expr *Source,
83                      const Expr *Dest,
84                      bool Restricted = false,
85                      bool IsMempcpy = false) const;
86
87  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
88
89  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
90  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
91  void evalstrLengthCommon(CheckerContext &C,
92                           const CallExpr *CE,
93                           bool IsStrnlen = false) const;
94
95  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
96  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
97  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
98  void evalStrcpyCommon(CheckerContext &C,
99                        const CallExpr *CE,
100                        bool returnEnd,
101                        bool isBounded,
102                        bool isAppending) const;
103
104  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
105  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
106
107  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
108  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
109  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
110  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
111  void evalStrcmpCommon(CheckerContext &C,
112                        const CallExpr *CE,
113                        bool isBounded = false,
114                        bool ignoreCase = false) const;
115
116  // Utility methods
117  std::pair<ProgramStateRef , ProgramStateRef >
118  static assumeZero(CheckerContext &C,
119                    ProgramStateRef state, SVal V, QualType Ty);
120
121  static ProgramStateRef setCStringLength(ProgramStateRef state,
122                                              const MemRegion *MR,
123                                              SVal strLength);
124  static SVal getCStringLengthForRegion(CheckerContext &C,
125                                        ProgramStateRef &state,
126                                        const Expr *Ex,
127                                        const MemRegion *MR,
128                                        bool hypothetical);
129  SVal getCStringLength(CheckerContext &C,
130                        ProgramStateRef &state,
131                        const Expr *Ex,
132                        SVal Buf,
133                        bool hypothetical = false) const;
134
135  const StringLiteral *getCStringLiteral(CheckerContext &C,
136                                         ProgramStateRef &state,
137                                         const Expr *expr,
138                                         SVal val) const;
139
140  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
141                                              ProgramStateRef state,
142                                              const Expr *Ex, SVal V);
143
144  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
145                              const MemRegion *MR);
146
147  // Re-usable checks
148  ProgramStateRef checkNonNull(CheckerContext &C,
149                                   ProgramStateRef state,
150                                   const Expr *S,
151                                   SVal l) const;
152  ProgramStateRef CheckLocation(CheckerContext &C,
153                                    ProgramStateRef state,
154                                    const Expr *S,
155                                    SVal l,
156                                    const char *message = NULL) const;
157  ProgramStateRef CheckBufferAccess(CheckerContext &C,
158                                        ProgramStateRef state,
159                                        const Expr *Size,
160                                        const Expr *FirstBuf,
161                                        const Expr *SecondBuf,
162                                        const char *firstMessage = NULL,
163                                        const char *secondMessage = NULL,
164                                        bool WarnAboutSize = false) const;
165
166  ProgramStateRef CheckBufferAccess(CheckerContext &C,
167                                        ProgramStateRef state,
168                                        const Expr *Size,
169                                        const Expr *Buf,
170                                        const char *message = NULL,
171                                        bool WarnAboutSize = false) const {
172    // This is a convenience override.
173    return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
174                             WarnAboutSize);
175  }
176  ProgramStateRef CheckOverlap(CheckerContext &C,
177                                   ProgramStateRef state,
178                                   const Expr *Size,
179                                   const Expr *First,
180                                   const Expr *Second) const;
181  void emitOverlapBug(CheckerContext &C,
182                      ProgramStateRef state,
183                      const Stmt *First,
184                      const Stmt *Second) const;
185
186  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
187                                            ProgramStateRef state,
188                                            NonLoc left,
189                                            NonLoc right) const;
190};
191
192} //end anonymous namespace
193
194REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
195
196//===----------------------------------------------------------------------===//
197// Individual checks and utility methods.
198//===----------------------------------------------------------------------===//
199
200std::pair<ProgramStateRef , ProgramStateRef >
201CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
202                           QualType Ty) {
203  DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
204  if (!val)
205    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
206
207  SValBuilder &svalBuilder = C.getSValBuilder();
208  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
209  return state->assume(svalBuilder.evalEQ(state, *val, zero));
210}
211
212ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
213                                            ProgramStateRef state,
214                                            const Expr *S, SVal l) const {
215  // If a previous check has failed, propagate the failure.
216  if (!state)
217    return NULL;
218
219  ProgramStateRef stateNull, stateNonNull;
220  llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
221
222  if (stateNull && !stateNonNull) {
223    if (!Filter.CheckCStringNullArg)
224      return NULL;
225
226    ExplodedNode *N = C.generateSink(stateNull);
227    if (!N)
228      return NULL;
229
230    if (!BT_Null)
231      BT_Null.reset(new BuiltinBug("Unix API",
232        "Null pointer argument in call to byte string function"));
233
234    SmallString<80> buf;
235    llvm::raw_svector_ostream os(buf);
236    assert(CurrentFunctionDescription);
237    os << "Null pointer argument in call to " << CurrentFunctionDescription;
238
239    // Generate a report for this bug.
240    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
241    BugReport *report = new BugReport(*BT, os.str(), N);
242
243    report->addRange(S->getSourceRange());
244    bugreporter::trackNullOrUndefValue(N, S, *report);
245    C.emitReport(report);
246    return NULL;
247  }
248
249  // From here on, assume that the value is non-null.
250  assert(stateNonNull);
251  return stateNonNull;
252}
253
254// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
255ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
256                                             ProgramStateRef state,
257                                             const Expr *S, SVal l,
258                                             const char *warningMsg) const {
259  // If a previous check has failed, propagate the failure.
260  if (!state)
261    return NULL;
262
263  // Check for out of bound array element access.
264  const MemRegion *R = l.getAsRegion();
265  if (!R)
266    return state;
267
268  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
269  if (!ER)
270    return state;
271
272  assert(ER->getValueType() == C.getASTContext().CharTy &&
273    "CheckLocation should only be called with char* ElementRegions");
274
275  // Get the size of the array.
276  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
277  SValBuilder &svalBuilder = C.getSValBuilder();
278  SVal Extent =
279    svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
280  DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
281
282  // Get the index of the accessed element.
283  DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
284
285  ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
286  ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
287  if (StOutBound && !StInBound) {
288    ExplodedNode *N = C.generateSink(StOutBound);
289    if (!N)
290      return NULL;
291
292    if (!BT_Bounds) {
293      BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
294        "Byte string function accesses out-of-bound array element"));
295    }
296    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
297
298    // Generate a report for this bug.
299    BugReport *report;
300    if (warningMsg) {
301      report = new BugReport(*BT, warningMsg, N);
302    } else {
303      assert(CurrentFunctionDescription);
304      assert(CurrentFunctionDescription[0] != '\0');
305
306      SmallString<80> buf;
307      llvm::raw_svector_ostream os(buf);
308      os << (char)toupper(CurrentFunctionDescription[0])
309         << &CurrentFunctionDescription[1]
310         << " accesses out-of-bound array element";
311      report = new BugReport(*BT, os.str(), N);
312    }
313
314    // FIXME: It would be nice to eventually make this diagnostic more clear,
315    // e.g., by referencing the original declaration or by saying *why* this
316    // reference is outside the range.
317
318    report->addRange(S->getSourceRange());
319    C.emitReport(report);
320    return NULL;
321  }
322
323  // Array bound check succeeded.  From this point forward the array bound
324  // should always succeed.
325  return StInBound;
326}
327
328ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
329                                                 ProgramStateRef state,
330                                                 const Expr *Size,
331                                                 const Expr *FirstBuf,
332                                                 const Expr *SecondBuf,
333                                                 const char *firstMessage,
334                                                 const char *secondMessage,
335                                                 bool WarnAboutSize) const {
336  // If a previous check has failed, propagate the failure.
337  if (!state)
338    return NULL;
339
340  SValBuilder &svalBuilder = C.getSValBuilder();
341  ASTContext &Ctx = svalBuilder.getContext();
342  const LocationContext *LCtx = C.getLocationContext();
343
344  QualType sizeTy = Size->getType();
345  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
346
347  // Check that the first buffer is non-null.
348  SVal BufVal = state->getSVal(FirstBuf, LCtx);
349  state = checkNonNull(C, state, FirstBuf, BufVal);
350  if (!state)
351    return NULL;
352
353  // If out-of-bounds checking is turned off, skip the rest.
354  if (!Filter.CheckCStringOutOfBounds)
355    return state;
356
357  // Get the access length and make sure it is known.
358  // FIXME: This assumes the caller has already checked that the access length
359  // is positive. And that it's unsigned.
360  SVal LengthVal = state->getSVal(Size, LCtx);
361  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
362  if (!Length)
363    return state;
364
365  // Compute the offset of the last element to be accessed: size-1.
366  NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
367  NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
368                                                    *Length, One, sizeTy));
369
370  // Check that the first buffer is sufficiently long.
371  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
372  if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
373    const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
374
375    SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
376                                          LastOffset, PtrTy);
377    state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
378
379    // If the buffer isn't large enough, abort.
380    if (!state)
381      return NULL;
382  }
383
384  // If there's a second buffer, check it as well.
385  if (SecondBuf) {
386    BufVal = state->getSVal(SecondBuf, LCtx);
387    state = checkNonNull(C, state, SecondBuf, BufVal);
388    if (!state)
389      return NULL;
390
391    BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
392    if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
393      const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
394
395      SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
396                                            LastOffset, PtrTy);
397      state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
398    }
399  }
400
401  // Large enough or not, return this state!
402  return state;
403}
404
405ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
406                                            ProgramStateRef state,
407                                            const Expr *Size,
408                                            const Expr *First,
409                                            const Expr *Second) const {
410  if (!Filter.CheckCStringBufferOverlap)
411    return state;
412
413  // Do a simple check for overlap: if the two arguments are from the same
414  // buffer, see if the end of the first is greater than the start of the second
415  // or vice versa.
416
417  // If a previous check has failed, propagate the failure.
418  if (!state)
419    return NULL;
420
421  ProgramStateRef stateTrue, stateFalse;
422
423  // Get the buffer values and make sure they're known locations.
424  const LocationContext *LCtx = C.getLocationContext();
425  SVal firstVal = state->getSVal(First, LCtx);
426  SVal secondVal = state->getSVal(Second, LCtx);
427
428  Loc *firstLoc = dyn_cast<Loc>(&firstVal);
429  if (!firstLoc)
430    return state;
431
432  Loc *secondLoc = dyn_cast<Loc>(&secondVal);
433  if (!secondLoc)
434    return state;
435
436  // Are the two values the same?
437  SValBuilder &svalBuilder = C.getSValBuilder();
438  llvm::tie(stateTrue, stateFalse) =
439    state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
440
441  if (stateTrue && !stateFalse) {
442    // If the values are known to be equal, that's automatically an overlap.
443    emitOverlapBug(C, stateTrue, First, Second);
444    return NULL;
445  }
446
447  // assume the two expressions are not equal.
448  assert(stateFalse);
449  state = stateFalse;
450
451  // Which value comes first?
452  QualType cmpTy = svalBuilder.getConditionType();
453  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
454                                         *firstLoc, *secondLoc, cmpTy);
455  DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
456  if (!reverseTest)
457    return state;
458
459  llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
460  if (stateTrue) {
461    if (stateFalse) {
462      // If we don't know which one comes first, we can't perform this test.
463      return state;
464    } else {
465      // Switch the values so that firstVal is before secondVal.
466      Loc *tmpLoc = firstLoc;
467      firstLoc = secondLoc;
468      secondLoc = tmpLoc;
469
470      // Switch the Exprs as well, so that they still correspond.
471      const Expr *tmpExpr = First;
472      First = Second;
473      Second = tmpExpr;
474    }
475  }
476
477  // Get the length, and make sure it too is known.
478  SVal LengthVal = state->getSVal(Size, LCtx);
479  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
480  if (!Length)
481    return state;
482
483  // Convert the first buffer's start address to char*.
484  // Bail out if the cast fails.
485  ASTContext &Ctx = svalBuilder.getContext();
486  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
487  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
488                                         First->getType());
489  Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
490  if (!FirstStartLoc)
491    return state;
492
493  // Compute the end of the first buffer. Bail out if THAT fails.
494  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
495                                 *FirstStartLoc, *Length, CharPtrTy);
496  Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
497  if (!FirstEndLoc)
498    return state;
499
500  // Is the end of the first buffer past the start of the second buffer?
501  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
502                                *FirstEndLoc, *secondLoc, cmpTy);
503  DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
504  if (!OverlapTest)
505    return state;
506
507  llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
508
509  if (stateTrue && !stateFalse) {
510    // Overlap!
511    emitOverlapBug(C, stateTrue, First, Second);
512    return NULL;
513  }
514
515  // assume the two expressions don't overlap.
516  assert(stateFalse);
517  return stateFalse;
518}
519
520void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
521                                  const Stmt *First, const Stmt *Second) const {
522  ExplodedNode *N = C.generateSink(state);
523  if (!N)
524    return;
525
526  if (!BT_Overlap)
527    BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
528
529  // Generate a report for this bug.
530  BugReport *report =
531    new BugReport(*BT_Overlap,
532      "Arguments must not be overlapping buffers", N);
533  report->addRange(First->getSourceRange());
534  report->addRange(Second->getSourceRange());
535
536  C.emitReport(report);
537}
538
539ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
540                                                     ProgramStateRef state,
541                                                     NonLoc left,
542                                                     NonLoc right) const {
543  // If out-of-bounds checking is turned off, skip the rest.
544  if (!Filter.CheckCStringOutOfBounds)
545    return state;
546
547  // If a previous check has failed, propagate the failure.
548  if (!state)
549    return NULL;
550
551  SValBuilder &svalBuilder = C.getSValBuilder();
552  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
553
554  QualType sizeTy = svalBuilder.getContext().getSizeType();
555  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
556  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
557
558  SVal maxMinusRight;
559  if (isa<nonloc::ConcreteInt>(right)) {
560    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
561                                                 sizeTy);
562  } else {
563    // Try switching the operands. (The order of these two assignments is
564    // important!)
565    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
566                                            sizeTy);
567    left = right;
568  }
569
570  if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
571    QualType cmpTy = svalBuilder.getConditionType();
572    // If left > max - right, we have an overflow.
573    SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
574                                                *maxMinusRightNL, cmpTy);
575
576    ProgramStateRef stateOverflow, stateOkay;
577    llvm::tie(stateOverflow, stateOkay) =
578      state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
579
580    if (stateOverflow && !stateOkay) {
581      // We have an overflow. Emit a bug report.
582      ExplodedNode *N = C.generateSink(stateOverflow);
583      if (!N)
584        return NULL;
585
586      if (!BT_AdditionOverflow)
587        BT_AdditionOverflow.reset(new BuiltinBug("API",
588          "Sum of expressions causes overflow"));
589
590      // This isn't a great error message, but this should never occur in real
591      // code anyway -- you'd have to create a buffer longer than a size_t can
592      // represent, which is sort of a contradiction.
593      const char *warning =
594        "This expression will create a string whose length is too big to "
595        "be represented as a size_t";
596
597      // Generate a report for this bug.
598      BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
599      C.emitReport(report);
600
601      return NULL;
602    }
603
604    // From now on, assume an overflow didn't occur.
605    assert(stateOkay);
606    state = stateOkay;
607  }
608
609  return state;
610}
611
612ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
613                                                const MemRegion *MR,
614                                                SVal strLength) {
615  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
616
617  MR = MR->StripCasts();
618
619  switch (MR->getKind()) {
620  case MemRegion::StringRegionKind:
621    // FIXME: This can happen if we strcpy() into a string region. This is
622    // undefined [C99 6.4.5p6], but we should still warn about it.
623    return state;
624
625  case MemRegion::SymbolicRegionKind:
626  case MemRegion::AllocaRegionKind:
627  case MemRegion::VarRegionKind:
628  case MemRegion::FieldRegionKind:
629  case MemRegion::ObjCIvarRegionKind:
630    // These are the types we can currently track string lengths for.
631    break;
632
633  case MemRegion::ElementRegionKind:
634    // FIXME: Handle element regions by upper-bounding the parent region's
635    // string length.
636    return state;
637
638  default:
639    // Other regions (mostly non-data) can't have a reliable C string length.
640    // For now, just ignore the change.
641    // FIXME: These are rare but not impossible. We should output some kind of
642    // warning for things like strcpy((char[]){'a', 0}, "b");
643    return state;
644  }
645
646  if (strLength.isUnknown())
647    return state->remove<CStringLength>(MR);
648
649  return state->set<CStringLength>(MR, strLength);
650}
651
652SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
653                                               ProgramStateRef &state,
654                                               const Expr *Ex,
655                                               const MemRegion *MR,
656                                               bool hypothetical) {
657  if (!hypothetical) {
658    // If there's a recorded length, go ahead and return it.
659    const SVal *Recorded = state->get<CStringLength>(MR);
660    if (Recorded)
661      return *Recorded;
662  }
663
664  // Otherwise, get a new symbol and update the state.
665  SValBuilder &svalBuilder = C.getSValBuilder();
666  QualType sizeTy = svalBuilder.getContext().getSizeType();
667  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
668                                                    MR, Ex, sizeTy,
669                                                    C.blockCount());
670
671  if (!hypothetical)
672    state = state->set<CStringLength>(MR, strLength);
673
674  return strLength;
675}
676
677SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
678                                      const Expr *Ex, SVal Buf,
679                                      bool hypothetical) const {
680  const MemRegion *MR = Buf.getAsRegion();
681  if (!MR) {
682    // If we can't get a region, see if it's something we /know/ isn't a
683    // C string. In the context of locations, the only time we can issue such
684    // a warning is for labels.
685    if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
686      if (!Filter.CheckCStringNotNullTerm)
687        return UndefinedVal();
688
689      if (ExplodedNode *N = C.addTransition(state)) {
690        if (!BT_NotCString)
691          BT_NotCString.reset(new BuiltinBug("Unix API",
692            "Argument is not a null-terminated string."));
693
694        SmallString<120> buf;
695        llvm::raw_svector_ostream os(buf);
696        assert(CurrentFunctionDescription);
697        os << "Argument to " << CurrentFunctionDescription
698           << " is the address of the label '" << Label->getLabel()->getName()
699           << "', which is not a null-terminated string";
700
701        // Generate a report for this bug.
702        BugReport *report = new BugReport(*BT_NotCString,
703                                                          os.str(), N);
704
705        report->addRange(Ex->getSourceRange());
706        C.emitReport(report);
707      }
708      return UndefinedVal();
709
710    }
711
712    // If it's not a region and not a label, give up.
713    return UnknownVal();
714  }
715
716  // If we have a region, strip casts from it and see if we can figure out
717  // its length. For anything we can't figure out, just return UnknownVal.
718  MR = MR->StripCasts();
719
720  switch (MR->getKind()) {
721  case MemRegion::StringRegionKind: {
722    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
723    // so we can assume that the byte length is the correct C string length.
724    SValBuilder &svalBuilder = C.getSValBuilder();
725    QualType sizeTy = svalBuilder.getContext().getSizeType();
726    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
727    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
728  }
729  case MemRegion::SymbolicRegionKind:
730  case MemRegion::AllocaRegionKind:
731  case MemRegion::VarRegionKind:
732  case MemRegion::FieldRegionKind:
733  case MemRegion::ObjCIvarRegionKind:
734    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
735  case MemRegion::CompoundLiteralRegionKind:
736    // FIXME: Can we track this? Is it necessary?
737    return UnknownVal();
738  case MemRegion::ElementRegionKind:
739    // FIXME: How can we handle this? It's not good enough to subtract the
740    // offset from the base string length; consider "123\x00567" and &a[5].
741    return UnknownVal();
742  default:
743    // Other regions (mostly non-data) can't have a reliable C string length.
744    // In this case, an error is emitted and UndefinedVal is returned.
745    // The caller should always be prepared to handle this case.
746    if (!Filter.CheckCStringNotNullTerm)
747      return UndefinedVal();
748
749    if (ExplodedNode *N = C.addTransition(state)) {
750      if (!BT_NotCString)
751        BT_NotCString.reset(new BuiltinBug("Unix API",
752          "Argument is not a null-terminated string."));
753
754      SmallString<120> buf;
755      llvm::raw_svector_ostream os(buf);
756
757      assert(CurrentFunctionDescription);
758      os << "Argument to " << CurrentFunctionDescription << " is ";
759
760      if (SummarizeRegion(os, C.getASTContext(), MR))
761        os << ", which is not a null-terminated string";
762      else
763        os << "not a null-terminated string";
764
765      // Generate a report for this bug.
766      BugReport *report = new BugReport(*BT_NotCString,
767                                                        os.str(), N);
768
769      report->addRange(Ex->getSourceRange());
770      C.emitReport(report);
771    }
772
773    return UndefinedVal();
774  }
775}
776
777const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
778  ProgramStateRef &state, const Expr *expr, SVal val) const {
779
780  // Get the memory region pointed to by the val.
781  const MemRegion *bufRegion = val.getAsRegion();
782  if (!bufRegion)
783    return NULL;
784
785  // Strip casts off the memory region.
786  bufRegion = bufRegion->StripCasts();
787
788  // Cast the memory region to a string region.
789  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
790  if (!strRegion)
791    return NULL;
792
793  // Return the actual string in the string region.
794  return strRegion->getStringLiteral();
795}
796
797ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
798                                                ProgramStateRef state,
799                                                const Expr *E, SVal V) {
800  Loc *L = dyn_cast<Loc>(&V);
801  if (!L)
802    return state;
803
804  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
805  // some assumptions about the value that CFRefCount can't. Even so, it should
806  // probably be refactored.
807  if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
808    const MemRegion *R = MR->getRegion()->StripCasts();
809
810    // Are we dealing with an ElementRegion?  If so, we should be invalidating
811    // the super-region.
812    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
813      R = ER->getSuperRegion();
814      // FIXME: What about layers of ElementRegions?
815    }
816
817    // Invalidate this region.
818    const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
819    return state->invalidateRegions(R, E, C.blockCount(), LCtx);
820  }
821
822  // If we have a non-region value by chance, just remove the binding.
823  // FIXME: is this necessary or correct? This handles the non-Region
824  //  cases.  Is it ever valid to store to these?
825  return state->killBinding(*L);
826}
827
828bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
829                                     const MemRegion *MR) {
830  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
831
832  switch (MR->getKind()) {
833  case MemRegion::FunctionTextRegionKind: {
834    const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
835    if (FD)
836      os << "the address of the function '" << *FD << '\'';
837    else
838      os << "the address of a function";
839    return true;
840  }
841  case MemRegion::BlockTextRegionKind:
842    os << "block text";
843    return true;
844  case MemRegion::BlockDataRegionKind:
845    os << "a block";
846    return true;
847  case MemRegion::CXXThisRegionKind:
848  case MemRegion::CXXTempObjectRegionKind:
849    os << "a C++ temp object of type " << TVR->getValueType().getAsString();
850    return true;
851  case MemRegion::VarRegionKind:
852    os << "a variable of type" << TVR->getValueType().getAsString();
853    return true;
854  case MemRegion::FieldRegionKind:
855    os << "a field of type " << TVR->getValueType().getAsString();
856    return true;
857  case MemRegion::ObjCIvarRegionKind:
858    os << "an instance variable of type " << TVR->getValueType().getAsString();
859    return true;
860  default:
861    return false;
862  }
863}
864
865//===----------------------------------------------------------------------===//
866// evaluation of individual function calls.
867//===----------------------------------------------------------------------===//
868
869void CStringChecker::evalCopyCommon(CheckerContext &C,
870                                    const CallExpr *CE,
871                                    ProgramStateRef state,
872                                    const Expr *Size, const Expr *Dest,
873                                    const Expr *Source, bool Restricted,
874                                    bool IsMempcpy) const {
875  CurrentFunctionDescription = "memory copy function";
876
877  // See if the size argument is zero.
878  const LocationContext *LCtx = C.getLocationContext();
879  SVal sizeVal = state->getSVal(Size, LCtx);
880  QualType sizeTy = Size->getType();
881
882  ProgramStateRef stateZeroSize, stateNonZeroSize;
883  llvm::tie(stateZeroSize, stateNonZeroSize) =
884    assumeZero(C, state, sizeVal, sizeTy);
885
886  // Get the value of the Dest.
887  SVal destVal = state->getSVal(Dest, LCtx);
888
889  // If the size is zero, there won't be any actual memory access, so
890  // just bind the return value to the destination buffer and return.
891  if (stateZeroSize && !stateNonZeroSize) {
892    stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
893    C.addTransition(stateZeroSize);
894    return;
895  }
896
897  // If the size can be nonzero, we have to check the other arguments.
898  if (stateNonZeroSize) {
899    state = stateNonZeroSize;
900
901    // Ensure the destination is not null. If it is NULL there will be a
902    // NULL pointer dereference.
903    state = checkNonNull(C, state, Dest, destVal);
904    if (!state)
905      return;
906
907    // Get the value of the Src.
908    SVal srcVal = state->getSVal(Source, LCtx);
909
910    // Ensure the source is not null. If it is NULL there will be a
911    // NULL pointer dereference.
912    state = checkNonNull(C, state, Source, srcVal);
913    if (!state)
914      return;
915
916    // Ensure the accesses are valid and that the buffers do not overlap.
917    const char * const writeWarning =
918      "Memory copy function overflows destination buffer";
919    state = CheckBufferAccess(C, state, Size, Dest, Source,
920                              writeWarning, /* sourceWarning = */ NULL);
921    if (Restricted)
922      state = CheckOverlap(C, state, Size, Dest, Source);
923
924    if (!state)
925      return;
926
927    // If this is mempcpy, get the byte after the last byte copied and
928    // bind the expr.
929    if (IsMempcpy) {
930      loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
931      assert(destRegVal && "Destination should be a known MemRegionVal here");
932
933      // Get the length to copy.
934      NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
935
936      if (lenValNonLoc) {
937        // Get the byte after the last byte copied.
938        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
939                                                          *destRegVal,
940                                                          *lenValNonLoc,
941                                                          Dest->getType());
942
943        // The byte after the last byte copied is the return value.
944        state = state->BindExpr(CE, LCtx, lastElement);
945      } else {
946        // If we don't know how much we copied, we can at least
947        // conjure a return value for later.
948        SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
949                                                          C.blockCount());
950        state = state->BindExpr(CE, LCtx, result);
951      }
952
953    } else {
954      // All other copies return the destination buffer.
955      // (Well, bcopy() has a void return type, but this won't hurt.)
956      state = state->BindExpr(CE, LCtx, destVal);
957    }
958
959    // Invalidate the destination.
960    // FIXME: Even if we can't perfectly model the copy, we should see if we
961    // can use LazyCompoundVals to copy the source values into the destination.
962    // This would probably remove any existing bindings past the end of the
963    // copied region, but that's still an improvement over blank invalidation.
964    state = InvalidateBuffer(C, state, Dest,
965                             state->getSVal(Dest, C.getLocationContext()));
966    C.addTransition(state);
967  }
968}
969
970
971void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
972  if (CE->getNumArgs() < 3)
973    return;
974
975  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
976  // The return value is the address of the destination buffer.
977  const Expr *Dest = CE->getArg(0);
978  ProgramStateRef state = C.getState();
979
980  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
981}
982
983void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
984  if (CE->getNumArgs() < 3)
985    return;
986
987  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
988  // The return value is a pointer to the byte following the last written byte.
989  const Expr *Dest = CE->getArg(0);
990  ProgramStateRef state = C.getState();
991
992  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
993}
994
995void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
996  if (CE->getNumArgs() < 3)
997    return;
998
999  // void *memmove(void *dst, const void *src, size_t n);
1000  // The return value is the address of the destination buffer.
1001  const Expr *Dest = CE->getArg(0);
1002  ProgramStateRef state = C.getState();
1003
1004  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1005}
1006
1007void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1008  if (CE->getNumArgs() < 3)
1009    return;
1010
1011  // void bcopy(const void *src, void *dst, size_t n);
1012  evalCopyCommon(C, CE, C.getState(),
1013                 CE->getArg(2), CE->getArg(1), CE->getArg(0));
1014}
1015
1016void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1017  if (CE->getNumArgs() < 3)
1018    return;
1019
1020  // int memcmp(const void *s1, const void *s2, size_t n);
1021  CurrentFunctionDescription = "memory comparison function";
1022
1023  const Expr *Left = CE->getArg(0);
1024  const Expr *Right = CE->getArg(1);
1025  const Expr *Size = CE->getArg(2);
1026
1027  ProgramStateRef state = C.getState();
1028  SValBuilder &svalBuilder = C.getSValBuilder();
1029
1030  // See if the size argument is zero.
1031  const LocationContext *LCtx = C.getLocationContext();
1032  SVal sizeVal = state->getSVal(Size, LCtx);
1033  QualType sizeTy = Size->getType();
1034
1035  ProgramStateRef stateZeroSize, stateNonZeroSize;
1036  llvm::tie(stateZeroSize, stateNonZeroSize) =
1037    assumeZero(C, state, sizeVal, sizeTy);
1038
1039  // If the size can be zero, the result will be 0 in that case, and we don't
1040  // have to check either of the buffers.
1041  if (stateZeroSize) {
1042    state = stateZeroSize;
1043    state = state->BindExpr(CE, LCtx,
1044                            svalBuilder.makeZeroVal(CE->getType()));
1045    C.addTransition(state);
1046  }
1047
1048  // If the size can be nonzero, we have to check the other arguments.
1049  if (stateNonZeroSize) {
1050    state = stateNonZeroSize;
1051    // If we know the two buffers are the same, we know the result is 0.
1052    // First, get the two buffers' addresses. Another checker will have already
1053    // made sure they're not undefined.
1054    DefinedOrUnknownSVal LV =
1055      cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx));
1056    DefinedOrUnknownSVal RV =
1057      cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx));
1058
1059    // See if they are the same.
1060    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1061    ProgramStateRef StSameBuf, StNotSameBuf;
1062    llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1063
1064    // If the two arguments might be the same buffer, we know the result is 0,
1065    // and we only need to check one size.
1066    if (StSameBuf) {
1067      state = StSameBuf;
1068      state = CheckBufferAccess(C, state, Size, Left);
1069      if (state) {
1070        state = StSameBuf->BindExpr(CE, LCtx,
1071                                    svalBuilder.makeZeroVal(CE->getType()));
1072        C.addTransition(state);
1073      }
1074    }
1075
1076    // If the two arguments might be different buffers, we have to check the
1077    // size of both of them.
1078    if (StNotSameBuf) {
1079      state = StNotSameBuf;
1080      state = CheckBufferAccess(C, state, Size, Left, Right);
1081      if (state) {
1082        // The return value is the comparison result, which we don't know.
1083        SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1084        state = state->BindExpr(CE, LCtx, CmpV);
1085        C.addTransition(state);
1086      }
1087    }
1088  }
1089}
1090
1091void CStringChecker::evalstrLength(CheckerContext &C,
1092                                   const CallExpr *CE) const {
1093  if (CE->getNumArgs() < 1)
1094    return;
1095
1096  // size_t strlen(const char *s);
1097  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1098}
1099
1100void CStringChecker::evalstrnLength(CheckerContext &C,
1101                                    const CallExpr *CE) const {
1102  if (CE->getNumArgs() < 2)
1103    return;
1104
1105  // size_t strnlen(const char *s, size_t maxlen);
1106  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1107}
1108
1109void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1110                                         bool IsStrnlen) const {
1111  CurrentFunctionDescription = "string length function";
1112  ProgramStateRef state = C.getState();
1113  const LocationContext *LCtx = C.getLocationContext();
1114
1115  if (IsStrnlen) {
1116    const Expr *maxlenExpr = CE->getArg(1);
1117    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1118
1119    ProgramStateRef stateZeroSize, stateNonZeroSize;
1120    llvm::tie(stateZeroSize, stateNonZeroSize) =
1121      assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1122
1123    // If the size can be zero, the result will be 0 in that case, and we don't
1124    // have to check the string itself.
1125    if (stateZeroSize) {
1126      SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1127      stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1128      C.addTransition(stateZeroSize);
1129    }
1130
1131    // If the size is GUARANTEED to be zero, we're done!
1132    if (!stateNonZeroSize)
1133      return;
1134
1135    // Otherwise, record the assumption that the size is nonzero.
1136    state = stateNonZeroSize;
1137  }
1138
1139  // Check that the string argument is non-null.
1140  const Expr *Arg = CE->getArg(0);
1141  SVal ArgVal = state->getSVal(Arg, LCtx);
1142
1143  state = checkNonNull(C, state, Arg, ArgVal);
1144
1145  if (!state)
1146    return;
1147
1148  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1149
1150  // If the argument isn't a valid C string, there's no valid state to
1151  // transition to.
1152  if (strLength.isUndef())
1153    return;
1154
1155  DefinedOrUnknownSVal result = UnknownVal();
1156
1157  // If the check is for strnlen() then bind the return value to no more than
1158  // the maxlen value.
1159  if (IsStrnlen) {
1160    QualType cmpTy = C.getSValBuilder().getConditionType();
1161
1162    // It's a little unfortunate to be getting this again,
1163    // but it's not that expensive...
1164    const Expr *maxlenExpr = CE->getArg(1);
1165    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1166
1167    NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1168    NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1169
1170    if (strLengthNL && maxlenValNL) {
1171      ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1172
1173      // Check if the strLength is greater than the maxlen.
1174      llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1175        state->assume(cast<DefinedOrUnknownSVal>
1176                      (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1177                                                      *strLengthNL,
1178                                                      *maxlenValNL,
1179                                                      cmpTy)));
1180
1181      if (stateStringTooLong && !stateStringNotTooLong) {
1182        // If the string is longer than maxlen, return maxlen.
1183        result = *maxlenValNL;
1184      } else if (stateStringNotTooLong && !stateStringTooLong) {
1185        // If the string is shorter than maxlen, return its length.
1186        result = *strLengthNL;
1187      }
1188    }
1189
1190    if (result.isUnknown()) {
1191      // If we don't have enough information for a comparison, there's
1192      // no guarantee the full string length will actually be returned.
1193      // All we know is the return value is the min of the string length
1194      // and the limit. This is better than nothing.
1195      result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1196      NonLoc *resultNL = cast<NonLoc>(&result);
1197
1198      if (strLengthNL) {
1199        state = state->assume(cast<DefinedOrUnknownSVal>
1200                              (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1201                                                              *resultNL,
1202                                                              *strLengthNL,
1203                                                              cmpTy)), true);
1204      }
1205
1206      if (maxlenValNL) {
1207        state = state->assume(cast<DefinedOrUnknownSVal>
1208                              (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1209                                                              *resultNL,
1210                                                              *maxlenValNL,
1211                                                              cmpTy)), true);
1212      }
1213    }
1214
1215  } else {
1216    // This is a plain strlen(), not strnlen().
1217    result = cast<DefinedOrUnknownSVal>(strLength);
1218
1219    // If we don't know the length of the string, conjure a return
1220    // value, so it can be used in constraints, at least.
1221    if (result.isUnknown()) {
1222      result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1223    }
1224  }
1225
1226  // Bind the return value.
1227  assert(!result.isUnknown() && "Should have conjured a value by now");
1228  state = state->BindExpr(CE, LCtx, result);
1229  C.addTransition(state);
1230}
1231
1232void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1233  if (CE->getNumArgs() < 2)
1234    return;
1235
1236  // char *strcpy(char *restrict dst, const char *restrict src);
1237  evalStrcpyCommon(C, CE,
1238                   /* returnEnd = */ false,
1239                   /* isBounded = */ false,
1240                   /* isAppending = */ false);
1241}
1242
1243void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1244  if (CE->getNumArgs() < 3)
1245    return;
1246
1247  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1248  evalStrcpyCommon(C, CE,
1249                   /* returnEnd = */ false,
1250                   /* isBounded = */ true,
1251                   /* isAppending = */ false);
1252}
1253
1254void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1255  if (CE->getNumArgs() < 2)
1256    return;
1257
1258  // char *stpcpy(char *restrict dst, const char *restrict src);
1259  evalStrcpyCommon(C, CE,
1260                   /* returnEnd = */ true,
1261                   /* isBounded = */ false,
1262                   /* isAppending = */ false);
1263}
1264
1265void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1266  if (CE->getNumArgs() < 2)
1267    return;
1268
1269  //char *strcat(char *restrict s1, const char *restrict s2);
1270  evalStrcpyCommon(C, CE,
1271                   /* returnEnd = */ false,
1272                   /* isBounded = */ false,
1273                   /* isAppending = */ true);
1274}
1275
1276void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1277  if (CE->getNumArgs() < 3)
1278    return;
1279
1280  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1281  evalStrcpyCommon(C, CE,
1282                   /* returnEnd = */ false,
1283                   /* isBounded = */ true,
1284                   /* isAppending = */ true);
1285}
1286
1287void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1288                                      bool returnEnd, bool isBounded,
1289                                      bool isAppending) const {
1290  CurrentFunctionDescription = "string copy function";
1291  ProgramStateRef state = C.getState();
1292  const LocationContext *LCtx = C.getLocationContext();
1293
1294  // Check that the destination is non-null.
1295  const Expr *Dst = CE->getArg(0);
1296  SVal DstVal = state->getSVal(Dst, LCtx);
1297
1298  state = checkNonNull(C, state, Dst, DstVal);
1299  if (!state)
1300    return;
1301
1302  // Check that the source is non-null.
1303  const Expr *srcExpr = CE->getArg(1);
1304  SVal srcVal = state->getSVal(srcExpr, LCtx);
1305  state = checkNonNull(C, state, srcExpr, srcVal);
1306  if (!state)
1307    return;
1308
1309  // Get the string length of the source.
1310  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1311
1312  // If the source isn't a valid C string, give up.
1313  if (strLength.isUndef())
1314    return;
1315
1316  SValBuilder &svalBuilder = C.getSValBuilder();
1317  QualType cmpTy = svalBuilder.getConditionType();
1318  QualType sizeTy = svalBuilder.getContext().getSizeType();
1319
1320  // These two values allow checking two kinds of errors:
1321  // - actual overflows caused by a source that doesn't fit in the destination
1322  // - potential overflows caused by a bound that could exceed the destination
1323  SVal amountCopied = UnknownVal();
1324  SVal maxLastElementIndex = UnknownVal();
1325  const char *boundWarning = NULL;
1326
1327  // If the function is strncpy, strncat, etc... it is bounded.
1328  if (isBounded) {
1329    // Get the max number of characters to copy.
1330    const Expr *lenExpr = CE->getArg(2);
1331    SVal lenVal = state->getSVal(lenExpr, LCtx);
1332
1333    // Protect against misdeclared strncpy().
1334    lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1335
1336    NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1337    NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1338
1339    // If we know both values, we might be able to figure out how much
1340    // we're copying.
1341    if (strLengthNL && lenValNL) {
1342      ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1343
1344      // Check if the max number to copy is less than the length of the src.
1345      // If the bound is equal to the source length, strncpy won't null-
1346      // terminate the result!
1347      llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1348        state->assume(cast<DefinedOrUnknownSVal>
1349                      (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL,
1350                                               *lenValNL, cmpTy)));
1351
1352      if (stateSourceTooLong && !stateSourceNotTooLong) {
1353        // Max number to copy is less than the length of the src, so the actual
1354        // strLength copied is the max number arg.
1355        state = stateSourceTooLong;
1356        amountCopied = lenVal;
1357
1358      } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1359        // The source buffer entirely fits in the bound.
1360        state = stateSourceNotTooLong;
1361        amountCopied = strLength;
1362      }
1363    }
1364
1365    // We still want to know if the bound is known to be too large.
1366    if (lenValNL) {
1367      if (isAppending) {
1368        // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1369
1370        // Get the string length of the destination. If the destination is
1371        // memory that can't have a string length, we shouldn't be copying
1372        // into it anyway.
1373        SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1374        if (dstStrLength.isUndef())
1375          return;
1376
1377        if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) {
1378          maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1379                                                        *lenValNL,
1380                                                        *dstStrLengthNL,
1381                                                        sizeTy);
1382          boundWarning = "Size argument is greater than the free space in the "
1383                         "destination buffer";
1384        }
1385
1386      } else {
1387        // For strncpy, this is just checking that lenVal <= sizeof(dst)
1388        // (Yes, strncpy and strncat differ in how they treat termination.
1389        // strncat ALWAYS terminates, but strncpy doesn't.)
1390
1391        // We need a special case for when the copy size is zero, in which
1392        // case strncpy will do no work at all. Our bounds check uses n-1
1393        // as the last element accessed, so n == 0 is problematic.
1394        ProgramStateRef StateZeroSize, StateNonZeroSize;
1395        llvm::tie(StateZeroSize, StateNonZeroSize) =
1396          assumeZero(C, state, *lenValNL, sizeTy);
1397
1398        // If the size is known to be zero, we're done.
1399        if (StateZeroSize && !StateNonZeroSize) {
1400          StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1401          C.addTransition(StateZeroSize);
1402          return;
1403        }
1404
1405        // Otherwise, go ahead and figure out the last element we'll touch.
1406        // We don't record the non-zero assumption here because we can't
1407        // be sure. We won't warn on a possible zero.
1408        NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
1409        maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1410                                                      one, sizeTy);
1411        boundWarning = "Size argument is greater than the length of the "
1412                       "destination buffer";
1413      }
1414    }
1415
1416    // If we couldn't pin down the copy length, at least bound it.
1417    // FIXME: We should actually run this code path for append as well, but
1418    // right now it creates problems with constraints (since we can end up
1419    // trying to pass constraints from symbol to symbol).
1420    if (amountCopied.isUnknown() && !isAppending) {
1421      // Try to get a "hypothetical" string length symbol, which we can later
1422      // set as a real value if that turns out to be the case.
1423      amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1424      assert(!amountCopied.isUndef());
1425
1426      if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1427        if (lenValNL) {
1428          // amountCopied <= lenVal
1429          SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1430                                                             *amountCopiedNL,
1431                                                             *lenValNL,
1432                                                             cmpTy);
1433          state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1434                                true);
1435          if (!state)
1436            return;
1437        }
1438
1439        if (strLengthNL) {
1440          // amountCopied <= strlen(source)
1441          SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1442                                                           *amountCopiedNL,
1443                                                           *strLengthNL,
1444                                                           cmpTy);
1445          state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1446                                true);
1447          if (!state)
1448            return;
1449        }
1450      }
1451    }
1452
1453  } else {
1454    // The function isn't bounded. The amount copied should match the length
1455    // of the source buffer.
1456    amountCopied = strLength;
1457  }
1458
1459  assert(state);
1460
1461  // This represents the number of characters copied into the destination
1462  // buffer. (It may not actually be the strlen if the destination buffer
1463  // is not terminated.)
1464  SVal finalStrLength = UnknownVal();
1465
1466  // If this is an appending function (strcat, strncat...) then set the
1467  // string length to strlen(src) + strlen(dst) since the buffer will
1468  // ultimately contain both.
1469  if (isAppending) {
1470    // Get the string length of the destination. If the destination is memory
1471    // that can't have a string length, we shouldn't be copying into it anyway.
1472    SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1473    if (dstStrLength.isUndef())
1474      return;
1475
1476    NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1477    NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1478
1479    // If we know both string lengths, we might know the final string length.
1480    if (srcStrLengthNL && dstStrLengthNL) {
1481      // Make sure the two lengths together don't overflow a size_t.
1482      state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1483      if (!state)
1484        return;
1485
1486      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1487                                               *dstStrLengthNL, sizeTy);
1488    }
1489
1490    // If we couldn't get a single value for the final string length,
1491    // we can at least bound it by the individual lengths.
1492    if (finalStrLength.isUnknown()) {
1493      // Try to get a "hypothetical" string length symbol, which we can later
1494      // set as a real value if that turns out to be the case.
1495      finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1496      assert(!finalStrLength.isUndef());
1497
1498      if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1499        if (srcStrLengthNL) {
1500          // finalStrLength >= srcStrLength
1501          SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1502                                                        *finalStrLengthNL,
1503                                                        *srcStrLengthNL,
1504                                                        cmpTy);
1505          state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1506                                true);
1507          if (!state)
1508            return;
1509        }
1510
1511        if (dstStrLengthNL) {
1512          // finalStrLength >= dstStrLength
1513          SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1514                                                      *finalStrLengthNL,
1515                                                      *dstStrLengthNL,
1516                                                      cmpTy);
1517          state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1518                                true);
1519          if (!state)
1520            return;
1521        }
1522      }
1523    }
1524
1525  } else {
1526    // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1527    // the final string length will match the input string length.
1528    finalStrLength = amountCopied;
1529  }
1530
1531  // The final result of the function will either be a pointer past the last
1532  // copied element, or a pointer to the start of the destination buffer.
1533  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1534
1535  assert(state);
1536
1537  // If the destination is a MemRegion, try to check for a buffer overflow and
1538  // record the new string length.
1539  if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1540    QualType ptrTy = Dst->getType();
1541
1542    // If we have an exact value on a bounded copy, use that to check for
1543    // overflows, rather than our estimate about how much is actually copied.
1544    if (boundWarning) {
1545      if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) {
1546        SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1547                                                      *maxLastNL, ptrTy);
1548        state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1549                              boundWarning);
1550        if (!state)
1551          return;
1552      }
1553    }
1554
1555    // Then, if the final length is known...
1556    if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1557      SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1558                                                 *knownStrLength, ptrTy);
1559
1560      // ...and we haven't checked the bound, we'll check the actual copy.
1561      if (!boundWarning) {
1562        const char * const warningMsg =
1563          "String copy function overflows destination buffer";
1564        state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1565        if (!state)
1566          return;
1567      }
1568
1569      // If this is a stpcpy-style copy, the last element is the return value.
1570      if (returnEnd)
1571        Result = lastElement;
1572    }
1573
1574    // Invalidate the destination. This must happen before we set the C string
1575    // length because invalidation will clear the length.
1576    // FIXME: Even if we can't perfectly model the copy, we should see if we
1577    // can use LazyCompoundVals to copy the source values into the destination.
1578    // This would probably remove any existing bindings past the end of the
1579    // string, but that's still an improvement over blank invalidation.
1580    state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1581
1582    // Set the C string length of the destination, if we know it.
1583    if (isBounded && !isAppending) {
1584      // strncpy is annoying in that it doesn't guarantee to null-terminate
1585      // the result string. If the original string didn't fit entirely inside
1586      // the bound (including the null-terminator), we don't know how long the
1587      // result is.
1588      if (amountCopied != strLength)
1589        finalStrLength = UnknownVal();
1590    }
1591    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1592  }
1593
1594  assert(state);
1595
1596  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1597  // overflow, we still need a result. Conjure a return value.
1598  if (returnEnd && Result.isUnknown()) {
1599    Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1600  }
1601
1602  // Set the return value.
1603  state = state->BindExpr(CE, LCtx, Result);
1604  C.addTransition(state);
1605}
1606
1607void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1608  if (CE->getNumArgs() < 2)
1609    return;
1610
1611  //int strcmp(const char *s1, const char *s2);
1612  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1613}
1614
1615void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1616  if (CE->getNumArgs() < 3)
1617    return;
1618
1619  //int strncmp(const char *s1, const char *s2, size_t n);
1620  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1621}
1622
1623void CStringChecker::evalStrcasecmp(CheckerContext &C,
1624                                    const CallExpr *CE) const {
1625  if (CE->getNumArgs() < 2)
1626    return;
1627
1628  //int strcasecmp(const char *s1, const char *s2);
1629  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1630}
1631
1632void CStringChecker::evalStrncasecmp(CheckerContext &C,
1633                                     const CallExpr *CE) const {
1634  if (CE->getNumArgs() < 3)
1635    return;
1636
1637  //int strncasecmp(const char *s1, const char *s2, size_t n);
1638  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1639}
1640
1641void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1642                                      bool isBounded, bool ignoreCase) const {
1643  CurrentFunctionDescription = "string comparison function";
1644  ProgramStateRef state = C.getState();
1645  const LocationContext *LCtx = C.getLocationContext();
1646
1647  // Check that the first string is non-null
1648  const Expr *s1 = CE->getArg(0);
1649  SVal s1Val = state->getSVal(s1, LCtx);
1650  state = checkNonNull(C, state, s1, s1Val);
1651  if (!state)
1652    return;
1653
1654  // Check that the second string is non-null.
1655  const Expr *s2 = CE->getArg(1);
1656  SVal s2Val = state->getSVal(s2, LCtx);
1657  state = checkNonNull(C, state, s2, s2Val);
1658  if (!state)
1659    return;
1660
1661  // Get the string length of the first string or give up.
1662  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1663  if (s1Length.isUndef())
1664    return;
1665
1666  // Get the string length of the second string or give up.
1667  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1668  if (s2Length.isUndef())
1669    return;
1670
1671  // If we know the two buffers are the same, we know the result is 0.
1672  // First, get the two buffers' addresses. Another checker will have already
1673  // made sure they're not undefined.
1674  DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
1675  DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
1676
1677  // See if they are the same.
1678  SValBuilder &svalBuilder = C.getSValBuilder();
1679  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1680  ProgramStateRef StSameBuf, StNotSameBuf;
1681  llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1682
1683  // If the two arguments might be the same buffer, we know the result is 0,
1684  // and we only need to check one size.
1685  if (StSameBuf) {
1686    StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1687                                    svalBuilder.makeZeroVal(CE->getType()));
1688    C.addTransition(StSameBuf);
1689
1690    // If the two arguments are GUARANTEED to be the same, we're done!
1691    if (!StNotSameBuf)
1692      return;
1693  }
1694
1695  assert(StNotSameBuf);
1696  state = StNotSameBuf;
1697
1698  // At this point we can go about comparing the two buffers.
1699  // For now, we only do this if they're both known string literals.
1700
1701  // Attempt to extract string literals from both expressions.
1702  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1703  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1704  bool canComputeResult = false;
1705
1706  if (s1StrLiteral && s2StrLiteral) {
1707    StringRef s1StrRef = s1StrLiteral->getString();
1708    StringRef s2StrRef = s2StrLiteral->getString();
1709
1710    if (isBounded) {
1711      // Get the max number of characters to compare.
1712      const Expr *lenExpr = CE->getArg(2);
1713      SVal lenVal = state->getSVal(lenExpr, LCtx);
1714
1715      // If the length is known, we can get the right substrings.
1716      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1717        // Create substrings of each to compare the prefix.
1718        s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1719        s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1720        canComputeResult = true;
1721      }
1722    } else {
1723      // This is a normal, unbounded strcmp.
1724      canComputeResult = true;
1725    }
1726
1727    if (canComputeResult) {
1728      // Real strcmp stops at null characters.
1729      size_t s1Term = s1StrRef.find('\0');
1730      if (s1Term != StringRef::npos)
1731        s1StrRef = s1StrRef.substr(0, s1Term);
1732
1733      size_t s2Term = s2StrRef.find('\0');
1734      if (s2Term != StringRef::npos)
1735        s2StrRef = s2StrRef.substr(0, s2Term);
1736
1737      // Use StringRef's comparison methods to compute the actual result.
1738      int result;
1739
1740      if (ignoreCase) {
1741        // Compare string 1 to string 2 the same way strcasecmp() does.
1742        result = s1StrRef.compare_lower(s2StrRef);
1743      } else {
1744        // Compare string 1 to string 2 the same way strcmp() does.
1745        result = s1StrRef.compare(s2StrRef);
1746      }
1747
1748      // Build the SVal of the comparison and bind the return value.
1749      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1750      state = state->BindExpr(CE, LCtx, resultVal);
1751    }
1752  }
1753
1754  if (!canComputeResult) {
1755    // Conjure a symbolic value. It's the best we can do.
1756    SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1757    state = state->BindExpr(CE, LCtx, resultVal);
1758  }
1759
1760  // Record this as a possible path.
1761  C.addTransition(state);
1762}
1763
1764//===----------------------------------------------------------------------===//
1765// The driver method, and other Checker callbacks.
1766//===----------------------------------------------------------------------===//
1767
1768bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1769  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1770
1771  if (!FDecl)
1772    return false;
1773
1774  FnCheck evalFunction = 0;
1775  if (C.isCLibraryFunction(FDecl, "memcpy"))
1776    evalFunction =  &CStringChecker::evalMemcpy;
1777  else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1778    evalFunction =  &CStringChecker::evalMempcpy;
1779  else if (C.isCLibraryFunction(FDecl, "memcmp"))
1780    evalFunction =  &CStringChecker::evalMemcmp;
1781  else if (C.isCLibraryFunction(FDecl, "memmove"))
1782    evalFunction =  &CStringChecker::evalMemmove;
1783  else if (C.isCLibraryFunction(FDecl, "strcpy"))
1784    evalFunction =  &CStringChecker::evalStrcpy;
1785  else if (C.isCLibraryFunction(FDecl, "strncpy"))
1786    evalFunction =  &CStringChecker::evalStrncpy;
1787  else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1788    evalFunction =  &CStringChecker::evalStpcpy;
1789  else if (C.isCLibraryFunction(FDecl, "strcat"))
1790    evalFunction =  &CStringChecker::evalStrcat;
1791  else if (C.isCLibraryFunction(FDecl, "strncat"))
1792    evalFunction =  &CStringChecker::evalStrncat;
1793  else if (C.isCLibraryFunction(FDecl, "strlen"))
1794    evalFunction =  &CStringChecker::evalstrLength;
1795  else if (C.isCLibraryFunction(FDecl, "strnlen"))
1796    evalFunction =  &CStringChecker::evalstrnLength;
1797  else if (C.isCLibraryFunction(FDecl, "strcmp"))
1798    evalFunction =  &CStringChecker::evalStrcmp;
1799  else if (C.isCLibraryFunction(FDecl, "strncmp"))
1800    evalFunction =  &CStringChecker::evalStrncmp;
1801  else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1802    evalFunction =  &CStringChecker::evalStrcasecmp;
1803  else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1804    evalFunction =  &CStringChecker::evalStrncasecmp;
1805  else if (C.isCLibraryFunction(FDecl, "bcopy"))
1806    evalFunction =  &CStringChecker::evalBcopy;
1807  else if (C.isCLibraryFunction(FDecl, "bcmp"))
1808    evalFunction =  &CStringChecker::evalMemcmp;
1809
1810  // If the callee isn't a string function, let another checker handle it.
1811  if (!evalFunction)
1812    return false;
1813
1814  // Make sure each function sets its own description.
1815  // (But don't bother in a release build.)
1816  assert(!(CurrentFunctionDescription = NULL));
1817
1818  // Check and evaluate the call.
1819  (this->*evalFunction)(C, CE);
1820
1821  // If the evaluate call resulted in no change, chain to the next eval call
1822  // handler.
1823  // Note, the custom CString evaluation calls assume that basic safety
1824  // properties are held. However, if the user chooses to turn off some of these
1825  // checks, we ignore the issues and leave the call evaluation to a generic
1826  // handler.
1827  if (!C.isDifferent())
1828    return false;
1829
1830  return true;
1831}
1832
1833void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1834  // Record string length for char a[] = "abc";
1835  ProgramStateRef state = C.getState();
1836
1837  for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1838       I != E; ++I) {
1839    const VarDecl *D = dyn_cast<VarDecl>(*I);
1840    if (!D)
1841      continue;
1842
1843    // FIXME: Handle array fields of structs.
1844    if (!D->getType()->isArrayType())
1845      continue;
1846
1847    const Expr *Init = D->getInit();
1848    if (!Init)
1849      continue;
1850    if (!isa<StringLiteral>(Init))
1851      continue;
1852
1853    Loc VarLoc = state->getLValue(D, C.getLocationContext());
1854    const MemRegion *MR = VarLoc.getAsRegion();
1855    if (!MR)
1856      continue;
1857
1858    SVal StrVal = state->getSVal(Init, C.getLocationContext());
1859    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1860    DefinedOrUnknownSVal strLength
1861      = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1862
1863    state = state->set<CStringLength>(MR, strLength);
1864  }
1865
1866  C.addTransition(state);
1867}
1868
1869bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1870  CStringLengthTy Entries = state->get<CStringLength>();
1871  return !Entries.isEmpty();
1872}
1873
1874ProgramStateRef
1875CStringChecker::checkRegionChanges(ProgramStateRef state,
1876                                   const StoreManager::InvalidatedSymbols *,
1877                                   ArrayRef<const MemRegion *> ExplicitRegions,
1878                                   ArrayRef<const MemRegion *> Regions,
1879                                   const CallEvent *Call) const {
1880  CStringLengthTy Entries = state->get<CStringLength>();
1881  if (Entries.isEmpty())
1882    return state;
1883
1884  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1885  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1886
1887  // First build sets for the changed regions and their super-regions.
1888  for (ArrayRef<const MemRegion *>::iterator
1889       I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1890    const MemRegion *MR = *I;
1891    Invalidated.insert(MR);
1892
1893    SuperRegions.insert(MR);
1894    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1895      MR = SR->getSuperRegion();
1896      SuperRegions.insert(MR);
1897    }
1898  }
1899
1900  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1901
1902  // Then loop over the entries in the current state.
1903  for (CStringLengthTy::iterator I = Entries.begin(),
1904       E = Entries.end(); I != E; ++I) {
1905    const MemRegion *MR = I.getKey();
1906
1907    // Is this entry for a super-region of a changed region?
1908    if (SuperRegions.count(MR)) {
1909      Entries = F.remove(Entries, MR);
1910      continue;
1911    }
1912
1913    // Is this entry for a sub-region of a changed region?
1914    const MemRegion *Super = MR;
1915    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1916      Super = SR->getSuperRegion();
1917      if (Invalidated.count(Super)) {
1918        Entries = F.remove(Entries, MR);
1919        break;
1920      }
1921    }
1922  }
1923
1924  return state->set<CStringLength>(Entries);
1925}
1926
1927void CStringChecker::checkLiveSymbols(ProgramStateRef state,
1928                                      SymbolReaper &SR) const {
1929  // Mark all symbols in our string length map as valid.
1930  CStringLengthTy Entries = state->get<CStringLength>();
1931
1932  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1933       I != E; ++I) {
1934    SVal Len = I.getData();
1935
1936    for (SymExpr::symbol_iterator si = Len.symbol_begin(),
1937                                  se = Len.symbol_end(); si != se; ++si)
1938      SR.markInUse(*si);
1939  }
1940}
1941
1942void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1943                                      CheckerContext &C) const {
1944  if (!SR.hasDeadSymbols())
1945    return;
1946
1947  ProgramStateRef state = C.getState();
1948  CStringLengthTy Entries = state->get<CStringLength>();
1949  if (Entries.isEmpty())
1950    return;
1951
1952  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1953  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1954       I != E; ++I) {
1955    SVal Len = I.getData();
1956    if (SymbolRef Sym = Len.getAsSymbol()) {
1957      if (SR.isDead(Sym))
1958        Entries = F.remove(Entries, I.getKey());
1959    }
1960  }
1961
1962  state = state->set<CStringLength>(Entries);
1963  C.addTransition(state);
1964}
1965
1966#define REGISTER_CHECKER(name) \
1967void ento::register##name(CheckerManager &mgr) {\
1968  static CStringChecker *TheChecker = 0; \
1969  if (TheChecker == 0) \
1970    TheChecker = mgr.registerChecker<CStringChecker>(); \
1971  TheChecker->Filter.Check##name = true; \
1972}
1973
1974REGISTER_CHECKER(CStringNullArg)
1975REGISTER_CHECKER(CStringOutOfBounds)
1976REGISTER_CHECKER(CStringBufferOverlap)
1977REGISTER_CHECKER(CStringNotNullTerm)
1978
1979void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
1980  registerCStringNullArg(Mgr);
1981}
1982