CStringChecker.cpp revision 651f13cea278ec967336033dd032faef0e9fc2ec
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This defines CStringChecker, which is an assortment of checks on calls
11// to functions in <string.h>.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ClangSACheckers.h"
16#include "InterCheckerAPI.h"
17#include "clang/Basic/CharInfo.h"
18#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19#include "clang/StaticAnalyzer/Core/Checker.h"
20#include "clang/StaticAnalyzer/Core/CheckerManager.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringSwitch.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace clang;
29using namespace ento;
30
31namespace {
32class CStringChecker : public Checker< eval::Call,
33                                         check::PreStmt<DeclStmt>,
34                                         check::LiveSymbols,
35                                         check::DeadSymbols,
36                                         check::RegionChanges
37                                         > {
38  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39      BT_NotCString, BT_AdditionOverflow;
40
41  mutable const char *CurrentFunctionDescription;
42
43public:
44  /// The filter is used to filter out the diagnostics which are not enabled by
45  /// the user.
46  struct CStringChecksFilter {
47    DefaultBool CheckCStringNullArg;
48    DefaultBool CheckCStringOutOfBounds;
49    DefaultBool CheckCStringBufferOverlap;
50    DefaultBool CheckCStringNotNullTerm;
51
52    CheckName CheckNameCStringNullArg;
53    CheckName CheckNameCStringOutOfBounds;
54    CheckName CheckNameCStringBufferOverlap;
55    CheckName CheckNameCStringNotNullTerm;
56  };
57
58  CStringChecksFilter Filter;
59
60  static void *getTag() { static int tag; return &tag; }
61
62  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64  void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66  bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67
68  ProgramStateRef
69    checkRegionChanges(ProgramStateRef state,
70                       const InvalidatedSymbols *,
71                       ArrayRef<const MemRegion *> ExplicitRegions,
72                       ArrayRef<const MemRegion *> Regions,
73                       const CallEvent *Call) const;
74
75  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76                                          const CallExpr *) const;
77
78  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83                      ProgramStateRef state,
84                      const Expr *Size,
85                      const Expr *Source,
86                      const Expr *Dest,
87                      bool Restricted = false,
88                      bool IsMempcpy = false) const;
89
90  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91
92  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94  void evalstrLengthCommon(CheckerContext &C,
95                           const CallExpr *CE,
96                           bool IsStrnlen = false) const;
97
98  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101  void evalStrcpyCommon(CheckerContext &C,
102                        const CallExpr *CE,
103                        bool returnEnd,
104                        bool isBounded,
105                        bool isAppending) const;
106
107  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109
110  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114  void evalStrcmpCommon(CheckerContext &C,
115                        const CallExpr *CE,
116                        bool isBounded = false,
117                        bool ignoreCase = false) const;
118
119  void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120
121  // Utility methods
122  std::pair<ProgramStateRef , ProgramStateRef >
123  static assumeZero(CheckerContext &C,
124                    ProgramStateRef state, SVal V, QualType Ty);
125
126  static ProgramStateRef setCStringLength(ProgramStateRef state,
127                                              const MemRegion *MR,
128                                              SVal strLength);
129  static SVal getCStringLengthForRegion(CheckerContext &C,
130                                        ProgramStateRef &state,
131                                        const Expr *Ex,
132                                        const MemRegion *MR,
133                                        bool hypothetical);
134  SVal getCStringLength(CheckerContext &C,
135                        ProgramStateRef &state,
136                        const Expr *Ex,
137                        SVal Buf,
138                        bool hypothetical = false) const;
139
140  const StringLiteral *getCStringLiteral(CheckerContext &C,
141                                         ProgramStateRef &state,
142                                         const Expr *expr,
143                                         SVal val) const;
144
145  static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146                                          ProgramStateRef state,
147                                          const Expr *Ex, SVal V,
148                                          bool IsSourceBuffer);
149
150  static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
151                              const MemRegion *MR);
152
153  // Re-usable checks
154  ProgramStateRef checkNonNull(CheckerContext &C,
155                                   ProgramStateRef state,
156                                   const Expr *S,
157                                   SVal l) const;
158  ProgramStateRef CheckLocation(CheckerContext &C,
159                                    ProgramStateRef state,
160                                    const Expr *S,
161                                    SVal l,
162                                    const char *message = NULL) const;
163  ProgramStateRef CheckBufferAccess(CheckerContext &C,
164                                        ProgramStateRef state,
165                                        const Expr *Size,
166                                        const Expr *FirstBuf,
167                                        const Expr *SecondBuf,
168                                        const char *firstMessage = NULL,
169                                        const char *secondMessage = NULL,
170                                        bool WarnAboutSize = false) const;
171
172  ProgramStateRef CheckBufferAccess(CheckerContext &C,
173                                        ProgramStateRef state,
174                                        const Expr *Size,
175                                        const Expr *Buf,
176                                        const char *message = NULL,
177                                        bool WarnAboutSize = false) const {
178    // This is a convenience override.
179    return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
180                             WarnAboutSize);
181  }
182  ProgramStateRef CheckOverlap(CheckerContext &C,
183                                   ProgramStateRef state,
184                                   const Expr *Size,
185                                   const Expr *First,
186                                   const Expr *Second) const;
187  void emitOverlapBug(CheckerContext &C,
188                      ProgramStateRef state,
189                      const Stmt *First,
190                      const Stmt *Second) const;
191
192  ProgramStateRef checkAdditionOverflow(CheckerContext &C,
193                                            ProgramStateRef state,
194                                            NonLoc left,
195                                            NonLoc right) const;
196};
197
198} //end anonymous namespace
199
200REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
201
202//===----------------------------------------------------------------------===//
203// Individual checks and utility methods.
204//===----------------------------------------------------------------------===//
205
206std::pair<ProgramStateRef , ProgramStateRef >
207CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
208                           QualType Ty) {
209  Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
210  if (!val)
211    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
212
213  SValBuilder &svalBuilder = C.getSValBuilder();
214  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
215  return state->assume(svalBuilder.evalEQ(state, *val, zero));
216}
217
218ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
219                                            ProgramStateRef state,
220                                            const Expr *S, SVal l) const {
221  // If a previous check has failed, propagate the failure.
222  if (!state)
223    return NULL;
224
225  ProgramStateRef stateNull, stateNonNull;
226  std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
227
228  if (stateNull && !stateNonNull) {
229    if (!Filter.CheckCStringNullArg)
230      return NULL;
231
232    ExplodedNode *N = C.generateSink(stateNull);
233    if (!N)
234      return NULL;
235
236    if (!BT_Null)
237      BT_Null.reset(new BuiltinBug(
238          Filter.CheckNameCStringNullArg, categories::UnixAPI,
239          "Null pointer argument in call to byte string function"));
240
241    SmallString<80> buf;
242    llvm::raw_svector_ostream os(buf);
243    assert(CurrentFunctionDescription);
244    os << "Null pointer argument in call to " << CurrentFunctionDescription;
245
246    // Generate a report for this bug.
247    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
248    BugReport *report = new BugReport(*BT, os.str(), N);
249
250    report->addRange(S->getSourceRange());
251    bugreporter::trackNullOrUndefValue(N, S, *report);
252    C.emitReport(report);
253    return NULL;
254  }
255
256  // From here on, assume that the value is non-null.
257  assert(stateNonNull);
258  return stateNonNull;
259}
260
261// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
262ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
263                                             ProgramStateRef state,
264                                             const Expr *S, SVal l,
265                                             const char *warningMsg) const {
266  // If a previous check has failed, propagate the failure.
267  if (!state)
268    return NULL;
269
270  // Check for out of bound array element access.
271  const MemRegion *R = l.getAsRegion();
272  if (!R)
273    return state;
274
275  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
276  if (!ER)
277    return state;
278
279  assert(ER->getValueType() == C.getASTContext().CharTy &&
280    "CheckLocation should only be called with char* ElementRegions");
281
282  // Get the size of the array.
283  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
284  SValBuilder &svalBuilder = C.getSValBuilder();
285  SVal Extent =
286    svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
287  DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
288
289  // Get the index of the accessed element.
290  DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
291
292  ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
293  ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
294  if (StOutBound && !StInBound) {
295    ExplodedNode *N = C.generateSink(StOutBound);
296    if (!N)
297      return NULL;
298
299    if (!BT_Bounds) {
300      BT_Bounds.reset(new BuiltinBug(
301          Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
302          "Byte string function accesses out-of-bound array element"));
303    }
304    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
305
306    // Generate a report for this bug.
307    BugReport *report;
308    if (warningMsg) {
309      report = new BugReport(*BT, warningMsg, N);
310    } else {
311      assert(CurrentFunctionDescription);
312      assert(CurrentFunctionDescription[0] != '\0');
313
314      SmallString<80> buf;
315      llvm::raw_svector_ostream os(buf);
316      os << toUppercase(CurrentFunctionDescription[0])
317         << &CurrentFunctionDescription[1]
318         << " accesses out-of-bound array element";
319      report = new BugReport(*BT, os.str(), N);
320    }
321
322    // FIXME: It would be nice to eventually make this diagnostic more clear,
323    // e.g., by referencing the original declaration or by saying *why* this
324    // reference is outside the range.
325
326    report->addRange(S->getSourceRange());
327    C.emitReport(report);
328    return NULL;
329  }
330
331  // Array bound check succeeded.  From this point forward the array bound
332  // should always succeed.
333  return StInBound;
334}
335
336ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
337                                                 ProgramStateRef state,
338                                                 const Expr *Size,
339                                                 const Expr *FirstBuf,
340                                                 const Expr *SecondBuf,
341                                                 const char *firstMessage,
342                                                 const char *secondMessage,
343                                                 bool WarnAboutSize) const {
344  // If a previous check has failed, propagate the failure.
345  if (!state)
346    return NULL;
347
348  SValBuilder &svalBuilder = C.getSValBuilder();
349  ASTContext &Ctx = svalBuilder.getContext();
350  const LocationContext *LCtx = C.getLocationContext();
351
352  QualType sizeTy = Size->getType();
353  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
354
355  // Check that the first buffer is non-null.
356  SVal BufVal = state->getSVal(FirstBuf, LCtx);
357  state = checkNonNull(C, state, FirstBuf, BufVal);
358  if (!state)
359    return NULL;
360
361  // If out-of-bounds checking is turned off, skip the rest.
362  if (!Filter.CheckCStringOutOfBounds)
363    return state;
364
365  // Get the access length and make sure it is known.
366  // FIXME: This assumes the caller has already checked that the access length
367  // is positive. And that it's unsigned.
368  SVal LengthVal = state->getSVal(Size, LCtx);
369  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
370  if (!Length)
371    return state;
372
373  // Compute the offset of the last element to be accessed: size-1.
374  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
375  NonLoc LastOffset = svalBuilder
376      .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
377
378  // Check that the first buffer is sufficiently long.
379  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
380  if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
381    const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
382
383    SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
384                                          LastOffset, PtrTy);
385    state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
386
387    // If the buffer isn't large enough, abort.
388    if (!state)
389      return NULL;
390  }
391
392  // If there's a second buffer, check it as well.
393  if (SecondBuf) {
394    BufVal = state->getSVal(SecondBuf, LCtx);
395    state = checkNonNull(C, state, SecondBuf, BufVal);
396    if (!state)
397      return NULL;
398
399    BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
400    if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
401      const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
402
403      SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
404                                            LastOffset, PtrTy);
405      state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
406    }
407  }
408
409  // Large enough or not, return this state!
410  return state;
411}
412
413ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
414                                            ProgramStateRef state,
415                                            const Expr *Size,
416                                            const Expr *First,
417                                            const Expr *Second) const {
418  if (!Filter.CheckCStringBufferOverlap)
419    return state;
420
421  // Do a simple check for overlap: if the two arguments are from the same
422  // buffer, see if the end of the first is greater than the start of the second
423  // or vice versa.
424
425  // If a previous check has failed, propagate the failure.
426  if (!state)
427    return NULL;
428
429  ProgramStateRef stateTrue, stateFalse;
430
431  // Get the buffer values and make sure they're known locations.
432  const LocationContext *LCtx = C.getLocationContext();
433  SVal firstVal = state->getSVal(First, LCtx);
434  SVal secondVal = state->getSVal(Second, LCtx);
435
436  Optional<Loc> firstLoc = firstVal.getAs<Loc>();
437  if (!firstLoc)
438    return state;
439
440  Optional<Loc> secondLoc = secondVal.getAs<Loc>();
441  if (!secondLoc)
442    return state;
443
444  // Are the two values the same?
445  SValBuilder &svalBuilder = C.getSValBuilder();
446  std::tie(stateTrue, stateFalse) =
447    state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
448
449  if (stateTrue && !stateFalse) {
450    // If the values are known to be equal, that's automatically an overlap.
451    emitOverlapBug(C, stateTrue, First, Second);
452    return NULL;
453  }
454
455  // assume the two expressions are not equal.
456  assert(stateFalse);
457  state = stateFalse;
458
459  // Which value comes first?
460  QualType cmpTy = svalBuilder.getConditionType();
461  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
462                                         *firstLoc, *secondLoc, cmpTy);
463  Optional<DefinedOrUnknownSVal> reverseTest =
464      reverse.getAs<DefinedOrUnknownSVal>();
465  if (!reverseTest)
466    return state;
467
468  std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
469  if (stateTrue) {
470    if (stateFalse) {
471      // If we don't know which one comes first, we can't perform this test.
472      return state;
473    } else {
474      // Switch the values so that firstVal is before secondVal.
475      std::swap(firstLoc, secondLoc);
476
477      // Switch the Exprs as well, so that they still correspond.
478      std::swap(First, Second);
479    }
480  }
481
482  // Get the length, and make sure it too is known.
483  SVal LengthVal = state->getSVal(Size, LCtx);
484  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
485  if (!Length)
486    return state;
487
488  // Convert the first buffer's start address to char*.
489  // Bail out if the cast fails.
490  ASTContext &Ctx = svalBuilder.getContext();
491  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
492  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
493                                         First->getType());
494  Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
495  if (!FirstStartLoc)
496    return state;
497
498  // Compute the end of the first buffer. Bail out if THAT fails.
499  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
500                                 *FirstStartLoc, *Length, CharPtrTy);
501  Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
502  if (!FirstEndLoc)
503    return state;
504
505  // Is the end of the first buffer past the start of the second buffer?
506  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
507                                *FirstEndLoc, *secondLoc, cmpTy);
508  Optional<DefinedOrUnknownSVal> OverlapTest =
509      Overlap.getAs<DefinedOrUnknownSVal>();
510  if (!OverlapTest)
511    return state;
512
513  std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
514
515  if (stateTrue && !stateFalse) {
516    // Overlap!
517    emitOverlapBug(C, stateTrue, First, Second);
518    return NULL;
519  }
520
521  // assume the two expressions don't overlap.
522  assert(stateFalse);
523  return stateFalse;
524}
525
526void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
527                                  const Stmt *First, const Stmt *Second) const {
528  ExplodedNode *N = C.generateSink(state);
529  if (!N)
530    return;
531
532  if (!BT_Overlap)
533    BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
534                                 categories::UnixAPI, "Improper arguments"));
535
536  // Generate a report for this bug.
537  BugReport *report =
538    new BugReport(*BT_Overlap,
539      "Arguments must not be overlapping buffers", N);
540  report->addRange(First->getSourceRange());
541  report->addRange(Second->getSourceRange());
542
543  C.emitReport(report);
544}
545
546ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
547                                                     ProgramStateRef state,
548                                                     NonLoc left,
549                                                     NonLoc right) const {
550  // If out-of-bounds checking is turned off, skip the rest.
551  if (!Filter.CheckCStringOutOfBounds)
552    return state;
553
554  // If a previous check has failed, propagate the failure.
555  if (!state)
556    return NULL;
557
558  SValBuilder &svalBuilder = C.getSValBuilder();
559  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
560
561  QualType sizeTy = svalBuilder.getContext().getSizeType();
562  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
563  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
564
565  SVal maxMinusRight;
566  if (right.getAs<nonloc::ConcreteInt>()) {
567    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
568                                                 sizeTy);
569  } else {
570    // Try switching the operands. (The order of these two assignments is
571    // important!)
572    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
573                                            sizeTy);
574    left = right;
575  }
576
577  if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
578    QualType cmpTy = svalBuilder.getConditionType();
579    // If left > max - right, we have an overflow.
580    SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
581                                                *maxMinusRightNL, cmpTy);
582
583    ProgramStateRef stateOverflow, stateOkay;
584    std::tie(stateOverflow, stateOkay) =
585      state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
586
587    if (stateOverflow && !stateOkay) {
588      // We have an overflow. Emit a bug report.
589      ExplodedNode *N = C.generateSink(stateOverflow);
590      if (!N)
591        return NULL;
592
593      if (!BT_AdditionOverflow)
594        BT_AdditionOverflow.reset(
595            new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
596                           "Sum of expressions causes overflow"));
597
598      // This isn't a great error message, but this should never occur in real
599      // code anyway -- you'd have to create a buffer longer than a size_t can
600      // represent, which is sort of a contradiction.
601      const char *warning =
602        "This expression will create a string whose length is too big to "
603        "be represented as a size_t";
604
605      // Generate a report for this bug.
606      BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
607      C.emitReport(report);
608
609      return NULL;
610    }
611
612    // From now on, assume an overflow didn't occur.
613    assert(stateOkay);
614    state = stateOkay;
615  }
616
617  return state;
618}
619
620ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
621                                                const MemRegion *MR,
622                                                SVal strLength) {
623  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
624
625  MR = MR->StripCasts();
626
627  switch (MR->getKind()) {
628  case MemRegion::StringRegionKind:
629    // FIXME: This can happen if we strcpy() into a string region. This is
630    // undefined [C99 6.4.5p6], but we should still warn about it.
631    return state;
632
633  case MemRegion::SymbolicRegionKind:
634  case MemRegion::AllocaRegionKind:
635  case MemRegion::VarRegionKind:
636  case MemRegion::FieldRegionKind:
637  case MemRegion::ObjCIvarRegionKind:
638    // These are the types we can currently track string lengths for.
639    break;
640
641  case MemRegion::ElementRegionKind:
642    // FIXME: Handle element regions by upper-bounding the parent region's
643    // string length.
644    return state;
645
646  default:
647    // Other regions (mostly non-data) can't have a reliable C string length.
648    // For now, just ignore the change.
649    // FIXME: These are rare but not impossible. We should output some kind of
650    // warning for things like strcpy((char[]){'a', 0}, "b");
651    return state;
652  }
653
654  if (strLength.isUnknown())
655    return state->remove<CStringLength>(MR);
656
657  return state->set<CStringLength>(MR, strLength);
658}
659
660SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
661                                               ProgramStateRef &state,
662                                               const Expr *Ex,
663                                               const MemRegion *MR,
664                                               bool hypothetical) {
665  if (!hypothetical) {
666    // If there's a recorded length, go ahead and return it.
667    const SVal *Recorded = state->get<CStringLength>(MR);
668    if (Recorded)
669      return *Recorded;
670  }
671
672  // Otherwise, get a new symbol and update the state.
673  SValBuilder &svalBuilder = C.getSValBuilder();
674  QualType sizeTy = svalBuilder.getContext().getSizeType();
675  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
676                                                    MR, Ex, sizeTy,
677                                                    C.blockCount());
678
679  if (!hypothetical) {
680    if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
681      // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
682      BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
683      const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
684      llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
685      const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
686                                                        fourInt);
687      NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
688      SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
689                                                maxLength, sizeTy);
690      state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
691    }
692    state = state->set<CStringLength>(MR, strLength);
693  }
694
695  return strLength;
696}
697
698SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
699                                      const Expr *Ex, SVal Buf,
700                                      bool hypothetical) const {
701  const MemRegion *MR = Buf.getAsRegion();
702  if (!MR) {
703    // If we can't get a region, see if it's something we /know/ isn't a
704    // C string. In the context of locations, the only time we can issue such
705    // a warning is for labels.
706    if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
707      if (!Filter.CheckCStringNotNullTerm)
708        return UndefinedVal();
709
710      if (ExplodedNode *N = C.addTransition(state)) {
711        if (!BT_NotCString)
712          BT_NotCString.reset(new BuiltinBug(
713              Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
714              "Argument is not a null-terminated string."));
715
716        SmallString<120> buf;
717        llvm::raw_svector_ostream os(buf);
718        assert(CurrentFunctionDescription);
719        os << "Argument to " << CurrentFunctionDescription
720           << " is the address of the label '" << Label->getLabel()->getName()
721           << "', which is not a null-terminated string";
722
723        // Generate a report for this bug.
724        BugReport *report = new BugReport(*BT_NotCString, os.str(), N);
725
726        report->addRange(Ex->getSourceRange());
727        C.emitReport(report);
728      }
729      return UndefinedVal();
730
731    }
732
733    // If it's not a region and not a label, give up.
734    return UnknownVal();
735  }
736
737  // If we have a region, strip casts from it and see if we can figure out
738  // its length. For anything we can't figure out, just return UnknownVal.
739  MR = MR->StripCasts();
740
741  switch (MR->getKind()) {
742  case MemRegion::StringRegionKind: {
743    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
744    // so we can assume that the byte length is the correct C string length.
745    SValBuilder &svalBuilder = C.getSValBuilder();
746    QualType sizeTy = svalBuilder.getContext().getSizeType();
747    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
748    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
749  }
750  case MemRegion::SymbolicRegionKind:
751  case MemRegion::AllocaRegionKind:
752  case MemRegion::VarRegionKind:
753  case MemRegion::FieldRegionKind:
754  case MemRegion::ObjCIvarRegionKind:
755    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
756  case MemRegion::CompoundLiteralRegionKind:
757    // FIXME: Can we track this? Is it necessary?
758    return UnknownVal();
759  case MemRegion::ElementRegionKind:
760    // FIXME: How can we handle this? It's not good enough to subtract the
761    // offset from the base string length; consider "123\x00567" and &a[5].
762    return UnknownVal();
763  default:
764    // Other regions (mostly non-data) can't have a reliable C string length.
765    // In this case, an error is emitted and UndefinedVal is returned.
766    // The caller should always be prepared to handle this case.
767    if (!Filter.CheckCStringNotNullTerm)
768      return UndefinedVal();
769
770    if (ExplodedNode *N = C.addTransition(state)) {
771      if (!BT_NotCString)
772        BT_NotCString.reset(new BuiltinBug(
773            Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
774            "Argument is not a null-terminated string."));
775
776      SmallString<120> buf;
777      llvm::raw_svector_ostream os(buf);
778
779      assert(CurrentFunctionDescription);
780      os << "Argument to " << CurrentFunctionDescription << " is ";
781
782      if (SummarizeRegion(os, C.getASTContext(), MR))
783        os << ", which is not a null-terminated string";
784      else
785        os << "not a null-terminated string";
786
787      // Generate a report for this bug.
788      BugReport *report = new BugReport(*BT_NotCString,
789                                                        os.str(), N);
790
791      report->addRange(Ex->getSourceRange());
792      C.emitReport(report);
793    }
794
795    return UndefinedVal();
796  }
797}
798
799const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
800  ProgramStateRef &state, const Expr *expr, SVal val) const {
801
802  // Get the memory region pointed to by the val.
803  const MemRegion *bufRegion = val.getAsRegion();
804  if (!bufRegion)
805    return NULL;
806
807  // Strip casts off the memory region.
808  bufRegion = bufRegion->StripCasts();
809
810  // Cast the memory region to a string region.
811  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
812  if (!strRegion)
813    return NULL;
814
815  // Return the actual string in the string region.
816  return strRegion->getStringLiteral();
817}
818
819ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
820                                                 ProgramStateRef state,
821                                                 const Expr *E, SVal V,
822                                                 bool IsSourceBuffer) {
823  Optional<Loc> L = V.getAs<Loc>();
824  if (!L)
825    return state;
826
827  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
828  // some assumptions about the value that CFRefCount can't. Even so, it should
829  // probably be refactored.
830  if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
831    const MemRegion *R = MR->getRegion()->StripCasts();
832
833    // Are we dealing with an ElementRegion?  If so, we should be invalidating
834    // the super-region.
835    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
836      R = ER->getSuperRegion();
837      // FIXME: What about layers of ElementRegions?
838    }
839
840    // Invalidate this region.
841    const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
842
843    bool CausesPointerEscape = false;
844    RegionAndSymbolInvalidationTraits ITraits;
845    // Invalidate and escape only indirect regions accessible through the source
846    // buffer.
847    if (IsSourceBuffer) {
848      ITraits.setTrait(R,
849                       RegionAndSymbolInvalidationTraits::TK_PreserveContents);
850      ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
851      CausesPointerEscape = true;
852    }
853
854    return state->invalidateRegions(R, E, C.blockCount(), LCtx,
855                                    CausesPointerEscape, 0, 0, &ITraits);
856  }
857
858  // If we have a non-region value by chance, just remove the binding.
859  // FIXME: is this necessary or correct? This handles the non-Region
860  //  cases.  Is it ever valid to store to these?
861  return state->killBinding(*L);
862}
863
864bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
865                                     const MemRegion *MR) {
866  const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
867
868  switch (MR->getKind()) {
869  case MemRegion::FunctionTextRegionKind: {
870    const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
871    if (FD)
872      os << "the address of the function '" << *FD << '\'';
873    else
874      os << "the address of a function";
875    return true;
876  }
877  case MemRegion::BlockTextRegionKind:
878    os << "block text";
879    return true;
880  case MemRegion::BlockDataRegionKind:
881    os << "a block";
882    return true;
883  case MemRegion::CXXThisRegionKind:
884  case MemRegion::CXXTempObjectRegionKind:
885    os << "a C++ temp object of type " << TVR->getValueType().getAsString();
886    return true;
887  case MemRegion::VarRegionKind:
888    os << "a variable of type" << TVR->getValueType().getAsString();
889    return true;
890  case MemRegion::FieldRegionKind:
891    os << "a field of type " << TVR->getValueType().getAsString();
892    return true;
893  case MemRegion::ObjCIvarRegionKind:
894    os << "an instance variable of type " << TVR->getValueType().getAsString();
895    return true;
896  default:
897    return false;
898  }
899}
900
901//===----------------------------------------------------------------------===//
902// evaluation of individual function calls.
903//===----------------------------------------------------------------------===//
904
905void CStringChecker::evalCopyCommon(CheckerContext &C,
906                                    const CallExpr *CE,
907                                    ProgramStateRef state,
908                                    const Expr *Size, const Expr *Dest,
909                                    const Expr *Source, bool Restricted,
910                                    bool IsMempcpy) const {
911  CurrentFunctionDescription = "memory copy function";
912
913  // See if the size argument is zero.
914  const LocationContext *LCtx = C.getLocationContext();
915  SVal sizeVal = state->getSVal(Size, LCtx);
916  QualType sizeTy = Size->getType();
917
918  ProgramStateRef stateZeroSize, stateNonZeroSize;
919  std::tie(stateZeroSize, stateNonZeroSize) =
920    assumeZero(C, state, sizeVal, sizeTy);
921
922  // Get the value of the Dest.
923  SVal destVal = state->getSVal(Dest, LCtx);
924
925  // If the size is zero, there won't be any actual memory access, so
926  // just bind the return value to the destination buffer and return.
927  if (stateZeroSize && !stateNonZeroSize) {
928    stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
929    C.addTransition(stateZeroSize);
930    return;
931  }
932
933  // If the size can be nonzero, we have to check the other arguments.
934  if (stateNonZeroSize) {
935    state = stateNonZeroSize;
936
937    // Ensure the destination is not null. If it is NULL there will be a
938    // NULL pointer dereference.
939    state = checkNonNull(C, state, Dest, destVal);
940    if (!state)
941      return;
942
943    // Get the value of the Src.
944    SVal srcVal = state->getSVal(Source, LCtx);
945
946    // Ensure the source is not null. If it is NULL there will be a
947    // NULL pointer dereference.
948    state = checkNonNull(C, state, Source, srcVal);
949    if (!state)
950      return;
951
952    // Ensure the accesses are valid and that the buffers do not overlap.
953    const char * const writeWarning =
954      "Memory copy function overflows destination buffer";
955    state = CheckBufferAccess(C, state, Size, Dest, Source,
956                              writeWarning, /* sourceWarning = */ NULL);
957    if (Restricted)
958      state = CheckOverlap(C, state, Size, Dest, Source);
959
960    if (!state)
961      return;
962
963    // If this is mempcpy, get the byte after the last byte copied and
964    // bind the expr.
965    if (IsMempcpy) {
966      loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
967
968      // Get the length to copy.
969      if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
970        // Get the byte after the last byte copied.
971        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
972                                                          destRegVal,
973                                                          *lenValNonLoc,
974                                                          Dest->getType());
975
976        // The byte after the last byte copied is the return value.
977        state = state->BindExpr(CE, LCtx, lastElement);
978      } else {
979        // If we don't know how much we copied, we can at least
980        // conjure a return value for later.
981        SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
982                                                          C.blockCount());
983        state = state->BindExpr(CE, LCtx, result);
984      }
985
986    } else {
987      // All other copies return the destination buffer.
988      // (Well, bcopy() has a void return type, but this won't hurt.)
989      state = state->BindExpr(CE, LCtx, destVal);
990    }
991
992    // Invalidate the destination (regular invalidation without pointer-escaping
993    // the address of the top-level region).
994    // FIXME: Even if we can't perfectly model the copy, we should see if we
995    // can use LazyCompoundVals to copy the source values into the destination.
996    // This would probably remove any existing bindings past the end of the
997    // copied region, but that's still an improvement over blank invalidation.
998    state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
999                             /*IsSourceBuffer*/false);
1000
1001    // Invalidate the source (const-invalidation without const-pointer-escaping
1002    // the address of the top-level region).
1003    state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1004                             /*IsSourceBuffer*/true);
1005
1006    C.addTransition(state);
1007  }
1008}
1009
1010
1011void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1012  if (CE->getNumArgs() < 3)
1013    return;
1014
1015  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1016  // The return value is the address of the destination buffer.
1017  const Expr *Dest = CE->getArg(0);
1018  ProgramStateRef state = C.getState();
1019
1020  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1021}
1022
1023void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1024  if (CE->getNumArgs() < 3)
1025    return;
1026
1027  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1028  // The return value is a pointer to the byte following the last written byte.
1029  const Expr *Dest = CE->getArg(0);
1030  ProgramStateRef state = C.getState();
1031
1032  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1033}
1034
1035void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1036  if (CE->getNumArgs() < 3)
1037    return;
1038
1039  // void *memmove(void *dst, const void *src, size_t n);
1040  // The return value is the address of the destination buffer.
1041  const Expr *Dest = CE->getArg(0);
1042  ProgramStateRef state = C.getState();
1043
1044  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1045}
1046
1047void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1048  if (CE->getNumArgs() < 3)
1049    return;
1050
1051  // void bcopy(const void *src, void *dst, size_t n);
1052  evalCopyCommon(C, CE, C.getState(),
1053                 CE->getArg(2), CE->getArg(1), CE->getArg(0));
1054}
1055
1056void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1057  if (CE->getNumArgs() < 3)
1058    return;
1059
1060  // int memcmp(const void *s1, const void *s2, size_t n);
1061  CurrentFunctionDescription = "memory comparison function";
1062
1063  const Expr *Left = CE->getArg(0);
1064  const Expr *Right = CE->getArg(1);
1065  const Expr *Size = CE->getArg(2);
1066
1067  ProgramStateRef state = C.getState();
1068  SValBuilder &svalBuilder = C.getSValBuilder();
1069
1070  // See if the size argument is zero.
1071  const LocationContext *LCtx = C.getLocationContext();
1072  SVal sizeVal = state->getSVal(Size, LCtx);
1073  QualType sizeTy = Size->getType();
1074
1075  ProgramStateRef stateZeroSize, stateNonZeroSize;
1076  std::tie(stateZeroSize, stateNonZeroSize) =
1077    assumeZero(C, state, sizeVal, sizeTy);
1078
1079  // If the size can be zero, the result will be 0 in that case, and we don't
1080  // have to check either of the buffers.
1081  if (stateZeroSize) {
1082    state = stateZeroSize;
1083    state = state->BindExpr(CE, LCtx,
1084                            svalBuilder.makeZeroVal(CE->getType()));
1085    C.addTransition(state);
1086  }
1087
1088  // If the size can be nonzero, we have to check the other arguments.
1089  if (stateNonZeroSize) {
1090    state = stateNonZeroSize;
1091    // If we know the two buffers are the same, we know the result is 0.
1092    // First, get the two buffers' addresses. Another checker will have already
1093    // made sure they're not undefined.
1094    DefinedOrUnknownSVal LV =
1095        state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1096    DefinedOrUnknownSVal RV =
1097        state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1098
1099    // See if they are the same.
1100    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1101    ProgramStateRef StSameBuf, StNotSameBuf;
1102    std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1103
1104    // If the two arguments might be the same buffer, we know the result is 0,
1105    // and we only need to check one size.
1106    if (StSameBuf) {
1107      state = StSameBuf;
1108      state = CheckBufferAccess(C, state, Size, Left);
1109      if (state) {
1110        state = StSameBuf->BindExpr(CE, LCtx,
1111                                    svalBuilder.makeZeroVal(CE->getType()));
1112        C.addTransition(state);
1113      }
1114    }
1115
1116    // If the two arguments might be different buffers, we have to check the
1117    // size of both of them.
1118    if (StNotSameBuf) {
1119      state = StNotSameBuf;
1120      state = CheckBufferAccess(C, state, Size, Left, Right);
1121      if (state) {
1122        // The return value is the comparison result, which we don't know.
1123        SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1124        state = state->BindExpr(CE, LCtx, CmpV);
1125        C.addTransition(state);
1126      }
1127    }
1128  }
1129}
1130
1131void CStringChecker::evalstrLength(CheckerContext &C,
1132                                   const CallExpr *CE) const {
1133  if (CE->getNumArgs() < 1)
1134    return;
1135
1136  // size_t strlen(const char *s);
1137  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1138}
1139
1140void CStringChecker::evalstrnLength(CheckerContext &C,
1141                                    const CallExpr *CE) const {
1142  if (CE->getNumArgs() < 2)
1143    return;
1144
1145  // size_t strnlen(const char *s, size_t maxlen);
1146  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1147}
1148
1149void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1150                                         bool IsStrnlen) const {
1151  CurrentFunctionDescription = "string length function";
1152  ProgramStateRef state = C.getState();
1153  const LocationContext *LCtx = C.getLocationContext();
1154
1155  if (IsStrnlen) {
1156    const Expr *maxlenExpr = CE->getArg(1);
1157    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1158
1159    ProgramStateRef stateZeroSize, stateNonZeroSize;
1160    std::tie(stateZeroSize, stateNonZeroSize) =
1161      assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1162
1163    // If the size can be zero, the result will be 0 in that case, and we don't
1164    // have to check the string itself.
1165    if (stateZeroSize) {
1166      SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1167      stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1168      C.addTransition(stateZeroSize);
1169    }
1170
1171    // If the size is GUARANTEED to be zero, we're done!
1172    if (!stateNonZeroSize)
1173      return;
1174
1175    // Otherwise, record the assumption that the size is nonzero.
1176    state = stateNonZeroSize;
1177  }
1178
1179  // Check that the string argument is non-null.
1180  const Expr *Arg = CE->getArg(0);
1181  SVal ArgVal = state->getSVal(Arg, LCtx);
1182
1183  state = checkNonNull(C, state, Arg, ArgVal);
1184
1185  if (!state)
1186    return;
1187
1188  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1189
1190  // If the argument isn't a valid C string, there's no valid state to
1191  // transition to.
1192  if (strLength.isUndef())
1193    return;
1194
1195  DefinedOrUnknownSVal result = UnknownVal();
1196
1197  // If the check is for strnlen() then bind the return value to no more than
1198  // the maxlen value.
1199  if (IsStrnlen) {
1200    QualType cmpTy = C.getSValBuilder().getConditionType();
1201
1202    // It's a little unfortunate to be getting this again,
1203    // but it's not that expensive...
1204    const Expr *maxlenExpr = CE->getArg(1);
1205    SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1206
1207    Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1208    Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1209
1210    if (strLengthNL && maxlenValNL) {
1211      ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1212
1213      // Check if the strLength is greater than the maxlen.
1214      std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1215          C.getSValBuilder()
1216              .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1217              .castAs<DefinedOrUnknownSVal>());
1218
1219      if (stateStringTooLong && !stateStringNotTooLong) {
1220        // If the string is longer than maxlen, return maxlen.
1221        result = *maxlenValNL;
1222      } else if (stateStringNotTooLong && !stateStringTooLong) {
1223        // If the string is shorter than maxlen, return its length.
1224        result = *strLengthNL;
1225      }
1226    }
1227
1228    if (result.isUnknown()) {
1229      // If we don't have enough information for a comparison, there's
1230      // no guarantee the full string length will actually be returned.
1231      // All we know is the return value is the min of the string length
1232      // and the limit. This is better than nothing.
1233      result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1234      NonLoc resultNL = result.castAs<NonLoc>();
1235
1236      if (strLengthNL) {
1237        state = state->assume(C.getSValBuilder().evalBinOpNN(
1238                                  state, BO_LE, resultNL, *strLengthNL, cmpTy)
1239                                  .castAs<DefinedOrUnknownSVal>(), true);
1240      }
1241
1242      if (maxlenValNL) {
1243        state = state->assume(C.getSValBuilder().evalBinOpNN(
1244                                  state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1245                                  .castAs<DefinedOrUnknownSVal>(), true);
1246      }
1247    }
1248
1249  } else {
1250    // This is a plain strlen(), not strnlen().
1251    result = strLength.castAs<DefinedOrUnknownSVal>();
1252
1253    // If we don't know the length of the string, conjure a return
1254    // value, so it can be used in constraints, at least.
1255    if (result.isUnknown()) {
1256      result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1257    }
1258  }
1259
1260  // Bind the return value.
1261  assert(!result.isUnknown() && "Should have conjured a value by now");
1262  state = state->BindExpr(CE, LCtx, result);
1263  C.addTransition(state);
1264}
1265
1266void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1267  if (CE->getNumArgs() < 2)
1268    return;
1269
1270  // char *strcpy(char *restrict dst, const char *restrict src);
1271  evalStrcpyCommon(C, CE,
1272                   /* returnEnd = */ false,
1273                   /* isBounded = */ false,
1274                   /* isAppending = */ false);
1275}
1276
1277void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1278  if (CE->getNumArgs() < 3)
1279    return;
1280
1281  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1282  evalStrcpyCommon(C, CE,
1283                   /* returnEnd = */ false,
1284                   /* isBounded = */ true,
1285                   /* isAppending = */ false);
1286}
1287
1288void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1289  if (CE->getNumArgs() < 2)
1290    return;
1291
1292  // char *stpcpy(char *restrict dst, const char *restrict src);
1293  evalStrcpyCommon(C, CE,
1294                   /* returnEnd = */ true,
1295                   /* isBounded = */ false,
1296                   /* isAppending = */ false);
1297}
1298
1299void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1300  if (CE->getNumArgs() < 2)
1301    return;
1302
1303  //char *strcat(char *restrict s1, const char *restrict s2);
1304  evalStrcpyCommon(C, CE,
1305                   /* returnEnd = */ false,
1306                   /* isBounded = */ false,
1307                   /* isAppending = */ true);
1308}
1309
1310void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1311  if (CE->getNumArgs() < 3)
1312    return;
1313
1314  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1315  evalStrcpyCommon(C, CE,
1316                   /* returnEnd = */ false,
1317                   /* isBounded = */ true,
1318                   /* isAppending = */ true);
1319}
1320
1321void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1322                                      bool returnEnd, bool isBounded,
1323                                      bool isAppending) const {
1324  CurrentFunctionDescription = "string copy function";
1325  ProgramStateRef state = C.getState();
1326  const LocationContext *LCtx = C.getLocationContext();
1327
1328  // Check that the destination is non-null.
1329  const Expr *Dst = CE->getArg(0);
1330  SVal DstVal = state->getSVal(Dst, LCtx);
1331
1332  state = checkNonNull(C, state, Dst, DstVal);
1333  if (!state)
1334    return;
1335
1336  // Check that the source is non-null.
1337  const Expr *srcExpr = CE->getArg(1);
1338  SVal srcVal = state->getSVal(srcExpr, LCtx);
1339  state = checkNonNull(C, state, srcExpr, srcVal);
1340  if (!state)
1341    return;
1342
1343  // Get the string length of the source.
1344  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1345
1346  // If the source isn't a valid C string, give up.
1347  if (strLength.isUndef())
1348    return;
1349
1350  SValBuilder &svalBuilder = C.getSValBuilder();
1351  QualType cmpTy = svalBuilder.getConditionType();
1352  QualType sizeTy = svalBuilder.getContext().getSizeType();
1353
1354  // These two values allow checking two kinds of errors:
1355  // - actual overflows caused by a source that doesn't fit in the destination
1356  // - potential overflows caused by a bound that could exceed the destination
1357  SVal amountCopied = UnknownVal();
1358  SVal maxLastElementIndex = UnknownVal();
1359  const char *boundWarning = NULL;
1360
1361  // If the function is strncpy, strncat, etc... it is bounded.
1362  if (isBounded) {
1363    // Get the max number of characters to copy.
1364    const Expr *lenExpr = CE->getArg(2);
1365    SVal lenVal = state->getSVal(lenExpr, LCtx);
1366
1367    // Protect against misdeclared strncpy().
1368    lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1369
1370    Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1371    Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1372
1373    // If we know both values, we might be able to figure out how much
1374    // we're copying.
1375    if (strLengthNL && lenValNL) {
1376      ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1377
1378      // Check if the max number to copy is less than the length of the src.
1379      // If the bound is equal to the source length, strncpy won't null-
1380      // terminate the result!
1381      std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1382          svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1383              .castAs<DefinedOrUnknownSVal>());
1384
1385      if (stateSourceTooLong && !stateSourceNotTooLong) {
1386        // Max number to copy is less than the length of the src, so the actual
1387        // strLength copied is the max number arg.
1388        state = stateSourceTooLong;
1389        amountCopied = lenVal;
1390
1391      } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1392        // The source buffer entirely fits in the bound.
1393        state = stateSourceNotTooLong;
1394        amountCopied = strLength;
1395      }
1396    }
1397
1398    // We still want to know if the bound is known to be too large.
1399    if (lenValNL) {
1400      if (isAppending) {
1401        // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1402
1403        // Get the string length of the destination. If the destination is
1404        // memory that can't have a string length, we shouldn't be copying
1405        // into it anyway.
1406        SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1407        if (dstStrLength.isUndef())
1408          return;
1409
1410        if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1411          maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1412                                                        *lenValNL,
1413                                                        *dstStrLengthNL,
1414                                                        sizeTy);
1415          boundWarning = "Size argument is greater than the free space in the "
1416                         "destination buffer";
1417        }
1418
1419      } else {
1420        // For strncpy, this is just checking that lenVal <= sizeof(dst)
1421        // (Yes, strncpy and strncat differ in how they treat termination.
1422        // strncat ALWAYS terminates, but strncpy doesn't.)
1423
1424        // We need a special case for when the copy size is zero, in which
1425        // case strncpy will do no work at all. Our bounds check uses n-1
1426        // as the last element accessed, so n == 0 is problematic.
1427        ProgramStateRef StateZeroSize, StateNonZeroSize;
1428        std::tie(StateZeroSize, StateNonZeroSize) =
1429          assumeZero(C, state, *lenValNL, sizeTy);
1430
1431        // If the size is known to be zero, we're done.
1432        if (StateZeroSize && !StateNonZeroSize) {
1433          StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1434          C.addTransition(StateZeroSize);
1435          return;
1436        }
1437
1438        // Otherwise, go ahead and figure out the last element we'll touch.
1439        // We don't record the non-zero assumption here because we can't
1440        // be sure. We won't warn on a possible zero.
1441        NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1442        maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1443                                                      one, sizeTy);
1444        boundWarning = "Size argument is greater than the length of the "
1445                       "destination buffer";
1446      }
1447    }
1448
1449    // If we couldn't pin down the copy length, at least bound it.
1450    // FIXME: We should actually run this code path for append as well, but
1451    // right now it creates problems with constraints (since we can end up
1452    // trying to pass constraints from symbol to symbol).
1453    if (amountCopied.isUnknown() && !isAppending) {
1454      // Try to get a "hypothetical" string length symbol, which we can later
1455      // set as a real value if that turns out to be the case.
1456      amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1457      assert(!amountCopied.isUndef());
1458
1459      if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1460        if (lenValNL) {
1461          // amountCopied <= lenVal
1462          SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1463                                                             *amountCopiedNL,
1464                                                             *lenValNL,
1465                                                             cmpTy);
1466          state = state->assume(
1467              copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1468          if (!state)
1469            return;
1470        }
1471
1472        if (strLengthNL) {
1473          // amountCopied <= strlen(source)
1474          SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1475                                                           *amountCopiedNL,
1476                                                           *strLengthNL,
1477                                                           cmpTy);
1478          state = state->assume(
1479              copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1480          if (!state)
1481            return;
1482        }
1483      }
1484    }
1485
1486  } else {
1487    // The function isn't bounded. The amount copied should match the length
1488    // of the source buffer.
1489    amountCopied = strLength;
1490  }
1491
1492  assert(state);
1493
1494  // This represents the number of characters copied into the destination
1495  // buffer. (It may not actually be the strlen if the destination buffer
1496  // is not terminated.)
1497  SVal finalStrLength = UnknownVal();
1498
1499  // If this is an appending function (strcat, strncat...) then set the
1500  // string length to strlen(src) + strlen(dst) since the buffer will
1501  // ultimately contain both.
1502  if (isAppending) {
1503    // Get the string length of the destination. If the destination is memory
1504    // that can't have a string length, we shouldn't be copying into it anyway.
1505    SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1506    if (dstStrLength.isUndef())
1507      return;
1508
1509    Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1510    Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1511
1512    // If we know both string lengths, we might know the final string length.
1513    if (srcStrLengthNL && dstStrLengthNL) {
1514      // Make sure the two lengths together don't overflow a size_t.
1515      state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1516      if (!state)
1517        return;
1518
1519      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1520                                               *dstStrLengthNL, sizeTy);
1521    }
1522
1523    // If we couldn't get a single value for the final string length,
1524    // we can at least bound it by the individual lengths.
1525    if (finalStrLength.isUnknown()) {
1526      // Try to get a "hypothetical" string length symbol, which we can later
1527      // set as a real value if that turns out to be the case.
1528      finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1529      assert(!finalStrLength.isUndef());
1530
1531      if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1532        if (srcStrLengthNL) {
1533          // finalStrLength >= srcStrLength
1534          SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1535                                                        *finalStrLengthNL,
1536                                                        *srcStrLengthNL,
1537                                                        cmpTy);
1538          state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1539                                true);
1540          if (!state)
1541            return;
1542        }
1543
1544        if (dstStrLengthNL) {
1545          // finalStrLength >= dstStrLength
1546          SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1547                                                      *finalStrLengthNL,
1548                                                      *dstStrLengthNL,
1549                                                      cmpTy);
1550          state =
1551              state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1552          if (!state)
1553            return;
1554        }
1555      }
1556    }
1557
1558  } else {
1559    // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1560    // the final string length will match the input string length.
1561    finalStrLength = amountCopied;
1562  }
1563
1564  // The final result of the function will either be a pointer past the last
1565  // copied element, or a pointer to the start of the destination buffer.
1566  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1567
1568  assert(state);
1569
1570  // If the destination is a MemRegion, try to check for a buffer overflow and
1571  // record the new string length.
1572  if (Optional<loc::MemRegionVal> dstRegVal =
1573          DstVal.getAs<loc::MemRegionVal>()) {
1574    QualType ptrTy = Dst->getType();
1575
1576    // If we have an exact value on a bounded copy, use that to check for
1577    // overflows, rather than our estimate about how much is actually copied.
1578    if (boundWarning) {
1579      if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1580        SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1581                                                      *maxLastNL, ptrTy);
1582        state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1583                              boundWarning);
1584        if (!state)
1585          return;
1586      }
1587    }
1588
1589    // Then, if the final length is known...
1590    if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1591      SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1592                                                 *knownStrLength, ptrTy);
1593
1594      // ...and we haven't checked the bound, we'll check the actual copy.
1595      if (!boundWarning) {
1596        const char * const warningMsg =
1597          "String copy function overflows destination buffer";
1598        state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1599        if (!state)
1600          return;
1601      }
1602
1603      // If this is a stpcpy-style copy, the last element is the return value.
1604      if (returnEnd)
1605        Result = lastElement;
1606    }
1607
1608    // Invalidate the destination (regular invalidation without pointer-escaping
1609    // the address of the top-level region). This must happen before we set the
1610    // C string length because invalidation will clear the length.
1611    // FIXME: Even if we can't perfectly model the copy, we should see if we
1612    // can use LazyCompoundVals to copy the source values into the destination.
1613    // This would probably remove any existing bindings past the end of the
1614    // string, but that's still an improvement over blank invalidation.
1615    state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1616                             /*IsSourceBuffer*/false);
1617
1618    // Invalidate the source (const-invalidation without const-pointer-escaping
1619    // the address of the top-level region).
1620    state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
1621
1622    // Set the C string length of the destination, if we know it.
1623    if (isBounded && !isAppending) {
1624      // strncpy is annoying in that it doesn't guarantee to null-terminate
1625      // the result string. If the original string didn't fit entirely inside
1626      // the bound (including the null-terminator), we don't know how long the
1627      // result is.
1628      if (amountCopied != strLength)
1629        finalStrLength = UnknownVal();
1630    }
1631    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1632  }
1633
1634  assert(state);
1635
1636  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1637  // overflow, we still need a result. Conjure a return value.
1638  if (returnEnd && Result.isUnknown()) {
1639    Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1640  }
1641
1642  // Set the return value.
1643  state = state->BindExpr(CE, LCtx, Result);
1644  C.addTransition(state);
1645}
1646
1647void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1648  if (CE->getNumArgs() < 2)
1649    return;
1650
1651  //int strcmp(const char *s1, const char *s2);
1652  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1653}
1654
1655void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1656  if (CE->getNumArgs() < 3)
1657    return;
1658
1659  //int strncmp(const char *s1, const char *s2, size_t n);
1660  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1661}
1662
1663void CStringChecker::evalStrcasecmp(CheckerContext &C,
1664                                    const CallExpr *CE) const {
1665  if (CE->getNumArgs() < 2)
1666    return;
1667
1668  //int strcasecmp(const char *s1, const char *s2);
1669  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1670}
1671
1672void CStringChecker::evalStrncasecmp(CheckerContext &C,
1673                                     const CallExpr *CE) const {
1674  if (CE->getNumArgs() < 3)
1675    return;
1676
1677  //int strncasecmp(const char *s1, const char *s2, size_t n);
1678  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1679}
1680
1681void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1682                                      bool isBounded, bool ignoreCase) const {
1683  CurrentFunctionDescription = "string comparison function";
1684  ProgramStateRef state = C.getState();
1685  const LocationContext *LCtx = C.getLocationContext();
1686
1687  // Check that the first string is non-null
1688  const Expr *s1 = CE->getArg(0);
1689  SVal s1Val = state->getSVal(s1, LCtx);
1690  state = checkNonNull(C, state, s1, s1Val);
1691  if (!state)
1692    return;
1693
1694  // Check that the second string is non-null.
1695  const Expr *s2 = CE->getArg(1);
1696  SVal s2Val = state->getSVal(s2, LCtx);
1697  state = checkNonNull(C, state, s2, s2Val);
1698  if (!state)
1699    return;
1700
1701  // Get the string length of the first string or give up.
1702  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1703  if (s1Length.isUndef())
1704    return;
1705
1706  // Get the string length of the second string or give up.
1707  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1708  if (s2Length.isUndef())
1709    return;
1710
1711  // If we know the two buffers are the same, we know the result is 0.
1712  // First, get the two buffers' addresses. Another checker will have already
1713  // made sure they're not undefined.
1714  DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1715  DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1716
1717  // See if they are the same.
1718  SValBuilder &svalBuilder = C.getSValBuilder();
1719  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1720  ProgramStateRef StSameBuf, StNotSameBuf;
1721  std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1722
1723  // If the two arguments might be the same buffer, we know the result is 0,
1724  // and we only need to check one size.
1725  if (StSameBuf) {
1726    StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1727                                    svalBuilder.makeZeroVal(CE->getType()));
1728    C.addTransition(StSameBuf);
1729
1730    // If the two arguments are GUARANTEED to be the same, we're done!
1731    if (!StNotSameBuf)
1732      return;
1733  }
1734
1735  assert(StNotSameBuf);
1736  state = StNotSameBuf;
1737
1738  // At this point we can go about comparing the two buffers.
1739  // For now, we only do this if they're both known string literals.
1740
1741  // Attempt to extract string literals from both expressions.
1742  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1743  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1744  bool canComputeResult = false;
1745
1746  if (s1StrLiteral && s2StrLiteral) {
1747    StringRef s1StrRef = s1StrLiteral->getString();
1748    StringRef s2StrRef = s2StrLiteral->getString();
1749
1750    if (isBounded) {
1751      // Get the max number of characters to compare.
1752      const Expr *lenExpr = CE->getArg(2);
1753      SVal lenVal = state->getSVal(lenExpr, LCtx);
1754
1755      // If the length is known, we can get the right substrings.
1756      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1757        // Create substrings of each to compare the prefix.
1758        s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1759        s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1760        canComputeResult = true;
1761      }
1762    } else {
1763      // This is a normal, unbounded strcmp.
1764      canComputeResult = true;
1765    }
1766
1767    if (canComputeResult) {
1768      // Real strcmp stops at null characters.
1769      size_t s1Term = s1StrRef.find('\0');
1770      if (s1Term != StringRef::npos)
1771        s1StrRef = s1StrRef.substr(0, s1Term);
1772
1773      size_t s2Term = s2StrRef.find('\0');
1774      if (s2Term != StringRef::npos)
1775        s2StrRef = s2StrRef.substr(0, s2Term);
1776
1777      // Use StringRef's comparison methods to compute the actual result.
1778      int result;
1779
1780      if (ignoreCase) {
1781        // Compare string 1 to string 2 the same way strcasecmp() does.
1782        result = s1StrRef.compare_lower(s2StrRef);
1783      } else {
1784        // Compare string 1 to string 2 the same way strcmp() does.
1785        result = s1StrRef.compare(s2StrRef);
1786      }
1787
1788      // Build the SVal of the comparison and bind the return value.
1789      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1790      state = state->BindExpr(CE, LCtx, resultVal);
1791    }
1792  }
1793
1794  if (!canComputeResult) {
1795    // Conjure a symbolic value. It's the best we can do.
1796    SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1797    state = state->BindExpr(CE, LCtx, resultVal);
1798  }
1799
1800  // Record this as a possible path.
1801  C.addTransition(state);
1802}
1803
1804void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1805  //char *strsep(char **stringp, const char *delim);
1806  if (CE->getNumArgs() < 2)
1807    return;
1808
1809  // Sanity: does the search string parameter match the return type?
1810  const Expr *SearchStrPtr = CE->getArg(0);
1811  QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1812  if (CharPtrTy.isNull() ||
1813      CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1814    return;
1815
1816  CurrentFunctionDescription = "strsep()";
1817  ProgramStateRef State = C.getState();
1818  const LocationContext *LCtx = C.getLocationContext();
1819
1820  // Check that the search string pointer is non-null (though it may point to
1821  // a null string).
1822  SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1823  State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1824  if (!State)
1825    return;
1826
1827  // Check that the delimiter string is non-null.
1828  const Expr *DelimStr = CE->getArg(1);
1829  SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1830  State = checkNonNull(C, State, DelimStr, DelimStrVal);
1831  if (!State)
1832    return;
1833
1834  SValBuilder &SVB = C.getSValBuilder();
1835  SVal Result;
1836  if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1837    // Get the current value of the search string pointer, as a char*.
1838    Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1839
1840    // Invalidate the search string, representing the change of one delimiter
1841    // character to NUL.
1842    State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1843                             /*IsSourceBuffer*/false);
1844
1845    // Overwrite the search string pointer. The new value is either an address
1846    // further along in the same string, or NULL if there are no more tokens.
1847    State = State->bindLoc(*SearchStrLoc,
1848                           SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1849                                                C.blockCount()));
1850  } else {
1851    assert(SearchStrVal.isUnknown());
1852    // Conjure a symbolic value. It's the best we can do.
1853    Result = SVB.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1854  }
1855
1856  // Set the return value, and finish.
1857  State = State->BindExpr(CE, LCtx, Result);
1858  C.addTransition(State);
1859}
1860
1861
1862//===----------------------------------------------------------------------===//
1863// The driver method, and other Checker callbacks.
1864//===----------------------------------------------------------------------===//
1865
1866bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1867  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1868
1869  if (!FDecl)
1870    return false;
1871
1872  // FIXME: Poorly-factored string switches are slow.
1873  FnCheck evalFunction = 0;
1874  if (C.isCLibraryFunction(FDecl, "memcpy"))
1875    evalFunction =  &CStringChecker::evalMemcpy;
1876  else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1877    evalFunction =  &CStringChecker::evalMempcpy;
1878  else if (C.isCLibraryFunction(FDecl, "memcmp"))
1879    evalFunction =  &CStringChecker::evalMemcmp;
1880  else if (C.isCLibraryFunction(FDecl, "memmove"))
1881    evalFunction =  &CStringChecker::evalMemmove;
1882  else if (C.isCLibraryFunction(FDecl, "strcpy"))
1883    evalFunction =  &CStringChecker::evalStrcpy;
1884  else if (C.isCLibraryFunction(FDecl, "strncpy"))
1885    evalFunction =  &CStringChecker::evalStrncpy;
1886  else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1887    evalFunction =  &CStringChecker::evalStpcpy;
1888  else if (C.isCLibraryFunction(FDecl, "strcat"))
1889    evalFunction =  &CStringChecker::evalStrcat;
1890  else if (C.isCLibraryFunction(FDecl, "strncat"))
1891    evalFunction =  &CStringChecker::evalStrncat;
1892  else if (C.isCLibraryFunction(FDecl, "strlen"))
1893    evalFunction =  &CStringChecker::evalstrLength;
1894  else if (C.isCLibraryFunction(FDecl, "strnlen"))
1895    evalFunction =  &CStringChecker::evalstrnLength;
1896  else if (C.isCLibraryFunction(FDecl, "strcmp"))
1897    evalFunction =  &CStringChecker::evalStrcmp;
1898  else if (C.isCLibraryFunction(FDecl, "strncmp"))
1899    evalFunction =  &CStringChecker::evalStrncmp;
1900  else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1901    evalFunction =  &CStringChecker::evalStrcasecmp;
1902  else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1903    evalFunction =  &CStringChecker::evalStrncasecmp;
1904  else if (C.isCLibraryFunction(FDecl, "strsep"))
1905    evalFunction =  &CStringChecker::evalStrsep;
1906  else if (C.isCLibraryFunction(FDecl, "bcopy"))
1907    evalFunction =  &CStringChecker::evalBcopy;
1908  else if (C.isCLibraryFunction(FDecl, "bcmp"))
1909    evalFunction =  &CStringChecker::evalMemcmp;
1910
1911  // If the callee isn't a string function, let another checker handle it.
1912  if (!evalFunction)
1913    return false;
1914
1915  // Make sure each function sets its own description.
1916  // (But don't bother in a release build.)
1917  assert(!(CurrentFunctionDescription = NULL));
1918
1919  // Check and evaluate the call.
1920  (this->*evalFunction)(C, CE);
1921
1922  // If the evaluate call resulted in no change, chain to the next eval call
1923  // handler.
1924  // Note, the custom CString evaluation calls assume that basic safety
1925  // properties are held. However, if the user chooses to turn off some of these
1926  // checks, we ignore the issues and leave the call evaluation to a generic
1927  // handler.
1928  if (!C.isDifferent())
1929    return false;
1930
1931  return true;
1932}
1933
1934void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1935  // Record string length for char a[] = "abc";
1936  ProgramStateRef state = C.getState();
1937
1938  for (const auto *I : DS->decls()) {
1939    const VarDecl *D = dyn_cast<VarDecl>(I);
1940    if (!D)
1941      continue;
1942
1943    // FIXME: Handle array fields of structs.
1944    if (!D->getType()->isArrayType())
1945      continue;
1946
1947    const Expr *Init = D->getInit();
1948    if (!Init)
1949      continue;
1950    if (!isa<StringLiteral>(Init))
1951      continue;
1952
1953    Loc VarLoc = state->getLValue(D, C.getLocationContext());
1954    const MemRegion *MR = VarLoc.getAsRegion();
1955    if (!MR)
1956      continue;
1957
1958    SVal StrVal = state->getSVal(Init, C.getLocationContext());
1959    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1960    DefinedOrUnknownSVal strLength =
1961        getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
1962
1963    state = state->set<CStringLength>(MR, strLength);
1964  }
1965
1966  C.addTransition(state);
1967}
1968
1969bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1970  CStringLengthTy Entries = state->get<CStringLength>();
1971  return !Entries.isEmpty();
1972}
1973
1974ProgramStateRef
1975CStringChecker::checkRegionChanges(ProgramStateRef state,
1976                                   const InvalidatedSymbols *,
1977                                   ArrayRef<const MemRegion *> ExplicitRegions,
1978                                   ArrayRef<const MemRegion *> Regions,
1979                                   const CallEvent *Call) const {
1980  CStringLengthTy Entries = state->get<CStringLength>();
1981  if (Entries.isEmpty())
1982    return state;
1983
1984  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1985  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1986
1987  // First build sets for the changed regions and their super-regions.
1988  for (ArrayRef<const MemRegion *>::iterator
1989       I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1990    const MemRegion *MR = *I;
1991    Invalidated.insert(MR);
1992
1993    SuperRegions.insert(MR);
1994    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1995      MR = SR->getSuperRegion();
1996      SuperRegions.insert(MR);
1997    }
1998  }
1999
2000  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2001
2002  // Then loop over the entries in the current state.
2003  for (CStringLengthTy::iterator I = Entries.begin(),
2004       E = Entries.end(); I != E; ++I) {
2005    const MemRegion *MR = I.getKey();
2006
2007    // Is this entry for a super-region of a changed region?
2008    if (SuperRegions.count(MR)) {
2009      Entries = F.remove(Entries, MR);
2010      continue;
2011    }
2012
2013    // Is this entry for a sub-region of a changed region?
2014    const MemRegion *Super = MR;
2015    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2016      Super = SR->getSuperRegion();
2017      if (Invalidated.count(Super)) {
2018        Entries = F.remove(Entries, MR);
2019        break;
2020      }
2021    }
2022  }
2023
2024  return state->set<CStringLength>(Entries);
2025}
2026
2027void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2028                                      SymbolReaper &SR) const {
2029  // Mark all symbols in our string length map as valid.
2030  CStringLengthTy Entries = state->get<CStringLength>();
2031
2032  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2033       I != E; ++I) {
2034    SVal Len = I.getData();
2035
2036    for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2037                                  se = Len.symbol_end(); si != se; ++si)
2038      SR.markInUse(*si);
2039  }
2040}
2041
2042void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2043                                      CheckerContext &C) const {
2044  if (!SR.hasDeadSymbols())
2045    return;
2046
2047  ProgramStateRef state = C.getState();
2048  CStringLengthTy Entries = state->get<CStringLength>();
2049  if (Entries.isEmpty())
2050    return;
2051
2052  CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2053  for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2054       I != E; ++I) {
2055    SVal Len = I.getData();
2056    if (SymbolRef Sym = Len.getAsSymbol()) {
2057      if (SR.isDead(Sym))
2058        Entries = F.remove(Entries, I.getKey());
2059    }
2060  }
2061
2062  state = state->set<CStringLength>(Entries);
2063  C.addTransition(state);
2064}
2065
2066#define REGISTER_CHECKER(name)                                                 \
2067  void ento::register##name(CheckerManager &mgr) {                             \
2068    CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2069    checker->Filter.Check##name = true;                                        \
2070    checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2071  }
2072
2073REGISTER_CHECKER(CStringNullArg)
2074REGISTER_CHECKER(CStringOutOfBounds)
2075REGISTER_CHECKER(CStringBufferOverlap)
2076REGISTER_CHECKER(CStringNotNullTerm)
2077
2078void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2079  registerCStringNullArg(Mgr);
2080}
2081