CStringChecker.cpp revision d5af0e17b00ab2ee6a8c1f352bb9eeb1cc5b2d07
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This defines CStringChecker, which is an assortment of checks on calls
11// to functions in <string.h>.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ClangSACheckers.h"
16#include "clang/StaticAnalyzer/Core/Checker.h"
17#include "clang/StaticAnalyzer/Core/CheckerManager.h"
18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
19#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h"
21#include "llvm/ADT/StringSwitch.h"
22
23using namespace clang;
24using namespace ento;
25
26namespace {
27class CStringChecker : public Checker< eval::Call,
28                                         check::PreStmt<DeclStmt>,
29                                         check::LiveSymbols,
30                                         check::DeadSymbols,
31                                         check::RegionChanges
32                                         > {
33  mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite,
34                                   BT_Overlap, BT_NotCString,
35                                   BT_AdditionOverflow;
36public:
37  static void *getTag() { static int tag; return &tag; }
38
39  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
40  void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
41  void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const;
42  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
43  bool wantsRegionChangeUpdate(const GRState *state) const;
44
45  const GRState *checkRegionChanges(const GRState *state,
46                                    const StoreManager::InvalidatedSymbols *,
47                                    const MemRegion * const *Begin,
48                                    const MemRegion * const *End) const;
49
50  typedef void (CStringChecker::*FnCheck)(CheckerContext &,
51                                          const CallExpr *) const;
52
53  void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
54  void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
55  void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
56  void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
57  void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
58                      const GRState *state,
59                      const Expr *Size, const Expr *Source, const Expr *Dest,
60                      bool Restricted = false,
61                      bool IsMempcpy = false) const;
62
63  void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
64
65  void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
66  void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
67  void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
68                           bool IsStrnlen = false) const;
69
70  void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
71  void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
72  void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
73  void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd,
74                        bool isBounded, bool isAppending) const;
75
76  void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
77  void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
78
79  void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
80  void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
81  void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
82  void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
83  void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
84                        bool isBounded = false, bool ignoreCase = false) const;
85
86  // Utility methods
87  std::pair<const GRState*, const GRState*>
88  static assumeZero(CheckerContext &C,
89                    const GRState *state, SVal V, QualType Ty);
90
91  static const GRState *setCStringLength(const GRState *state,
92                                         const MemRegion *MR, SVal strLength);
93  static SVal getCStringLengthForRegion(CheckerContext &C,
94                                        const GRState *&state,
95                                        const Expr *Ex, const MemRegion *MR,
96                                        bool hypothetical);
97  SVal getCStringLength(CheckerContext &C, const GRState *&state,
98                        const Expr *Ex, SVal Buf,
99                        bool hypothetical = false) const;
100
101  const StringLiteral *getCStringLiteral(CheckerContext &C,
102                                         const GRState *&state,
103                                         const Expr *expr,
104                                         SVal val) const;
105
106  static const GRState *InvalidateBuffer(CheckerContext &C,
107                                         const GRState *state,
108                                         const Expr *Ex, SVal V);
109
110  static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
111                              const MemRegion *MR);
112
113  // Re-usable checks
114  const GRState *checkNonNull(CheckerContext &C, const GRState *state,
115                               const Expr *S, SVal l) const;
116  const GRState *CheckLocation(CheckerContext &C, const GRState *state,
117                               const Expr *S, SVal l,
118                               bool IsDestination = false) const;
119  const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
120                                   const Expr *Size,
121                                   const Expr *FirstBuf,
122                                   const Expr *SecondBuf = NULL,
123                                   bool FirstIsDestination = false) const;
124  const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
125                              const Expr *Size, const Expr *First,
126                              const Expr *Second) const;
127  void emitOverlapBug(CheckerContext &C, const GRState *state,
128                      const Stmt *First, const Stmt *Second) const;
129  const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state,
130                                       NonLoc left, NonLoc right) const;
131};
132
133class CStringLength {
134public:
135  typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
136};
137} //end anonymous namespace
138
139namespace clang {
140namespace ento {
141  template <>
142  struct GRStateTrait<CStringLength>
143    : public GRStatePartialTrait<CStringLength::EntryMap> {
144    static void *GDMIndex() { return CStringChecker::getTag(); }
145  };
146}
147}
148
149//===----------------------------------------------------------------------===//
150// Individual checks and utility methods.
151//===----------------------------------------------------------------------===//
152
153std::pair<const GRState*, const GRState*>
154CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V,
155                           QualType Ty) {
156  DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
157  if (!val)
158    return std::pair<const GRState*, const GRState *>(state, state);
159
160  SValBuilder &svalBuilder = C.getSValBuilder();
161  DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
162  return state->assume(svalBuilder.evalEQ(state, *val, zero));
163}
164
165const GRState *CStringChecker::checkNonNull(CheckerContext &C,
166                                            const GRState *state,
167                                            const Expr *S, SVal l) const {
168  // If a previous check has failed, propagate the failure.
169  if (!state)
170    return NULL;
171
172  const GRState *stateNull, *stateNonNull;
173  llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
174
175  if (stateNull && !stateNonNull) {
176    ExplodedNode *N = C.generateSink(stateNull);
177    if (!N)
178      return NULL;
179
180    if (!BT_Null)
181      BT_Null.reset(new BuiltinBug("API",
182        "Null pointer argument in call to byte string function"));
183
184    // Generate a report for this bug.
185    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
186    EnhancedBugReport *report = new EnhancedBugReport(*BT,
187                                                      BT->getDescription(), N);
188
189    report->addRange(S->getSourceRange());
190    report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S);
191    C.EmitReport(report);
192    return NULL;
193  }
194
195  // From here on, assume that the value is non-null.
196  assert(stateNonNull);
197  return stateNonNull;
198}
199
200// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
201const GRState *CStringChecker::CheckLocation(CheckerContext &C,
202                                             const GRState *state,
203                                             const Expr *S, SVal l,
204                                             bool IsDestination) const {
205  // If a previous check has failed, propagate the failure.
206  if (!state)
207    return NULL;
208
209  // Check for out of bound array element access.
210  const MemRegion *R = l.getAsRegion();
211  if (!R)
212    return state;
213
214  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
215  if (!ER)
216    return state;
217
218  assert(ER->getValueType() == C.getASTContext().CharTy &&
219    "CheckLocation should only be called with char* ElementRegions");
220
221  // Get the size of the array.
222  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
223  SValBuilder &svalBuilder = C.getSValBuilder();
224  SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
225  DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
226
227  // Get the index of the accessed element.
228  DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
229
230  const GRState *StInBound = state->assumeInBound(Idx, Size, true);
231  const GRState *StOutBound = state->assumeInBound(Idx, Size, false);
232  if (StOutBound && !StInBound) {
233    ExplodedNode *N = C.generateSink(StOutBound);
234    if (!N)
235      return NULL;
236
237    BuiltinBug *BT;
238    if (IsDestination) {
239      if (!BT_BoundsWrite) {
240        BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access",
241          "Byte string function overflows destination buffer"));
242      }
243      BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get());
244    } else {
245      if (!BT_Bounds) {
246        BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
247          "Byte string function accesses out-of-bound array element"));
248      }
249      BT = static_cast<BuiltinBug*>(BT_Bounds.get());
250    }
251
252    // FIXME: It would be nice to eventually make this diagnostic more clear,
253    // e.g., by referencing the original declaration or by saying *why* this
254    // reference is outside the range.
255
256    // Generate a report for this bug.
257    RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
258
259    report->addRange(S->getSourceRange());
260    C.EmitReport(report);
261    return NULL;
262  }
263
264  // Array bound check succeeded.  From this point forward the array bound
265  // should always succeed.
266  return StInBound;
267}
268
269const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C,
270                                                 const GRState *state,
271                                                 const Expr *Size,
272                                                 const Expr *FirstBuf,
273                                                 const Expr *SecondBuf,
274                                                bool FirstIsDestination) const {
275  // If a previous check has failed, propagate the failure.
276  if (!state)
277    return NULL;
278
279  SValBuilder &svalBuilder = C.getSValBuilder();
280  ASTContext &Ctx = C.getASTContext();
281
282  QualType sizeTy = Size->getType();
283  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
284
285  // Check that the first buffer is non-null.
286  SVal BufVal = state->getSVal(FirstBuf);
287  state = checkNonNull(C, state, FirstBuf, BufVal);
288  if (!state)
289    return NULL;
290
291  // Get the access length and make sure it is known.
292  SVal LengthVal = state->getSVal(Size);
293  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
294  if (!Length)
295    return state;
296
297  // Compute the offset of the last element to be accessed: size-1.
298  NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
299  NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
300                                                    *Length, One, sizeTy));
301
302  // Check that the first buffer is sufficiently long.
303  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
304  if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
305    SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
306                                          LastOffset, PtrTy);
307    state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
308
309    // If the buffer isn't large enough, abort.
310    if (!state)
311      return NULL;
312  }
313
314  // If there's a second buffer, check it as well.
315  if (SecondBuf) {
316    BufVal = state->getSVal(SecondBuf);
317    state = checkNonNull(C, state, SecondBuf, BufVal);
318    if (!state)
319      return NULL;
320
321    BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
322    if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
323      SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
324                                            LastOffset, PtrTy);
325      state = CheckLocation(C, state, SecondBuf, BufEnd);
326    }
327  }
328
329  // Large enough or not, return this state!
330  return state;
331}
332
333const GRState *CStringChecker::CheckOverlap(CheckerContext &C,
334                                            const GRState *state,
335                                            const Expr *Size,
336                                            const Expr *First,
337                                            const Expr *Second) const {
338  // Do a simple check for overlap: if the two arguments are from the same
339  // buffer, see if the end of the first is greater than the start of the second
340  // or vice versa.
341
342  // If a previous check has failed, propagate the failure.
343  if (!state)
344    return NULL;
345
346  const GRState *stateTrue, *stateFalse;
347
348  // Get the buffer values and make sure they're known locations.
349  SVal firstVal = state->getSVal(First);
350  SVal secondVal = state->getSVal(Second);
351
352  Loc *firstLoc = dyn_cast<Loc>(&firstVal);
353  if (!firstLoc)
354    return state;
355
356  Loc *secondLoc = dyn_cast<Loc>(&secondVal);
357  if (!secondLoc)
358    return state;
359
360  // Are the two values the same?
361  SValBuilder &svalBuilder = C.getSValBuilder();
362  llvm::tie(stateTrue, stateFalse) =
363    state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
364
365  if (stateTrue && !stateFalse) {
366    // If the values are known to be equal, that's automatically an overlap.
367    emitOverlapBug(C, stateTrue, First, Second);
368    return NULL;
369  }
370
371  // assume the two expressions are not equal.
372  assert(stateFalse);
373  state = stateFalse;
374
375  // Which value comes first?
376  ASTContext &Ctx = svalBuilder.getContext();
377  QualType cmpTy = Ctx.IntTy;
378  SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
379                                         *firstLoc, *secondLoc, cmpTy);
380  DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
381  if (!reverseTest)
382    return state;
383
384  llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
385  if (stateTrue) {
386    if (stateFalse) {
387      // If we don't know which one comes first, we can't perform this test.
388      return state;
389    } else {
390      // Switch the values so that firstVal is before secondVal.
391      Loc *tmpLoc = firstLoc;
392      firstLoc = secondLoc;
393      secondLoc = tmpLoc;
394
395      // Switch the Exprs as well, so that they still correspond.
396      const Expr *tmpExpr = First;
397      First = Second;
398      Second = tmpExpr;
399    }
400  }
401
402  // Get the length, and make sure it too is known.
403  SVal LengthVal = state->getSVal(Size);
404  NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
405  if (!Length)
406    return state;
407
408  // Convert the first buffer's start address to char*.
409  // Bail out if the cast fails.
410  QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
411  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType());
412  Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
413  if (!FirstStartLoc)
414    return state;
415
416  // Compute the end of the first buffer. Bail out if THAT fails.
417  SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
418                                 *FirstStartLoc, *Length, CharPtrTy);
419  Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
420  if (!FirstEndLoc)
421    return state;
422
423  // Is the end of the first buffer past the start of the second buffer?
424  SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
425                                *FirstEndLoc, *secondLoc, cmpTy);
426  DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
427  if (!OverlapTest)
428    return state;
429
430  llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
431
432  if (stateTrue && !stateFalse) {
433    // Overlap!
434    emitOverlapBug(C, stateTrue, First, Second);
435    return NULL;
436  }
437
438  // assume the two expressions don't overlap.
439  assert(stateFalse);
440  return stateFalse;
441}
442
443void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state,
444                                  const Stmt *First, const Stmt *Second) const {
445  ExplodedNode *N = C.generateSink(state);
446  if (!N)
447    return;
448
449  if (!BT_Overlap)
450    BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
451
452  // Generate a report for this bug.
453  RangedBugReport *report =
454    new RangedBugReport(*BT_Overlap,
455      "Arguments must not be overlapping buffers", N);
456  report->addRange(First->getSourceRange());
457  report->addRange(Second->getSourceRange());
458
459  C.EmitReport(report);
460}
461
462const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C,
463                                                     const GRState *state,
464                                                     NonLoc left,
465                                                     NonLoc right) const {
466  // If a previous check has failed, propagate the failure.
467  if (!state)
468    return NULL;
469
470  SValBuilder &svalBuilder = C.getSValBuilder();
471  BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
472
473  QualType sizeTy = svalBuilder.getContext().getSizeType();
474  const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
475  NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
476
477  SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
478                                               sizeTy);
479
480  if (maxMinusRight.isUnknownOrUndef()) {
481    // Try switching the operands. (The order of these two assignments is
482    // important!)
483    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
484                                            sizeTy);
485    left = right;
486  }
487
488  if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
489    QualType cmpTy = svalBuilder.getConditionType();
490    // If left > max - right, we have an overflow.
491    SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
492                                                *maxMinusRightNL, cmpTy);
493
494    const GRState *stateOverflow, *stateOkay;
495    llvm::tie(stateOverflow, stateOkay) =
496      state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
497
498    if (stateOverflow && !stateOkay) {
499      // We have an overflow. Emit a bug report.
500      ExplodedNode *N = C.generateSink(stateOverflow);
501      if (!N)
502        return NULL;
503
504      if (!BT_AdditionOverflow)
505        BT_AdditionOverflow.reset(new BuiltinBug("API",
506          "Sum of expressions causes overflow"));
507
508      llvm::SmallString<120> buf;
509      llvm::raw_svector_ostream os(buf);
510      // This isn't a great error message, but this should never occur in real
511      // code anyway -- you'd have to create a buffer longer than a size_t can
512      // represent, which is sort of a contradiction.
513      os << "This expression will create a string whose length is too big to "
514         << "be represented as a size_t";
515
516      // Generate a report for this bug.
517      BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N);
518      C.EmitReport(report);
519
520      return NULL;
521    }
522
523    // From now on, assume an overflow didn't occur.
524    assert(stateOkay);
525    state = stateOkay;
526  }
527
528  return state;
529}
530
531const GRState *CStringChecker::setCStringLength(const GRState *state,
532                                                const MemRegion *MR,
533                                                SVal strLength) {
534  assert(!strLength.isUndef() && "Attempt to set an undefined string length");
535
536  MR = MR->StripCasts();
537
538  switch (MR->getKind()) {
539  case MemRegion::StringRegionKind:
540    // FIXME: This can happen if we strcpy() into a string region. This is
541    // undefined [C99 6.4.5p6], but we should still warn about it.
542    return state;
543
544  case MemRegion::SymbolicRegionKind:
545  case MemRegion::AllocaRegionKind:
546  case MemRegion::VarRegionKind:
547  case MemRegion::FieldRegionKind:
548  case MemRegion::ObjCIvarRegionKind:
549    // These are the types we can currently track string lengths for.
550    break;
551
552  case MemRegion::ElementRegionKind:
553    // FIXME: Handle element regions by upper-bounding the parent region's
554    // string length.
555    return state;
556
557  default:
558    // Other regions (mostly non-data) can't have a reliable C string length.
559    // For now, just ignore the change.
560    // FIXME: These are rare but not impossible. We should output some kind of
561    // warning for things like strcpy((char[]){'a', 0}, "b");
562    return state;
563  }
564
565  if (strLength.isUnknown())
566    return state->remove<CStringLength>(MR);
567
568  return state->set<CStringLength>(MR, strLength);
569}
570
571SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
572                                               const GRState *&state,
573                                               const Expr *Ex,
574                                               const MemRegion *MR,
575                                               bool hypothetical) {
576  if (!hypothetical) {
577    // If there's a recorded length, go ahead and return it.
578    const SVal *Recorded = state->get<CStringLength>(MR);
579    if (Recorded)
580      return *Recorded;
581  }
582
583  // Otherwise, get a new symbol and update the state.
584  unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
585  SValBuilder &svalBuilder = C.getSValBuilder();
586  QualType sizeTy = svalBuilder.getContext().getSizeType();
587  SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
588                                                    MR, Ex, sizeTy, Count);
589
590  if (!hypothetical)
591    state = state->set<CStringLength>(MR, strLength);
592
593  return strLength;
594}
595
596SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state,
597                                      const Expr *Ex, SVal Buf,
598                                      bool hypothetical) const {
599  const MemRegion *MR = Buf.getAsRegion();
600  if (!MR) {
601    // If we can't get a region, see if it's something we /know/ isn't a
602    // C string. In the context of locations, the only time we can issue such
603    // a warning is for labels.
604    if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
605      if (ExplodedNode *N = C.generateNode(state)) {
606        if (!BT_NotCString)
607          BT_NotCString.reset(new BuiltinBug("API",
608            "Argument is not a null-terminated string."));
609
610        llvm::SmallString<120> buf;
611        llvm::raw_svector_ostream os(buf);
612        os << "Argument to byte string function is the address of the label '"
613           << Label->getLabel()->getName()
614           << "', which is not a null-terminated string";
615
616        // Generate a report for this bug.
617        EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
618                                                          os.str(), N);
619
620        report->addRange(Ex->getSourceRange());
621        C.EmitReport(report);
622      }
623
624      return UndefinedVal();
625    }
626
627    // If it's not a region and not a label, give up.
628    return UnknownVal();
629  }
630
631  // If we have a region, strip casts from it and see if we can figure out
632  // its length. For anything we can't figure out, just return UnknownVal.
633  MR = MR->StripCasts();
634
635  switch (MR->getKind()) {
636  case MemRegion::StringRegionKind: {
637    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
638    // so we can assume that the byte length is the correct C string length.
639    SValBuilder &svalBuilder = C.getSValBuilder();
640    QualType sizeTy = svalBuilder.getContext().getSizeType();
641    const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
642    return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
643  }
644  case MemRegion::SymbolicRegionKind:
645  case MemRegion::AllocaRegionKind:
646  case MemRegion::VarRegionKind:
647  case MemRegion::FieldRegionKind:
648  case MemRegion::ObjCIvarRegionKind:
649    return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
650  case MemRegion::CompoundLiteralRegionKind:
651    // FIXME: Can we track this? Is it necessary?
652    return UnknownVal();
653  case MemRegion::ElementRegionKind:
654    // FIXME: How can we handle this? It's not good enough to subtract the
655    // offset from the base string length; consider "123\x00567" and &a[5].
656    return UnknownVal();
657  default:
658    // Other regions (mostly non-data) can't have a reliable C string length.
659    // In this case, an error is emitted and UndefinedVal is returned.
660    // The caller should always be prepared to handle this case.
661    if (ExplodedNode *N = C.generateNode(state)) {
662      if (!BT_NotCString)
663        BT_NotCString.reset(new BuiltinBug("API",
664          "Argument is not a null-terminated string."));
665
666      llvm::SmallString<120> buf;
667      llvm::raw_svector_ostream os(buf);
668
669      os << "Argument to byte string function is ";
670
671      if (SummarizeRegion(os, C.getASTContext(), MR))
672        os << ", which is not a null-terminated string";
673      else
674        os << "not a null-terminated string";
675
676      // Generate a report for this bug.
677      EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
678                                                        os.str(), N);
679
680      report->addRange(Ex->getSourceRange());
681      C.EmitReport(report);
682    }
683
684    return UndefinedVal();
685  }
686}
687
688const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
689  const GRState *&state, const Expr *expr, SVal val) const {
690
691  // Get the memory region pointed to by the val.
692  const MemRegion *bufRegion = val.getAsRegion();
693  if (!bufRegion)
694    return NULL;
695
696  // Strip casts off the memory region.
697  bufRegion = bufRegion->StripCasts();
698
699  // Cast the memory region to a string region.
700  const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
701  if (!strRegion)
702    return NULL;
703
704  // Return the actual string in the string region.
705  return strRegion->getStringLiteral();
706}
707
708const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
709                                                const GRState *state,
710                                                const Expr *E, SVal V) {
711  Loc *L = dyn_cast<Loc>(&V);
712  if (!L)
713    return state;
714
715  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
716  // some assumptions about the value that CFRefCount can't. Even so, it should
717  // probably be refactored.
718  if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
719    const MemRegion *R = MR->getRegion()->StripCasts();
720
721    // Are we dealing with an ElementRegion?  If so, we should be invalidating
722    // the super-region.
723    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
724      R = ER->getSuperRegion();
725      // FIXME: What about layers of ElementRegions?
726    }
727
728    // Invalidate this region.
729    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
730    return state->invalidateRegion(R, E, Count, NULL);
731  }
732
733  // If we have a non-region value by chance, just remove the binding.
734  // FIXME: is this necessary or correct? This handles the non-Region
735  //  cases.  Is it ever valid to store to these?
736  return state->unbindLoc(*L);
737}
738
739bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
740                                     const MemRegion *MR) {
741  const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
742  if (!TR)
743    return false;
744
745  switch (TR->getKind()) {
746  case MemRegion::FunctionTextRegionKind: {
747    const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
748    if (FD)
749      os << "the address of the function '" << FD << "'";
750    else
751      os << "the address of a function";
752    return true;
753  }
754  case MemRegion::BlockTextRegionKind:
755    os << "block text";
756    return true;
757  case MemRegion::BlockDataRegionKind:
758    os << "a block";
759    return true;
760  case MemRegion::CXXThisRegionKind:
761  case MemRegion::CXXTempObjectRegionKind:
762    os << "a C++ temp object of type " << TR->getValueType().getAsString();
763    return true;
764  case MemRegion::VarRegionKind:
765    os << "a variable of type" << TR->getValueType().getAsString();
766    return true;
767  case MemRegion::FieldRegionKind:
768    os << "a field of type " << TR->getValueType().getAsString();
769    return true;
770  case MemRegion::ObjCIvarRegionKind:
771    os << "an instance variable of type " << TR->getValueType().getAsString();
772    return true;
773  default:
774    return false;
775  }
776}
777
778//===----------------------------------------------------------------------===//
779// evaluation of individual function calls.
780//===----------------------------------------------------------------------===//
781
782void CStringChecker::evalCopyCommon(CheckerContext &C,
783                                    const CallExpr *CE,
784                                    const GRState *state,
785                                    const Expr *Size, const Expr *Dest,
786                                    const Expr *Source, bool Restricted,
787                                    bool IsMempcpy) const {
788  // See if the size argument is zero.
789  SVal sizeVal = state->getSVal(Size);
790  QualType sizeTy = Size->getType();
791
792  const GRState *stateZeroSize, *stateNonZeroSize;
793  llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy);
794
795  // Get the value of the Dest.
796  SVal destVal = state->getSVal(Dest);
797
798  // If the size is zero, there won't be any actual memory access, so
799  // just bind the return value to the destination buffer and return.
800  if (stateZeroSize) {
801    stateZeroSize = stateZeroSize->BindExpr(CE, destVal);
802    C.addTransition(stateZeroSize);
803  }
804
805  // If the size can be nonzero, we have to check the other arguments.
806  if (stateNonZeroSize) {
807    state = stateNonZeroSize;
808
809    // Ensure the destination is not null. If it is NULL there will be a
810    // NULL pointer dereference.
811    state = checkNonNull(C, state, Dest, destVal);
812    if (!state)
813      return;
814
815    // Get the value of the Src.
816    SVal srcVal = state->getSVal(Source);
817
818    // Ensure the source is not null. If it is NULL there will be a
819    // NULL pointer dereference.
820    state = checkNonNull(C, state, Source, srcVal);
821    if (!state)
822      return;
823
824    // Ensure the accesses are valid and that the buffers do not overlap.
825    state = CheckBufferAccess(C, state, Size, Dest, Source,
826                              /* FirstIsDst = */ true);
827    if (Restricted)
828      state = CheckOverlap(C, state, Size, Dest, Source);
829
830    if (!state)
831      return;
832
833    // If this is mempcpy, get the byte after the last byte copied and
834    // bind the expr.
835    if (IsMempcpy) {
836      loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
837      assert(destRegVal && "Destination should be a known MemRegionVal here");
838
839      // Get the length to copy.
840      NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
841
842      if (lenValNonLoc) {
843        // Get the byte after the last byte copied.
844        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
845                                                          *destRegVal,
846                                                          *lenValNonLoc,
847                                                          Dest->getType());
848
849        // The byte after the last byte copied is the return value.
850        state = state->BindExpr(CE, lastElement);
851      } else {
852        // If we don't know how much we copied, we can at least
853        // conjure a return value for later.
854        unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
855        SVal result =
856          C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
857        state = state->BindExpr(CE, result);
858      }
859
860    } else {
861      // All other copies return the destination buffer.
862      // (Well, bcopy() has a void return type, but this won't hurt.)
863      state = state->BindExpr(CE, destVal);
864    }
865
866    // Invalidate the destination.
867    // FIXME: Even if we can't perfectly model the copy, we should see if we
868    // can use LazyCompoundVals to copy the source values into the destination.
869    // This would probably remove any existing bindings past the end of the
870    // copied region, but that's still an improvement over blank invalidation.
871    state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
872    C.addTransition(state);
873  }
874}
875
876
877void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
878  // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
879  // The return value is the address of the destination buffer.
880  const Expr *Dest = CE->getArg(0);
881  const GRState *state = C.getState();
882
883  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
884}
885
886void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
887  // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
888  // The return value is a pointer to the byte following the last written byte.
889  const Expr *Dest = CE->getArg(0);
890  const GRState *state = C.getState();
891
892  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
893}
894
895void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
896  // void *memmove(void *dst, const void *src, size_t n);
897  // The return value is the address of the destination buffer.
898  const Expr *Dest = CE->getArg(0);
899  const GRState *state = C.getState();
900
901  evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
902}
903
904void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
905  // void bcopy(const void *src, void *dst, size_t n);
906  evalCopyCommon(C, CE, C.getState(),
907                 CE->getArg(2), CE->getArg(1), CE->getArg(0));
908}
909
910void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
911  // int memcmp(const void *s1, const void *s2, size_t n);
912  const Expr *Left = CE->getArg(0);
913  const Expr *Right = CE->getArg(1);
914  const Expr *Size = CE->getArg(2);
915
916  const GRState *state = C.getState();
917  SValBuilder &svalBuilder = C.getSValBuilder();
918
919  // See if the size argument is zero.
920  SVal sizeVal = state->getSVal(Size);
921  QualType sizeTy = Size->getType();
922
923  const GRState *stateZeroSize, *stateNonZeroSize;
924  llvm::tie(stateZeroSize, stateNonZeroSize) =
925    assumeZero(C, state, sizeVal, sizeTy);
926
927  // If the size can be zero, the result will be 0 in that case, and we don't
928  // have to check either of the buffers.
929  if (stateZeroSize) {
930    state = stateZeroSize;
931    state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
932    C.addTransition(state);
933  }
934
935  // If the size can be nonzero, we have to check the other arguments.
936  if (stateNonZeroSize) {
937    state = stateNonZeroSize;
938    // If we know the two buffers are the same, we know the result is 0.
939    // First, get the two buffers' addresses. Another checker will have already
940    // made sure they're not undefined.
941    DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left));
942    DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right));
943
944    // See if they are the same.
945    DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
946    const GRState *StSameBuf, *StNotSameBuf;
947    llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
948
949    // If the two arguments might be the same buffer, we know the result is zero,
950    // and we only need to check one size.
951    if (StSameBuf) {
952      state = StSameBuf;
953      state = CheckBufferAccess(C, state, Size, Left);
954      if (state) {
955        state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
956        C.addTransition(state);
957      }
958    }
959
960    // If the two arguments might be different buffers, we have to check the
961    // size of both of them.
962    if (StNotSameBuf) {
963      state = StNotSameBuf;
964      state = CheckBufferAccess(C, state, Size, Left, Right);
965      if (state) {
966        // The return value is the comparison result, which we don't know.
967        unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
968        SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
969        state = state->BindExpr(CE, CmpV);
970        C.addTransition(state);
971      }
972    }
973  }
974}
975
976void CStringChecker::evalstrLength(CheckerContext &C,
977                                   const CallExpr *CE) const {
978  // size_t strlen(const char *s);
979  evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
980}
981
982void CStringChecker::evalstrnLength(CheckerContext &C,
983                                    const CallExpr *CE) const {
984  // size_t strnlen(const char *s, size_t maxlen);
985  evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
986}
987
988void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
989                                         bool IsStrnlen) const {
990  const GRState *state = C.getState();
991
992  if (IsStrnlen) {
993    const Expr *maxlenExpr = CE->getArg(1);
994    SVal maxlenVal = state->getSVal(maxlenExpr);
995
996    const GRState *stateZeroSize, *stateNonZeroSize;
997    llvm::tie(stateZeroSize, stateNonZeroSize) =
998      assumeZero(C, state, maxlenVal, maxlenExpr->getType());
999
1000    // If the size can be zero, the result will be 0 in that case, and we don't
1001    // have to check the string itself.
1002    if (stateZeroSize) {
1003      SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1004      stateZeroSize = stateZeroSize->BindExpr(CE, zero);
1005      C.addTransition(stateZeroSize);
1006    }
1007
1008    // If the size is GUARANTEED to be zero, we're done!
1009    if (!stateNonZeroSize)
1010      return;
1011
1012    // Otherwise, record the assumption that the size is nonzero.
1013    state = stateNonZeroSize;
1014  }
1015
1016  // Check that the string argument is non-null.
1017  const Expr *Arg = CE->getArg(0);
1018  SVal ArgVal = state->getSVal(Arg);
1019
1020  state = checkNonNull(C, state, Arg, ArgVal);
1021
1022  if (!state)
1023    return;
1024
1025  SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1026
1027  // If the argument isn't a valid C string, there's no valid state to
1028  // transition to.
1029  if (strLength.isUndef())
1030    return;
1031
1032  DefinedOrUnknownSVal result = UnknownVal();
1033
1034  // If the check is for strnlen() then bind the return value to no more than
1035  // the maxlen value.
1036  if (IsStrnlen) {
1037    QualType cmpTy = C.getSValBuilder().getContext().IntTy;
1038
1039    // It's a little unfortunate to be getting this again,
1040    // but it's not that expensive...
1041    const Expr *maxlenExpr = CE->getArg(1);
1042    SVal maxlenVal = state->getSVal(maxlenExpr);
1043
1044    NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1045    NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1046
1047    if (strLengthNL && maxlenValNL) {
1048      const GRState *stateStringTooLong, *stateStringNotTooLong;
1049
1050      // Check if the strLength is greater than the maxlen.
1051      llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1052        state->assume(cast<DefinedOrUnknownSVal>
1053                      (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1054                                                      *strLengthNL,
1055                                                      *maxlenValNL,
1056                                                      cmpTy)));
1057
1058      if (stateStringTooLong && !stateStringNotTooLong) {
1059        // If the string is longer than maxlen, return maxlen.
1060        result = *maxlenValNL;
1061      } else if (stateStringNotTooLong && !stateStringTooLong) {
1062        // If the string is shorter than maxlen, return its length.
1063        result = *strLengthNL;
1064      }
1065    }
1066
1067    if (result.isUnknown()) {
1068      // If we don't have enough information for a comparison, there's
1069      // no guarantee the full string length will actually be returned.
1070      // All we know is the return value is the min of the string length
1071      // and the limit. This is better than nothing.
1072      unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1073      result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1074      NonLoc *resultNL = cast<NonLoc>(&result);
1075
1076      if (strLengthNL) {
1077        state = state->assume(cast<DefinedOrUnknownSVal>
1078                              (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1079                                                              *resultNL,
1080                                                              *strLengthNL,
1081                                                              cmpTy)), true);
1082      }
1083
1084      if (maxlenValNL) {
1085        state = state->assume(cast<DefinedOrUnknownSVal>
1086                              (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1087                                                              *resultNL,
1088                                                              *maxlenValNL,
1089                                                              cmpTy)), true);
1090      }
1091    }
1092
1093  } else {
1094    // This is a plain strlen(), not strnlen().
1095    result = cast<DefinedOrUnknownSVal>(strLength);
1096
1097    // If we don't know the length of the string, conjure a return
1098    // value, so it can be used in constraints, at least.
1099    if (result.isUnknown()) {
1100      unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1101      result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count);
1102    }
1103  }
1104
1105  // Bind the return value.
1106  assert(!result.isUnknown() && "Should have conjured a value by now");
1107  state = state->BindExpr(CE, result);
1108  C.addTransition(state);
1109}
1110
1111void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1112  // char *strcpy(char *restrict dst, const char *restrict src);
1113  evalStrcpyCommon(C, CE,
1114                   /* returnEnd = */ false,
1115                   /* isBounded = */ false,
1116                   /* isAppending = */ false);
1117}
1118
1119void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1120  // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1121  evalStrcpyCommon(C, CE,
1122                   /* returnEnd = */ false,
1123                   /* isBounded = */ true,
1124                   /* isAppending = */ false);
1125}
1126
1127void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1128  // char *stpcpy(char *restrict dst, const char *restrict src);
1129  evalStrcpyCommon(C, CE,
1130                   /* returnEnd = */ true,
1131                   /* isBounded = */ false,
1132                   /* isAppending = */ false);
1133}
1134
1135void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1136  //char *strcat(char *restrict s1, const char *restrict s2);
1137  evalStrcpyCommon(C, CE,
1138                   /* returnEnd = */ false,
1139                   /* isBounded = */ false,
1140                   /* isAppending = */ true);
1141}
1142
1143void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1144  //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1145  evalStrcpyCommon(C, CE,
1146                   /* returnEnd = */ false,
1147                   /* isBounded = */ true,
1148                   /* isAppending = */ true);
1149}
1150
1151void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1152                                      bool returnEnd, bool isBounded,
1153                                      bool isAppending) const {
1154  const GRState *state = C.getState();
1155
1156  // Check that the destination is non-null.
1157  const Expr *Dst = CE->getArg(0);
1158  SVal DstVal = state->getSVal(Dst);
1159
1160  state = checkNonNull(C, state, Dst, DstVal);
1161  if (!state)
1162    return;
1163
1164  // Check that the source is non-null.
1165  const Expr *srcExpr = CE->getArg(1);
1166  SVal srcVal = state->getSVal(srcExpr);
1167  state = checkNonNull(C, state, srcExpr, srcVal);
1168  if (!state)
1169    return;
1170
1171  // Get the string length of the source.
1172  SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1173
1174  // If the source isn't a valid C string, give up.
1175  if (strLength.isUndef())
1176    return;
1177
1178  SValBuilder &svalBuilder = C.getSValBuilder();
1179  QualType cmpTy = svalBuilder.getConditionType();
1180
1181  SVal amountCopied = UnknownVal();
1182
1183  // If the function is strncpy, strncat, etc... it is bounded.
1184  if (isBounded) {
1185    // Get the max number of characters to copy.
1186    const Expr *lenExpr = CE->getArg(2);
1187    SVal lenVal = state->getSVal(lenExpr);
1188
1189    // Protect against misdeclared strncpy().
1190    lenVal = svalBuilder.evalCast(lenVal,
1191                                  svalBuilder.getContext().getSizeType(),
1192                                  lenExpr->getType());
1193
1194    NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1195    NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1196
1197    // If we know both values, we might be able to figure out how much
1198    // we're copying.
1199    if (strLengthNL && lenValNL) {
1200      const GRState *stateSourceTooLong, *stateSourceNotTooLong;
1201
1202      // Check if the max number to copy is less than the length of the src.
1203      llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1204        state->assume(cast<DefinedOrUnknownSVal>
1205                      (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL,
1206                                               *lenValNL, cmpTy)));
1207
1208      if (stateSourceTooLong && !stateSourceNotTooLong) {
1209        // Max number to copy is less than the length of the src, so the actual
1210        // strLength copied is the max number arg.
1211        state = stateSourceTooLong;
1212        amountCopied = lenVal;
1213
1214      } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1215        // The source buffer entirely fits in the bound.
1216        state = stateSourceNotTooLong;
1217        amountCopied = strLength;
1218      }
1219    }
1220
1221    // If we couldn't pin down the copy length, at least bound it.
1222    if (amountCopied.isUnknown()) {
1223      // Try to get a "hypothetical" string length symbol, which we can later
1224      // set as a real value if that turns out to be the case.
1225      amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1226      assert(!amountCopied.isUndef());
1227
1228      if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1229        if (lenValNL) {
1230          // amountCopied <= lenVal
1231          SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1232                                                             *amountCopiedNL,
1233                                                             *lenValNL,
1234                                                             cmpTy);
1235          state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1236                                true);
1237          if (!state)
1238            return;
1239        }
1240
1241        if (strLengthNL) {
1242          // amountCopied <= strlen(source)
1243          SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1244                                                           *amountCopiedNL,
1245                                                           *strLengthNL,
1246                                                           cmpTy);
1247          state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1248                                true);
1249          if (!state)
1250            return;
1251        }
1252      }
1253    }
1254
1255  } else {
1256    // The function isn't bounded. The amount copied should match the length
1257    // of the source buffer.
1258    amountCopied = strLength;
1259  }
1260
1261  assert(state);
1262
1263  // This represents the number of characters copied into the destination
1264  // buffer. (It may not actually be the strlen if the destination buffer
1265  // is not terminated.)
1266  SVal finalStrLength = UnknownVal();
1267
1268  // If this is an appending function (strcat, strncat...) then set the
1269  // string length to strlen(src) + strlen(dst) since the buffer will
1270  // ultimately contain both.
1271  if (isAppending) {
1272    // Get the string length of the destination, or give up.
1273    SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1274    if (dstStrLength.isUndef())
1275      return;
1276
1277    QualType sizeTy = svalBuilder.getContext().getSizeType();
1278
1279    NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1280    NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1281
1282    // If we know both string lengths, we might know the final string length.
1283    if (srcStrLengthNL && dstStrLengthNL) {
1284      // Make sure the two lengths together don't overflow a size_t.
1285      state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1286      if (!state)
1287        return;
1288
1289      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1290                                               *dstStrLengthNL, sizeTy);
1291    }
1292
1293    // If we couldn't get a single value for the final string length,
1294    // we can at least bound it by the individual lengths.
1295    if (finalStrLength.isUnknown()) {
1296      // Try to get a "hypothetical" string length symbol, which we can later
1297      // set as a real value if that turns out to be the case.
1298      finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1299      assert(!finalStrLength.isUndef());
1300
1301      if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1302        if (srcStrLengthNL) {
1303          // finalStrLength >= srcStrLength
1304          SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1305                                                        *finalStrLengthNL,
1306                                                        *srcStrLengthNL,
1307                                                        cmpTy);
1308          state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1309                                true);
1310          if (!state)
1311            return;
1312        }
1313
1314        if (dstStrLengthNL) {
1315          // finalStrLength >= dstStrLength
1316          SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1317                                                      *finalStrLengthNL,
1318                                                      *dstStrLengthNL,
1319                                                      cmpTy);
1320          state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1321                                true);
1322          if (!state)
1323            return;
1324        }
1325      }
1326    }
1327
1328  } else {
1329    // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1330    // the final string length will match the input string length.
1331    finalStrLength = amountCopied;
1332  }
1333
1334  // The final result of the function will either be a pointer past the last
1335  // copied element, or a pointer to the start of the destination buffer.
1336  SVal Result = (returnEnd ? UnknownVal() : DstVal);
1337
1338  assert(state);
1339
1340  // If the destination is a MemRegion, try to check for a buffer overflow and
1341  // record the new string length.
1342  if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1343    // If the final length is known, we can check for an overflow.
1344    if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1345      SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1346                                                 *knownStrLength,
1347                                                 Dst->getType());
1348
1349      state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true);
1350      if (!state)
1351        return;
1352
1353      // If this is a stpcpy-style copy, the last element is the return value.
1354      if (returnEnd)
1355        Result = lastElement;
1356    }
1357
1358    // Invalidate the destination. This must happen before we set the C string
1359    // length because invalidation will clear the length.
1360    // FIXME: Even if we can't perfectly model the copy, we should see if we
1361    // can use LazyCompoundVals to copy the source values into the destination.
1362    // This would probably remove any existing bindings past the end of the
1363    // string, but that's still an improvement over blank invalidation.
1364    state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1365
1366    // Set the C string length of the destination.
1367    state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1368  }
1369
1370  assert(state);
1371
1372  // If this is a stpcpy-style copy, but we were unable to check for a buffer
1373  // overflow, we still need a result. Conjure a return value.
1374  if (returnEnd && Result.isUnknown()) {
1375    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
1376    Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
1377  }
1378
1379  // Set the return value.
1380  state = state->BindExpr(CE, Result);
1381  C.addTransition(state);
1382}
1383
1384void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1385  //int strcmp(const char *restrict s1, const char *restrict s2);
1386  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1387}
1388
1389void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1390  //int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
1391  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1392}
1393
1394void CStringChecker::evalStrcasecmp(CheckerContext &C,
1395                                    const CallExpr *CE) const {
1396  //int strcasecmp(const char *restrict s1, const char *restrict s2);
1397  evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1398}
1399
1400void CStringChecker::evalStrncasecmp(CheckerContext &C,
1401                                     const CallExpr *CE) const {
1402  //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
1403  evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1404}
1405
1406void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1407                                      bool isBounded, bool ignoreCase) const {
1408  const GRState *state = C.getState();
1409
1410  // Check that the first string is non-null
1411  const Expr *s1 = CE->getArg(0);
1412  SVal s1Val = state->getSVal(s1);
1413  state = checkNonNull(C, state, s1, s1Val);
1414  if (!state)
1415    return;
1416
1417  // Check that the second string is non-null.
1418  const Expr *s2 = CE->getArg(1);
1419  SVal s2Val = state->getSVal(s2);
1420  state = checkNonNull(C, state, s2, s2Val);
1421  if (!state)
1422    return;
1423
1424  // Get the string length of the first string or give up.
1425  SVal s1Length = getCStringLength(C, state, s1, s1Val);
1426  if (s1Length.isUndef())
1427    return;
1428
1429  // Get the string length of the second string or give up.
1430  SVal s2Length = getCStringLength(C, state, s2, s2Val);
1431  if (s2Length.isUndef())
1432    return;
1433
1434  // Get the string literal of the first string.
1435  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1436  if (!s1StrLiteral)
1437    return;
1438  llvm::StringRef s1StrRef = s1StrLiteral->getString();
1439
1440  // Get the string literal of the second string.
1441  const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1442  if (!s2StrLiteral)
1443    return;
1444  llvm::StringRef s2StrRef = s2StrLiteral->getString();
1445
1446  int result;
1447  if (isBounded) {
1448    // Get the max number of characters to compare.
1449    const Expr *lenExpr = CE->getArg(2);
1450    SVal lenVal = state->getSVal(lenExpr);
1451
1452    // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt
1453    // then give up, otherwise get the value and use it as the bounds.
1454    nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal);
1455    if (!CI)
1456      return;
1457    llvm::APSInt lenInt(CI->getValue());
1458
1459    // Create substrings of each to compare the prefix.
1460    s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1461    s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue());
1462  }
1463
1464  if (ignoreCase) {
1465    // Compare string 1 to string 2 the same way strcasecmp() does.
1466    result = s1StrRef.compare_lower(s2StrRef);
1467  } else {
1468    // Compare string 1 to string 2 the same way strcmp() does.
1469    result = s1StrRef.compare(s2StrRef);
1470  }
1471
1472  // Build the SVal of the comparison to bind the return value.
1473  SValBuilder &svalBuilder = C.getSValBuilder();
1474  QualType intTy = svalBuilder.getContext().IntTy;
1475  SVal resultVal = svalBuilder.makeIntVal(result, intTy);
1476
1477  // Bind the return value of the expression.
1478  // Set the return value.
1479  state = state->BindExpr(CE, resultVal);
1480  C.addTransition(state);
1481}
1482
1483//===----------------------------------------------------------------------===//
1484// The driver method, and other Checker callbacks.
1485//===----------------------------------------------------------------------===//
1486
1487bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1488  // Get the callee.  All the functions we care about are C functions
1489  // with simple identifiers.
1490  const GRState *state = C.getState();
1491  const Expr *Callee = CE->getCallee();
1492  const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl();
1493
1494  if (!FD)
1495    return false;
1496
1497  // Get the name of the callee. If it's a builtin, strip off the prefix.
1498  IdentifierInfo *II = FD->getIdentifier();
1499  if (!II)   // if no identifier, not a simple C function
1500    return false;
1501  llvm::StringRef Name = II->getName();
1502  if (Name.startswith("__builtin_"))
1503    Name = Name.substr(10);
1504
1505  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
1506    .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy)
1507    .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy)
1508    .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp)
1509    .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove)
1510    .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy)
1511    //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy)
1512    .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy)
1513    .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat)
1514    .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat)
1515    .Case("strlen", &CStringChecker::evalstrLength)
1516    .Case("strnlen", &CStringChecker::evalstrnLength)
1517    .Case("strcmp", &CStringChecker::evalStrcmp)
1518    .Case("strncmp", &CStringChecker::evalStrncmp)
1519    .Case("strcasecmp", &CStringChecker::evalStrcasecmp)
1520    .Case("strncasecmp", &CStringChecker::evalStrncasecmp)
1521    .Case("bcopy", &CStringChecker::evalBcopy)
1522    .Default(NULL);
1523
1524  // If the callee isn't a string function, let another checker handle it.
1525  if (!evalFunction)
1526    return false;
1527
1528  // Check and evaluate the call.
1529  (this->*evalFunction)(C, CE);
1530  return true;
1531}
1532
1533void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1534  // Record string length for char a[] = "abc";
1535  const GRState *state = C.getState();
1536
1537  for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1538       I != E; ++I) {
1539    const VarDecl *D = dyn_cast<VarDecl>(*I);
1540    if (!D)
1541      continue;
1542
1543    // FIXME: Handle array fields of structs.
1544    if (!D->getType()->isArrayType())
1545      continue;
1546
1547    const Expr *Init = D->getInit();
1548    if (!Init)
1549      continue;
1550    if (!isa<StringLiteral>(Init))
1551      continue;
1552
1553    Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
1554    const MemRegion *MR = VarLoc.getAsRegion();
1555    if (!MR)
1556      continue;
1557
1558    SVal StrVal = state->getSVal(Init);
1559    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1560    DefinedOrUnknownSVal strLength
1561      = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1562
1563    state = state->set<CStringLength>(MR, strLength);
1564  }
1565
1566  C.addTransition(state);
1567}
1568
1569bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const {
1570  CStringLength::EntryMap Entries = state->get<CStringLength>();
1571  return !Entries.isEmpty();
1572}
1573
1574const GRState *
1575CStringChecker::checkRegionChanges(const GRState *state,
1576                                   const StoreManager::InvalidatedSymbols *,
1577                                   const MemRegion * const *Begin,
1578                                   const MemRegion * const *End) const {
1579  CStringLength::EntryMap Entries = state->get<CStringLength>();
1580  if (Entries.isEmpty())
1581    return state;
1582
1583  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1584  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1585
1586  // First build sets for the changed regions and their super-regions.
1587  for ( ; Begin != End; ++Begin) {
1588    const MemRegion *MR = *Begin;
1589    Invalidated.insert(MR);
1590
1591    SuperRegions.insert(MR);
1592    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1593      MR = SR->getSuperRegion();
1594      SuperRegions.insert(MR);
1595    }
1596  }
1597
1598  CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1599
1600  // Then loop over the entries in the current state.
1601  for (CStringLength::EntryMap::iterator I = Entries.begin(),
1602       E = Entries.end(); I != E; ++I) {
1603    const MemRegion *MR = I.getKey();
1604
1605    // Is this entry for a super-region of a changed region?
1606    if (SuperRegions.count(MR)) {
1607      Entries = F.remove(Entries, MR);
1608      continue;
1609    }
1610
1611    // Is this entry for a sub-region of a changed region?
1612    const MemRegion *Super = MR;
1613    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1614      Super = SR->getSuperRegion();
1615      if (Invalidated.count(Super)) {
1616        Entries = F.remove(Entries, MR);
1617        break;
1618      }
1619    }
1620  }
1621
1622  return state->set<CStringLength>(Entries);
1623}
1624
1625void CStringChecker::checkLiveSymbols(const GRState *state,
1626                                      SymbolReaper &SR) const {
1627  // Mark all symbols in our string length map as valid.
1628  CStringLength::EntryMap Entries = state->get<CStringLength>();
1629
1630  for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1631       I != E; ++I) {
1632    SVal Len = I.getData();
1633
1634    for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end();
1635         si != se; ++si)
1636      SR.markInUse(*si);
1637  }
1638}
1639
1640void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1641                                      CheckerContext &C) const {
1642  if (!SR.hasDeadSymbols())
1643    return;
1644
1645  const GRState *state = C.getState();
1646  CStringLength::EntryMap Entries = state->get<CStringLength>();
1647  if (Entries.isEmpty())
1648    return;
1649
1650  CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
1651  for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
1652       I != E; ++I) {
1653    SVal Len = I.getData();
1654    if (SymbolRef Sym = Len.getAsSymbol()) {
1655      if (SR.isDead(Sym))
1656        Entries = F.remove(Entries, I.getKey());
1657    }
1658  }
1659
1660  state = state->set<CStringLength>(Entries);
1661  C.generateNode(state);
1662}
1663
1664void ento::registerCStringChecker(CheckerManager &mgr) {
1665  mgr.registerChecker<CStringChecker>();
1666}
1667