GenericTaintChecker.cpp revision 3026348bd4c13a0f83b59839f64065e0fcbea253
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/Basic/Builtins.h"
24#include <climits>
25
26using namespace clang;
27using namespace ento;
28
29namespace {
30class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
31                                            check::PreStmt<CallExpr> > {
32public:
33  static void *getTag() { static int Tag; return &Tag; }
34
35  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39
40private:
41  static const unsigned ReturnValueIndex = UINT_MAX;
42  static const unsigned InvalidArgIndex = UINT_MAX - 1;
43
44  mutable llvm::OwningPtr<BugType> BT;
45  inline void initBugType() const {
46    if (!BT)
47      BT.reset(new BugType("Taint Analysis", "General"));
48  }
49
50  /// \brief Catch taint related bugs. Check if tainted data is passed to a
51  /// system call etc.
52  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
53
54  /// \brief Add taint sources on a pre-visit.
55  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
56
57  /// \brief Propagate taint generated at pre-visit.
58  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
59
60  /// \brief Add taint sources on a post visit.
61  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
62
63  /// \brief Given a pointer argument, get the symbol of the value it contains
64  /// (points to).
65  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
66
67  static inline bool isTaintedOrPointsToTainted(const Expr *E,
68                                                const ProgramState *State,
69                                                CheckerContext &C) {
70    return (State->isTainted(E, C.getLocationContext()) ||
71            (E->getType().getTypePtr()->isPointerType() &&
72             State->isTainted(getPointedToSymbol(C, E))));
73  }
74
75  /// Functions defining the attack surface.
76  typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
77                                                       CheckerContext &C) const;
78  const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
79  const ProgramState *postSocket(const CallExpr *CE, CheckerContext &C) const;
80  const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
81
82  /// Taint the scanned input if the file is tainted.
83  const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
84
85  /// Check if the region the expression evaluates to is the standard input,
86  /// and thus, is tainted.
87  bool isStdin(const Expr *E, CheckerContext &C) const;
88
89  /// Check for CWE-134: Uncontrolled Format String.
90  static const char MsgUncontrolledFormatString[];
91  bool checkUncontrolledFormatString(const CallExpr *CE,
92                                     CheckerContext &C) const;
93
94  /// Check for:
95  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
96  /// CWE-78, "Failure to Sanitize Data into an OS Command"
97  static const char MsgSanitizeSystemArgs[];
98  bool checkSystemCall(const CallExpr *CE, StringRef Name,
99                       CheckerContext &C) const;
100
101  /// Check if tainted data is used as a buffer size ins strn.. functions,
102  /// and allocators.
103  static const char MsgTaintedBufferSize[];
104  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
105                              CheckerContext &C) const;
106
107  /// Generate a report if the expression is tainted or points to tainted data.
108  bool generateReportIfTainted(const Expr *E, const char Msg[],
109                               CheckerContext &C) const;
110
111
112  typedef llvm::SmallVector<unsigned, 2> ArgVector;
113
114  /// \brief A struct used to specify taint propagation rules for a function.
115  ///
116  /// If any of the possible taint source arguments is tainted, all of the
117  /// destination arguments should also be tainted. Use InvalidArgIndex in the
118  /// src list to specify that all of the arguments can introduce taint. Use
119  /// InvalidArgIndex in the dst arguments to signify that all the non-const
120  /// pointer and reference arguments might be tainted on return. If
121  /// ReturnValueIndex is added to the dst list, the return value will be
122  /// tainted.
123  struct TaintPropagationRule {
124    /// List of arguments which can be taint sources and should be checked.
125    ArgVector SrcArgs;
126    /// List of arguments which should be tainted on function return.
127    ArgVector DstArgs;
128    // TODO: Check if using other data structures would be more optimal.
129
130    TaintPropagationRule() {}
131
132    TaintPropagationRule(unsigned SArg,
133                         unsigned DArg, bool TaintRet = false) {
134      SrcArgs.push_back(SArg);
135      DstArgs.push_back(DArg);
136      if (TaintRet)
137        DstArgs.push_back(ReturnValueIndex);
138    }
139
140    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
141                         unsigned DArg, bool TaintRet = false) {
142      SrcArgs.push_back(SArg1);
143      SrcArgs.push_back(SArg2);
144      DstArgs.push_back(DArg);
145      if (TaintRet)
146        DstArgs.push_back(ReturnValueIndex);
147    }
148
149    /// Get the propagation rule for a given function.
150    static TaintPropagationRule
151      getTaintPropagationRule(const FunctionDecl *FDecl,
152                              StringRef Name,
153                              CheckerContext &C);
154
155    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
156    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
157
158    inline bool isNull() const { return SrcArgs.empty(); }
159
160    inline bool isDestinationArgument(unsigned ArgNum) const {
161      return (std::find(DstArgs.begin(),
162                        DstArgs.end(), ArgNum) != DstArgs.end());
163    }
164
165    /// \brief Pre-process a function which propagates taint according to the
166    /// taint rule.
167    const ProgramState *process(const CallExpr *CE, CheckerContext &C) const;
168
169  };
170};
171
172const unsigned GenericTaintChecker::ReturnValueIndex;
173const unsigned GenericTaintChecker::InvalidArgIndex;
174
175const char GenericTaintChecker::MsgUncontrolledFormatString[] =
176  "Tainted format string (CWE-134: Uncontrolled Format String)";
177
178const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
179  "Tainted data passed to a system call "
180  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
181
182const char GenericTaintChecker::MsgTaintedBufferSize[] =
183  "Tainted data is used to specify the buffer size "
184  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
185  "character data and the null terminator)";
186
187} // end of anonymous namespace
188
189/// A set which is used to pass information from call pre-visit instruction
190/// to the call post-visit. The values are unsigned integers, which are either
191/// ReturnValueIndex, or indexes of the pointer/reference argument, which
192/// points to data, which should be tainted on return.
193namespace { struct TaintArgsOnPostVisit{}; }
194namespace clang { namespace ento {
195template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
196    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
197  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
198};
199}}
200
201GenericTaintChecker::TaintPropagationRule
202GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
203                                                     const FunctionDecl *FDecl,
204                                                     StringRef Name,
205                                                     CheckerContext &C) {
206  // Check for exact name match for functions without builtin substitutes.
207  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
211    .Case("read", TaintPropagationRule(0, 2, 1, true))
212    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
213    .Default(TaintPropagationRule());
214
215  if (!Rule.isNull())
216    return Rule;
217
218  // Check if it's one of the memory setting/copying functions.
219  // This check is specialized but faster then calling isCLibraryFunction.
220  unsigned BId = 0;
221  if ( (BId = FDecl->getMemoryFunctionKind()) )
222    switch(BId) {
223    case Builtin::BImemcpy:
224    case Builtin::BImemmove:
225    case Builtin::BIstrncpy:
226    case Builtin::BIstrncat:
227      return TaintPropagationRule(1, 2, 0, true);
228    case Builtin::BIstrlcpy:
229    case Builtin::BIstrlcat:
230      return TaintPropagationRule(1, 2, 0, false);
231    case Builtin::BIstrndup:
232      return TaintPropagationRule(0, 1, ReturnValueIndex);
233
234    default:
235      break;
236    };
237
238  // Process all other functions which could be defined as builtins.
239  if (Rule.isNull()) {
240    if (C.isCLibraryFunction(FDecl, "snprintf") ||
241        C.isCLibraryFunction(FDecl, "sprintf"))
242      return TaintPropagationRule(InvalidArgIndex, 0, true);
243    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
244             C.isCLibraryFunction(FDecl, "stpcpy") ||
245             C.isCLibraryFunction(FDecl, "strcat"))
246      return TaintPropagationRule(1, 0, true);
247    else if (C.isCLibraryFunction(FDecl, "bcopy"))
248      return TaintPropagationRule(0, 2, 1, false);
249    else if (C.isCLibraryFunction(FDecl, "strdup") ||
250             C.isCLibraryFunction(FDecl, "strdupa"))
251      return TaintPropagationRule(0, ReturnValueIndex);
252    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
253      return TaintPropagationRule(0, ReturnValueIndex);
254  }
255
256  // Skipping the following functions, since they might be used for cleansing
257  // or smart memory copy:
258  // - memccpy - copying untill hitting a special character.
259
260  return TaintPropagationRule();
261}
262
263void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
264                                       CheckerContext &C) const {
265  // Check for errors first.
266  if (checkPre(CE, C))
267    return;
268
269  // Add taint second.
270  addSourcesPre(CE, C);
271}
272
273void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
274                                        CheckerContext &C) const {
275  if (propagateFromPre(CE, C))
276    return;
277  addSourcesPost(CE, C);
278}
279
280void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
281                                        CheckerContext &C) const {
282  const ProgramState *State = 0;
283  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
284  StringRef Name = C.getCalleeName(FDecl);
285  if (Name.empty())
286    return;
287
288  // First, try generating a propagation rule for this function.
289  TaintPropagationRule Rule =
290    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
291  if (!Rule.isNull()) {
292    State = Rule.process(CE, C);
293    if (!State)
294      return;
295    C.addTransition(State);
296    return;
297  }
298
299  // Otherwise, check if we have custom pre-processing implemented.
300  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
301    .Case("fscanf", &GenericTaintChecker::preFscanf)
302    .Default(0);
303  // Check and evaluate the call.
304  if (evalFunction)
305    State = (this->*evalFunction)(CE, C);
306  if (!State)
307    return;
308  C.addTransition(State);
309
310}
311
312bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
313                                           CheckerContext &C) const {
314  const ProgramState *State = C.getState();
315
316  // Depending on what was tainted at pre-visit, we determined a set of
317  // arguments which should be tainted after the function returns. These are
318  // stored in the state as TaintArgsOnPostVisit set.
319  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
320  for (llvm::ImmutableSet<unsigned>::iterator
321         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
322    unsigned ArgNum  = *I;
323
324    // Special handling for the tainted return value.
325    if (ArgNum == ReturnValueIndex) {
326      State = State->addTaint(CE, C.getLocationContext());
327      continue;
328    }
329
330    // The arguments are pointer arguments. The data they are pointing at is
331    // tainted after the call.
332    const Expr* Arg = CE->getArg(ArgNum);
333    SymbolRef Sym = getPointedToSymbol(C, Arg);
334    if (Sym)
335      State = State->addTaint(Sym);
336  }
337
338  // Clear up the taint info from the state.
339  State = State->remove<TaintArgsOnPostVisit>();
340
341  if (State != C.getState()) {
342    C.addTransition(State);
343    return true;
344  }
345  return false;
346}
347
348void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
349                                         CheckerContext &C) const {
350  // Define the attack surface.
351  // Set the evaluation function by switching on the callee name.
352  StringRef Name = C.getCalleeName(CE);
353  if (Name.empty())
354    return;
355  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
356    .Case("scanf", &GenericTaintChecker::postScanf)
357    // TODO: Add support for vfscanf & family.
358    .Case("getchar", &GenericTaintChecker::postRetTaint)
359    .Case("getenv", &GenericTaintChecker::postRetTaint)
360    .Case("fopen", &GenericTaintChecker::postRetTaint)
361    .Case("fdopen", &GenericTaintChecker::postRetTaint)
362    .Case("freopen", &GenericTaintChecker::postRetTaint)
363    .Case("socket", &GenericTaintChecker::postSocket)
364    .Default(0);
365
366  // If the callee isn't defined, it is not of security concern.
367  // Check and evaluate the call.
368  const ProgramState *State = 0;
369  if (evalFunction)
370    State = (this->*evalFunction)(CE, C);
371  if (!State)
372    return;
373
374  C.addTransition(State);
375}
376
377bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
378
379  if (checkUncontrolledFormatString(CE, C))
380    return true;
381
382  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
383  StringRef Name = C.getCalleeName(FDecl);
384  if (Name.empty())
385    return false;
386
387  if (checkSystemCall(CE, Name, C))
388    return true;
389
390  if (checkTaintedBufferSize(CE, FDecl, C))
391    return true;
392
393  return false;
394}
395
396SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
397                                                  const Expr* Arg) {
398  const ProgramState *State = C.getState();
399  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
400  if (AddrVal.isUnknownOrUndef())
401    return 0;
402
403  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
404  if (!AddrLoc)
405    return 0;
406
407  const PointerType *ArgTy =
408    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
409  assert(ArgTy);
410  SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
411  return Val.getAsSymbol();
412}
413
414const ProgramState *
415GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
416                                                   CheckerContext &C) const {
417  const ProgramState *State = C.getState();
418
419  // Check for taint in arguments.
420  bool IsTainted = false;
421  for (ArgVector::const_iterator I = SrcArgs.begin(),
422                                 E = SrcArgs.end(); I != E; ++I) {
423    unsigned ArgNum = *I;
424
425    if (ArgNum == InvalidArgIndex) {
426      // Check if any of the arguments is tainted, but skip the
427      // destination arguments.
428      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
429        if (isDestinationArgument(i))
430          continue;
431        if ((IsTainted =
432               GenericTaintChecker::isTaintedOrPointsToTainted(CE->getArg(i),
433                                                               State, C)))
434          break;
435      }
436      break;
437    }
438
439    assert(ArgNum < CE->getNumArgs());
440    if ((IsTainted =
441           GenericTaintChecker::isTaintedOrPointsToTainted(CE->getArg(ArgNum),
442                                                           State, C)))
443      break;
444  }
445  if (!IsTainted)
446    return State;
447
448  // Mark the arguments which should be tainted after the function returns.
449  for (ArgVector::const_iterator I = DstArgs.begin(),
450                                 E = DstArgs.end(); I != E; ++I) {
451    unsigned ArgNum = *I;
452
453    // Should we mark all arguments as tainted?
454    if (ArgNum == InvalidArgIndex) {
455      // For all pointer and references that were passed in:
456      //   If they are not pointing to const data, mark data as tainted.
457      //   TODO: So far we are just going one level down; ideally we'd need to
458      //         recurse here.
459      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
460        const Expr *Arg = CE->getArg(i);
461        // Process pointer argument.
462        const Type *ArgTy = Arg->getType().getTypePtr();
463        QualType PType = ArgTy->getPointeeType();
464        if ((!PType.isNull() && !PType.isConstQualified())
465            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
466          State = State->add<TaintArgsOnPostVisit>(i);
467      }
468      continue;
469    }
470
471    // Should mark the return value?
472    if (ArgNum == ReturnValueIndex) {
473      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
474      continue;
475    }
476
477    // Mark the given argument.
478    assert(ArgNum < CE->getNumArgs());
479    State = State->add<TaintArgsOnPostVisit>(ArgNum);
480  }
481
482  return State;
483}
484
485
486// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
487// and arg 1 should get taint.
488const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
489                                                   CheckerContext &C) const {
490  assert(CE->getNumArgs() >= 2);
491  const ProgramState *State = C.getState();
492
493  // Check is the file descriptor is tainted.
494  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
495      isStdin(CE->getArg(0), C)) {
496    // All arguments except for the first two should get taint.
497    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
498        State = State->add<TaintArgsOnPostVisit>(i);
499    return State;
500  }
501
502  return 0;
503}
504
505
506// If argument 0(protocol domain) is network, the return value should get taint.
507const ProgramState *GenericTaintChecker::postSocket(const CallExpr *CE,
508                                                    CheckerContext &C) const {
509  assert(CE->getNumArgs() >= 3);
510  const ProgramState *State = C.getState();
511
512  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
513  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
514  // White list the internal communication protocols.
515  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
516      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
517    return State;
518  State = State->addTaint(CE, C.getLocationContext());
519  return State;
520}
521
522const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
523                                                   CheckerContext &C) const {
524  const ProgramState *State = C.getState();
525  assert(CE->getNumArgs() >= 2);
526  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
527  // All arguments except for the very first one should get taint.
528  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
529    // The arguments are pointer arguments. The data they are pointing at is
530    // tainted after the call.
531    const Expr* Arg = CE->getArg(i);
532        SymbolRef Sym = getPointedToSymbol(C, Arg);
533    if (Sym)
534      State = State->addTaint(Sym);
535  }
536  return State;
537}
538
539const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
540                                                      CheckerContext &C) const {
541  return C.getState()->addTaint(CE, C.getLocationContext());
542}
543
544bool GenericTaintChecker::isStdin(const Expr *E,
545                                  CheckerContext &C) const {
546  const ProgramState *State = C.getState();
547  SVal Val = State->getSVal(E, C.getLocationContext());
548
549  // stdin is a pointer, so it would be a region.
550  const MemRegion *MemReg = Val.getAsRegion();
551
552  // The region should be symbolic, we do not know it's value.
553  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
554  if (!SymReg)
555    return false;
556
557  // Get it's symbol and find the declaration region it's pointing to.
558  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
559  if (!Sm)
560    return false;
561  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
562  if (!DeclReg)
563    return false;
564
565  // This region corresponds to a declaration, find out if it's a global/extern
566  // variable named stdin with the proper type.
567  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
568    D = D->getCanonicalDecl();
569    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
570        if (const PointerType * PtrTy =
571              dyn_cast<PointerType>(D->getType().getTypePtr()))
572          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
573            return true;
574  }
575  return false;
576}
577
578static bool getPrintfFormatArgumentNum(const CallExpr *CE,
579                                       const CheckerContext &C,
580                                       unsigned int &ArgNum) {
581  // Find if the function contains a format string argument.
582  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
583  // vsnprintf, syslog, custom annotated functions.
584  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
585  if (!FDecl)
586    return false;
587  for (specific_attr_iterator<FormatAttr>
588         i = FDecl->specific_attr_begin<FormatAttr>(),
589         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
590
591    const FormatAttr *Format = *i;
592    ArgNum = Format->getFormatIdx() - 1;
593    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
594      return true;
595  }
596
597  // Or if a function is named setproctitle (this is a heuristic).
598  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
599    ArgNum = 0;
600    return true;
601  }
602
603  return false;
604}
605
606bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
607                                                  const char Msg[],
608                                                  CheckerContext &C) const {
609  assert(E);
610
611  // Check for taint.
612  const ProgramState *State = C.getState();
613  if (!State->isTainted(getPointedToSymbol(C, E)) &&
614      !State->isTainted(E, C.getLocationContext()))
615    return false;
616
617  // Generate diagnostic.
618  if (ExplodedNode *N = C.addTransition()) {
619    initBugType();
620    BugReport *report = new BugReport(*BT, Msg, N);
621    report->addRange(E->getSourceRange());
622    C.EmitReport(report);
623    return true;
624  }
625  return false;
626}
627
628bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
629                                                        CheckerContext &C) const{
630  // Check if the function contains a format string argument.
631  unsigned int ArgNum = 0;
632  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
633    return false;
634
635  // If either the format string content or the pointer itself are tainted, warn.
636  if (generateReportIfTainted(CE->getArg(ArgNum),
637                              MsgUncontrolledFormatString, C))
638    return true;
639  return false;
640}
641
642bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
643                                          StringRef Name,
644                                          CheckerContext &C) const {
645  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
646    .Case("system", 0)
647    .Case("popen", 0)
648    .Case("execl", 0)
649    .Case("execle", 0)
650    .Case("execlp", 0)
651    .Case("execv", 0)
652    .Case("execvp", 0)
653    .Case("execvP", 0)
654    .Default(UINT_MAX);
655
656  if (ArgNum == UINT_MAX)
657    return false;
658
659  if (generateReportIfTainted(CE->getArg(ArgNum),
660                              MsgSanitizeSystemArgs, C))
661    return true;
662
663  return false;
664}
665
666// TODO: Should this check be a part of the CString checker?
667// If yes, should taint be a global setting?
668bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
669                                                 const FunctionDecl *FDecl,
670                                                 CheckerContext &C) const {
671  // If the function has a buffer size argument, set ArgNum.
672  unsigned ArgNum = InvalidArgIndex;
673  unsigned BId = 0;
674  if ( (BId = FDecl->getMemoryFunctionKind()) )
675    switch(BId) {
676    case Builtin::BImemcpy:
677    case Builtin::BImemmove:
678    case Builtin::BIstrncpy:
679      ArgNum = 2;
680      break;
681    case Builtin::BIstrndup:
682      ArgNum = 1;
683      break;
684    default:
685      break;
686    };
687
688  if (ArgNum == InvalidArgIndex) {
689    if (C.isCLibraryFunction(FDecl, "malloc") ||
690        C.isCLibraryFunction(FDecl, "calloc") ||
691        C.isCLibraryFunction(FDecl, "alloca"))
692      ArgNum = 0;
693    else if (C.isCLibraryFunction(FDecl, "memccpy"))
694      ArgNum = 3;
695    else if (C.isCLibraryFunction(FDecl, "realloc"))
696      ArgNum = 1;
697    else if (C.isCLibraryFunction(FDecl, "bcopy"))
698      ArgNum = 2;
699  }
700
701  if (ArgNum != InvalidArgIndex &&
702      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
703    return true;
704
705  return false;
706}
707
708void ento::registerGenericTaintChecker(CheckerManager &mgr) {
709  mgr.registerChecker<GenericTaintChecker>();
710}
711