GenericTaintChecker.cpp revision 2fa67efeaf66a9332c30a026dc1c21bef6c33a6c
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/AST/Attr.h"
24#include "clang/Basic/Builtins.h"
25#include <climits>
26
27using namespace clang;
28using namespace ento;
29
30namespace {
31class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32                                            check::PreStmt<CallExpr> > {
33public:
34  static void *getTag() { static int Tag; return &Tag; }
35
36  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
38
39  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
40
41private:
42  static const unsigned InvalidArgIndex = UINT_MAX;
43  /// Denotes the return vale.
44  static const unsigned ReturnValueIndex = UINT_MAX - 1;
45
46  mutable OwningPtr<BugType> BT;
47  inline void initBugType() const {
48    if (!BT)
49      BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
50  }
51
52  /// \brief Catch taint related bugs. Check if tainted data is passed to a
53  /// system call etc.
54  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
55
56  /// \brief Add taint sources on a pre-visit.
57  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
58
59  /// \brief Propagate taint generated at pre-visit.
60  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
61
62  /// \brief Add taint sources on a post visit.
63  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
64
65  /// Check if the region the expression evaluates to is the standard input,
66  /// and thus, is tainted.
67  static bool isStdin(const Expr *E, CheckerContext &C);
68
69  /// \brief Given a pointer argument, get the symbol of the value it contains
70  /// (points to).
71  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
72
73  /// Functions defining the attack surface.
74  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
75                                                       CheckerContext &C) const;
76  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
77  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
78  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
79
80  /// Taint the scanned input if the file is tainted.
81  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
82
83  /// Check for CWE-134: Uncontrolled Format String.
84  static const char MsgUncontrolledFormatString[];
85  bool checkUncontrolledFormatString(const CallExpr *CE,
86                                     CheckerContext &C) const;
87
88  /// Check for:
89  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
90  /// CWE-78, "Failure to Sanitize Data into an OS Command"
91  static const char MsgSanitizeSystemArgs[];
92  bool checkSystemCall(const CallExpr *CE, StringRef Name,
93                       CheckerContext &C) const;
94
95  /// Check if tainted data is used as a buffer size ins strn.. functions,
96  /// and allocators.
97  static const char MsgTaintedBufferSize[];
98  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
99                              CheckerContext &C) const;
100
101  /// Generate a report if the expression is tainted or points to tainted data.
102  bool generateReportIfTainted(const Expr *E, const char Msg[],
103                               CheckerContext &C) const;
104
105
106  typedef llvm::SmallVector<unsigned, 2> ArgVector;
107
108  /// \brief A struct used to specify taint propagation rules for a function.
109  ///
110  /// If any of the possible taint source arguments is tainted, all of the
111  /// destination arguments should also be tainted. Use InvalidArgIndex in the
112  /// src list to specify that all of the arguments can introduce taint. Use
113  /// InvalidArgIndex in the dst arguments to signify that all the non-const
114  /// pointer and reference arguments might be tainted on return. If
115  /// ReturnValueIndex is added to the dst list, the return value will be
116  /// tainted.
117  struct TaintPropagationRule {
118    /// List of arguments which can be taint sources and should be checked.
119    ArgVector SrcArgs;
120    /// List of arguments which should be tainted on function return.
121    ArgVector DstArgs;
122    // TODO: Check if using other data structures would be more optimal.
123
124    TaintPropagationRule() {}
125
126    TaintPropagationRule(unsigned SArg,
127                         unsigned DArg, bool TaintRet = false) {
128      SrcArgs.push_back(SArg);
129      DstArgs.push_back(DArg);
130      if (TaintRet)
131        DstArgs.push_back(ReturnValueIndex);
132    }
133
134    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
135                         unsigned DArg, bool TaintRet = false) {
136      SrcArgs.push_back(SArg1);
137      SrcArgs.push_back(SArg2);
138      DstArgs.push_back(DArg);
139      if (TaintRet)
140        DstArgs.push_back(ReturnValueIndex);
141    }
142
143    /// Get the propagation rule for a given function.
144    static TaintPropagationRule
145      getTaintPropagationRule(const FunctionDecl *FDecl,
146                              StringRef Name,
147                              CheckerContext &C);
148
149    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
150    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
151
152    inline bool isNull() const { return SrcArgs.empty(); }
153
154    inline bool isDestinationArgument(unsigned ArgNum) const {
155      return (std::find(DstArgs.begin(),
156                        DstArgs.end(), ArgNum) != DstArgs.end());
157    }
158
159    static inline bool isTaintedOrPointsToTainted(const Expr *E,
160                                                  ProgramStateRef State,
161                                                  CheckerContext &C) {
162      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
163              (E->getType().getTypePtr()->isPointerType() &&
164               State->isTainted(getPointedToSymbol(C, E))));
165    }
166
167    /// \brief Pre-process a function which propagates taint according to the
168    /// taint rule.
169    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
170
171  };
172};
173
174const unsigned GenericTaintChecker::ReturnValueIndex;
175const unsigned GenericTaintChecker::InvalidArgIndex;
176
177const char GenericTaintChecker::MsgUncontrolledFormatString[] =
178  "Untrusted data is used as a format string "
179  "(CWE-134: Uncontrolled Format String)";
180
181const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
182  "Untrusted data is passed to a system call "
183  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
184
185const char GenericTaintChecker::MsgTaintedBufferSize[] =
186  "Untrusted data is used to specify the buffer size "
187  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
188  "character data and the null terminator)";
189
190} // end of anonymous namespace
191
192/// A set which is used to pass information from call pre-visit instruction
193/// to the call post-visit. The values are unsigned integers, which are either
194/// ReturnValueIndex, or indexes of the pointer/reference argument, which
195/// points to data, which should be tainted on return.
196REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
197
198GenericTaintChecker::TaintPropagationRule
199GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
200                                                     const FunctionDecl *FDecl,
201                                                     StringRef Name,
202                                                     CheckerContext &C) {
203  // TODO: Currently, we might loose precision here: we always mark a return
204  // value as tainted even if it's just a pointer, pointing to tainted data.
205
206  // Check for exact name match for functions without builtin substitutes.
207  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
211    .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
212    .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
213    .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
214    .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
215    .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
216    .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
217    .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
218    .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
219    .Case("read", TaintPropagationRule(0, 2, 1, true))
220    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
221    .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
222    .Case("fgets", TaintPropagationRule(2, 0, true))
223    .Case("getline", TaintPropagationRule(2, 0))
224    .Case("getdelim", TaintPropagationRule(3, 0))
225    .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
226    .Default(TaintPropagationRule());
227
228  if (!Rule.isNull())
229    return Rule;
230
231  // Check if it's one of the memory setting/copying functions.
232  // This check is specialized but faster then calling isCLibraryFunction.
233  unsigned BId = 0;
234  if ( (BId = FDecl->getMemoryFunctionKind()) )
235    switch(BId) {
236    case Builtin::BImemcpy:
237    case Builtin::BImemmove:
238    case Builtin::BIstrncpy:
239    case Builtin::BIstrncat:
240      return TaintPropagationRule(1, 2, 0, true);
241    case Builtin::BIstrlcpy:
242    case Builtin::BIstrlcat:
243      return TaintPropagationRule(1, 2, 0, false);
244    case Builtin::BIstrndup:
245      return TaintPropagationRule(0, 1, ReturnValueIndex);
246
247    default:
248      break;
249    };
250
251  // Process all other functions which could be defined as builtins.
252  if (Rule.isNull()) {
253    if (C.isCLibraryFunction(FDecl, "snprintf") ||
254        C.isCLibraryFunction(FDecl, "sprintf"))
255      return TaintPropagationRule(InvalidArgIndex, 0, true);
256    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
257             C.isCLibraryFunction(FDecl, "stpcpy") ||
258             C.isCLibraryFunction(FDecl, "strcat"))
259      return TaintPropagationRule(1, 0, true);
260    else if (C.isCLibraryFunction(FDecl, "bcopy"))
261      return TaintPropagationRule(0, 2, 1, false);
262    else if (C.isCLibraryFunction(FDecl, "strdup") ||
263             C.isCLibraryFunction(FDecl, "strdupa"))
264      return TaintPropagationRule(0, ReturnValueIndex);
265    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
266      return TaintPropagationRule(0, ReturnValueIndex);
267  }
268
269  // Skipping the following functions, since they might be used for cleansing
270  // or smart memory copy:
271  // - memccpy - copying until hitting a special character.
272
273  return TaintPropagationRule();
274}
275
276void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
277                                       CheckerContext &C) const {
278  // Check for errors first.
279  if (checkPre(CE, C))
280    return;
281
282  // Add taint second.
283  addSourcesPre(CE, C);
284}
285
286void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
287                                        CheckerContext &C) const {
288  if (propagateFromPre(CE, C))
289    return;
290  addSourcesPost(CE, C);
291}
292
293void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
294                                        CheckerContext &C) const {
295  ProgramStateRef State = 0;
296  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
297  if (!FDecl || FDecl->getKind() != Decl::Function)
298    return;
299
300  StringRef Name = C.getCalleeName(FDecl);
301  if (Name.empty())
302    return;
303
304  // First, try generating a propagation rule for this function.
305  TaintPropagationRule Rule =
306    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
307  if (!Rule.isNull()) {
308    State = Rule.process(CE, C);
309    if (!State)
310      return;
311    C.addTransition(State);
312    return;
313  }
314
315  // Otherwise, check if we have custom pre-processing implemented.
316  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
317    .Case("fscanf", &GenericTaintChecker::preFscanf)
318    .Default(0);
319  // Check and evaluate the call.
320  if (evalFunction)
321    State = (this->*evalFunction)(CE, C);
322  if (!State)
323    return;
324  C.addTransition(State);
325
326}
327
328bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
329                                           CheckerContext &C) const {
330  ProgramStateRef State = C.getState();
331
332  // Depending on what was tainted at pre-visit, we determined a set of
333  // arguments which should be tainted after the function returns. These are
334  // stored in the state as TaintArgsOnPostVisit set.
335  TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
336  if (TaintArgs.isEmpty())
337    return false;
338
339  for (llvm::ImmutableSet<unsigned>::iterator
340         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
341    unsigned ArgNum  = *I;
342
343    // Special handling for the tainted return value.
344    if (ArgNum == ReturnValueIndex) {
345      State = State->addTaint(CE, C.getLocationContext());
346      continue;
347    }
348
349    // The arguments are pointer arguments. The data they are pointing at is
350    // tainted after the call.
351    if (CE->getNumArgs() < (ArgNum + 1))
352      return false;
353    const Expr* Arg = CE->getArg(ArgNum);
354    SymbolRef Sym = getPointedToSymbol(C, Arg);
355    if (Sym)
356      State = State->addTaint(Sym);
357  }
358
359  // Clear up the taint info from the state.
360  State = State->remove<TaintArgsOnPostVisit>();
361
362  if (State != C.getState()) {
363    C.addTransition(State);
364    return true;
365  }
366  return false;
367}
368
369void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
370                                         CheckerContext &C) const {
371  // Define the attack surface.
372  // Set the evaluation function by switching on the callee name.
373  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
374  if (!FDecl || FDecl->getKind() != Decl::Function)
375    return;
376
377  StringRef Name = C.getCalleeName(FDecl);
378  if (Name.empty())
379    return;
380  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
381    .Case("scanf", &GenericTaintChecker::postScanf)
382    // TODO: Add support for vfscanf & family.
383    .Case("getchar", &GenericTaintChecker::postRetTaint)
384    .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
385    .Case("getenv", &GenericTaintChecker::postRetTaint)
386    .Case("fopen", &GenericTaintChecker::postRetTaint)
387    .Case("fdopen", &GenericTaintChecker::postRetTaint)
388    .Case("freopen", &GenericTaintChecker::postRetTaint)
389    .Case("getch", &GenericTaintChecker::postRetTaint)
390    .Case("wgetch", &GenericTaintChecker::postRetTaint)
391    .Case("socket", &GenericTaintChecker::postSocket)
392    .Default(0);
393
394  // If the callee isn't defined, it is not of security concern.
395  // Check and evaluate the call.
396  ProgramStateRef State = 0;
397  if (evalFunction)
398    State = (this->*evalFunction)(CE, C);
399  if (!State)
400    return;
401
402  C.addTransition(State);
403}
404
405bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
406
407  if (checkUncontrolledFormatString(CE, C))
408    return true;
409
410  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
411  if (!FDecl || FDecl->getKind() != Decl::Function)
412    return false;
413
414  StringRef Name = C.getCalleeName(FDecl);
415  if (Name.empty())
416    return false;
417
418  if (checkSystemCall(CE, Name, C))
419    return true;
420
421  if (checkTaintedBufferSize(CE, FDecl, C))
422    return true;
423
424  return false;
425}
426
427SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
428                                                  const Expr* Arg) {
429  ProgramStateRef State = C.getState();
430  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
431  if (AddrVal.isUnknownOrUndef())
432    return 0;
433
434  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
435  if (!AddrLoc)
436    return 0;
437
438  const PointerType *ArgTy =
439    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
440  SVal Val = State->getSVal(*AddrLoc,
441                            ArgTy ? ArgTy->getPointeeType(): QualType());
442  return Val.getAsSymbol();
443}
444
445ProgramStateRef
446GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
447                                                   CheckerContext &C) const {
448  ProgramStateRef State = C.getState();
449
450  // Check for taint in arguments.
451  bool IsTainted = false;
452  for (ArgVector::const_iterator I = SrcArgs.begin(),
453                                 E = SrcArgs.end(); I != E; ++I) {
454    unsigned ArgNum = *I;
455
456    if (ArgNum == InvalidArgIndex) {
457      // Check if any of the arguments is tainted, but skip the
458      // destination arguments.
459      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
460        if (isDestinationArgument(i))
461          continue;
462        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
463          break;
464      }
465      break;
466    }
467
468    if (CE->getNumArgs() < (ArgNum + 1))
469      return State;
470    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
471      break;
472  }
473  if (!IsTainted)
474    return State;
475
476  // Mark the arguments which should be tainted after the function returns.
477  for (ArgVector::const_iterator I = DstArgs.begin(),
478                                 E = DstArgs.end(); I != E; ++I) {
479    unsigned ArgNum = *I;
480
481    // Should we mark all arguments as tainted?
482    if (ArgNum == InvalidArgIndex) {
483      // For all pointer and references that were passed in:
484      //   If they are not pointing to const data, mark data as tainted.
485      //   TODO: So far we are just going one level down; ideally we'd need to
486      //         recurse here.
487      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
488        const Expr *Arg = CE->getArg(i);
489        // Process pointer argument.
490        const Type *ArgTy = Arg->getType().getTypePtr();
491        QualType PType = ArgTy->getPointeeType();
492        if ((!PType.isNull() && !PType.isConstQualified())
493            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
494          State = State->add<TaintArgsOnPostVisit>(i);
495      }
496      continue;
497    }
498
499    // Should mark the return value?
500    if (ArgNum == ReturnValueIndex) {
501      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
502      continue;
503    }
504
505    // Mark the given argument.
506    assert(ArgNum < CE->getNumArgs());
507    State = State->add<TaintArgsOnPostVisit>(ArgNum);
508  }
509
510  return State;
511}
512
513
514// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
515// and arg 1 should get taint.
516ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
517                                                   CheckerContext &C) const {
518  assert(CE->getNumArgs() >= 2);
519  ProgramStateRef State = C.getState();
520
521  // Check is the file descriptor is tainted.
522  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
523      isStdin(CE->getArg(0), C)) {
524    // All arguments except for the first two should get taint.
525    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
526        State = State->add<TaintArgsOnPostVisit>(i);
527    return State;
528  }
529
530  return 0;
531}
532
533
534// If argument 0(protocol domain) is network, the return value should get taint.
535ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
536                                                CheckerContext &C) const {
537  ProgramStateRef State = C.getState();
538  if (CE->getNumArgs() < 3)
539    return State;
540
541  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
542  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
543  // White list the internal communication protocols.
544  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
545      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
546    return State;
547  State = State->addTaint(CE, C.getLocationContext());
548  return State;
549}
550
551ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
552                                                   CheckerContext &C) const {
553  ProgramStateRef State = C.getState();
554  if (CE->getNumArgs() < 2)
555    return State;
556
557  // All arguments except for the very first one should get taint.
558  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
559    // The arguments are pointer arguments. The data they are pointing at is
560    // tainted after the call.
561    const Expr* Arg = CE->getArg(i);
562        SymbolRef Sym = getPointedToSymbol(C, Arg);
563    if (Sym)
564      State = State->addTaint(Sym);
565  }
566  return State;
567}
568
569ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
570                                                  CheckerContext &C) const {
571  return C.getState()->addTaint(CE, C.getLocationContext());
572}
573
574bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
575  ProgramStateRef State = C.getState();
576  SVal Val = State->getSVal(E, C.getLocationContext());
577
578  // stdin is a pointer, so it would be a region.
579  const MemRegion *MemReg = Val.getAsRegion();
580
581  // The region should be symbolic, we do not know it's value.
582  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
583  if (!SymReg)
584    return false;
585
586  // Get it's symbol and find the declaration region it's pointing to.
587  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
588  if (!Sm)
589    return false;
590  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
591  if (!DeclReg)
592    return false;
593
594  // This region corresponds to a declaration, find out if it's a global/extern
595  // variable named stdin with the proper type.
596  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
597    D = D->getCanonicalDecl();
598    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
599        if (const PointerType * PtrTy =
600              dyn_cast<PointerType>(D->getType().getTypePtr()))
601          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
602            return true;
603  }
604  return false;
605}
606
607static bool getPrintfFormatArgumentNum(const CallExpr *CE,
608                                       const CheckerContext &C,
609                                       unsigned int &ArgNum) {
610  // Find if the function contains a format string argument.
611  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
612  // vsnprintf, syslog, custom annotated functions.
613  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
614  if (!FDecl)
615    return false;
616  for (specific_attr_iterator<FormatAttr>
617         i = FDecl->specific_attr_begin<FormatAttr>(),
618         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
619
620    const FormatAttr *Format = *i;
621    ArgNum = Format->getFormatIdx() - 1;
622    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
623      return true;
624  }
625
626  // Or if a function is named setproctitle (this is a heuristic).
627  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
628    ArgNum = 0;
629    return true;
630  }
631
632  return false;
633}
634
635bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
636                                                  const char Msg[],
637                                                  CheckerContext &C) const {
638  assert(E);
639
640  // Check for taint.
641  ProgramStateRef State = C.getState();
642  if (!State->isTainted(getPointedToSymbol(C, E)) &&
643      !State->isTainted(E, C.getLocationContext()))
644    return false;
645
646  // Generate diagnostic.
647  if (ExplodedNode *N = C.addTransition()) {
648    initBugType();
649    BugReport *report = new BugReport(*BT, Msg, N);
650    report->addRange(E->getSourceRange());
651    C.emitReport(report);
652    return true;
653  }
654  return false;
655}
656
657bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
658                                                        CheckerContext &C) const{
659  // Check if the function contains a format string argument.
660  unsigned int ArgNum = 0;
661  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
662    return false;
663
664  // If either the format string content or the pointer itself are tainted, warn.
665  if (generateReportIfTainted(CE->getArg(ArgNum),
666                              MsgUncontrolledFormatString, C))
667    return true;
668  return false;
669}
670
671bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
672                                          StringRef Name,
673                                          CheckerContext &C) const {
674  // TODO: It might make sense to run this check on demand. In some cases,
675  // we should check if the environment has been cleansed here. We also might
676  // need to know if the user was reset before these calls(seteuid).
677  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
678    .Case("system", 0)
679    .Case("popen", 0)
680    .Case("execl", 0)
681    .Case("execle", 0)
682    .Case("execlp", 0)
683    .Case("execv", 0)
684    .Case("execvp", 0)
685    .Case("execvP", 0)
686    .Case("execve", 0)
687    .Case("dlopen", 0)
688    .Default(UINT_MAX);
689
690  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
691    return false;
692
693  if (generateReportIfTainted(CE->getArg(ArgNum),
694                              MsgSanitizeSystemArgs, C))
695    return true;
696
697  return false;
698}
699
700// TODO: Should this check be a part of the CString checker?
701// If yes, should taint be a global setting?
702bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
703                                                 const FunctionDecl *FDecl,
704                                                 CheckerContext &C) const {
705  // If the function has a buffer size argument, set ArgNum.
706  unsigned ArgNum = InvalidArgIndex;
707  unsigned BId = 0;
708  if ( (BId = FDecl->getMemoryFunctionKind()) )
709    switch(BId) {
710    case Builtin::BImemcpy:
711    case Builtin::BImemmove:
712    case Builtin::BIstrncpy:
713      ArgNum = 2;
714      break;
715    case Builtin::BIstrndup:
716      ArgNum = 1;
717      break;
718    default:
719      break;
720    };
721
722  if (ArgNum == InvalidArgIndex) {
723    if (C.isCLibraryFunction(FDecl, "malloc") ||
724        C.isCLibraryFunction(FDecl, "calloc") ||
725        C.isCLibraryFunction(FDecl, "alloca"))
726      ArgNum = 0;
727    else if (C.isCLibraryFunction(FDecl, "memccpy"))
728      ArgNum = 3;
729    else if (C.isCLibraryFunction(FDecl, "realloc"))
730      ArgNum = 1;
731    else if (C.isCLibraryFunction(FDecl, "bcopy"))
732      ArgNum = 2;
733  }
734
735  if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
736      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
737    return true;
738
739  return false;
740}
741
742void ento::registerGenericTaintChecker(CheckerManager &mgr) {
743  mgr.registerChecker<GenericTaintChecker>();
744}
745