GenericTaintChecker.cpp revision 8bef8238181a30e52dea380789a7e2d760eac532
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/Basic/Builtins.h"
24#include <climits>
25
26using namespace clang;
27using namespace ento;
28
29namespace {
30class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
31                                            check::PreStmt<CallExpr> > {
32public:
33  static void *getTag() { static int Tag; return &Tag; }
34
35  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39
40private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44
45  mutable llvm::OwningPtr<BugType> BT;
46  inline void initBugType() const {
47    if (!BT)
48      BT.reset(new BugType("Taint Analysis", "General"));
49  }
50
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67
68  /// \brief Given a pointer argument, get the symbol of the value it contains
69  /// (points to).
70  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71
72  /// Functions defining the attack surface.
73  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74                                                       CheckerContext &C) const;
75  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78
79  /// Taint the scanned input if the file is tainted.
80  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81
82  /// Check for CWE-134: Uncontrolled Format String.
83  static const char MsgUncontrolledFormatString[];
84  bool checkUncontrolledFormatString(const CallExpr *CE,
85                                     CheckerContext &C) const;
86
87  /// Check for:
88  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89  /// CWE-78, "Failure to Sanitize Data into an OS Command"
90  static const char MsgSanitizeSystemArgs[];
91  bool checkSystemCall(const CallExpr *CE, StringRef Name,
92                       CheckerContext &C) const;
93
94  /// Check if tainted data is used as a buffer size ins strn.. functions,
95  /// and allocators.
96  static const char MsgTaintedBufferSize[];
97  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98                              CheckerContext &C) const;
99
100  /// Generate a report if the expression is tainted or points to tainted data.
101  bool generateReportIfTainted(const Expr *E, const char Msg[],
102                               CheckerContext &C) const;
103
104
105  typedef llvm::SmallVector<unsigned, 2> ArgVector;
106
107  /// \brief A struct used to specify taint propagation rules for a function.
108  ///
109  /// If any of the possible taint source arguments is tainted, all of the
110  /// destination arguments should also be tainted. Use InvalidArgIndex in the
111  /// src list to specify that all of the arguments can introduce taint. Use
112  /// InvalidArgIndex in the dst arguments to signify that all the non-const
113  /// pointer and reference arguments might be tainted on return. If
114  /// ReturnValueIndex is added to the dst list, the return value will be
115  /// tainted.
116  struct TaintPropagationRule {
117    /// List of arguments which can be taint sources and should be checked.
118    ArgVector SrcArgs;
119    /// List of arguments which should be tainted on function return.
120    ArgVector DstArgs;
121    // TODO: Check if using other data structures would be more optimal.
122
123    TaintPropagationRule() {}
124
125    TaintPropagationRule(unsigned SArg,
126                         unsigned DArg, bool TaintRet = false) {
127      SrcArgs.push_back(SArg);
128      DstArgs.push_back(DArg);
129      if (TaintRet)
130        DstArgs.push_back(ReturnValueIndex);
131    }
132
133    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134                         unsigned DArg, bool TaintRet = false) {
135      SrcArgs.push_back(SArg1);
136      SrcArgs.push_back(SArg2);
137      DstArgs.push_back(DArg);
138      if (TaintRet)
139        DstArgs.push_back(ReturnValueIndex);
140    }
141
142    /// Get the propagation rule for a given function.
143    static TaintPropagationRule
144      getTaintPropagationRule(const FunctionDecl *FDecl,
145                              StringRef Name,
146                              CheckerContext &C);
147
148    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
150
151    inline bool isNull() const { return SrcArgs.empty(); }
152
153    inline bool isDestinationArgument(unsigned ArgNum) const {
154      return (std::find(DstArgs.begin(),
155                        DstArgs.end(), ArgNum) != DstArgs.end());
156    }
157
158    static inline bool isTaintedOrPointsToTainted(const Expr *E,
159                                                  ProgramStateRef State,
160                                                  CheckerContext &C) {
161      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162              (E->getType().getTypePtr()->isPointerType() &&
163               State->isTainted(getPointedToSymbol(C, E))));
164    }
165
166    /// \brief Pre-process a function which propagates taint according to the
167    /// taint rule.
168    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169
170  };
171};
172
173const unsigned GenericTaintChecker::ReturnValueIndex;
174const unsigned GenericTaintChecker::InvalidArgIndex;
175
176const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177  "Tainted format string (CWE-134: Uncontrolled Format String)";
178
179const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
180  "Tainted data passed to a system call "
181  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
182
183const char GenericTaintChecker::MsgTaintedBufferSize[] =
184  "Tainted data is used to specify the buffer size "
185  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
186  "character data and the null terminator)";
187
188} // end of anonymous namespace
189
190/// A set which is used to pass information from call pre-visit instruction
191/// to the call post-visit. The values are unsigned integers, which are either
192/// ReturnValueIndex, or indexes of the pointer/reference argument, which
193/// points to data, which should be tainted on return.
194namespace { struct TaintArgsOnPostVisit{}; }
195namespace clang { namespace ento {
196template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
197    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
198  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
199};
200}}
201
202GenericTaintChecker::TaintPropagationRule
203GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
204                                                     const FunctionDecl *FDecl,
205                                                     StringRef Name,
206                                                     CheckerContext &C) {
207  // TODO: Currently, we might loose precision here: we always mark a return
208  // value as tainted even if it's just a pointer, pointing to tainted data.
209
210  // Check for exact name match for functions without builtin substitutes.
211  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
212    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
213    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
214    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
215    .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
216    .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
217    .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
218    .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
219    .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
220    .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
221    .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
222    .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
223    .Case("read", TaintPropagationRule(0, 2, 1, true))
224    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
225    .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
226    .Case("fgets", TaintPropagationRule(2, 0, true))
227    .Case("getline", TaintPropagationRule(2, 0))
228    .Case("getdelim", TaintPropagationRule(3, 0))
229    .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
230    .Default(TaintPropagationRule());
231
232  if (!Rule.isNull())
233    return Rule;
234
235  // Check if it's one of the memory setting/copying functions.
236  // This check is specialized but faster then calling isCLibraryFunction.
237  unsigned BId = 0;
238  if ( (BId = FDecl->getMemoryFunctionKind()) )
239    switch(BId) {
240    case Builtin::BImemcpy:
241    case Builtin::BImemmove:
242    case Builtin::BIstrncpy:
243    case Builtin::BIstrncat:
244      return TaintPropagationRule(1, 2, 0, true);
245    case Builtin::BIstrlcpy:
246    case Builtin::BIstrlcat:
247      return TaintPropagationRule(1, 2, 0, false);
248    case Builtin::BIstrndup:
249      return TaintPropagationRule(0, 1, ReturnValueIndex);
250
251    default:
252      break;
253    };
254
255  // Process all other functions which could be defined as builtins.
256  if (Rule.isNull()) {
257    if (C.isCLibraryFunction(FDecl, "snprintf") ||
258        C.isCLibraryFunction(FDecl, "sprintf"))
259      return TaintPropagationRule(InvalidArgIndex, 0, true);
260    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
261             C.isCLibraryFunction(FDecl, "stpcpy") ||
262             C.isCLibraryFunction(FDecl, "strcat"))
263      return TaintPropagationRule(1, 0, true);
264    else if (C.isCLibraryFunction(FDecl, "bcopy"))
265      return TaintPropagationRule(0, 2, 1, false);
266    else if (C.isCLibraryFunction(FDecl, "strdup") ||
267             C.isCLibraryFunction(FDecl, "strdupa"))
268      return TaintPropagationRule(0, ReturnValueIndex);
269    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
270      return TaintPropagationRule(0, ReturnValueIndex);
271  }
272
273  // Skipping the following functions, since they might be used for cleansing
274  // or smart memory copy:
275  // - memccpy - copying untill hitting a special character.
276
277  return TaintPropagationRule();
278}
279
280void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
281                                       CheckerContext &C) const {
282  // Check for errors first.
283  if (checkPre(CE, C))
284    return;
285
286  // Add taint second.
287  addSourcesPre(CE, C);
288}
289
290void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
291                                        CheckerContext &C) const {
292  if (propagateFromPre(CE, C))
293    return;
294  addSourcesPost(CE, C);
295}
296
297void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
298                                        CheckerContext &C) const {
299  ProgramStateRef State = 0;
300  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
301  StringRef Name = C.getCalleeName(FDecl);
302  if (Name.empty())
303    return;
304
305  // First, try generating a propagation rule for this function.
306  TaintPropagationRule Rule =
307    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
308  if (!Rule.isNull()) {
309    State = Rule.process(CE, C);
310    if (!State)
311      return;
312    C.addTransition(State);
313    return;
314  }
315
316  // Otherwise, check if we have custom pre-processing implemented.
317  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
318    .Case("fscanf", &GenericTaintChecker::preFscanf)
319    .Default(0);
320  // Check and evaluate the call.
321  if (evalFunction)
322    State = (this->*evalFunction)(CE, C);
323  if (!State)
324    return;
325  C.addTransition(State);
326
327}
328
329bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
330                                           CheckerContext &C) const {
331  ProgramStateRef State = C.getState();
332
333  // Depending on what was tainted at pre-visit, we determined a set of
334  // arguments which should be tainted after the function returns. These are
335  // stored in the state as TaintArgsOnPostVisit set.
336  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
337  if (TaintArgs.isEmpty())
338    return false;
339
340  for (llvm::ImmutableSet<unsigned>::iterator
341         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
342    unsigned ArgNum  = *I;
343
344    // Special handling for the tainted return value.
345    if (ArgNum == ReturnValueIndex) {
346      State = State->addTaint(CE, C.getLocationContext());
347      continue;
348    }
349
350    // The arguments are pointer arguments. The data they are pointing at is
351    // tainted after the call.
352    const Expr* Arg = CE->getArg(ArgNum);
353    SymbolRef Sym = getPointedToSymbol(C, Arg);
354    if (Sym)
355      State = State->addTaint(Sym);
356  }
357
358  // Clear up the taint info from the state.
359  State = State->remove<TaintArgsOnPostVisit>();
360
361  if (State != C.getState()) {
362    C.addTransition(State);
363    return true;
364  }
365  return false;
366}
367
368void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369                                         CheckerContext &C) const {
370  // Define the attack surface.
371  // Set the evaluation function by switching on the callee name.
372  StringRef Name = C.getCalleeName(CE);
373  if (Name.empty())
374    return;
375  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
376    .Case("scanf", &GenericTaintChecker::postScanf)
377    // TODO: Add support for vfscanf & family.
378    .Case("getchar", &GenericTaintChecker::postRetTaint)
379    .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
380    .Case("getenv", &GenericTaintChecker::postRetTaint)
381    .Case("fopen", &GenericTaintChecker::postRetTaint)
382    .Case("fdopen", &GenericTaintChecker::postRetTaint)
383    .Case("freopen", &GenericTaintChecker::postRetTaint)
384    .Case("getch", &GenericTaintChecker::postRetTaint)
385    .Case("wgetch", &GenericTaintChecker::postRetTaint)
386    .Case("socket", &GenericTaintChecker::postSocket)
387    .Default(0);
388
389  // If the callee isn't defined, it is not of security concern.
390  // Check and evaluate the call.
391  ProgramStateRef State = 0;
392  if (evalFunction)
393    State = (this->*evalFunction)(CE, C);
394  if (!State)
395    return;
396
397  C.addTransition(State);
398}
399
400bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
401
402  if (checkUncontrolledFormatString(CE, C))
403    return true;
404
405  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
406  StringRef Name = C.getCalleeName(FDecl);
407  if (Name.empty())
408    return false;
409
410  if (checkSystemCall(CE, Name, C))
411    return true;
412
413  if (checkTaintedBufferSize(CE, FDecl, C))
414    return true;
415
416  return false;
417}
418
419SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
420                                                  const Expr* Arg) {
421  ProgramStateRef State = C.getState();
422  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
423  if (AddrVal.isUnknownOrUndef())
424    return 0;
425
426  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
427  if (!AddrLoc)
428    return 0;
429
430  const PointerType *ArgTy =
431    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
432  SVal Val = State->getSVal(*AddrLoc,
433                            ArgTy ? ArgTy->getPointeeType(): QualType());
434  return Val.getAsSymbol();
435}
436
437ProgramStateRef
438GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
439                                                   CheckerContext &C) const {
440  ProgramStateRef State = C.getState();
441
442  // Check for taint in arguments.
443  bool IsTainted = false;
444  for (ArgVector::const_iterator I = SrcArgs.begin(),
445                                 E = SrcArgs.end(); I != E; ++I) {
446    unsigned ArgNum = *I;
447
448    if (ArgNum == InvalidArgIndex) {
449      // Check if any of the arguments is tainted, but skip the
450      // destination arguments.
451      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
452        if (isDestinationArgument(i))
453          continue;
454        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
455          break;
456      }
457      break;
458    }
459
460    assert(ArgNum < CE->getNumArgs());
461    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
462      break;
463  }
464  if (!IsTainted)
465    return State;
466
467  // Mark the arguments which should be tainted after the function returns.
468  for (ArgVector::const_iterator I = DstArgs.begin(),
469                                 E = DstArgs.end(); I != E; ++I) {
470    unsigned ArgNum = *I;
471
472    // Should we mark all arguments as tainted?
473    if (ArgNum == InvalidArgIndex) {
474      // For all pointer and references that were passed in:
475      //   If they are not pointing to const data, mark data as tainted.
476      //   TODO: So far we are just going one level down; ideally we'd need to
477      //         recurse here.
478      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
479        const Expr *Arg = CE->getArg(i);
480        // Process pointer argument.
481        const Type *ArgTy = Arg->getType().getTypePtr();
482        QualType PType = ArgTy->getPointeeType();
483        if ((!PType.isNull() && !PType.isConstQualified())
484            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
485          State = State->add<TaintArgsOnPostVisit>(i);
486      }
487      continue;
488    }
489
490    // Should mark the return value?
491    if (ArgNum == ReturnValueIndex) {
492      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
493      continue;
494    }
495
496    // Mark the given argument.
497    assert(ArgNum < CE->getNumArgs());
498    State = State->add<TaintArgsOnPostVisit>(ArgNum);
499  }
500
501  return State;
502}
503
504
505// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
506// and arg 1 should get taint.
507ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
508                                                   CheckerContext &C) const {
509  assert(CE->getNumArgs() >= 2);
510  ProgramStateRef State = C.getState();
511
512  // Check is the file descriptor is tainted.
513  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
514      isStdin(CE->getArg(0), C)) {
515    // All arguments except for the first two should get taint.
516    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
517        State = State->add<TaintArgsOnPostVisit>(i);
518    return State;
519  }
520
521  return 0;
522}
523
524
525// If argument 0(protocol domain) is network, the return value should get taint.
526ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
527                                                    CheckerContext &C) const {
528  assert(CE->getNumArgs() >= 3);
529  ProgramStateRef State = C.getState();
530
531  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
532  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
533  // White list the internal communication protocols.
534  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
535      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
536    return State;
537  State = State->addTaint(CE, C.getLocationContext());
538  return State;
539}
540
541ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
542                                                   CheckerContext &C) const {
543  ProgramStateRef State = C.getState();
544  assert(CE->getNumArgs() >= 2);
545  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
546  // All arguments except for the very first one should get taint.
547  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
548    // The arguments are pointer arguments. The data they are pointing at is
549    // tainted after the call.
550    const Expr* Arg = CE->getArg(i);
551        SymbolRef Sym = getPointedToSymbol(C, Arg);
552    if (Sym)
553      State = State->addTaint(Sym);
554  }
555  return State;
556}
557
558ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
559                                                      CheckerContext &C) const {
560  return C.getState()->addTaint(CE, C.getLocationContext());
561}
562
563bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
564  ProgramStateRef State = C.getState();
565  SVal Val = State->getSVal(E, C.getLocationContext());
566
567  // stdin is a pointer, so it would be a region.
568  const MemRegion *MemReg = Val.getAsRegion();
569
570  // The region should be symbolic, we do not know it's value.
571  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
572  if (!SymReg)
573    return false;
574
575  // Get it's symbol and find the declaration region it's pointing to.
576  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
577  if (!Sm)
578    return false;
579  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
580  if (!DeclReg)
581    return false;
582
583  // This region corresponds to a declaration, find out if it's a global/extern
584  // variable named stdin with the proper type.
585  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
586    D = D->getCanonicalDecl();
587    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
588        if (const PointerType * PtrTy =
589              dyn_cast<PointerType>(D->getType().getTypePtr()))
590          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
591            return true;
592  }
593  return false;
594}
595
596static bool getPrintfFormatArgumentNum(const CallExpr *CE,
597                                       const CheckerContext &C,
598                                       unsigned int &ArgNum) {
599  // Find if the function contains a format string argument.
600  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
601  // vsnprintf, syslog, custom annotated functions.
602  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
603  if (!FDecl)
604    return false;
605  for (specific_attr_iterator<FormatAttr>
606         i = FDecl->specific_attr_begin<FormatAttr>(),
607         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
608
609    const FormatAttr *Format = *i;
610    ArgNum = Format->getFormatIdx() - 1;
611    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
612      return true;
613  }
614
615  // Or if a function is named setproctitle (this is a heuristic).
616  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
617    ArgNum = 0;
618    return true;
619  }
620
621  return false;
622}
623
624bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
625                                                  const char Msg[],
626                                                  CheckerContext &C) const {
627  assert(E);
628
629  // Check for taint.
630  ProgramStateRef State = C.getState();
631  if (!State->isTainted(getPointedToSymbol(C, E)) &&
632      !State->isTainted(E, C.getLocationContext()))
633    return false;
634
635  // Generate diagnostic.
636  if (ExplodedNode *N = C.addTransition()) {
637    initBugType();
638    BugReport *report = new BugReport(*BT, Msg, N);
639    report->addRange(E->getSourceRange());
640    C.EmitReport(report);
641    return true;
642  }
643  return false;
644}
645
646bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
647                                                        CheckerContext &C) const{
648  // Check if the function contains a format string argument.
649  unsigned int ArgNum = 0;
650  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
651    return false;
652
653  // If either the format string content or the pointer itself are tainted, warn.
654  if (generateReportIfTainted(CE->getArg(ArgNum),
655                              MsgUncontrolledFormatString, C))
656    return true;
657  return false;
658}
659
660bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
661                                          StringRef Name,
662                                          CheckerContext &C) const {
663  // TODO: It might make sense to run this check on demand. In some cases,
664  // we should check if the environment has been cleansed here. We also might
665  // need to know if the user was reset before these calls(seteuid).
666  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
667    .Case("system", 0)
668    .Case("popen", 0)
669    .Case("execl", 0)
670    .Case("execle", 0)
671    .Case("execlp", 0)
672    .Case("execv", 0)
673    .Case("execvp", 0)
674    .Case("execvP", 0)
675    .Case("execve", 0)
676    .Case("dlopen", 0)
677    .Default(UINT_MAX);
678
679  if (ArgNum == UINT_MAX)
680    return false;
681
682  if (generateReportIfTainted(CE->getArg(ArgNum),
683                              MsgSanitizeSystemArgs, C))
684    return true;
685
686  return false;
687}
688
689// TODO: Should this check be a part of the CString checker?
690// If yes, should taint be a global setting?
691bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
692                                                 const FunctionDecl *FDecl,
693                                                 CheckerContext &C) const {
694  // If the function has a buffer size argument, set ArgNum.
695  unsigned ArgNum = InvalidArgIndex;
696  unsigned BId = 0;
697  if ( (BId = FDecl->getMemoryFunctionKind()) )
698    switch(BId) {
699    case Builtin::BImemcpy:
700    case Builtin::BImemmove:
701    case Builtin::BIstrncpy:
702      ArgNum = 2;
703      break;
704    case Builtin::BIstrndup:
705      ArgNum = 1;
706      break;
707    default:
708      break;
709    };
710
711  if (ArgNum == InvalidArgIndex) {
712    if (C.isCLibraryFunction(FDecl, "malloc") ||
713        C.isCLibraryFunction(FDecl, "calloc") ||
714        C.isCLibraryFunction(FDecl, "alloca"))
715      ArgNum = 0;
716    else if (C.isCLibraryFunction(FDecl, "memccpy"))
717      ArgNum = 3;
718    else if (C.isCLibraryFunction(FDecl, "realloc"))
719      ArgNum = 1;
720    else if (C.isCLibraryFunction(FDecl, "bcopy"))
721      ArgNum = 2;
722  }
723
724  if (ArgNum != InvalidArgIndex &&
725      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
726    return true;
727
728  return false;
729}
730
731void ento::registerGenericTaintChecker(CheckerManager &mgr) {
732  mgr.registerChecker<GenericTaintChecker>();
733}
734