GenericTaintChecker.cpp revision 785950e59424dca7ce0081bebf13c0acd2c4fff6
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include "clang/Basic/Builtins.h"
24#include <climits>
25
26using namespace clang;
27using namespace ento;
28
29namespace {
30class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
31                                            check::PreStmt<CallExpr> > {
32public:
33  static void *getTag() { static int Tag; return &Tag; }
34
35  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
36  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
37
38  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39
40private:
41  static const unsigned InvalidArgIndex = UINT_MAX;
42  /// Denotes the return vale.
43  static const unsigned ReturnValueIndex = UINT_MAX - 1;
44
45  mutable OwningPtr<BugType> BT;
46  inline void initBugType() const {
47    if (!BT)
48      BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
49  }
50
51  /// \brief Catch taint related bugs. Check if tainted data is passed to a
52  /// system call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
55  /// \brief Add taint sources on a pre-visit.
56  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58  /// \brief Propagate taint generated at pre-visit.
59  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61  /// \brief Add taint sources on a post visit.
62  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
64  /// Check if the region the expression evaluates to is the standard input,
65  /// and thus, is tainted.
66  static bool isStdin(const Expr *E, CheckerContext &C);
67
68  /// \brief Given a pointer argument, get the symbol of the value it contains
69  /// (points to).
70  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71
72  /// Functions defining the attack surface.
73  typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74                                                       CheckerContext &C) const;
75  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78
79  /// Taint the scanned input if the file is tainted.
80  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81
82  /// Check for CWE-134: Uncontrolled Format String.
83  static const char MsgUncontrolledFormatString[];
84  bool checkUncontrolledFormatString(const CallExpr *CE,
85                                     CheckerContext &C) const;
86
87  /// Check for:
88  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89  /// CWE-78, "Failure to Sanitize Data into an OS Command"
90  static const char MsgSanitizeSystemArgs[];
91  bool checkSystemCall(const CallExpr *CE, StringRef Name,
92                       CheckerContext &C) const;
93
94  /// Check if tainted data is used as a buffer size ins strn.. functions,
95  /// and allocators.
96  static const char MsgTaintedBufferSize[];
97  bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98                              CheckerContext &C) const;
99
100  /// Generate a report if the expression is tainted or points to tainted data.
101  bool generateReportIfTainted(const Expr *E, const char Msg[],
102                               CheckerContext &C) const;
103
104
105  typedef llvm::SmallVector<unsigned, 2> ArgVector;
106
107  /// \brief A struct used to specify taint propagation rules for a function.
108  ///
109  /// If any of the possible taint source arguments is tainted, all of the
110  /// destination arguments should also be tainted. Use InvalidArgIndex in the
111  /// src list to specify that all of the arguments can introduce taint. Use
112  /// InvalidArgIndex in the dst arguments to signify that all the non-const
113  /// pointer and reference arguments might be tainted on return. If
114  /// ReturnValueIndex is added to the dst list, the return value will be
115  /// tainted.
116  struct TaintPropagationRule {
117    /// List of arguments which can be taint sources and should be checked.
118    ArgVector SrcArgs;
119    /// List of arguments which should be tainted on function return.
120    ArgVector DstArgs;
121    // TODO: Check if using other data structures would be more optimal.
122
123    TaintPropagationRule() {}
124
125    TaintPropagationRule(unsigned SArg,
126                         unsigned DArg, bool TaintRet = false) {
127      SrcArgs.push_back(SArg);
128      DstArgs.push_back(DArg);
129      if (TaintRet)
130        DstArgs.push_back(ReturnValueIndex);
131    }
132
133    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134                         unsigned DArg, bool TaintRet = false) {
135      SrcArgs.push_back(SArg1);
136      SrcArgs.push_back(SArg2);
137      DstArgs.push_back(DArg);
138      if (TaintRet)
139        DstArgs.push_back(ReturnValueIndex);
140    }
141
142    /// Get the propagation rule for a given function.
143    static TaintPropagationRule
144      getTaintPropagationRule(const FunctionDecl *FDecl,
145                              StringRef Name,
146                              CheckerContext &C);
147
148    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
149    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
150
151    inline bool isNull() const { return SrcArgs.empty(); }
152
153    inline bool isDestinationArgument(unsigned ArgNum) const {
154      return (std::find(DstArgs.begin(),
155                        DstArgs.end(), ArgNum) != DstArgs.end());
156    }
157
158    static inline bool isTaintedOrPointsToTainted(const Expr *E,
159                                                  ProgramStateRef State,
160                                                  CheckerContext &C) {
161      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162              (E->getType().getTypePtr()->isPointerType() &&
163               State->isTainted(getPointedToSymbol(C, E))));
164    }
165
166    /// \brief Pre-process a function which propagates taint according to the
167    /// taint rule.
168    ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169
170  };
171};
172
173const unsigned GenericTaintChecker::ReturnValueIndex;
174const unsigned GenericTaintChecker::InvalidArgIndex;
175
176const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177  "Untrusted data is used as a format string "
178  "(CWE-134: Uncontrolled Format String)";
179
180const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181  "Untrusted data is passed to a system call "
182  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183
184const char GenericTaintChecker::MsgTaintedBufferSize[] =
185  "Untrusted data is used to specify the buffer size "
186  "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187  "character data and the null terminator)";
188
189} // end of anonymous namespace
190
191/// A set which is used to pass information from call pre-visit instruction
192/// to the call post-visit. The values are unsigned integers, which are either
193/// ReturnValueIndex, or indexes of the pointer/reference argument, which
194/// points to data, which should be tainted on return.
195namespace { struct TaintArgsOnPostVisit{}; }
196namespace clang { namespace ento {
197template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
198    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
199  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
200};
201}}
202
203GenericTaintChecker::TaintPropagationRule
204GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
205                                                     const FunctionDecl *FDecl,
206                                                     StringRef Name,
207                                                     CheckerContext &C) {
208  // TODO: Currently, we might loose precision here: we always mark a return
209  // value as tainted even if it's just a pointer, pointing to tainted data.
210
211  // Check for exact name match for functions without builtin substitutes.
212  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
213    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
214    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
215    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
216    .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
217    .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
218    .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
219    .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
220    .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
221    .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
222    .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
223    .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
224    .Case("read", TaintPropagationRule(0, 2, 1, true))
225    .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
226    .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
227    .Case("fgets", TaintPropagationRule(2, 0, true))
228    .Case("getline", TaintPropagationRule(2, 0))
229    .Case("getdelim", TaintPropagationRule(3, 0))
230    .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
231    .Default(TaintPropagationRule());
232
233  if (!Rule.isNull())
234    return Rule;
235
236  // Check if it's one of the memory setting/copying functions.
237  // This check is specialized but faster then calling isCLibraryFunction.
238  unsigned BId = 0;
239  if ( (BId = FDecl->getMemoryFunctionKind()) )
240    switch(BId) {
241    case Builtin::BImemcpy:
242    case Builtin::BImemmove:
243    case Builtin::BIstrncpy:
244    case Builtin::BIstrncat:
245      return TaintPropagationRule(1, 2, 0, true);
246    case Builtin::BIstrlcpy:
247    case Builtin::BIstrlcat:
248      return TaintPropagationRule(1, 2, 0, false);
249    case Builtin::BIstrndup:
250      return TaintPropagationRule(0, 1, ReturnValueIndex);
251
252    default:
253      break;
254    };
255
256  // Process all other functions which could be defined as builtins.
257  if (Rule.isNull()) {
258    if (C.isCLibraryFunction(FDecl, "snprintf") ||
259        C.isCLibraryFunction(FDecl, "sprintf"))
260      return TaintPropagationRule(InvalidArgIndex, 0, true);
261    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
262             C.isCLibraryFunction(FDecl, "stpcpy") ||
263             C.isCLibraryFunction(FDecl, "strcat"))
264      return TaintPropagationRule(1, 0, true);
265    else if (C.isCLibraryFunction(FDecl, "bcopy"))
266      return TaintPropagationRule(0, 2, 1, false);
267    else if (C.isCLibraryFunction(FDecl, "strdup") ||
268             C.isCLibraryFunction(FDecl, "strdupa"))
269      return TaintPropagationRule(0, ReturnValueIndex);
270    else if (C.isCLibraryFunction(FDecl, "wcsdup"))
271      return TaintPropagationRule(0, ReturnValueIndex);
272  }
273
274  // Skipping the following functions, since they might be used for cleansing
275  // or smart memory copy:
276  // - memccpy - copying until hitting a special character.
277
278  return TaintPropagationRule();
279}
280
281void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
282                                       CheckerContext &C) const {
283  // Check for errors first.
284  if (checkPre(CE, C))
285    return;
286
287  // Add taint second.
288  addSourcesPre(CE, C);
289}
290
291void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
292                                        CheckerContext &C) const {
293  if (propagateFromPre(CE, C))
294    return;
295  addSourcesPost(CE, C);
296}
297
298void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
299                                        CheckerContext &C) const {
300  ProgramStateRef State = 0;
301  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
302  if (!FDecl || FDecl->getKind() != Decl::Function)
303    return;
304
305  StringRef Name = C.getCalleeName(FDecl);
306  if (Name.empty())
307    return;
308
309  // First, try generating a propagation rule for this function.
310  TaintPropagationRule Rule =
311    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
312  if (!Rule.isNull()) {
313    State = Rule.process(CE, C);
314    if (!State)
315      return;
316    C.addTransition(State);
317    return;
318  }
319
320  // Otherwise, check if we have custom pre-processing implemented.
321  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
322    .Case("fscanf", &GenericTaintChecker::preFscanf)
323    .Default(0);
324  // Check and evaluate the call.
325  if (evalFunction)
326    State = (this->*evalFunction)(CE, C);
327  if (!State)
328    return;
329  C.addTransition(State);
330
331}
332
333bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
334                                           CheckerContext &C) const {
335  ProgramStateRef State = C.getState();
336
337  // Depending on what was tainted at pre-visit, we determined a set of
338  // arguments which should be tainted after the function returns. These are
339  // stored in the state as TaintArgsOnPostVisit set.
340  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
341  if (TaintArgs.isEmpty())
342    return false;
343
344  for (llvm::ImmutableSet<unsigned>::iterator
345         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
346    unsigned ArgNum  = *I;
347
348    // Special handling for the tainted return value.
349    if (ArgNum == ReturnValueIndex) {
350      State = State->addTaint(CE, C.getLocationContext());
351      continue;
352    }
353
354    // The arguments are pointer arguments. The data they are pointing at is
355    // tainted after the call.
356    if (CE->getNumArgs() < (ArgNum + 1))
357      return false;
358    const Expr* Arg = CE->getArg(ArgNum);
359    SymbolRef Sym = getPointedToSymbol(C, Arg);
360    if (Sym)
361      State = State->addTaint(Sym);
362  }
363
364  // Clear up the taint info from the state.
365  State = State->remove<TaintArgsOnPostVisit>();
366
367  if (State != C.getState()) {
368    C.addTransition(State);
369    return true;
370  }
371  return false;
372}
373
374void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
375                                         CheckerContext &C) const {
376  // Define the attack surface.
377  // Set the evaluation function by switching on the callee name.
378  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
379  if (!FDecl || FDecl->getKind() != Decl::Function)
380    return;
381
382  StringRef Name = C.getCalleeName(FDecl);
383  if (Name.empty())
384    return;
385  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
386    .Case("scanf", &GenericTaintChecker::postScanf)
387    // TODO: Add support for vfscanf & family.
388    .Case("getchar", &GenericTaintChecker::postRetTaint)
389    .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
390    .Case("getenv", &GenericTaintChecker::postRetTaint)
391    .Case("fopen", &GenericTaintChecker::postRetTaint)
392    .Case("fdopen", &GenericTaintChecker::postRetTaint)
393    .Case("freopen", &GenericTaintChecker::postRetTaint)
394    .Case("getch", &GenericTaintChecker::postRetTaint)
395    .Case("wgetch", &GenericTaintChecker::postRetTaint)
396    .Case("socket", &GenericTaintChecker::postSocket)
397    .Default(0);
398
399  // If the callee isn't defined, it is not of security concern.
400  // Check and evaluate the call.
401  ProgramStateRef State = 0;
402  if (evalFunction)
403    State = (this->*evalFunction)(CE, C);
404  if (!State)
405    return;
406
407  C.addTransition(State);
408}
409
410bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
411
412  if (checkUncontrolledFormatString(CE, C))
413    return true;
414
415  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
416  if (!FDecl || FDecl->getKind() != Decl::Function)
417    return false;
418
419  StringRef Name = C.getCalleeName(FDecl);
420  if (Name.empty())
421    return false;
422
423  if (checkSystemCall(CE, Name, C))
424    return true;
425
426  if (checkTaintedBufferSize(CE, FDecl, C))
427    return true;
428
429  return false;
430}
431
432SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
433                                                  const Expr* Arg) {
434  ProgramStateRef State = C.getState();
435  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
436  if (AddrVal.isUnknownOrUndef())
437    return 0;
438
439  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
440  if (!AddrLoc)
441    return 0;
442
443  const PointerType *ArgTy =
444    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
445  SVal Val = State->getSVal(*AddrLoc,
446                            ArgTy ? ArgTy->getPointeeType(): QualType());
447  return Val.getAsSymbol();
448}
449
450ProgramStateRef
451GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
452                                                   CheckerContext &C) const {
453  ProgramStateRef State = C.getState();
454
455  // Check for taint in arguments.
456  bool IsTainted = false;
457  for (ArgVector::const_iterator I = SrcArgs.begin(),
458                                 E = SrcArgs.end(); I != E; ++I) {
459    unsigned ArgNum = *I;
460
461    if (ArgNum == InvalidArgIndex) {
462      // Check if any of the arguments is tainted, but skip the
463      // destination arguments.
464      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
465        if (isDestinationArgument(i))
466          continue;
467        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
468          break;
469      }
470      break;
471    }
472
473    if (CE->getNumArgs() < (ArgNum + 1))
474      return State;
475    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
476      break;
477  }
478  if (!IsTainted)
479    return State;
480
481  // Mark the arguments which should be tainted after the function returns.
482  for (ArgVector::const_iterator I = DstArgs.begin(),
483                                 E = DstArgs.end(); I != E; ++I) {
484    unsigned ArgNum = *I;
485
486    // Should we mark all arguments as tainted?
487    if (ArgNum == InvalidArgIndex) {
488      // For all pointer and references that were passed in:
489      //   If they are not pointing to const data, mark data as tainted.
490      //   TODO: So far we are just going one level down; ideally we'd need to
491      //         recurse here.
492      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
493        const Expr *Arg = CE->getArg(i);
494        // Process pointer argument.
495        const Type *ArgTy = Arg->getType().getTypePtr();
496        QualType PType = ArgTy->getPointeeType();
497        if ((!PType.isNull() && !PType.isConstQualified())
498            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
499          State = State->add<TaintArgsOnPostVisit>(i);
500      }
501      continue;
502    }
503
504    // Should mark the return value?
505    if (ArgNum == ReturnValueIndex) {
506      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
507      continue;
508    }
509
510    // Mark the given argument.
511    assert(ArgNum < CE->getNumArgs());
512    State = State->add<TaintArgsOnPostVisit>(ArgNum);
513  }
514
515  return State;
516}
517
518
519// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
520// and arg 1 should get taint.
521ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
522                                                   CheckerContext &C) const {
523  assert(CE->getNumArgs() >= 2);
524  ProgramStateRef State = C.getState();
525
526  // Check is the file descriptor is tainted.
527  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
528      isStdin(CE->getArg(0), C)) {
529    // All arguments except for the first two should get taint.
530    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
531        State = State->add<TaintArgsOnPostVisit>(i);
532    return State;
533  }
534
535  return 0;
536}
537
538
539// If argument 0(protocol domain) is network, the return value should get taint.
540ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
541                                                CheckerContext &C) const {
542  ProgramStateRef State = C.getState();
543  if (CE->getNumArgs() < 3)
544    return State;
545
546  SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
547  StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
548  // White list the internal communication protocols.
549  if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
550      DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
551    return State;
552  State = State->addTaint(CE, C.getLocationContext());
553  return State;
554}
555
556ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
557                                                   CheckerContext &C) const {
558  ProgramStateRef State = C.getState();
559  if (CE->getNumArgs() < 2)
560    return State;
561
562  // All arguments except for the very first one should get taint.
563  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
564    // The arguments are pointer arguments. The data they are pointing at is
565    // tainted after the call.
566    const Expr* Arg = CE->getArg(i);
567        SymbolRef Sym = getPointedToSymbol(C, Arg);
568    if (Sym)
569      State = State->addTaint(Sym);
570  }
571  return State;
572}
573
574ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
575                                                  CheckerContext &C) const {
576  return C.getState()->addTaint(CE, C.getLocationContext());
577}
578
579bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
580  ProgramStateRef State = C.getState();
581  SVal Val = State->getSVal(E, C.getLocationContext());
582
583  // stdin is a pointer, so it would be a region.
584  const MemRegion *MemReg = Val.getAsRegion();
585
586  // The region should be symbolic, we do not know it's value.
587  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
588  if (!SymReg)
589    return false;
590
591  // Get it's symbol and find the declaration region it's pointing to.
592  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
593  if (!Sm)
594    return false;
595  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
596  if (!DeclReg)
597    return false;
598
599  // This region corresponds to a declaration, find out if it's a global/extern
600  // variable named stdin with the proper type.
601  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
602    D = D->getCanonicalDecl();
603    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
604        if (const PointerType * PtrTy =
605              dyn_cast<PointerType>(D->getType().getTypePtr()))
606          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
607            return true;
608  }
609  return false;
610}
611
612static bool getPrintfFormatArgumentNum(const CallExpr *CE,
613                                       const CheckerContext &C,
614                                       unsigned int &ArgNum) {
615  // Find if the function contains a format string argument.
616  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
617  // vsnprintf, syslog, custom annotated functions.
618  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
619  if (!FDecl)
620    return false;
621  for (specific_attr_iterator<FormatAttr>
622         i = FDecl->specific_attr_begin<FormatAttr>(),
623         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
624
625    const FormatAttr *Format = *i;
626    ArgNum = Format->getFormatIdx() - 1;
627    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
628      return true;
629  }
630
631  // Or if a function is named setproctitle (this is a heuristic).
632  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
633    ArgNum = 0;
634    return true;
635  }
636
637  return false;
638}
639
640bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
641                                                  const char Msg[],
642                                                  CheckerContext &C) const {
643  assert(E);
644
645  // Check for taint.
646  ProgramStateRef State = C.getState();
647  if (!State->isTainted(getPointedToSymbol(C, E)) &&
648      !State->isTainted(E, C.getLocationContext()))
649    return false;
650
651  // Generate diagnostic.
652  if (ExplodedNode *N = C.addTransition()) {
653    initBugType();
654    BugReport *report = new BugReport(*BT, Msg, N);
655    report->addRange(E->getSourceRange());
656    C.emitReport(report);
657    return true;
658  }
659  return false;
660}
661
662bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
663                                                        CheckerContext &C) const{
664  // Check if the function contains a format string argument.
665  unsigned int ArgNum = 0;
666  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
667    return false;
668
669  // If either the format string content or the pointer itself are tainted, warn.
670  if (generateReportIfTainted(CE->getArg(ArgNum),
671                              MsgUncontrolledFormatString, C))
672    return true;
673  return false;
674}
675
676bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
677                                          StringRef Name,
678                                          CheckerContext &C) const {
679  // TODO: It might make sense to run this check on demand. In some cases,
680  // we should check if the environment has been cleansed here. We also might
681  // need to know if the user was reset before these calls(seteuid).
682  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
683    .Case("system", 0)
684    .Case("popen", 0)
685    .Case("execl", 0)
686    .Case("execle", 0)
687    .Case("execlp", 0)
688    .Case("execv", 0)
689    .Case("execvp", 0)
690    .Case("execvP", 0)
691    .Case("execve", 0)
692    .Case("dlopen", 0)
693    .Default(UINT_MAX);
694
695  if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
696    return false;
697
698  if (generateReportIfTainted(CE->getArg(ArgNum),
699                              MsgSanitizeSystemArgs, C))
700    return true;
701
702  return false;
703}
704
705// TODO: Should this check be a part of the CString checker?
706// If yes, should taint be a global setting?
707bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
708                                                 const FunctionDecl *FDecl,
709                                                 CheckerContext &C) const {
710  // If the function has a buffer size argument, set ArgNum.
711  unsigned ArgNum = InvalidArgIndex;
712  unsigned BId = 0;
713  if ( (BId = FDecl->getMemoryFunctionKind()) )
714    switch(BId) {
715    case Builtin::BImemcpy:
716    case Builtin::BImemmove:
717    case Builtin::BIstrncpy:
718      ArgNum = 2;
719      break;
720    case Builtin::BIstrndup:
721      ArgNum = 1;
722      break;
723    default:
724      break;
725    };
726
727  if (ArgNum == InvalidArgIndex) {
728    if (C.isCLibraryFunction(FDecl, "malloc") ||
729        C.isCLibraryFunction(FDecl, "calloc") ||
730        C.isCLibraryFunction(FDecl, "alloca"))
731      ArgNum = 0;
732    else if (C.isCLibraryFunction(FDecl, "memccpy"))
733      ArgNum = 3;
734    else if (C.isCLibraryFunction(FDecl, "realloc"))
735      ArgNum = 1;
736    else if (C.isCLibraryFunction(FDecl, "bcopy"))
737      ArgNum = 2;
738  }
739
740  if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
741      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
742    return true;
743
744  return false;
745}
746
747void ento::registerGenericTaintChecker(CheckerManager &mgr) {
748  mgr.registerChecker<GenericTaintChecker>();
749}
750