GenericTaintChecker.cpp revision 022b3f4490bbdcde7b3f18ce0498f9a73b6cbf53
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include <climits>
24
25using namespace clang;
26using namespace ento;
27
28namespace {
29class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
30                                            check::PreStmt<CallExpr> > {
31public:
32  static void *getTag() { static int Tag; return &Tag; }
33
34  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
35  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
36
37  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
38
39private:
40  static const unsigned ReturnValueIndex = UINT_MAX;
41  static const unsigned InvalidArgIndex = UINT_MAX - 1;
42
43  mutable llvm::OwningPtr<BugType> BT;
44  void initBugType() const;
45
46  /// \brief Catch taint related bugs. Check if tainted data is passed to a
47  /// system call etc.
48  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
49
50  /// \brief Add taint sources on a pre-visit.
51  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
52
53  /// \brief Propagate taint generated at pre-visit.
54  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
55
56  /// \brief Add taint sources on a post visit.
57  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
58
59  /// \brief Given a pointer argument, get the symbol of the value it contains
60  /// (points to).
61  SymbolRef getPointedToSymbol(CheckerContext &C,
62                               const Expr *Arg,
63                               bool IssueWarning = false) const;
64
65  inline bool isTaintedOrPointsToTainted(const Expr *E,
66                                         const ProgramState *State,
67                                         CheckerContext &C) const {
68    return (State->isTainted(E, C.getLocationContext()) ||
69            (E->getType().getTypePtr()->isPointerType() &&
70             State->isTainted(getPointedToSymbol(C, E))));
71  }
72
73  /// Functions defining the attack surface.
74  typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
75                                                       CheckerContext &C) const;
76  const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
77  const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78
79  /// Taint the scanned input if the file is tainted.
80  const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
81  /// Taint if any of the arguments are tainted.
82  const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
83  const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
84
85  /// Check if the region the expression evaluates to is the standard input,
86  /// and thus, is tainted.
87  bool isStdin(const Expr *E, CheckerContext &C) const;
88
89  /// Check for CWE-134: Uncontrolled Format String.
90  static const char MsgUncontrolledFormatString[];
91  bool checkUncontrolledFormatString(const CallExpr *CE,
92                                     CheckerContext &C) const;
93
94  /// Check for:
95  /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
96  /// CWE-78, "Failure to Sanitize Data into an OS Command"
97  static const char MsgSanitizeSystemArgs[];
98  bool checkSystemCall(const CallExpr *CE, StringRef Name,
99                       CheckerContext &C) const;
100
101  /// Generate a report if the expression is tainted or points to tainted data.
102  bool generateReportIfTainted(const Expr *E, const char Msg[],
103                               CheckerContext &C) const;
104
105
106  typedef llvm::SmallVector<unsigned, 2> ArgVector;
107
108  /// \brief A struct used to specify taint propagation rules for a function.
109  ///
110  /// If any of the possible taint source arguments is tainted, all of the
111  /// destination arguments should also be tainted. Use InvalidArgIndex in the
112  /// src list to specify that all of the arguments can introduce taint. Use
113  /// InvalidArgIndex in the dst arguments to signify that all the non-const
114  /// pointer and reference arguments might be tainted on return. If
115  /// ReturnValueIndex is added to the dst list, the return value will be
116  /// tainted.
117  struct TaintPropagationRule {
118    /// List of arguments which can be taint sources and should be checked.
119    ArgVector SrcArgs;
120    /// List of arguments which should be tainted on function return.
121    ArgVector DstArgs;
122
123    TaintPropagationRule() {}
124
125    TaintPropagationRule(unsigned SArg, unsigned DArg) {
126      SrcArgs.push_back(SArg);
127      DstArgs.push_back(DArg);
128    }
129
130    inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
131    inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
132
133    inline bool isNull() { return SrcArgs.empty(); }
134  };
135
136  /// \brief Pre-process a function which propagates taint according to the
137  /// given taint rule.
138  const ProgramState *prePropagateTaint(const CallExpr *CE,
139                                        CheckerContext &C,
140                                        const TaintPropagationRule PR) const;
141
142
143};
144// TODO: We probably could use TableGen here.
145const char GenericTaintChecker::MsgUncontrolledFormatString[] =
146  "Tainted format string (CWE-134: Uncontrolled Format String)";
147
148const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
149  "Tainted data passed to a system call "
150  "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
151
152}
153
154/// A set which is used to pass information from call pre-visit instruction
155/// to the call post-visit. The values are unsigned integers, which are either
156/// ReturnValueIndex, or indexes of the pointer/reference argument, which
157/// points to data, which should be tainted on return.
158namespace { struct TaintArgsOnPostVisit{}; }
159namespace clang { namespace ento {
160template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
161    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
162  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
163};
164}}
165
166inline void GenericTaintChecker::initBugType() const {
167  if (!BT)
168    BT.reset(new BugType("Taint Analysis", "General"));
169}
170
171void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
172                                       CheckerContext &C) const {
173  // Check for errors first.
174  if (checkPre(CE, C))
175    return;
176
177  // Add taint second.
178  addSourcesPre(CE, C);
179}
180
181void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
182                                        CheckerContext &C) const {
183  if (propagateFromPre(CE, C))
184    return;
185  addSourcesPost(CE, C);
186}
187
188void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
189                                        CheckerContext &C) const {
190  // Set the evaluation function by switching on the callee name.
191  StringRef Name = C.getCalleeName(CE);
192  if (Name.empty())
193    return;
194
195  const ProgramState *State = 0;
196
197  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
198    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
199    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
200    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
201    .Default(TaintPropagationRule());
202
203  if (!Rule.isNull()) {
204    State = prePropagateTaint(CE, C, Rule);
205    if (!State)
206      return;
207    C.addTransition(State);
208  }
209
210  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
211    .Case("fscanf", &GenericTaintChecker::preFscanf)
212    .Cases("strcpy", "__builtin___strcpy_chk",
213           "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
214    .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
215    .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
216    .Default(0);
217
218  // Check and evaluate the call.
219  if (evalFunction)
220    State = (this->*evalFunction)(CE, C);
221  if (!State)
222    return;
223
224  C.addTransition(State);
225}
226
227bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
228                                           CheckerContext &C) const {
229  const ProgramState *State = C.getState();
230
231  // Depending on what was tainted at pre-visit, we determined a set of
232  // arguments which should be tainted after the function returns. These are
233  // stored in the state as TaintArgsOnPostVisit set.
234  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
235  for (llvm::ImmutableSet<unsigned>::iterator
236         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
237    unsigned ArgNum  = *I;
238
239    // Special handling for the tainted return value.
240    if (ArgNum == ReturnValueIndex) {
241      State = State->addTaint(CE, C.getLocationContext());
242      continue;
243    }
244
245    // The arguments are pointer arguments. The data they are pointing at is
246    // tainted after the call.
247    const Expr* Arg = CE->getArg(ArgNum);
248    SymbolRef Sym = getPointedToSymbol(C, Arg, true);
249    if (Sym)
250      State = State->addTaint(Sym);
251  }
252
253  // Clear up the taint info from the state.
254  State = State->remove<TaintArgsOnPostVisit>();
255
256  if (State != C.getState()) {
257    C.addTransition(State);
258    return true;
259  }
260  return false;
261}
262
263void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
264                                         CheckerContext &C) const {
265  // Define the attack surface.
266  // Set the evaluation function by switching on the callee name.
267  StringRef Name = C.getCalleeName(CE);
268  if (Name.empty())
269    return;
270  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
271    .Case("scanf", &GenericTaintChecker::postScanf)
272    // TODO: Add support for vfscanf & family.
273    .Case("getchar", &GenericTaintChecker::postRetTaint)
274    .Case("getenv", &GenericTaintChecker::postRetTaint)
275    .Case("fopen", &GenericTaintChecker::postRetTaint)
276    .Case("fdopen", &GenericTaintChecker::postRetTaint)
277    .Case("freopen", &GenericTaintChecker::postRetTaint)
278    .Default(0);
279
280  // If the callee isn't defined, it is not of security concern.
281  // Check and evaluate the call.
282  const ProgramState *State = 0;
283  if (evalFunction)
284    State = (this->*evalFunction)(CE, C);
285  if (!State)
286    return;
287
288  C.addTransition(State);
289}
290
291bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
292
293  if (checkUncontrolledFormatString(CE, C))
294    return true;
295
296  StringRef Name = C.getCalleeName(CE);
297  if (Name.empty())
298    return false;
299
300  if (checkSystemCall(CE, Name, C))
301    return true;
302
303  return false;
304}
305
306SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
307                                                  const Expr* Arg,
308                                                  bool IssueWarning) const {
309  const ProgramState *State = C.getState();
310  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
311  if (AddrVal.isUnknownOrUndef())
312    return 0;
313
314  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
315
316  if (!AddrLoc && !IssueWarning)
317    return 0;
318
319  // If the Expr is not a location, issue a warning.
320  if (!AddrLoc) {
321    assert(IssueWarning);
322    if (ExplodedNode *N = C.generateSink(State)) {
323      initBugType();
324      BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
325      report->addRange(Arg->getSourceRange());
326      C.EmitReport(report);
327    }
328    return 0;
329  }
330
331  const PointerType *ArgTy =
332    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
333  assert(ArgTy);
334  SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
335  return Val.getAsSymbol();
336}
337
338const ProgramState *
339GenericTaintChecker::prePropagateTaint(const CallExpr *CE,
340                                       CheckerContext &C,
341                                       const TaintPropagationRule PR) const {
342  const ProgramState *State = C.getState();
343
344  // Check for taint in arguments.
345  bool IsTainted = false;
346  for (ArgVector::const_iterator I = PR.SrcArgs.begin(),
347                                 E = PR.SrcArgs.end(); I != E; ++I) {
348    unsigned ArgNum = *I;
349
350    if (ArgNum == InvalidArgIndex) {
351      // Check if any of the arguments is tainted.
352      for (unsigned int i = 0; i < CE->getNumArgs(); ++i)
353        if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
354          break;
355      break;
356    }
357
358    assert(ArgNum < CE->getNumArgs());
359    if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
360      break;
361  }
362  if (!IsTainted)
363    return State;
364
365  // Mark the arguments which should be tainted after the function returns.
366  for (ArgVector::const_iterator I = PR.DstArgs.begin(),
367                                 E = PR.DstArgs.end(); I != E; ++I) {
368    unsigned ArgNum = *I;
369
370    // Should we mark all arguments as tainted?
371    if (ArgNum == InvalidArgIndex) {
372      // For all pointer and references that were passed in:
373      //   If they are not pointing to const data, mark data as tainted.
374      //   TODO: So far we are just going one level down; ideally we'd need to
375      //         recurse here.
376      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
377        const Expr *Arg = CE->getArg(i);
378        // Process pointer argument.
379        const Type *ArgTy = Arg->getType().getTypePtr();
380        QualType PType = ArgTy->getPointeeType();
381        if ((!PType.isNull() && !PType.isConstQualified())
382            || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
383          State = State->add<TaintArgsOnPostVisit>(i);
384      }
385      continue;
386    }
387
388    // Should mark the return value?
389    if (ArgNum == ReturnValueIndex) {
390      State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
391      continue;
392    }
393
394    // Mark the given argument.
395    assert(ArgNum < CE->getNumArgs());
396    State = State->add<TaintArgsOnPostVisit>(ArgNum);
397  }
398
399  return State;
400}
401
402
403// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
404// and arg 1 should get taint.
405const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
406                                                   CheckerContext &C) const {
407  assert(CE->getNumArgs() >= 2);
408  const ProgramState *State = C.getState();
409
410  // Check is the file descriptor is tainted.
411  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
412      isStdin(CE->getArg(0), C)) {
413    // All arguments except for the first two should get taint.
414    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
415        State = State->add<TaintArgsOnPostVisit>(i);
416    return State;
417  }
418
419  return 0;
420}
421
422// If any arguments are tainted, mark the return value as tainted on post-visit.
423const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
424                                                     CheckerContext &C) const {
425  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
426    const ProgramState *State = C.getState();
427    const Expr *Arg = CE->getArg(i);
428    if (State->isTainted(Arg, C.getLocationContext()) ||
429        State->isTainted(getPointedToSymbol(C, Arg)))
430      return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
431  }
432  return 0;
433}
434
435const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
436                                                    CheckerContext &C) const {
437  assert(CE->getNumArgs() >= 2);
438  const Expr *FromArg = CE->getArg(1);
439  const ProgramState *State = C.getState();
440  if (State->isTainted(FromArg, C.getLocationContext()) ||
441      State->isTainted(getPointedToSymbol(C, FromArg)))
442    return State = State->add<TaintArgsOnPostVisit>(0);
443  return 0;
444}
445
446const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
447                                                   CheckerContext &C) const {
448  const ProgramState *State = C.getState();
449  assert(CE->getNumArgs() >= 2);
450  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
451  // All arguments except for the very first one should get taint.
452  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
453    // The arguments are pointer arguments. The data they are pointing at is
454    // tainted after the call.
455    const Expr* Arg = CE->getArg(i);
456        SymbolRef Sym = getPointedToSymbol(C, Arg, true);
457    if (Sym)
458      State = State->addTaint(Sym);
459  }
460  return State;
461}
462
463const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
464                                                      CheckerContext &C) const {
465  return C.getState()->addTaint(CE, C.getLocationContext());
466}
467
468bool GenericTaintChecker::isStdin(const Expr *E,
469                                  CheckerContext &C) const {
470  const ProgramState *State = C.getState();
471  SVal Val = State->getSVal(E, C.getLocationContext());
472
473  // stdin is a pointer, so it would be a region.
474  const MemRegion *MemReg = Val.getAsRegion();
475
476  // The region should be symbolic, we do not know it's value.
477  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
478  if (!SymReg)
479    return false;
480
481  // Get it's symbol and find the declaration region it's pointing to.
482  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
483  if (!Sm)
484    return false;
485  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
486  if (!DeclReg)
487    return false;
488
489  // This region corresponds to a declaration, find out if it's a global/extern
490  // variable named stdin with the proper type.
491  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
492    D = D->getCanonicalDecl();
493    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
494        if (const PointerType * PtrTy =
495              dyn_cast<PointerType>(D->getType().getTypePtr()))
496          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
497            return true;
498  }
499  return false;
500}
501
502static bool getPrintfFormatArgumentNum(const CallExpr *CE,
503                                       const CheckerContext &C,
504                                       unsigned int &ArgNum) {
505  // Find if the function contains a format string argument.
506  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
507  // vsnprintf, syslog, custom annotated functions.
508  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
509  if (!FDecl)
510    return false;
511  for (specific_attr_iterator<FormatAttr>
512         i = FDecl->specific_attr_begin<FormatAttr>(),
513         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
514
515    const FormatAttr *Format = *i;
516    ArgNum = Format->getFormatIdx() - 1;
517    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
518      return true;
519  }
520
521  // Or if a function is named setproctitle (this is a heuristic).
522  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
523    ArgNum = 0;
524    return true;
525  }
526
527  return false;
528}
529
530bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
531                                                  const char Msg[],
532                                                  CheckerContext &C) const {
533  assert(E);
534
535  // Check for taint.
536  const ProgramState *State = C.getState();
537  if (!State->isTainted(getPointedToSymbol(C, E)) &&
538      !State->isTainted(E, C.getLocationContext()))
539    return false;
540
541  // Generate diagnostic.
542  if (ExplodedNode *N = C.addTransition()) {
543    initBugType();
544    BugReport *report = new BugReport(*BT, Msg, N);
545    report->addRange(E->getSourceRange());
546    C.EmitReport(report);
547    return true;
548  }
549  return false;
550}
551
552bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
553                                                        CheckerContext &C) const{
554  // Check if the function contains a format string argument.
555  unsigned int ArgNum = 0;
556  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
557    return false;
558
559  // If either the format string content or the pointer itself are tainted, warn.
560  if (generateReportIfTainted(CE->getArg(ArgNum),
561                              MsgUncontrolledFormatString, C))
562    return true;
563  return false;
564}
565
566bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
567                                          StringRef Name,
568                                          CheckerContext &C) const {
569  unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
570    .Case("system", 0)
571    .Case("popen", 0)
572    .Default(UINT_MAX);
573
574  if (ArgNum == UINT_MAX)
575    return false;
576
577  if (generateReportIfTainted(CE->getArg(ArgNum),
578                              MsgSanitizeSystemArgs, C))
579    return true;
580
581  return false;
582}
583
584void ento::registerGenericTaintChecker(CheckerManager &mgr) {
585  mgr.registerChecker<GenericTaintChecker>();
586}
587