GenericTaintChecker.cpp revision 71d29095d27e94b00083259c06a45f5294501697
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include <climits>
24
25using namespace clang;
26using namespace ento;
27
28namespace {
29class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
30                                            check::PreStmt<CallExpr> > {
31public:
32  static const unsigned ReturnValueIndex = UINT_MAX;
33
34private:
35  mutable llvm::OwningPtr<BugType> BT;
36  void initBugType() const;
37
38  /// \brief Catch taint related bugs. Check if tainted data is passed to a
39  /// system call etc.
40  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
41
42  /// \brief Add taint sources on a pre-visit.
43  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
44
45  /// \brief Propagate taint generated at pre-visit.
46  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
47
48  /// \brief Add taint sources on a post visit.
49  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
50
51  /// \brief Given a pointer argument, get the symbol of the value it contains
52  /// (points to).
53  SymbolRef getPointedToSymbol(CheckerContext &C,
54                               const Expr *Arg,
55                               bool IssueWarning = false) const;
56
57  /// Functions defining the attack surface.
58  typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
59                                                       CheckerContext &C) const;
60  const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
61  const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
62
63  /// Taint the scanned input if the file is tainted.
64  const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
65  /// Taint if any of the arguments are tainted.
66  const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
67  const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
68
69  /// Check if the region the expression evaluates to is the standard input,
70  /// and thus, is tainted.
71  bool isStdin(const Expr *E, CheckerContext &C) const;
72
73  /// Check for CWE-134: Uncontrolled Format String.
74  bool checkUncontrolledFormatString(const CallExpr *CE,
75                                     CheckerContext &C) const;
76
77public:
78  static void *getTag() { static int Tag; return &Tag; }
79
80  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
81  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
82
83  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
84
85};
86}
87
88/// A set which is used to pass information from call pre-visit instruction
89/// to the call post-visit. The values are unsigned integers, which are either
90/// ReturnValueIndex, or indexes of the pointer/reference argument, which
91/// points to data, which should be tainted on return.
92namespace { struct TaintArgsOnPostVisit{}; }
93namespace clang { namespace ento {
94template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
95    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
96  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
97};
98}}
99
100inline void GenericTaintChecker::initBugType() const {
101  if (!BT)
102    BT.reset(new BugType("Taint Analysis", "General"));
103}
104
105void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
106                                       CheckerContext &C) const {
107  // Check for errors first.
108  if (checkPre(CE, C))
109    return;
110
111  // Add taint second.
112  addSourcesPre(CE, C);
113}
114
115void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
116                                        CheckerContext &C) const {
117  if (propagateFromPre(CE, C))
118    return;
119  addSourcesPost(CE, C);
120}
121
122void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
123                                        CheckerContext &C) const {
124  // Set the evaluation function by switching on the callee name.
125  StringRef Name = C.getCalleeName(CE);
126  if (Name.empty())
127    return;
128  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
129    .Case("atoi", &GenericTaintChecker::preAnyArgs)
130    .Case("atol", &GenericTaintChecker::preAnyArgs)
131    .Case("atoll", &GenericTaintChecker::preAnyArgs)
132    .Case("fscanf", &GenericTaintChecker::preFscanf)
133    .Cases("strcpy", "__builtin___strcpy_chk",
134           "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
135    .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
136    .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
137    .Default(0);
138
139  // Check and evaluate the call.
140  const ProgramState *State = 0;
141  if (evalFunction)
142    State = (this->*evalFunction)(CE, C);
143  if (!State)
144    return;
145
146  C.addTransition(State);
147}
148
149bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
150                                           CheckerContext &C) const {
151  const ProgramState *State = C.getState();
152
153  // Depending on what was tainted at pre-visit, we determined a set of
154  // arguments which should be tainted after the function returns. These are
155  // stored in the state as TaintArgsOnPostVisit set.
156  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
157  for (llvm::ImmutableSet<unsigned>::iterator
158         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
159    unsigned ArgNum  = *I;
160
161    // Special handling for the tainted return value.
162    if (ArgNum == ReturnValueIndex) {
163      State = State->addTaint(CE, C.getLocationContext());
164      continue;
165    }
166
167    // The arguments are pointer arguments. The data they are pointing at is
168    // tainted after the call.
169    const Expr* Arg = CE->getArg(ArgNum);
170    SymbolRef Sym = getPointedToSymbol(C, Arg, true);
171    if (Sym)
172      State = State->addTaint(Sym);
173  }
174
175  // Clear up the taint info from the state.
176  State = State->remove<TaintArgsOnPostVisit>();
177
178  if (State != C.getState()) {
179    C.addTransition(State);
180    return true;
181  }
182  return false;
183}
184
185void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
186                                         CheckerContext &C) const {
187  // Define the attack surface.
188  // Set the evaluation function by switching on the callee name.
189  StringRef Name = C.getCalleeName(CE);
190  if (Name.empty())
191    return;
192  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
193    .Case("scanf", &GenericTaintChecker::postScanf)
194    // TODO: Add support for vfscanf & family.
195    .Case("getchar", &GenericTaintChecker::postRetTaint)
196    .Case("getenv", &GenericTaintChecker::postRetTaint)
197    .Case("fopen", &GenericTaintChecker::postRetTaint)
198    .Case("fdopen", &GenericTaintChecker::postRetTaint)
199    .Case("freopen", &GenericTaintChecker::postRetTaint)
200    .Default(0);
201
202  // If the callee isn't defined, it is not of security concern.
203  // Check and evaluate the call.
204  const ProgramState *State = 0;
205  if (evalFunction)
206    State = (this->*evalFunction)(CE, C);
207  if (!State)
208    return;
209
210  C.addTransition(State);
211}
212
213bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
214
215  if (checkUncontrolledFormatString(CE, C))
216    return true;
217
218  return false;
219}
220
221SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
222                                                  const Expr* Arg,
223                                                  bool IssueWarning) const {
224  const ProgramState *State = C.getState();
225  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
226  if (AddrVal.isUnknownOrUndef())
227    return 0;
228
229  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
230
231  if (!AddrLoc && !IssueWarning)
232    return 0;
233
234  // If the Expr is not a location, issue a warning.
235  if (!AddrLoc) {
236    assert(IssueWarning);
237    if (ExplodedNode *N = C.generateSink(State)) {
238      initBugType();
239      BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
240      report->addRange(Arg->getSourceRange());
241      C.EmitReport(report);
242    }
243    return 0;
244  }
245
246  const PointerType *ArgTy =
247    dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
248  assert(ArgTy);
249  SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType());
250  return Val.getAsSymbol();
251}
252
253// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
254// and arg 1 should get taint.
255const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
256                                                   CheckerContext &C) const {
257  assert(CE->getNumArgs() >= 2);
258  const ProgramState *State = C.getState();
259
260  // Check is the file descriptor is tainted.
261  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
262      isStdin(CE->getArg(0), C)) {
263    // All arguments except for the first two should get taint.
264    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
265        State = State->add<TaintArgsOnPostVisit>(i);
266    return State;
267  }
268
269  return 0;
270}
271
272// If any other arguments are tainted, mark state as tainted on pre-visit.
273const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
274                                                     CheckerContext &C) const {
275  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
276    const ProgramState *State = C.getState();
277    const Expr *Arg = CE->getArg(i);
278    if (State->isTainted(Arg, C.getLocationContext()) ||
279        State->isTainted(getPointedToSymbol(C, Arg)))
280      return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
281  }
282  return 0;
283}
284
285const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
286                                                    CheckerContext &C) const {
287  assert(CE->getNumArgs() >= 2);
288  const Expr *FromArg = CE->getArg(1);
289  const ProgramState *State = C.getState();
290  if (State->isTainted(FromArg, C.getLocationContext()) ||
291      State->isTainted(getPointedToSymbol(C, FromArg)))
292    return State = State->add<TaintArgsOnPostVisit>(0);
293  return 0;
294}
295
296const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
297                                                   CheckerContext &C) const {
298  const ProgramState *State = C.getState();
299  assert(CE->getNumArgs() >= 2);
300  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
301  // All arguments except for the very first one should get taint.
302  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
303    // The arguments are pointer arguments. The data they are pointing at is
304    // tainted after the call.
305    const Expr* Arg = CE->getArg(i);
306        SymbolRef Sym = getPointedToSymbol(C, Arg, true);
307    if (Sym)
308      State = State->addTaint(Sym);
309  }
310  return State;
311}
312
313const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
314                                                      CheckerContext &C) const {
315  return C.getState()->addTaint(CE, C.getLocationContext());
316}
317
318bool GenericTaintChecker::isStdin(const Expr *E,
319                                  CheckerContext &C) const {
320  const ProgramState *State = C.getState();
321  SVal Val = State->getSVal(E, C.getLocationContext());
322
323  // stdin is a pointer, so it would be a region.
324  const MemRegion *MemReg = Val.getAsRegion();
325
326  // The region should be symbolic, we do not know it's value.
327  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
328  if (!SymReg)
329    return false;
330
331  // Get it's symbol and find the declaration region it's pointing to.
332  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
333  if (!Sm)
334    return false;
335  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
336  if (!DeclReg)
337    return false;
338
339  // This region corresponds to a declaration, find out if it's a global/extern
340  // variable named stdin with the proper type.
341  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
342    D = D->getCanonicalDecl();
343    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
344        if (const PointerType * PtrTy =
345              dyn_cast<PointerType>(D->getType().getTypePtr()))
346          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
347            return true;
348  }
349  return false;
350}
351
352static bool getPrintfFormatArgumentNum(const CallExpr *CE,
353                                       const CheckerContext &C,
354                                       unsigned int &ArgNum) {
355  // Find if the function contains a format string argument.
356  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
357  // vsnprintf, syslog, custom annotated functions.
358  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
359  if (!FDecl)
360    return false;
361  for (specific_attr_iterator<FormatAttr>
362         i = FDecl->specific_attr_begin<FormatAttr>(),
363         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
364
365    const FormatAttr *Format = *i;
366    ArgNum = Format->getFormatIdx() - 1;
367    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
368      return true;
369  }
370
371  // Or if a function is named setproctitle (this is a heuristic).
372  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
373    ArgNum = 0;
374    return true;
375  }
376
377  return false;
378}
379
380bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
381                                                        CheckerContext &C) const{
382  // Check if the function contains a format string argument.
383  unsigned int ArgNum = 0;
384  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
385    return false;
386
387  // If either the format string content or the pointer itself are tainted, warn.
388  const ProgramState *State = C.getState();
389  const Expr *Arg = CE->getArg(ArgNum);
390  if (State->isTainted(getPointedToSymbol(C, Arg)) ||
391      State->isTainted(Arg, C.getLocationContext()))
392    if (ExplodedNode *N = C.addTransition()) {
393      initBugType();
394      BugReport *report = new BugReport(*BT,
395        "Tainted format string (CWE-134: Uncontrolled Format String)", N);
396      report->addRange(Arg->getSourceRange());
397      C.EmitReport(report);
398      return true;
399    }
400  return false;
401}
402
403void ento::registerGenericTaintChecker(CheckerManager &mgr) {
404  mgr.registerChecker<GenericTaintChecker>();
405}
406