GenericTaintChecker.cpp revision 1fb826a6fd893234f32b0b91bb92ea4d127788ad
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23#include <climits>
24
25using namespace clang;
26using namespace ento;
27
28namespace {
29class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
30                                            check::PreStmt<CallExpr> > {
31public:
32  static const unsigned ReturnValueIndex = UINT_MAX;
33
34private:
35  mutable llvm::OwningPtr<BugType> BT;
36  void initBugType() const;
37
38  /// \brief Catch taint related bugs. Check if tainted data is passed to a
39  /// system call etc.
40  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
41
42  /// \brief Add taint sources on a pre-visit.
43  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
44
45  /// \brief Propagate taint generated at pre-visit.
46  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
47
48  /// \brief Add taint sources on a post visit.
49  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
50
51  /// \brief Given a pointer argument, get the symbol of the value it contains
52  /// (points to).
53  SymbolRef getPointedToSymbol(CheckerContext &C,
54                               const Expr *Arg,
55                               bool IssueWarning = false) const;
56
57  /// Functions defining the attack surface.
58  typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
59                                                       CheckerContext &C) const;
60  const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
61  const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
62
63  /// Taint the scanned input if the file is tainted.
64  const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
65  /// Taint if any of the arguments are tainted.
66  const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
67  const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
68
69  /// Check if the region the expression evaluates to is the standard input,
70  /// and thus, is tainted.
71  bool isStdin(const Expr *E, CheckerContext &C) const;
72
73  /// Check for CWE-134: Uncontrolled Format String.
74  bool checkUncontrolledFormatString(const CallExpr *CE,
75                                     CheckerContext &C) const;
76
77public:
78  static void *getTag() { static int Tag; return &Tag; }
79
80  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
81  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
82
83  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
84
85};
86}
87
88/// A set which is used to pass information from call pre-visit instruction
89/// to the call post-visit. The values are unsigned integers, which are either
90/// ReturnValueIndex, or indexes of the pointer/reference argument, which
91/// points to data, which should be tainted on return.
92namespace { struct TaintArgsOnPostVisit{}; }
93namespace clang { namespace ento {
94template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
95    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
96  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
97};
98}}
99
100inline void GenericTaintChecker::initBugType() const {
101  if (!BT)
102    BT.reset(new BugType("Taint Analysis", "General"));
103}
104
105void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
106                                       CheckerContext &C) const {
107  // Check for errors first.
108  if (checkPre(CE, C))
109    return;
110
111  // Add taint second.
112  addSourcesPre(CE, C);
113}
114
115void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
116                                        CheckerContext &C) const {
117  if (propagateFromPre(CE, C))
118    return;
119  addSourcesPost(CE, C);
120}
121
122void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
123                                        CheckerContext &C) const {
124  // Set the evaluation function by switching on the callee name.
125  StringRef Name = C.getCalleeName(CE);
126  if (Name.empty())
127    return;
128  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
129    .Case("atoi", &GenericTaintChecker::preAnyArgs)
130    .Case("atol", &GenericTaintChecker::preAnyArgs)
131    .Case("atoll", &GenericTaintChecker::preAnyArgs)
132    .Case("fscanf", &GenericTaintChecker::preFscanf)
133    .Cases("strcpy", "__builtin___strcpy_chk",
134           "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
135    .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
136    .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
137    .Default(0);
138
139  // Check and evaluate the call.
140  const ProgramState *State = 0;
141  if (evalFunction)
142    State = (this->*evalFunction)(CE, C);
143  if (!State)
144    return;
145
146  C.addTransition(State);
147}
148
149bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
150                                           CheckerContext &C) const {
151  const ProgramState *State = C.getState();
152
153  // Depending on what was tainted at pre-visit, we determined a set of
154  // arguments which should be tainted after the function returns. These are
155  // stored in the state as TaintArgsOnPostVisit set.
156  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
157  for (llvm::ImmutableSet<unsigned>::iterator
158         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
159    unsigned ArgNum  = *I;
160
161    // Special handling for the tainted return value.
162    if (ArgNum == ReturnValueIndex) {
163      State = State->addTaint(CE, C.getLocationContext());
164      continue;
165    }
166
167    // The arguments are pointer arguments. The data they are pointing at is
168    // tainted after the call.
169    const Expr* Arg = CE->getArg(ArgNum);
170    SymbolRef Sym = getPointedToSymbol(C, Arg, true);
171    if (Sym)
172      State = State->addTaint(Sym);
173  }
174
175  // Clear up the taint info from the state.
176  State = State->remove<TaintArgsOnPostVisit>();
177
178  if (State != C.getState()) {
179    C.addTransition(State);
180    return true;
181  }
182  return false;
183}
184
185void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
186                                         CheckerContext &C) const {
187  // Define the attack surface.
188  // Set the evaluation function by switching on the callee name.
189  StringRef Name = C.getCalleeName(CE);
190  if (Name.empty())
191    return;
192  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
193    .Case("scanf", &GenericTaintChecker::postScanf)
194    // TODO: Add support for vfscanf & family.
195    .Case("getchar", &GenericTaintChecker::postRetTaint)
196    .Case("getenv", &GenericTaintChecker::postRetTaint)
197    .Case("fopen", &GenericTaintChecker::postRetTaint)
198    .Case("fdopen", &GenericTaintChecker::postRetTaint)
199    .Case("freopen", &GenericTaintChecker::postRetTaint)
200    .Default(0);
201
202  // If the callee isn't defined, it is not of security concern.
203  // Check and evaluate the call.
204  const ProgramState *State = 0;
205  if (evalFunction)
206    State = (this->*evalFunction)(CE, C);
207  if (!State)
208    return;
209
210  C.addTransition(State);
211}
212
213bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
214
215  if (checkUncontrolledFormatString(CE, C))
216    return true;
217
218  return false;
219}
220
221SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
222                                                  const Expr* Arg,
223                                                  bool IssueWarning) const {
224  const ProgramState *State = C.getState();
225  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
226  if (AddrVal.isUnknownOrUndef())
227    return 0;
228
229  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
230
231  if (!AddrLoc && !IssueWarning)
232    return 0;
233
234  // If the Expr is not a location, issue a warning.
235  if (!AddrLoc) {
236    assert(IssueWarning);
237    if (ExplodedNode *N = C.generateSink(State)) {
238      initBugType();
239      BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
240      report->addRange(Arg->getSourceRange());
241      C.EmitReport(report);
242    }
243    return 0;
244  }
245
246  SVal Val = State->getSVal(*AddrLoc);
247  return Val.getAsSymbol();
248}
249
250// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
251// and arg 1 should get taint.
252const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
253                                                   CheckerContext &C) const {
254  assert(CE->getNumArgs() >= 2);
255  const ProgramState *State = C.getState();
256
257  // Check is the file descriptor is tainted.
258  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
259      isStdin(CE->getArg(0), C)) {
260    // All arguments except for the first two should get taint.
261    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
262        State = State->add<TaintArgsOnPostVisit>(i);
263    return State;
264  }
265
266  return 0;
267}
268
269// If any other arguments are tainted, mark state as tainted on pre-visit.
270const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
271                                                     CheckerContext &C) const {
272  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
273    const ProgramState *State = C.getState();
274    const Expr *Arg = CE->getArg(i);
275    if (State->isTainted(Arg, C.getLocationContext()) ||
276        State->isTainted(getPointedToSymbol(C, Arg)))
277      return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
278  }
279  return 0;
280}
281
282const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
283                                                    CheckerContext &C) const {
284  assert(CE->getNumArgs() >= 2);
285  const Expr *FromArg = CE->getArg(1);
286  const ProgramState *State = C.getState();
287  if (State->isTainted(FromArg, C.getLocationContext()) ||
288      State->isTainted(getPointedToSymbol(C, FromArg)))
289    return State = State->add<TaintArgsOnPostVisit>(0);
290  return 0;
291}
292
293const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
294                                                   CheckerContext &C) const {
295  const ProgramState *State = C.getState();
296  assert(CE->getNumArgs() >= 2);
297  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
298  // All arguments except for the very first one should get taint.
299  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
300    // The arguments are pointer arguments. The data they are pointing at is
301    // tainted after the call.
302    const Expr* Arg = CE->getArg(i);
303        SymbolRef Sym = getPointedToSymbol(C, Arg, true);
304    if (Sym)
305      State = State->addTaint(Sym);
306  }
307  return State;
308}
309
310const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
311                                                      CheckerContext &C) const {
312  return C.getState()->addTaint(CE, C.getLocationContext());
313}
314
315bool GenericTaintChecker::isStdin(const Expr *E,
316                                  CheckerContext &C) const {
317  const ProgramState *State = C.getState();
318  SVal Val = State->getSVal(E, C.getLocationContext());
319
320  // stdin is a pointer, so it would be a region.
321  const MemRegion *MemReg = Val.getAsRegion();
322
323  // The region should be symbolic, we do not know it's value.
324  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
325  if (!SymReg)
326    return false;
327
328  // Get it's symbol and find the declaration region it's pointing to.
329  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
330  if (!Sm)
331    return false;
332  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
333  if (!DeclReg)
334    return false;
335
336  // This region corresponds to a declaration, find out if it's a global/extern
337  // variable named stdin with the proper type.
338  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
339    D = D->getCanonicalDecl();
340    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
341        if (const PointerType * PtrTy =
342              dyn_cast<PointerType>(D->getType().getTypePtr()))
343          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
344            return true;
345  }
346  return false;
347}
348
349static bool getPrintfFormatArgumentNum(const CallExpr *CE,
350                                       const CheckerContext &C,
351                                       unsigned int &ArgNum) {
352  // Find if the function contains a format string argument.
353  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
354  // vsnprintf, syslog, custom annotated functions.
355  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
356  if (!FDecl)
357    return false;
358  for (specific_attr_iterator<FormatAttr>
359         i = FDecl->specific_attr_begin<FormatAttr>(),
360         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
361
362    const FormatAttr *Format = *i;
363    ArgNum = Format->getFormatIdx() - 1;
364    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
365      return true;
366  }
367
368  // Or if a function is named setproctitle (this is a heuristic).
369  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
370    ArgNum = 0;
371    return true;
372  }
373
374  return false;
375}
376
377bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
378                                                        CheckerContext &C) const{
379  // Check if the function contains a format string argument.
380  unsigned int ArgNum = 0;
381  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
382    return false;
383
384  // If either the format string content or the pointer itself are tainted, warn.
385  const ProgramState *State = C.getState();
386  const Expr *Arg = CE->getArg(ArgNum);
387  if (State->isTainted(getPointedToSymbol(C, Arg)) ||
388      State->isTainted(Arg, C.getLocationContext()))
389    if (ExplodedNode *N = C.addTransition()) {
390      initBugType();
391      BugReport *report = new BugReport(*BT,
392        "Tainted format string (CWE-134: Uncontrolled Format String)", N);
393      report->addRange(Arg->getSourceRange());
394      C.EmitReport(report);
395      return true;
396    }
397  return false;
398}
399
400void ento::registerGenericTaintChecker(CheckerManager &mgr) {
401  mgr.registerChecker<GenericTaintChecker>();
402}
403