GenericTaintChecker.cpp revision 9f03b62036a7abc0a227b17f4a49b9eefced9450
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23
24using namespace clang;
25using namespace ento;
26
27namespace {
28class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
29                                            check::PreStmt<CallExpr> > {
30public:
31  enum TaintOnPreVisitKind {
32    /// No taint propagates from pre-visit to post-visit.
33    PrevisitNone = 0,
34    /// Based on the pre-visit, the return argument of the call
35    /// should be tainted.
36    PrevisitTaintRet = 1,
37    /// Based on the pre-visit, the call can taint values through it's
38    /// pointer/reference arguments.
39    PrevisitTaintArgs = 2
40  };
41
42private:
43  mutable llvm::OwningPtr<BugType> BT;
44  void initBugType() const;
45
46  /// Add/propagate taint on a post visit.
47  void taintPost(const CallExpr *CE, CheckerContext &C) const;
48  /// Add/propagate taint on a pre visit.
49  void taintPre(const CallExpr *CE, CheckerContext &C) const;
50
51  /// Catch taint related bugs. Check if tainted data is passed to a system
52  /// call etc.
53  bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
55  /// Given a pointer argument, get the symbol of the value it contains
56  /// (points to).
57  SymbolRef getPointedToSymbol(CheckerContext &C,
58                               const Expr *Arg,
59                               bool IssueWarning = true) const;
60
61  /// Functions defining the attack surface.
62  typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
63                                                       CheckerContext &C) const;
64  const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
65  const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const;
66  const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
67  const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const;
68
69  /// Taint the scanned input if the file is tainted.
70  const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
71  /// Taint if any of the arguments are tainted.
72  const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
73
74  /// Check if the region the expression evaluates to is the standard input,
75  /// and thus, is tainted.
76  bool isStdin(const Expr *E, CheckerContext &C) const;
77
78  /// Check for CWE-134: Uncontrolled Format String.
79  bool checkUncontrolledFormatString(const CallExpr *CE,
80                                     CheckerContext &C) const;
81
82public:
83  static void *getTag() { static int Tag; return &Tag; }
84
85  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
86  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
87
88  void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
89
90};
91}
92
93/// Definitions for the checker specific state.
94namespace { struct TaintOnPreVisit {};}
95namespace clang {
96namespace ento {
97  /// A flag which is used to pass information from call pre-visit instruction
98  /// to the call post-visit. The value is an unsigned, which takes on values
99  /// of the TaintOnPreVisitKind enumeration.
100  template<>
101  struct ProgramStateTrait<TaintOnPreVisit> :
102    public ProgramStatePartialTrait<unsigned> {
103    static void *GDMIndex() { return GenericTaintChecker::getTag(); }
104  };
105}
106}
107
108inline void GenericTaintChecker::initBugType() const {
109  if (!BT)
110    BT.reset(new BugType("Taint Analysis", "General"));
111}
112
113void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
114                                       CheckerContext &C) const {
115  // Check for errors first.
116  if (checkPre(CE, C))
117    return;
118
119  // Add taint second.
120  taintPre(CE, C);
121}
122
123void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
124                                        CheckerContext &C) const {
125  taintPost(CE, C);
126}
127
128void GenericTaintChecker::taintPre(const CallExpr *CE,
129                                   CheckerContext &C) const {
130  // Set the evaluation function by switching on the callee name.
131  StringRef Name = C.getCalleeName(CE);
132  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
133    .Case("fscanf", &GenericTaintChecker::preFscanf)
134    .Case("atoi", &GenericTaintChecker::preAnyArgs)
135    .Case("atol", &GenericTaintChecker::preAnyArgs)
136    .Case("atoll", &GenericTaintChecker::preAnyArgs)
137    .Default(0);
138
139  // Check and evaluate the call.
140  const ProgramState *State = 0;
141  if (evalFunction)
142    State = (this->*evalFunction)(CE, C);
143  if (!State)
144    return;
145
146  C.addTransition(State);
147}
148
149void GenericTaintChecker::taintPost(const CallExpr *CE,
150                                    CheckerContext &C) const {
151  // Define the attack surface.
152  // Set the evaluation function by switching on the callee name.
153  StringRef Name = C.getCalleeName(CE);
154  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
155    .Case("scanf", &GenericTaintChecker::postScanf)
156    .Case("fscanf", &GenericTaintChecker::postFscanf)
157    .Case("sscanf", &GenericTaintChecker::postFscanf)
158    // TODO: Add support for vfscanf & family.
159    .Case("getchar", &GenericTaintChecker::postRetTaint)
160    .Case("getenv", &GenericTaintChecker::postRetTaint)
161    .Case("fopen", &GenericTaintChecker::postRetTaint)
162    .Case("fdopen", &GenericTaintChecker::postRetTaint)
163    .Case("freopen", &GenericTaintChecker::postRetTaint)
164    .Default(&GenericTaintChecker::postDefault);
165
166  // If the callee isn't defined, it is not of security concern.
167  // Check and evaluate the call.
168  const ProgramState *State = 0;
169  if (evalFunction)
170    State = (this->*evalFunction)(CE, C);
171  if (!State)
172    return;
173
174  assert(State->get<TaintOnPreVisit>() == PrevisitNone &&
175         "State has to be cleared.");
176  C.addTransition(State);
177}
178
179bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
180
181  if (checkUncontrolledFormatString(CE, C))
182    return true;
183
184  StringRef Name = C.getCalleeName(CE);
185  return false;
186}
187
188SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
189                                                  const Expr* Arg,
190                                                  bool IssueWarning) const {
191  const ProgramState *State = C.getState();
192  SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
193  if (AddrVal.isUnknownOrUndef())
194    return 0;
195
196  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
197
198  if (!AddrLoc && !IssueWarning)
199    return 0;
200
201  // If the Expr is not a location, issue a warning.
202  if (!AddrLoc) {
203    assert(IssueWarning);
204    if (ExplodedNode *N = C.generateSink(State)) {
205      initBugType();
206      BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
207      report->addRange(Arg->getSourceRange());
208      C.EmitReport(report);
209    }
210    return 0;
211  }
212
213  SVal Val = State->getSVal(*AddrLoc);
214  return Val.getAsSymbol();
215}
216
217const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
218                                                   CheckerContext &C) const {
219  assert(CE->getNumArgs() >= 2);
220  const ProgramState *State = C.getState();
221
222  // Check is the file descriptor is tainted.
223  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
224      isStdin(CE->getArg(0), C))
225    return State->set<TaintOnPreVisit>(PrevisitTaintArgs);
226  return 0;
227}
228
229// If any other arguments are tainted, mark state as tainted on pre-visit.
230const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
231                                                     CheckerContext &C) const {
232  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
233    const ProgramState *State = C.getState();
234    const Expr *Arg = CE->getArg(i);
235    if (State->isTainted(Arg, C.getLocationContext()) ||
236        State->isTainted(getPointedToSymbol(C, Arg)))
237      return State = State->set<TaintOnPreVisit>(PrevisitTaintRet);
238  }
239  return 0;
240}
241
242const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE,
243                                                     CheckerContext &C) const {
244  const ProgramState *State = C.getState();
245
246  // Check if we know that the result needs to be tainted based on the
247  // pre-visit analysis.
248  if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) {
249    State = State->addTaint(CE, C.getLocationContext());
250    return State->set<TaintOnPreVisit>(PrevisitNone);
251  }
252
253  return 0;
254}
255
256const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
257                                                   CheckerContext &C) const {
258  const ProgramState *State = C.getState();
259  assert(CE->getNumArgs() >= 2);
260  SVal x = State->getSVal(CE->getArg(1), C.getLocationContext());
261  // All arguments except for the very first one should get taint.
262  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
263    // The arguments are pointer arguments. The data they are pointing at is
264    // tainted after the call.
265    const Expr* Arg = CE->getArg(i);
266        SymbolRef Sym = getPointedToSymbol(C, Arg);
267    if (Sym)
268      State = State->addTaint(Sym);
269  }
270  return State;
271}
272
273/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
274/// and arg 1 should get taint.
275const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE,
276                                                    CheckerContext &C) const {
277  const ProgramState *State = C.getState();
278  assert(CE->getNumArgs() >= 2);
279
280  // Fscanf is only tainted if the input file is tainted at pre visit, so
281  // check for that first.
282  if (State->get<TaintOnPreVisit>() == PrevisitNone)
283    return 0;
284
285  // Reset the taint state.
286  State = State->set<TaintOnPreVisit>(PrevisitNone);
287
288  // All arguments except for the first two should get taint.
289  for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
290    // The arguments are pointer arguments. The data they are pointing at is
291    // tainted after the call.
292    const Expr* Arg = CE->getArg(i);
293    SymbolRef Sym = getPointedToSymbol(C, Arg);
294    if (Sym)
295      State = State->addTaint(Sym);
296  }
297  return State;
298}
299
300const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE,
301                                                      CheckerContext &C) const {
302  return C.getState()->addTaint(CE, C.getLocationContext());
303}
304
305bool GenericTaintChecker::isStdin(const Expr *E,
306                                  CheckerContext &C) const {
307  const ProgramState *State = C.getState();
308  SVal Val = State->getSVal(E, C.getLocationContext());
309
310  // stdin is a pointer, so it would be a region.
311  const MemRegion *MemReg = Val.getAsRegion();
312
313  // The region should be symbolic, we do not know it's value.
314  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
315  if (!SymReg)
316    return false;
317
318  // Get it's symbol and find the declaration region it's pointing to.
319  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
320  if (!Sm)
321    return false;
322  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
323  if (!DeclReg)
324    return false;
325
326  // This region corresponds to a declaration, find out if it's a global/extern
327  // variable named stdin with the proper type.
328  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
329    D = D->getCanonicalDecl();
330    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
331        if (const PointerType * PtrTy =
332              dyn_cast<PointerType>(D->getType().getTypePtr()))
333          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
334            return true;
335  }
336  return false;
337}
338
339static bool getPrintfFormatArgumentNum(const CallExpr *CE,
340                                       const CheckerContext &C,
341                                       unsigned int &ArgNum) {
342  // Find if the function contains a format string argument.
343  // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
344  // vsnprintf, syslog, custom annotated functions.
345  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
346  if (!FDecl)
347    return false;
348  for (specific_attr_iterator<FormatAttr>
349         i = FDecl->specific_attr_begin<FormatAttr>(),
350         e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
351
352    const FormatAttr *Format = *i;
353    ArgNum = Format->getFormatIdx() - 1;
354    if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
355      return true;
356  }
357
358  // Or if a function is named setproctitle (this is a heuristic).
359  if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
360    ArgNum = 0;
361    return true;
362  }
363
364  return false;
365}
366
367bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
368                                                        CheckerContext &C) const{
369  // Check if the function contains a format string argument.
370  unsigned int ArgNum = 0;
371  if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
372    return false;
373
374  // If either the format string content or the pointer itself are tainted, warn.
375  const ProgramState *State = C.getState();
376  const Expr *Arg = CE->getArg(ArgNum);
377  if (State->isTainted(getPointedToSymbol(C, Arg, false)) ||
378      State->isTainted(Arg, C.getLocationContext()))
379    if (ExplodedNode *N = C.addTransition()) {
380      initBugType();
381      BugReport *report = new BugReport(*BT,
382        "Tainted format string (CWE-134: Uncontrolled Format String)", N);
383      report->addRange(Arg->getSourceRange());
384      C.EmitReport(report);
385      return true;
386    }
387  return false;
388}
389
390void ento::registerGenericTaintChecker(CheckerManager &mgr) {
391  mgr.registerChecker<GenericTaintChecker>();
392}
393