GenericTaintChecker.cpp revision d3d8548e75f3fb6db53ed0927c1df30d78f4ce1d
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This checker defines the attack surface for generic taint propagation.
11//
12// The taint information produced by it might be useful to other checkers. For
13// example, checkers should report errors which involve tainted data more
14// aggressively, even if the involved symbols are under constrained.
15//
16//===----------------------------------------------------------------------===//
17#include "ClangSACheckers.h"
18#include "clang/StaticAnalyzer/Core/Checker.h"
19#include "clang/StaticAnalyzer/Core/CheckerManager.h"
20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
22
23using namespace clang;
24using namespace ento;
25
26namespace {
27class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
28                                            check::PostStmt<DeclRefExpr> > {
29
30  mutable llvm::OwningPtr<BugType> BT;
31  void initBugType() const;
32
33  /// Given a pointer argument, get the symbol of the value it contains
34  /// (points to).
35  SymbolRef getPointedToSymbol(CheckerContext &C,
36                               const Expr* Arg,
37                               bool IssueWarning = true) const;
38
39  /// Functions defining the attacke surface.
40  typedef void (GenericTaintChecker::*FnCheck)(const CallExpr *,
41                                               CheckerContext &C) const;
42  void processScanf(const CallExpr *CE, CheckerContext &C) const;
43  void processFscanf(const CallExpr *CE, CheckerContext &C) const;
44  void processRetTaint(const CallExpr *CE, CheckerContext &C) const;
45
46  /// Check if the region the expression evaluates to is the standard input,
47  /// and thus, is tainted.
48  bool isStdin(const Expr *E, CheckerContext &C) const;
49
50public:
51  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
52  void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
53};
54}
55
56inline void GenericTaintChecker::initBugType() const {
57  if (!BT)
58    BT.reset(new BugType("Tainted data checking", "General"));
59}
60
61void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
62                                        CheckerContext &C) const {
63  if (!C.getState())
64    return;
65
66  StringRef Name = C.getCalleeName(CE);
67
68  // Define the attack surface.
69  // Set the evaluation function by switching on the callee name.
70  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
71    .Case("scanf", &GenericTaintChecker::processScanf)
72    .Case("fscanf", &GenericTaintChecker::processFscanf)
73    .Case("sscanf", &GenericTaintChecker::processFscanf)
74    // TODO: Add support for vfscanf & family.
75    .Case("getchar", &GenericTaintChecker::processRetTaint)
76    .Case("getenv", &GenericTaintChecker::processRetTaint)
77    .Case("fopen", &GenericTaintChecker::processRetTaint)
78    .Case("fdopen", &GenericTaintChecker::processRetTaint)
79    .Case("freopen", &GenericTaintChecker::processRetTaint)
80    .Default(NULL);
81
82  // If the callee isn't defined, it is not of security concern.
83  // Check and evaluate the call.
84  if (evalFunction)
85    (this->*evalFunction)(CE, C);
86}
87
88void GenericTaintChecker::checkPostStmt(const DeclRefExpr *DRE,
89                                       CheckerContext &C) const {
90  if (isStdin(DRE, C)) {
91    const ProgramState *NewState = C.getState()->addTaint(DRE);
92    C.addTransition(NewState);
93  }
94}
95
96SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
97                                                  const Expr* Arg,
98                                                  bool IssueWarning) const {
99  const ProgramState *State = C.getState();
100  SVal AddrVal = State->getSVal(Arg->IgnoreParens());
101  if (AddrVal.isUnknownOrUndef())
102    return 0;
103
104  Loc *AddrLoc = dyn_cast<Loc>(&AddrVal);
105
106  if (!AddrLoc && !IssueWarning)
107    return 0;
108
109  // If the Expr is not a location, issue a warning.
110  if (!AddrLoc) {
111    assert(IssueWarning);
112    if (ExplodedNode *N = C.generateSink(State)) {
113      initBugType();
114      BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N);
115      report->addRange(Arg->getSourceRange());
116      C.EmitReport(report);
117    }
118    return 0;
119  }
120
121  SVal Val = State->getSVal(*AddrLoc);
122  return Val.getAsSymbol();
123}
124
125void GenericTaintChecker::processScanf(const CallExpr *CE,
126                                       CheckerContext &C) const {
127  const ProgramState *State = C.getState();
128  assert(CE->getNumArgs() >= 2);
129  SVal x = State->getSVal(CE->getArg(1));
130  // All arguments except for the very first one should get taint.
131  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
132    // The arguments are pointer arguments. The data they are pointing at is
133    // tainted after the call.
134    const Expr* Arg = CE->getArg(i);
135    SymbolRef Sym = getPointedToSymbol(C, Arg);
136    if (Sym)
137      State = State->addTaint(Sym);
138  }
139  C.addTransition(State);
140}
141
142/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
143/// and arg 1 should get taint.
144void GenericTaintChecker::processFscanf(const CallExpr *CE,
145                                        CheckerContext &C) const {
146  const ProgramState *State = C.getState();
147  assert(CE->getNumArgs() >= 2);
148
149  // Check is the file descriptor is tainted.
150  if (!State->isTainted(CE->getArg(0)) && !isStdin(CE->getArg(0), C))
151    return;
152
153  // All arguments except for the first two should get taint.
154  for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
155    // The arguments are pointer arguments. The data they are pointing at is
156    // tainted after the call.
157    const Expr* Arg = CE->getArg(i);
158    SymbolRef Sym = getPointedToSymbol(C, Arg);
159    if (Sym)
160      State = State->addTaint(Sym);
161  }
162  C.addTransition(State);
163}
164
165void GenericTaintChecker::processRetTaint(const CallExpr *CE,
166                                          CheckerContext &C) const {
167  const ProgramState *NewState = C.getState()->addTaint(CE);
168  C.addTransition(NewState);
169}
170
171bool GenericTaintChecker::isStdin(const Expr *E,
172                                  CheckerContext &C) const {
173  const ProgramState *State = C.getState();
174  SVal Val = State->getSVal(E);
175
176  // stdin is a pointer, so it would be a region.
177  const MemRegion *MemReg = Val.getAsRegion();
178
179  // The region should be symbolic, we do not know it's value.
180  const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
181  if (!SymReg)
182    return false;
183
184  // Get it's symbol and find the declaration region it's pointing to.
185  const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
186  if (!Sm)
187    return false;
188  const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
189  if (!DeclReg)
190    return false;
191
192  // This region corresponds to a declaration, find out if it's a global/extern
193  // variable named stdin with the proper type.
194  if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
195    D = D->getCanonicalDecl();
196    if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
197        if (const PointerType * PtrTy =
198              dyn_cast<PointerType>(D->getType().getTypePtr()))
199          if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
200            return true;
201  }
202  return false;
203}
204
205void ento::registerGenericTaintChecker(CheckerManager &mgr) {
206  mgr.registerChecker<GenericTaintChecker>();
207}
208