GenericTaintChecker.cpp revision 71d29095d27e94b00083259c06a45f5294501697
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/StaticAnalyzer/Core/Checker.h" 19#include "clang/StaticAnalyzer/Core/CheckerManager.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23#include <climits> 24 25using namespace clang; 26using namespace ento; 27 28namespace { 29class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 30 check::PreStmt<CallExpr> > { 31public: 32 static const unsigned ReturnValueIndex = UINT_MAX; 33 34private: 35 mutable llvm::OwningPtr<BugType> BT; 36 void initBugType() const; 37 38 /// \brief Catch taint related bugs. Check if tainted data is passed to a 39 /// system call etc. 40 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 41 42 /// \brief Add taint sources on a pre-visit. 43 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 44 45 /// \brief Propagate taint generated at pre-visit. 46 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 47 48 /// \brief Add taint sources on a post visit. 49 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 50 51 /// \brief Given a pointer argument, get the symbol of the value it contains 52 /// (points to). 53 SymbolRef getPointedToSymbol(CheckerContext &C, 54 const Expr *Arg, 55 bool IssueWarning = false) const; 56 57 /// Functions defining the attack surface. 58 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *, 59 CheckerContext &C) const; 60 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const; 61 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const; 62 63 /// Taint the scanned input if the file is tainted. 64 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const; 65 /// Taint if any of the arguments are tainted. 66 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const; 67 const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const; 68 69 /// Check if the region the expression evaluates to is the standard input, 70 /// and thus, is tainted. 71 bool isStdin(const Expr *E, CheckerContext &C) const; 72 73 /// Check for CWE-134: Uncontrolled Format String. 74 bool checkUncontrolledFormatString(const CallExpr *CE, 75 CheckerContext &C) const; 76 77public: 78 static void *getTag() { static int Tag; return &Tag; } 79 80 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 81 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 82 83 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 84 85}; 86} 87 88/// A set which is used to pass information from call pre-visit instruction 89/// to the call post-visit. The values are unsigned integers, which are either 90/// ReturnValueIndex, or indexes of the pointer/reference argument, which 91/// points to data, which should be tainted on return. 92namespace { struct TaintArgsOnPostVisit{}; } 93namespace clang { namespace ento { 94template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 95 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 96 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 97}; 98}} 99 100inline void GenericTaintChecker::initBugType() const { 101 if (!BT) 102 BT.reset(new BugType("Taint Analysis", "General")); 103} 104 105void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 106 CheckerContext &C) const { 107 // Check for errors first. 108 if (checkPre(CE, C)) 109 return; 110 111 // Add taint second. 112 addSourcesPre(CE, C); 113} 114 115void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 116 CheckerContext &C) const { 117 if (propagateFromPre(CE, C)) 118 return; 119 addSourcesPost(CE, C); 120} 121 122void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 123 CheckerContext &C) const { 124 // Set the evaluation function by switching on the callee name. 125 StringRef Name = C.getCalleeName(CE); 126 if (Name.empty()) 127 return; 128 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 129 .Case("atoi", &GenericTaintChecker::preAnyArgs) 130 .Case("atol", &GenericTaintChecker::preAnyArgs) 131 .Case("atoll", &GenericTaintChecker::preAnyArgs) 132 .Case("fscanf", &GenericTaintChecker::preFscanf) 133 .Cases("strcpy", "__builtin___strcpy_chk", 134 "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy) 135 .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy) 136 .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy) 137 .Default(0); 138 139 // Check and evaluate the call. 140 const ProgramState *State = 0; 141 if (evalFunction) 142 State = (this->*evalFunction)(CE, C); 143 if (!State) 144 return; 145 146 C.addTransition(State); 147} 148 149bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 150 CheckerContext &C) const { 151 const ProgramState *State = C.getState(); 152 153 // Depending on what was tainted at pre-visit, we determined a set of 154 // arguments which should be tainted after the function returns. These are 155 // stored in the state as TaintArgsOnPostVisit set. 156 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 157 for (llvm::ImmutableSet<unsigned>::iterator 158 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 159 unsigned ArgNum = *I; 160 161 // Special handling for the tainted return value. 162 if (ArgNum == ReturnValueIndex) { 163 State = State->addTaint(CE, C.getLocationContext()); 164 continue; 165 } 166 167 // The arguments are pointer arguments. The data they are pointing at is 168 // tainted after the call. 169 const Expr* Arg = CE->getArg(ArgNum); 170 SymbolRef Sym = getPointedToSymbol(C, Arg, true); 171 if (Sym) 172 State = State->addTaint(Sym); 173 } 174 175 // Clear up the taint info from the state. 176 State = State->remove<TaintArgsOnPostVisit>(); 177 178 if (State != C.getState()) { 179 C.addTransition(State); 180 return true; 181 } 182 return false; 183} 184 185void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 186 CheckerContext &C) const { 187 // Define the attack surface. 188 // Set the evaluation function by switching on the callee name. 189 StringRef Name = C.getCalleeName(CE); 190 if (Name.empty()) 191 return; 192 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 193 .Case("scanf", &GenericTaintChecker::postScanf) 194 // TODO: Add support for vfscanf & family. 195 .Case("getchar", &GenericTaintChecker::postRetTaint) 196 .Case("getenv", &GenericTaintChecker::postRetTaint) 197 .Case("fopen", &GenericTaintChecker::postRetTaint) 198 .Case("fdopen", &GenericTaintChecker::postRetTaint) 199 .Case("freopen", &GenericTaintChecker::postRetTaint) 200 .Default(0); 201 202 // If the callee isn't defined, it is not of security concern. 203 // Check and evaluate the call. 204 const ProgramState *State = 0; 205 if (evalFunction) 206 State = (this->*evalFunction)(CE, C); 207 if (!State) 208 return; 209 210 C.addTransition(State); 211} 212 213bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 214 215 if (checkUncontrolledFormatString(CE, C)) 216 return true; 217 218 return false; 219} 220 221SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 222 const Expr* Arg, 223 bool IssueWarning) const { 224 const ProgramState *State = C.getState(); 225 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 226 if (AddrVal.isUnknownOrUndef()) 227 return 0; 228 229 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 230 231 if (!AddrLoc && !IssueWarning) 232 return 0; 233 234 // If the Expr is not a location, issue a warning. 235 if (!AddrLoc) { 236 assert(IssueWarning); 237 if (ExplodedNode *N = C.generateSink(State)) { 238 initBugType(); 239 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N); 240 report->addRange(Arg->getSourceRange()); 241 C.EmitReport(report); 242 } 243 return 0; 244 } 245 246 const PointerType *ArgTy = 247 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 248 assert(ArgTy); 249 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType()); 250 return Val.getAsSymbol(); 251} 252 253// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 254// and arg 1 should get taint. 255const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, 256 CheckerContext &C) const { 257 assert(CE->getNumArgs() >= 2); 258 const ProgramState *State = C.getState(); 259 260 // Check is the file descriptor is tainted. 261 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 262 isStdin(CE->getArg(0), C)) { 263 // All arguments except for the first two should get taint. 264 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 265 State = State->add<TaintArgsOnPostVisit>(i); 266 return State; 267 } 268 269 return 0; 270} 271 272// If any other arguments are tainted, mark state as tainted on pre-visit. 273const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE, 274 CheckerContext &C) const { 275 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 276 const ProgramState *State = C.getState(); 277 const Expr *Arg = CE->getArg(i); 278 if (State->isTainted(Arg, C.getLocationContext()) || 279 State->isTainted(getPointedToSymbol(C, Arg))) 280 return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 281 } 282 return 0; 283} 284 285const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE, 286 CheckerContext &C) const { 287 assert(CE->getNumArgs() >= 2); 288 const Expr *FromArg = CE->getArg(1); 289 const ProgramState *State = C.getState(); 290 if (State->isTainted(FromArg, C.getLocationContext()) || 291 State->isTainted(getPointedToSymbol(C, FromArg))) 292 return State = State->add<TaintArgsOnPostVisit>(0); 293 return 0; 294} 295 296const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE, 297 CheckerContext &C) const { 298 const ProgramState *State = C.getState(); 299 assert(CE->getNumArgs() >= 2); 300 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 301 // All arguments except for the very first one should get taint. 302 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 303 // The arguments are pointer arguments. The data they are pointing at is 304 // tainted after the call. 305 const Expr* Arg = CE->getArg(i); 306 SymbolRef Sym = getPointedToSymbol(C, Arg, true); 307 if (Sym) 308 State = State->addTaint(Sym); 309 } 310 return State; 311} 312 313const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE, 314 CheckerContext &C) const { 315 return C.getState()->addTaint(CE, C.getLocationContext()); 316} 317 318bool GenericTaintChecker::isStdin(const Expr *E, 319 CheckerContext &C) const { 320 const ProgramState *State = C.getState(); 321 SVal Val = State->getSVal(E, C.getLocationContext()); 322 323 // stdin is a pointer, so it would be a region. 324 const MemRegion *MemReg = Val.getAsRegion(); 325 326 // The region should be symbolic, we do not know it's value. 327 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 328 if (!SymReg) 329 return false; 330 331 // Get it's symbol and find the declaration region it's pointing to. 332 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 333 if (!Sm) 334 return false; 335 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 336 if (!DeclReg) 337 return false; 338 339 // This region corresponds to a declaration, find out if it's a global/extern 340 // variable named stdin with the proper type. 341 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 342 D = D->getCanonicalDecl(); 343 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 344 if (const PointerType * PtrTy = 345 dyn_cast<PointerType>(D->getType().getTypePtr())) 346 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 347 return true; 348 } 349 return false; 350} 351 352static bool getPrintfFormatArgumentNum(const CallExpr *CE, 353 const CheckerContext &C, 354 unsigned int &ArgNum) { 355 // Find if the function contains a format string argument. 356 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 357 // vsnprintf, syslog, custom annotated functions. 358 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 359 if (!FDecl) 360 return false; 361 for (specific_attr_iterator<FormatAttr> 362 i = FDecl->specific_attr_begin<FormatAttr>(), 363 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 364 365 const FormatAttr *Format = *i; 366 ArgNum = Format->getFormatIdx() - 1; 367 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 368 return true; 369 } 370 371 // Or if a function is named setproctitle (this is a heuristic). 372 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 373 ArgNum = 0; 374 return true; 375 } 376 377 return false; 378} 379 380bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 381 CheckerContext &C) const{ 382 // Check if the function contains a format string argument. 383 unsigned int ArgNum = 0; 384 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 385 return false; 386 387 // If either the format string content or the pointer itself are tainted, warn. 388 const ProgramState *State = C.getState(); 389 const Expr *Arg = CE->getArg(ArgNum); 390 if (State->isTainted(getPointedToSymbol(C, Arg)) || 391 State->isTainted(Arg, C.getLocationContext())) 392 if (ExplodedNode *N = C.addTransition()) { 393 initBugType(); 394 BugReport *report = new BugReport(*BT, 395 "Tainted format string (CWE-134: Uncontrolled Format String)", N); 396 report->addRange(Arg->getSourceRange()); 397 C.EmitReport(report); 398 return true; 399 } 400 return false; 401} 402 403void ento::registerGenericTaintChecker(CheckerManager &mgr) { 404 mgr.registerChecker<GenericTaintChecker>(); 405} 406