GenericTaintChecker.cpp revision 1fb826a6fd893234f32b0b91bb92ea4d127788ad
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/StaticAnalyzer/Core/Checker.h" 19#include "clang/StaticAnalyzer/Core/CheckerManager.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23#include <climits> 24 25using namespace clang; 26using namespace ento; 27 28namespace { 29class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 30 check::PreStmt<CallExpr> > { 31public: 32 static const unsigned ReturnValueIndex = UINT_MAX; 33 34private: 35 mutable llvm::OwningPtr<BugType> BT; 36 void initBugType() const; 37 38 /// \brief Catch taint related bugs. Check if tainted data is passed to a 39 /// system call etc. 40 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 41 42 /// \brief Add taint sources on a pre-visit. 43 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 44 45 /// \brief Propagate taint generated at pre-visit. 46 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 47 48 /// \brief Add taint sources on a post visit. 49 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 50 51 /// \brief Given a pointer argument, get the symbol of the value it contains 52 /// (points to). 53 SymbolRef getPointedToSymbol(CheckerContext &C, 54 const Expr *Arg, 55 bool IssueWarning = false) const; 56 57 /// Functions defining the attack surface. 58 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *, 59 CheckerContext &C) const; 60 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const; 61 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const; 62 63 /// Taint the scanned input if the file is tainted. 64 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const; 65 /// Taint if any of the arguments are tainted. 66 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const; 67 const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const; 68 69 /// Check if the region the expression evaluates to is the standard input, 70 /// and thus, is tainted. 71 bool isStdin(const Expr *E, CheckerContext &C) const; 72 73 /// Check for CWE-134: Uncontrolled Format String. 74 bool checkUncontrolledFormatString(const CallExpr *CE, 75 CheckerContext &C) const; 76 77public: 78 static void *getTag() { static int Tag; return &Tag; } 79 80 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 81 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 82 83 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 84 85}; 86} 87 88/// A set which is used to pass information from call pre-visit instruction 89/// to the call post-visit. The values are unsigned integers, which are either 90/// ReturnValueIndex, or indexes of the pointer/reference argument, which 91/// points to data, which should be tainted on return. 92namespace { struct TaintArgsOnPostVisit{}; } 93namespace clang { namespace ento { 94template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 95 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 96 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 97}; 98}} 99 100inline void GenericTaintChecker::initBugType() const { 101 if (!BT) 102 BT.reset(new BugType("Taint Analysis", "General")); 103} 104 105void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 106 CheckerContext &C) const { 107 // Check for errors first. 108 if (checkPre(CE, C)) 109 return; 110 111 // Add taint second. 112 addSourcesPre(CE, C); 113} 114 115void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 116 CheckerContext &C) const { 117 if (propagateFromPre(CE, C)) 118 return; 119 addSourcesPost(CE, C); 120} 121 122void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 123 CheckerContext &C) const { 124 // Set the evaluation function by switching on the callee name. 125 StringRef Name = C.getCalleeName(CE); 126 if (Name.empty()) 127 return; 128 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 129 .Case("atoi", &GenericTaintChecker::preAnyArgs) 130 .Case("atol", &GenericTaintChecker::preAnyArgs) 131 .Case("atoll", &GenericTaintChecker::preAnyArgs) 132 .Case("fscanf", &GenericTaintChecker::preFscanf) 133 .Cases("strcpy", "__builtin___strcpy_chk", 134 "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy) 135 .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy) 136 .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy) 137 .Default(0); 138 139 // Check and evaluate the call. 140 const ProgramState *State = 0; 141 if (evalFunction) 142 State = (this->*evalFunction)(CE, C); 143 if (!State) 144 return; 145 146 C.addTransition(State); 147} 148 149bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 150 CheckerContext &C) const { 151 const ProgramState *State = C.getState(); 152 153 // Depending on what was tainted at pre-visit, we determined a set of 154 // arguments which should be tainted after the function returns. These are 155 // stored in the state as TaintArgsOnPostVisit set. 156 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 157 for (llvm::ImmutableSet<unsigned>::iterator 158 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 159 unsigned ArgNum = *I; 160 161 // Special handling for the tainted return value. 162 if (ArgNum == ReturnValueIndex) { 163 State = State->addTaint(CE, C.getLocationContext()); 164 continue; 165 } 166 167 // The arguments are pointer arguments. The data they are pointing at is 168 // tainted after the call. 169 const Expr* Arg = CE->getArg(ArgNum); 170 SymbolRef Sym = getPointedToSymbol(C, Arg, true); 171 if (Sym) 172 State = State->addTaint(Sym); 173 } 174 175 // Clear up the taint info from the state. 176 State = State->remove<TaintArgsOnPostVisit>(); 177 178 if (State != C.getState()) { 179 C.addTransition(State); 180 return true; 181 } 182 return false; 183} 184 185void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 186 CheckerContext &C) const { 187 // Define the attack surface. 188 // Set the evaluation function by switching on the callee name. 189 StringRef Name = C.getCalleeName(CE); 190 if (Name.empty()) 191 return; 192 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 193 .Case("scanf", &GenericTaintChecker::postScanf) 194 // TODO: Add support for vfscanf & family. 195 .Case("getchar", &GenericTaintChecker::postRetTaint) 196 .Case("getenv", &GenericTaintChecker::postRetTaint) 197 .Case("fopen", &GenericTaintChecker::postRetTaint) 198 .Case("fdopen", &GenericTaintChecker::postRetTaint) 199 .Case("freopen", &GenericTaintChecker::postRetTaint) 200 .Default(0); 201 202 // If the callee isn't defined, it is not of security concern. 203 // Check and evaluate the call. 204 const ProgramState *State = 0; 205 if (evalFunction) 206 State = (this->*evalFunction)(CE, C); 207 if (!State) 208 return; 209 210 C.addTransition(State); 211} 212 213bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 214 215 if (checkUncontrolledFormatString(CE, C)) 216 return true; 217 218 return false; 219} 220 221SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 222 const Expr* Arg, 223 bool IssueWarning) const { 224 const ProgramState *State = C.getState(); 225 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 226 if (AddrVal.isUnknownOrUndef()) 227 return 0; 228 229 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 230 231 if (!AddrLoc && !IssueWarning) 232 return 0; 233 234 // If the Expr is not a location, issue a warning. 235 if (!AddrLoc) { 236 assert(IssueWarning); 237 if (ExplodedNode *N = C.generateSink(State)) { 238 initBugType(); 239 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N); 240 report->addRange(Arg->getSourceRange()); 241 C.EmitReport(report); 242 } 243 return 0; 244 } 245 246 SVal Val = State->getSVal(*AddrLoc); 247 return Val.getAsSymbol(); 248} 249 250// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 251// and arg 1 should get taint. 252const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, 253 CheckerContext &C) const { 254 assert(CE->getNumArgs() >= 2); 255 const ProgramState *State = C.getState(); 256 257 // Check is the file descriptor is tainted. 258 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 259 isStdin(CE->getArg(0), C)) { 260 // All arguments except for the first two should get taint. 261 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 262 State = State->add<TaintArgsOnPostVisit>(i); 263 return State; 264 } 265 266 return 0; 267} 268 269// If any other arguments are tainted, mark state as tainted on pre-visit. 270const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE, 271 CheckerContext &C) const { 272 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 273 const ProgramState *State = C.getState(); 274 const Expr *Arg = CE->getArg(i); 275 if (State->isTainted(Arg, C.getLocationContext()) || 276 State->isTainted(getPointedToSymbol(C, Arg))) 277 return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 278 } 279 return 0; 280} 281 282const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE, 283 CheckerContext &C) const { 284 assert(CE->getNumArgs() >= 2); 285 const Expr *FromArg = CE->getArg(1); 286 const ProgramState *State = C.getState(); 287 if (State->isTainted(FromArg, C.getLocationContext()) || 288 State->isTainted(getPointedToSymbol(C, FromArg))) 289 return State = State->add<TaintArgsOnPostVisit>(0); 290 return 0; 291} 292 293const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE, 294 CheckerContext &C) const { 295 const ProgramState *State = C.getState(); 296 assert(CE->getNumArgs() >= 2); 297 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 298 // All arguments except for the very first one should get taint. 299 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 300 // The arguments are pointer arguments. The data they are pointing at is 301 // tainted after the call. 302 const Expr* Arg = CE->getArg(i); 303 SymbolRef Sym = getPointedToSymbol(C, Arg, true); 304 if (Sym) 305 State = State->addTaint(Sym); 306 } 307 return State; 308} 309 310const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE, 311 CheckerContext &C) const { 312 return C.getState()->addTaint(CE, C.getLocationContext()); 313} 314 315bool GenericTaintChecker::isStdin(const Expr *E, 316 CheckerContext &C) const { 317 const ProgramState *State = C.getState(); 318 SVal Val = State->getSVal(E, C.getLocationContext()); 319 320 // stdin is a pointer, so it would be a region. 321 const MemRegion *MemReg = Val.getAsRegion(); 322 323 // The region should be symbolic, we do not know it's value. 324 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 325 if (!SymReg) 326 return false; 327 328 // Get it's symbol and find the declaration region it's pointing to. 329 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 330 if (!Sm) 331 return false; 332 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 333 if (!DeclReg) 334 return false; 335 336 // This region corresponds to a declaration, find out if it's a global/extern 337 // variable named stdin with the proper type. 338 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 339 D = D->getCanonicalDecl(); 340 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 341 if (const PointerType * PtrTy = 342 dyn_cast<PointerType>(D->getType().getTypePtr())) 343 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 344 return true; 345 } 346 return false; 347} 348 349static bool getPrintfFormatArgumentNum(const CallExpr *CE, 350 const CheckerContext &C, 351 unsigned int &ArgNum) { 352 // Find if the function contains a format string argument. 353 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 354 // vsnprintf, syslog, custom annotated functions. 355 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 356 if (!FDecl) 357 return false; 358 for (specific_attr_iterator<FormatAttr> 359 i = FDecl->specific_attr_begin<FormatAttr>(), 360 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 361 362 const FormatAttr *Format = *i; 363 ArgNum = Format->getFormatIdx() - 1; 364 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 365 return true; 366 } 367 368 // Or if a function is named setproctitle (this is a heuristic). 369 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 370 ArgNum = 0; 371 return true; 372 } 373 374 return false; 375} 376 377bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 378 CheckerContext &C) const{ 379 // Check if the function contains a format string argument. 380 unsigned int ArgNum = 0; 381 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 382 return false; 383 384 // If either the format string content or the pointer itself are tainted, warn. 385 const ProgramState *State = C.getState(); 386 const Expr *Arg = CE->getArg(ArgNum); 387 if (State->isTainted(getPointedToSymbol(C, Arg)) || 388 State->isTainted(Arg, C.getLocationContext())) 389 if (ExplodedNode *N = C.addTransition()) { 390 initBugType(); 391 BugReport *report = new BugReport(*BT, 392 "Tainted format string (CWE-134: Uncontrolled Format String)", N); 393 report->addRange(Arg->getSourceRange()); 394 C.EmitReport(report); 395 return true; 396 } 397 return false; 398} 399 400void ento::registerGenericTaintChecker(CheckerManager &mgr) { 401 mgr.registerChecker<GenericTaintChecker>(); 402} 403