GenericTaintChecker.cpp revision 9f03b62036a7abc0a227b17f4a49b9eefced9450
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/StaticAnalyzer/Core/Checker.h" 19#include "clang/StaticAnalyzer/Core/CheckerManager.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23 24using namespace clang; 25using namespace ento; 26 27namespace { 28class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 29 check::PreStmt<CallExpr> > { 30public: 31 enum TaintOnPreVisitKind { 32 /// No taint propagates from pre-visit to post-visit. 33 PrevisitNone = 0, 34 /// Based on the pre-visit, the return argument of the call 35 /// should be tainted. 36 PrevisitTaintRet = 1, 37 /// Based on the pre-visit, the call can taint values through it's 38 /// pointer/reference arguments. 39 PrevisitTaintArgs = 2 40 }; 41 42private: 43 mutable llvm::OwningPtr<BugType> BT; 44 void initBugType() const; 45 46 /// Add/propagate taint on a post visit. 47 void taintPost(const CallExpr *CE, CheckerContext &C) const; 48 /// Add/propagate taint on a pre visit. 49 void taintPre(const CallExpr *CE, CheckerContext &C) const; 50 51 /// Catch taint related bugs. Check if tainted data is passed to a system 52 /// call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// Given a pointer argument, get the symbol of the value it contains 56 /// (points to). 57 SymbolRef getPointedToSymbol(CheckerContext &C, 58 const Expr *Arg, 59 bool IssueWarning = true) const; 60 61 /// Functions defining the attack surface. 62 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *, 63 CheckerContext &C) const; 64 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const; 65 const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const; 66 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const; 67 const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const; 68 69 /// Taint the scanned input if the file is tainted. 70 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const; 71 /// Taint if any of the arguments are tainted. 72 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const; 73 74 /// Check if the region the expression evaluates to is the standard input, 75 /// and thus, is tainted. 76 bool isStdin(const Expr *E, CheckerContext &C) const; 77 78 /// Check for CWE-134: Uncontrolled Format String. 79 bool checkUncontrolledFormatString(const CallExpr *CE, 80 CheckerContext &C) const; 81 82public: 83 static void *getTag() { static int Tag; return &Tag; } 84 85 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 86 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 87 88 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 89 90}; 91} 92 93/// Definitions for the checker specific state. 94namespace { struct TaintOnPreVisit {};} 95namespace clang { 96namespace ento { 97 /// A flag which is used to pass information from call pre-visit instruction 98 /// to the call post-visit. The value is an unsigned, which takes on values 99 /// of the TaintOnPreVisitKind enumeration. 100 template<> 101 struct ProgramStateTrait<TaintOnPreVisit> : 102 public ProgramStatePartialTrait<unsigned> { 103 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 104 }; 105} 106} 107 108inline void GenericTaintChecker::initBugType() const { 109 if (!BT) 110 BT.reset(new BugType("Taint Analysis", "General")); 111} 112 113void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 114 CheckerContext &C) const { 115 // Check for errors first. 116 if (checkPre(CE, C)) 117 return; 118 119 // Add taint second. 120 taintPre(CE, C); 121} 122 123void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 124 CheckerContext &C) const { 125 taintPost(CE, C); 126} 127 128void GenericTaintChecker::taintPre(const CallExpr *CE, 129 CheckerContext &C) const { 130 // Set the evaluation function by switching on the callee name. 131 StringRef Name = C.getCalleeName(CE); 132 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 133 .Case("fscanf", &GenericTaintChecker::preFscanf) 134 .Case("atoi", &GenericTaintChecker::preAnyArgs) 135 .Case("atol", &GenericTaintChecker::preAnyArgs) 136 .Case("atoll", &GenericTaintChecker::preAnyArgs) 137 .Default(0); 138 139 // Check and evaluate the call. 140 const ProgramState *State = 0; 141 if (evalFunction) 142 State = (this->*evalFunction)(CE, C); 143 if (!State) 144 return; 145 146 C.addTransition(State); 147} 148 149void GenericTaintChecker::taintPost(const CallExpr *CE, 150 CheckerContext &C) const { 151 // Define the attack surface. 152 // Set the evaluation function by switching on the callee name. 153 StringRef Name = C.getCalleeName(CE); 154 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 155 .Case("scanf", &GenericTaintChecker::postScanf) 156 .Case("fscanf", &GenericTaintChecker::postFscanf) 157 .Case("sscanf", &GenericTaintChecker::postFscanf) 158 // TODO: Add support for vfscanf & family. 159 .Case("getchar", &GenericTaintChecker::postRetTaint) 160 .Case("getenv", &GenericTaintChecker::postRetTaint) 161 .Case("fopen", &GenericTaintChecker::postRetTaint) 162 .Case("fdopen", &GenericTaintChecker::postRetTaint) 163 .Case("freopen", &GenericTaintChecker::postRetTaint) 164 .Default(&GenericTaintChecker::postDefault); 165 166 // If the callee isn't defined, it is not of security concern. 167 // Check and evaluate the call. 168 const ProgramState *State = 0; 169 if (evalFunction) 170 State = (this->*evalFunction)(CE, C); 171 if (!State) 172 return; 173 174 assert(State->get<TaintOnPreVisit>() == PrevisitNone && 175 "State has to be cleared."); 176 C.addTransition(State); 177} 178 179bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 180 181 if (checkUncontrolledFormatString(CE, C)) 182 return true; 183 184 StringRef Name = C.getCalleeName(CE); 185 return false; 186} 187 188SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 189 const Expr* Arg, 190 bool IssueWarning) const { 191 const ProgramState *State = C.getState(); 192 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 193 if (AddrVal.isUnknownOrUndef()) 194 return 0; 195 196 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 197 198 if (!AddrLoc && !IssueWarning) 199 return 0; 200 201 // If the Expr is not a location, issue a warning. 202 if (!AddrLoc) { 203 assert(IssueWarning); 204 if (ExplodedNode *N = C.generateSink(State)) { 205 initBugType(); 206 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N); 207 report->addRange(Arg->getSourceRange()); 208 C.EmitReport(report); 209 } 210 return 0; 211 } 212 213 SVal Val = State->getSVal(*AddrLoc); 214 return Val.getAsSymbol(); 215} 216 217const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, 218 CheckerContext &C) const { 219 assert(CE->getNumArgs() >= 2); 220 const ProgramState *State = C.getState(); 221 222 // Check is the file descriptor is tainted. 223 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 224 isStdin(CE->getArg(0), C)) 225 return State->set<TaintOnPreVisit>(PrevisitTaintArgs); 226 return 0; 227} 228 229// If any other arguments are tainted, mark state as tainted on pre-visit. 230const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE, 231 CheckerContext &C) const { 232 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 233 const ProgramState *State = C.getState(); 234 const Expr *Arg = CE->getArg(i); 235 if (State->isTainted(Arg, C.getLocationContext()) || 236 State->isTainted(getPointedToSymbol(C, Arg))) 237 return State = State->set<TaintOnPreVisit>(PrevisitTaintRet); 238 } 239 return 0; 240} 241 242const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE, 243 CheckerContext &C) const { 244 const ProgramState *State = C.getState(); 245 246 // Check if we know that the result needs to be tainted based on the 247 // pre-visit analysis. 248 if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) { 249 State = State->addTaint(CE, C.getLocationContext()); 250 return State->set<TaintOnPreVisit>(PrevisitNone); 251 } 252 253 return 0; 254} 255 256const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE, 257 CheckerContext &C) const { 258 const ProgramState *State = C.getState(); 259 assert(CE->getNumArgs() >= 2); 260 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 261 // All arguments except for the very first one should get taint. 262 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 263 // The arguments are pointer arguments. The data they are pointing at is 264 // tainted after the call. 265 const Expr* Arg = CE->getArg(i); 266 SymbolRef Sym = getPointedToSymbol(C, Arg); 267 if (Sym) 268 State = State->addTaint(Sym); 269 } 270 return State; 271} 272 273/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 274/// and arg 1 should get taint. 275const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE, 276 CheckerContext &C) const { 277 const ProgramState *State = C.getState(); 278 assert(CE->getNumArgs() >= 2); 279 280 // Fscanf is only tainted if the input file is tainted at pre visit, so 281 // check for that first. 282 if (State->get<TaintOnPreVisit>() == PrevisitNone) 283 return 0; 284 285 // Reset the taint state. 286 State = State->set<TaintOnPreVisit>(PrevisitNone); 287 288 // All arguments except for the first two should get taint. 289 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) { 290 // The arguments are pointer arguments. The data they are pointing at is 291 // tainted after the call. 292 const Expr* Arg = CE->getArg(i); 293 SymbolRef Sym = getPointedToSymbol(C, Arg); 294 if (Sym) 295 State = State->addTaint(Sym); 296 } 297 return State; 298} 299 300const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE, 301 CheckerContext &C) const { 302 return C.getState()->addTaint(CE, C.getLocationContext()); 303} 304 305bool GenericTaintChecker::isStdin(const Expr *E, 306 CheckerContext &C) const { 307 const ProgramState *State = C.getState(); 308 SVal Val = State->getSVal(E, C.getLocationContext()); 309 310 // stdin is a pointer, so it would be a region. 311 const MemRegion *MemReg = Val.getAsRegion(); 312 313 // The region should be symbolic, we do not know it's value. 314 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 315 if (!SymReg) 316 return false; 317 318 // Get it's symbol and find the declaration region it's pointing to. 319 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 320 if (!Sm) 321 return false; 322 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 323 if (!DeclReg) 324 return false; 325 326 // This region corresponds to a declaration, find out if it's a global/extern 327 // variable named stdin with the proper type. 328 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 329 D = D->getCanonicalDecl(); 330 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 331 if (const PointerType * PtrTy = 332 dyn_cast<PointerType>(D->getType().getTypePtr())) 333 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 334 return true; 335 } 336 return false; 337} 338 339static bool getPrintfFormatArgumentNum(const CallExpr *CE, 340 const CheckerContext &C, 341 unsigned int &ArgNum) { 342 // Find if the function contains a format string argument. 343 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 344 // vsnprintf, syslog, custom annotated functions. 345 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 346 if (!FDecl) 347 return false; 348 for (specific_attr_iterator<FormatAttr> 349 i = FDecl->specific_attr_begin<FormatAttr>(), 350 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 351 352 const FormatAttr *Format = *i; 353 ArgNum = Format->getFormatIdx() - 1; 354 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 355 return true; 356 } 357 358 // Or if a function is named setproctitle (this is a heuristic). 359 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 360 ArgNum = 0; 361 return true; 362 } 363 364 return false; 365} 366 367bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 368 CheckerContext &C) const{ 369 // Check if the function contains a format string argument. 370 unsigned int ArgNum = 0; 371 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 372 return false; 373 374 // If either the format string content or the pointer itself are tainted, warn. 375 const ProgramState *State = C.getState(); 376 const Expr *Arg = CE->getArg(ArgNum); 377 if (State->isTainted(getPointedToSymbol(C, Arg, false)) || 378 State->isTainted(Arg, C.getLocationContext())) 379 if (ExplodedNode *N = C.addTransition()) { 380 initBugType(); 381 BugReport *report = new BugReport(*BT, 382 "Tainted format string (CWE-134: Uncontrolled Format String)", N); 383 report->addRange(Arg->getSourceRange()); 384 C.EmitReport(report); 385 return true; 386 } 387 return false; 388} 389 390void ento::registerGenericTaintChecker(CheckerManager &mgr) { 391 mgr.registerChecker<GenericTaintChecker>(); 392} 393