GenericTaintChecker.cpp revision 022b3f4490bbdcde7b3f18ce0498f9a73b6cbf53
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/StaticAnalyzer/Core/Checker.h" 19#include "clang/StaticAnalyzer/Core/CheckerManager.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23#include <climits> 24 25using namespace clang; 26using namespace ento; 27 28namespace { 29class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 30 check::PreStmt<CallExpr> > { 31public: 32 static void *getTag() { static int Tag; return &Tag; } 33 34 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 35 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 36 37 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 38 39private: 40 static const unsigned ReturnValueIndex = UINT_MAX; 41 static const unsigned InvalidArgIndex = UINT_MAX - 1; 42 43 mutable llvm::OwningPtr<BugType> BT; 44 void initBugType() const; 45 46 /// \brief Catch taint related bugs. Check if tainted data is passed to a 47 /// system call etc. 48 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 49 50 /// \brief Add taint sources on a pre-visit. 51 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 52 53 /// \brief Propagate taint generated at pre-visit. 54 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 55 56 /// \brief Add taint sources on a post visit. 57 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 58 59 /// \brief Given a pointer argument, get the symbol of the value it contains 60 /// (points to). 61 SymbolRef getPointedToSymbol(CheckerContext &C, 62 const Expr *Arg, 63 bool IssueWarning = false) const; 64 65 inline bool isTaintedOrPointsToTainted(const Expr *E, 66 const ProgramState *State, 67 CheckerContext &C) const { 68 return (State->isTainted(E, C.getLocationContext()) || 69 (E->getType().getTypePtr()->isPointerType() && 70 State->isTainted(getPointedToSymbol(C, E)))); 71 } 72 73 /// Functions defining the attack surface. 74 typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *, 75 CheckerContext &C) const; 76 const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const; 77 const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const; 78 79 /// Taint the scanned input if the file is tainted. 80 const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const; 81 /// Taint if any of the arguments are tainted. 82 const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const; 83 const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const; 84 85 /// Check if the region the expression evaluates to is the standard input, 86 /// and thus, is tainted. 87 bool isStdin(const Expr *E, CheckerContext &C) const; 88 89 /// Check for CWE-134: Uncontrolled Format String. 90 static const char MsgUncontrolledFormatString[]; 91 bool checkUncontrolledFormatString(const CallExpr *CE, 92 CheckerContext &C) const; 93 94 /// Check for: 95 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 96 /// CWE-78, "Failure to Sanitize Data into an OS Command" 97 static const char MsgSanitizeSystemArgs[]; 98 bool checkSystemCall(const CallExpr *CE, StringRef Name, 99 CheckerContext &C) const; 100 101 /// Generate a report if the expression is tainted or points to tainted data. 102 bool generateReportIfTainted(const Expr *E, const char Msg[], 103 CheckerContext &C) const; 104 105 106 typedef llvm::SmallVector<unsigned, 2> ArgVector; 107 108 /// \brief A struct used to specify taint propagation rules for a function. 109 /// 110 /// If any of the possible taint source arguments is tainted, all of the 111 /// destination arguments should also be tainted. Use InvalidArgIndex in the 112 /// src list to specify that all of the arguments can introduce taint. Use 113 /// InvalidArgIndex in the dst arguments to signify that all the non-const 114 /// pointer and reference arguments might be tainted on return. If 115 /// ReturnValueIndex is added to the dst list, the return value will be 116 /// tainted. 117 struct TaintPropagationRule { 118 /// List of arguments which can be taint sources and should be checked. 119 ArgVector SrcArgs; 120 /// List of arguments which should be tainted on function return. 121 ArgVector DstArgs; 122 123 TaintPropagationRule() {} 124 125 TaintPropagationRule(unsigned SArg, unsigned DArg) { 126 SrcArgs.push_back(SArg); 127 DstArgs.push_back(DArg); 128 } 129 130 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 131 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 132 133 inline bool isNull() { return SrcArgs.empty(); } 134 }; 135 136 /// \brief Pre-process a function which propagates taint according to the 137 /// given taint rule. 138 const ProgramState *prePropagateTaint(const CallExpr *CE, 139 CheckerContext &C, 140 const TaintPropagationRule PR) const; 141 142 143}; 144// TODO: We probably could use TableGen here. 145const char GenericTaintChecker::MsgUncontrolledFormatString[] = 146 "Tainted format string (CWE-134: Uncontrolled Format String)"; 147 148const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 149 "Tainted data passed to a system call " 150 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 151 152} 153 154/// A set which is used to pass information from call pre-visit instruction 155/// to the call post-visit. The values are unsigned integers, which are either 156/// ReturnValueIndex, or indexes of the pointer/reference argument, which 157/// points to data, which should be tainted on return. 158namespace { struct TaintArgsOnPostVisit{}; } 159namespace clang { namespace ento { 160template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 161 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 162 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 163}; 164}} 165 166inline void GenericTaintChecker::initBugType() const { 167 if (!BT) 168 BT.reset(new BugType("Taint Analysis", "General")); 169} 170 171void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 172 CheckerContext &C) const { 173 // Check for errors first. 174 if (checkPre(CE, C)) 175 return; 176 177 // Add taint second. 178 addSourcesPre(CE, C); 179} 180 181void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 182 CheckerContext &C) const { 183 if (propagateFromPre(CE, C)) 184 return; 185 addSourcesPost(CE, C); 186} 187 188void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 189 CheckerContext &C) const { 190 // Set the evaluation function by switching on the callee name. 191 StringRef Name = C.getCalleeName(CE); 192 if (Name.empty()) 193 return; 194 195 const ProgramState *State = 0; 196 197 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 198 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 199 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 200 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 201 .Default(TaintPropagationRule()); 202 203 if (!Rule.isNull()) { 204 State = prePropagateTaint(CE, C, Rule); 205 if (!State) 206 return; 207 C.addTransition(State); 208 } 209 210 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 211 .Case("fscanf", &GenericTaintChecker::preFscanf) 212 .Cases("strcpy", "__builtin___strcpy_chk", 213 "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy) 214 .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy) 215 .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy) 216 .Default(0); 217 218 // Check and evaluate the call. 219 if (evalFunction) 220 State = (this->*evalFunction)(CE, C); 221 if (!State) 222 return; 223 224 C.addTransition(State); 225} 226 227bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 228 CheckerContext &C) const { 229 const ProgramState *State = C.getState(); 230 231 // Depending on what was tainted at pre-visit, we determined a set of 232 // arguments which should be tainted after the function returns. These are 233 // stored in the state as TaintArgsOnPostVisit set. 234 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 235 for (llvm::ImmutableSet<unsigned>::iterator 236 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 237 unsigned ArgNum = *I; 238 239 // Special handling for the tainted return value. 240 if (ArgNum == ReturnValueIndex) { 241 State = State->addTaint(CE, C.getLocationContext()); 242 continue; 243 } 244 245 // The arguments are pointer arguments. The data they are pointing at is 246 // tainted after the call. 247 const Expr* Arg = CE->getArg(ArgNum); 248 SymbolRef Sym = getPointedToSymbol(C, Arg, true); 249 if (Sym) 250 State = State->addTaint(Sym); 251 } 252 253 // Clear up the taint info from the state. 254 State = State->remove<TaintArgsOnPostVisit>(); 255 256 if (State != C.getState()) { 257 C.addTransition(State); 258 return true; 259 } 260 return false; 261} 262 263void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 264 CheckerContext &C) const { 265 // Define the attack surface. 266 // Set the evaluation function by switching on the callee name. 267 StringRef Name = C.getCalleeName(CE); 268 if (Name.empty()) 269 return; 270 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 271 .Case("scanf", &GenericTaintChecker::postScanf) 272 // TODO: Add support for vfscanf & family. 273 .Case("getchar", &GenericTaintChecker::postRetTaint) 274 .Case("getenv", &GenericTaintChecker::postRetTaint) 275 .Case("fopen", &GenericTaintChecker::postRetTaint) 276 .Case("fdopen", &GenericTaintChecker::postRetTaint) 277 .Case("freopen", &GenericTaintChecker::postRetTaint) 278 .Default(0); 279 280 // If the callee isn't defined, it is not of security concern. 281 // Check and evaluate the call. 282 const ProgramState *State = 0; 283 if (evalFunction) 284 State = (this->*evalFunction)(CE, C); 285 if (!State) 286 return; 287 288 C.addTransition(State); 289} 290 291bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 292 293 if (checkUncontrolledFormatString(CE, C)) 294 return true; 295 296 StringRef Name = C.getCalleeName(CE); 297 if (Name.empty()) 298 return false; 299 300 if (checkSystemCall(CE, Name, C)) 301 return true; 302 303 return false; 304} 305 306SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 307 const Expr* Arg, 308 bool IssueWarning) const { 309 const ProgramState *State = C.getState(); 310 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 311 if (AddrVal.isUnknownOrUndef()) 312 return 0; 313 314 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 315 316 if (!AddrLoc && !IssueWarning) 317 return 0; 318 319 // If the Expr is not a location, issue a warning. 320 if (!AddrLoc) { 321 assert(IssueWarning); 322 if (ExplodedNode *N = C.generateSink(State)) { 323 initBugType(); 324 BugReport *report = new BugReport(*BT, "Pointer argument is expected.",N); 325 report->addRange(Arg->getSourceRange()); 326 C.EmitReport(report); 327 } 328 return 0; 329 } 330 331 const PointerType *ArgTy = 332 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 333 assert(ArgTy); 334 SVal Val = State->getSVal(*AddrLoc, ArgTy->getPointeeType()); 335 return Val.getAsSymbol(); 336} 337 338const ProgramState * 339GenericTaintChecker::prePropagateTaint(const CallExpr *CE, 340 CheckerContext &C, 341 const TaintPropagationRule PR) const { 342 const ProgramState *State = C.getState(); 343 344 // Check for taint in arguments. 345 bool IsTainted = false; 346 for (ArgVector::const_iterator I = PR.SrcArgs.begin(), 347 E = PR.SrcArgs.end(); I != E; ++I) { 348 unsigned ArgNum = *I; 349 350 if (ArgNum == InvalidArgIndex) { 351 // Check if any of the arguments is tainted. 352 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) 353 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 354 break; 355 break; 356 } 357 358 assert(ArgNum < CE->getNumArgs()); 359 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 360 break; 361 } 362 if (!IsTainted) 363 return State; 364 365 // Mark the arguments which should be tainted after the function returns. 366 for (ArgVector::const_iterator I = PR.DstArgs.begin(), 367 E = PR.DstArgs.end(); I != E; ++I) { 368 unsigned ArgNum = *I; 369 370 // Should we mark all arguments as tainted? 371 if (ArgNum == InvalidArgIndex) { 372 // For all pointer and references that were passed in: 373 // If they are not pointing to const data, mark data as tainted. 374 // TODO: So far we are just going one level down; ideally we'd need to 375 // recurse here. 376 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 377 const Expr *Arg = CE->getArg(i); 378 // Process pointer argument. 379 const Type *ArgTy = Arg->getType().getTypePtr(); 380 QualType PType = ArgTy->getPointeeType(); 381 if ((!PType.isNull() && !PType.isConstQualified()) 382 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 383 State = State->add<TaintArgsOnPostVisit>(i); 384 } 385 continue; 386 } 387 388 // Should mark the return value? 389 if (ArgNum == ReturnValueIndex) { 390 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 391 continue; 392 } 393 394 // Mark the given argument. 395 assert(ArgNum < CE->getNumArgs()); 396 State = State->add<TaintArgsOnPostVisit>(ArgNum); 397 } 398 399 return State; 400} 401 402 403// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 404// and arg 1 should get taint. 405const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE, 406 CheckerContext &C) const { 407 assert(CE->getNumArgs() >= 2); 408 const ProgramState *State = C.getState(); 409 410 // Check is the file descriptor is tainted. 411 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 412 isStdin(CE->getArg(0), C)) { 413 // All arguments except for the first two should get taint. 414 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 415 State = State->add<TaintArgsOnPostVisit>(i); 416 return State; 417 } 418 419 return 0; 420} 421 422// If any arguments are tainted, mark the return value as tainted on post-visit. 423const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE, 424 CheckerContext &C) const { 425 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 426 const ProgramState *State = C.getState(); 427 const Expr *Arg = CE->getArg(i); 428 if (State->isTainted(Arg, C.getLocationContext()) || 429 State->isTainted(getPointedToSymbol(C, Arg))) 430 return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 431 } 432 return 0; 433} 434 435const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE, 436 CheckerContext &C) const { 437 assert(CE->getNumArgs() >= 2); 438 const Expr *FromArg = CE->getArg(1); 439 const ProgramState *State = C.getState(); 440 if (State->isTainted(FromArg, C.getLocationContext()) || 441 State->isTainted(getPointedToSymbol(C, FromArg))) 442 return State = State->add<TaintArgsOnPostVisit>(0); 443 return 0; 444} 445 446const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE, 447 CheckerContext &C) const { 448 const ProgramState *State = C.getState(); 449 assert(CE->getNumArgs() >= 2); 450 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 451 // All arguments except for the very first one should get taint. 452 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 453 // The arguments are pointer arguments. The data they are pointing at is 454 // tainted after the call. 455 const Expr* Arg = CE->getArg(i); 456 SymbolRef Sym = getPointedToSymbol(C, Arg, true); 457 if (Sym) 458 State = State->addTaint(Sym); 459 } 460 return State; 461} 462 463const ProgramState *GenericTaintChecker::postRetTaint(const CallExpr *CE, 464 CheckerContext &C) const { 465 return C.getState()->addTaint(CE, C.getLocationContext()); 466} 467 468bool GenericTaintChecker::isStdin(const Expr *E, 469 CheckerContext &C) const { 470 const ProgramState *State = C.getState(); 471 SVal Val = State->getSVal(E, C.getLocationContext()); 472 473 // stdin is a pointer, so it would be a region. 474 const MemRegion *MemReg = Val.getAsRegion(); 475 476 // The region should be symbolic, we do not know it's value. 477 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 478 if (!SymReg) 479 return false; 480 481 // Get it's symbol and find the declaration region it's pointing to. 482 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 483 if (!Sm) 484 return false; 485 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 486 if (!DeclReg) 487 return false; 488 489 // This region corresponds to a declaration, find out if it's a global/extern 490 // variable named stdin with the proper type. 491 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 492 D = D->getCanonicalDecl(); 493 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 494 if (const PointerType * PtrTy = 495 dyn_cast<PointerType>(D->getType().getTypePtr())) 496 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 497 return true; 498 } 499 return false; 500} 501 502static bool getPrintfFormatArgumentNum(const CallExpr *CE, 503 const CheckerContext &C, 504 unsigned int &ArgNum) { 505 // Find if the function contains a format string argument. 506 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 507 // vsnprintf, syslog, custom annotated functions. 508 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 509 if (!FDecl) 510 return false; 511 for (specific_attr_iterator<FormatAttr> 512 i = FDecl->specific_attr_begin<FormatAttr>(), 513 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 514 515 const FormatAttr *Format = *i; 516 ArgNum = Format->getFormatIdx() - 1; 517 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 518 return true; 519 } 520 521 // Or if a function is named setproctitle (this is a heuristic). 522 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 523 ArgNum = 0; 524 return true; 525 } 526 527 return false; 528} 529 530bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 531 const char Msg[], 532 CheckerContext &C) const { 533 assert(E); 534 535 // Check for taint. 536 const ProgramState *State = C.getState(); 537 if (!State->isTainted(getPointedToSymbol(C, E)) && 538 !State->isTainted(E, C.getLocationContext())) 539 return false; 540 541 // Generate diagnostic. 542 if (ExplodedNode *N = C.addTransition()) { 543 initBugType(); 544 BugReport *report = new BugReport(*BT, Msg, N); 545 report->addRange(E->getSourceRange()); 546 C.EmitReport(report); 547 return true; 548 } 549 return false; 550} 551 552bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 553 CheckerContext &C) const{ 554 // Check if the function contains a format string argument. 555 unsigned int ArgNum = 0; 556 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 557 return false; 558 559 // If either the format string content or the pointer itself are tainted, warn. 560 if (generateReportIfTainted(CE->getArg(ArgNum), 561 MsgUncontrolledFormatString, C)) 562 return true; 563 return false; 564} 565 566bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 567 StringRef Name, 568 CheckerContext &C) const { 569 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 570 .Case("system", 0) 571 .Case("popen", 0) 572 .Default(UINT_MAX); 573 574 if (ArgNum == UINT_MAX) 575 return false; 576 577 if (generateReportIfTainted(CE->getArg(ArgNum), 578 MsgSanitizeSystemArgs, C)) 579 return true; 580 581 return false; 582} 583 584void ento::registerGenericTaintChecker(CheckerManager &mgr) { 585 mgr.registerChecker<GenericTaintChecker>(); 586} 587