GenericTaintChecker.cpp revision 8bef8238181a30e52dea380789a7e2d760eac532
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/StaticAnalyzer/Core/Checker.h" 19#include "clang/StaticAnalyzer/Core/CheckerManager.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23#include "clang/Basic/Builtins.h" 24#include <climits> 25 26using namespace clang; 27using namespace ento; 28 29namespace { 30class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 31 check::PreStmt<CallExpr> > { 32public: 33 static void *getTag() { static int Tag; return &Tag; } 34 35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable llvm::OwningPtr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType("Taint Analysis", "General")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, get the symbol of the value it contains 69 /// (points to). 70 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 71 72 /// Functions defining the attack surface. 73 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 74 CheckerContext &C) const; 75 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 78 79 /// Taint the scanned input if the file is tainted. 80 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 81 82 /// Check for CWE-134: Uncontrolled Format String. 83 static const char MsgUncontrolledFormatString[]; 84 bool checkUncontrolledFormatString(const CallExpr *CE, 85 CheckerContext &C) const; 86 87 /// Check for: 88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 89 /// CWE-78, "Failure to Sanitize Data into an OS Command" 90 static const char MsgSanitizeSystemArgs[]; 91 bool checkSystemCall(const CallExpr *CE, StringRef Name, 92 CheckerContext &C) const; 93 94 /// Check if tainted data is used as a buffer size ins strn.. functions, 95 /// and allocators. 96 static const char MsgTaintedBufferSize[]; 97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 98 CheckerContext &C) const; 99 100 /// Generate a report if the expression is tainted or points to tainted data. 101 bool generateReportIfTainted(const Expr *E, const char Msg[], 102 CheckerContext &C) const; 103 104 105 typedef llvm::SmallVector<unsigned, 2> ArgVector; 106 107 /// \brief A struct used to specify taint propagation rules for a function. 108 /// 109 /// If any of the possible taint source arguments is tainted, all of the 110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 111 /// src list to specify that all of the arguments can introduce taint. Use 112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 113 /// pointer and reference arguments might be tainted on return. If 114 /// ReturnValueIndex is added to the dst list, the return value will be 115 /// tainted. 116 struct TaintPropagationRule { 117 /// List of arguments which can be taint sources and should be checked. 118 ArgVector SrcArgs; 119 /// List of arguments which should be tainted on function return. 120 ArgVector DstArgs; 121 // TODO: Check if using other data structures would be more optimal. 122 123 TaintPropagationRule() {} 124 125 TaintPropagationRule(unsigned SArg, 126 unsigned DArg, bool TaintRet = false) { 127 SrcArgs.push_back(SArg); 128 DstArgs.push_back(DArg); 129 if (TaintRet) 130 DstArgs.push_back(ReturnValueIndex); 131 } 132 133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 134 unsigned DArg, bool TaintRet = false) { 135 SrcArgs.push_back(SArg1); 136 SrcArgs.push_back(SArg2); 137 DstArgs.push_back(DArg); 138 if (TaintRet) 139 DstArgs.push_back(ReturnValueIndex); 140 } 141 142 /// Get the propagation rule for a given function. 143 static TaintPropagationRule 144 getTaintPropagationRule(const FunctionDecl *FDecl, 145 StringRef Name, 146 CheckerContext &C); 147 148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 150 151 inline bool isNull() const { return SrcArgs.empty(); } 152 153 inline bool isDestinationArgument(unsigned ArgNum) const { 154 return (std::find(DstArgs.begin(), 155 DstArgs.end(), ArgNum) != DstArgs.end()); 156 } 157 158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 159 ProgramStateRef State, 160 CheckerContext &C) { 161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 162 (E->getType().getTypePtr()->isPointerType() && 163 State->isTainted(getPointedToSymbol(C, E)))); 164 } 165 166 /// \brief Pre-process a function which propagates taint according to the 167 /// taint rule. 168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 169 170 }; 171}; 172 173const unsigned GenericTaintChecker::ReturnValueIndex; 174const unsigned GenericTaintChecker::InvalidArgIndex; 175 176const char GenericTaintChecker::MsgUncontrolledFormatString[] = 177 "Tainted format string (CWE-134: Uncontrolled Format String)"; 178 179const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 180 "Tainted data passed to a system call " 181 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 182 183const char GenericTaintChecker::MsgTaintedBufferSize[] = 184 "Tainted data is used to specify the buffer size " 185 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 186 "character data and the null terminator)"; 187 188} // end of anonymous namespace 189 190/// A set which is used to pass information from call pre-visit instruction 191/// to the call post-visit. The values are unsigned integers, which are either 192/// ReturnValueIndex, or indexes of the pointer/reference argument, which 193/// points to data, which should be tainted on return. 194namespace { struct TaintArgsOnPostVisit{}; } 195namespace clang { namespace ento { 196template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 197 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 198 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 199}; 200}} 201 202GenericTaintChecker::TaintPropagationRule 203GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 204 const FunctionDecl *FDecl, 205 StringRef Name, 206 CheckerContext &C) { 207 // TODO: Currently, we might loose precision here: we always mark a return 208 // value as tainted even if it's just a pointer, pointing to tainted data. 209 210 // Check for exact name match for functions without builtin substitutes. 211 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 212 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 213 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 220 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 221 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 222 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 223 .Case("read", TaintPropagationRule(0, 2, 1, true)) 224 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 225 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 226 .Case("fgets", TaintPropagationRule(2, 0, true)) 227 .Case("getline", TaintPropagationRule(2, 0)) 228 .Case("getdelim", TaintPropagationRule(3, 0)) 229 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 230 .Default(TaintPropagationRule()); 231 232 if (!Rule.isNull()) 233 return Rule; 234 235 // Check if it's one of the memory setting/copying functions. 236 // This check is specialized but faster then calling isCLibraryFunction. 237 unsigned BId = 0; 238 if ( (BId = FDecl->getMemoryFunctionKind()) ) 239 switch(BId) { 240 case Builtin::BImemcpy: 241 case Builtin::BImemmove: 242 case Builtin::BIstrncpy: 243 case Builtin::BIstrncat: 244 return TaintPropagationRule(1, 2, 0, true); 245 case Builtin::BIstrlcpy: 246 case Builtin::BIstrlcat: 247 return TaintPropagationRule(1, 2, 0, false); 248 case Builtin::BIstrndup: 249 return TaintPropagationRule(0, 1, ReturnValueIndex); 250 251 default: 252 break; 253 }; 254 255 // Process all other functions which could be defined as builtins. 256 if (Rule.isNull()) { 257 if (C.isCLibraryFunction(FDecl, "snprintf") || 258 C.isCLibraryFunction(FDecl, "sprintf")) 259 return TaintPropagationRule(InvalidArgIndex, 0, true); 260 else if (C.isCLibraryFunction(FDecl, "strcpy") || 261 C.isCLibraryFunction(FDecl, "stpcpy") || 262 C.isCLibraryFunction(FDecl, "strcat")) 263 return TaintPropagationRule(1, 0, true); 264 else if (C.isCLibraryFunction(FDecl, "bcopy")) 265 return TaintPropagationRule(0, 2, 1, false); 266 else if (C.isCLibraryFunction(FDecl, "strdup") || 267 C.isCLibraryFunction(FDecl, "strdupa")) 268 return TaintPropagationRule(0, ReturnValueIndex); 269 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 270 return TaintPropagationRule(0, ReturnValueIndex); 271 } 272 273 // Skipping the following functions, since they might be used for cleansing 274 // or smart memory copy: 275 // - memccpy - copying untill hitting a special character. 276 277 return TaintPropagationRule(); 278} 279 280void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 281 CheckerContext &C) const { 282 // Check for errors first. 283 if (checkPre(CE, C)) 284 return; 285 286 // Add taint second. 287 addSourcesPre(CE, C); 288} 289 290void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 291 CheckerContext &C) const { 292 if (propagateFromPre(CE, C)) 293 return; 294 addSourcesPost(CE, C); 295} 296 297void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 298 CheckerContext &C) const { 299 ProgramStateRef State = 0; 300 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 301 StringRef Name = C.getCalleeName(FDecl); 302 if (Name.empty()) 303 return; 304 305 // First, try generating a propagation rule for this function. 306 TaintPropagationRule Rule = 307 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 308 if (!Rule.isNull()) { 309 State = Rule.process(CE, C); 310 if (!State) 311 return; 312 C.addTransition(State); 313 return; 314 } 315 316 // Otherwise, check if we have custom pre-processing implemented. 317 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 318 .Case("fscanf", &GenericTaintChecker::preFscanf) 319 .Default(0); 320 // Check and evaluate the call. 321 if (evalFunction) 322 State = (this->*evalFunction)(CE, C); 323 if (!State) 324 return; 325 C.addTransition(State); 326 327} 328 329bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 330 CheckerContext &C) const { 331 ProgramStateRef State = C.getState(); 332 333 // Depending on what was tainted at pre-visit, we determined a set of 334 // arguments which should be tainted after the function returns. These are 335 // stored in the state as TaintArgsOnPostVisit set. 336 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 337 if (TaintArgs.isEmpty()) 338 return false; 339 340 for (llvm::ImmutableSet<unsigned>::iterator 341 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 342 unsigned ArgNum = *I; 343 344 // Special handling for the tainted return value. 345 if (ArgNum == ReturnValueIndex) { 346 State = State->addTaint(CE, C.getLocationContext()); 347 continue; 348 } 349 350 // The arguments are pointer arguments. The data they are pointing at is 351 // tainted after the call. 352 const Expr* Arg = CE->getArg(ArgNum); 353 SymbolRef Sym = getPointedToSymbol(C, Arg); 354 if (Sym) 355 State = State->addTaint(Sym); 356 } 357 358 // Clear up the taint info from the state. 359 State = State->remove<TaintArgsOnPostVisit>(); 360 361 if (State != C.getState()) { 362 C.addTransition(State); 363 return true; 364 } 365 return false; 366} 367 368void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 369 CheckerContext &C) const { 370 // Define the attack surface. 371 // Set the evaluation function by switching on the callee name. 372 StringRef Name = C.getCalleeName(CE); 373 if (Name.empty()) 374 return; 375 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 376 .Case("scanf", &GenericTaintChecker::postScanf) 377 // TODO: Add support for vfscanf & family. 378 .Case("getchar", &GenericTaintChecker::postRetTaint) 379 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 380 .Case("getenv", &GenericTaintChecker::postRetTaint) 381 .Case("fopen", &GenericTaintChecker::postRetTaint) 382 .Case("fdopen", &GenericTaintChecker::postRetTaint) 383 .Case("freopen", &GenericTaintChecker::postRetTaint) 384 .Case("getch", &GenericTaintChecker::postRetTaint) 385 .Case("wgetch", &GenericTaintChecker::postRetTaint) 386 .Case("socket", &GenericTaintChecker::postSocket) 387 .Default(0); 388 389 // If the callee isn't defined, it is not of security concern. 390 // Check and evaluate the call. 391 ProgramStateRef State = 0; 392 if (evalFunction) 393 State = (this->*evalFunction)(CE, C); 394 if (!State) 395 return; 396 397 C.addTransition(State); 398} 399 400bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 401 402 if (checkUncontrolledFormatString(CE, C)) 403 return true; 404 405 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 406 StringRef Name = C.getCalleeName(FDecl); 407 if (Name.empty()) 408 return false; 409 410 if (checkSystemCall(CE, Name, C)) 411 return true; 412 413 if (checkTaintedBufferSize(CE, FDecl, C)) 414 return true; 415 416 return false; 417} 418 419SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 420 const Expr* Arg) { 421 ProgramStateRef State = C.getState(); 422 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 423 if (AddrVal.isUnknownOrUndef()) 424 return 0; 425 426 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 427 if (!AddrLoc) 428 return 0; 429 430 const PointerType *ArgTy = 431 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 432 SVal Val = State->getSVal(*AddrLoc, 433 ArgTy ? ArgTy->getPointeeType(): QualType()); 434 return Val.getAsSymbol(); 435} 436 437ProgramStateRef 438GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 439 CheckerContext &C) const { 440 ProgramStateRef State = C.getState(); 441 442 // Check for taint in arguments. 443 bool IsTainted = false; 444 for (ArgVector::const_iterator I = SrcArgs.begin(), 445 E = SrcArgs.end(); I != E; ++I) { 446 unsigned ArgNum = *I; 447 448 if (ArgNum == InvalidArgIndex) { 449 // Check if any of the arguments is tainted, but skip the 450 // destination arguments. 451 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 452 if (isDestinationArgument(i)) 453 continue; 454 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 455 break; 456 } 457 break; 458 } 459 460 assert(ArgNum < CE->getNumArgs()); 461 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 462 break; 463 } 464 if (!IsTainted) 465 return State; 466 467 // Mark the arguments which should be tainted after the function returns. 468 for (ArgVector::const_iterator I = DstArgs.begin(), 469 E = DstArgs.end(); I != E; ++I) { 470 unsigned ArgNum = *I; 471 472 // Should we mark all arguments as tainted? 473 if (ArgNum == InvalidArgIndex) { 474 // For all pointer and references that were passed in: 475 // If they are not pointing to const data, mark data as tainted. 476 // TODO: So far we are just going one level down; ideally we'd need to 477 // recurse here. 478 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 479 const Expr *Arg = CE->getArg(i); 480 // Process pointer argument. 481 const Type *ArgTy = Arg->getType().getTypePtr(); 482 QualType PType = ArgTy->getPointeeType(); 483 if ((!PType.isNull() && !PType.isConstQualified()) 484 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 485 State = State->add<TaintArgsOnPostVisit>(i); 486 } 487 continue; 488 } 489 490 // Should mark the return value? 491 if (ArgNum == ReturnValueIndex) { 492 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 493 continue; 494 } 495 496 // Mark the given argument. 497 assert(ArgNum < CE->getNumArgs()); 498 State = State->add<TaintArgsOnPostVisit>(ArgNum); 499 } 500 501 return State; 502} 503 504 505// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 506// and arg 1 should get taint. 507ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 508 CheckerContext &C) const { 509 assert(CE->getNumArgs() >= 2); 510 ProgramStateRef State = C.getState(); 511 512 // Check is the file descriptor is tainted. 513 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 514 isStdin(CE->getArg(0), C)) { 515 // All arguments except for the first two should get taint. 516 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 517 State = State->add<TaintArgsOnPostVisit>(i); 518 return State; 519 } 520 521 return 0; 522} 523 524 525// If argument 0(protocol domain) is network, the return value should get taint. 526ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 527 CheckerContext &C) const { 528 assert(CE->getNumArgs() >= 3); 529 ProgramStateRef State = C.getState(); 530 531 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 532 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 533 // White list the internal communication protocols. 534 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 535 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 536 return State; 537 State = State->addTaint(CE, C.getLocationContext()); 538 return State; 539} 540 541ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 542 CheckerContext &C) const { 543 ProgramStateRef State = C.getState(); 544 assert(CE->getNumArgs() >= 2); 545 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 546 // All arguments except for the very first one should get taint. 547 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 548 // The arguments are pointer arguments. The data they are pointing at is 549 // tainted after the call. 550 const Expr* Arg = CE->getArg(i); 551 SymbolRef Sym = getPointedToSymbol(C, Arg); 552 if (Sym) 553 State = State->addTaint(Sym); 554 } 555 return State; 556} 557 558ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 559 CheckerContext &C) const { 560 return C.getState()->addTaint(CE, C.getLocationContext()); 561} 562 563bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 564 ProgramStateRef State = C.getState(); 565 SVal Val = State->getSVal(E, C.getLocationContext()); 566 567 // stdin is a pointer, so it would be a region. 568 const MemRegion *MemReg = Val.getAsRegion(); 569 570 // The region should be symbolic, we do not know it's value. 571 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 572 if (!SymReg) 573 return false; 574 575 // Get it's symbol and find the declaration region it's pointing to. 576 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 577 if (!Sm) 578 return false; 579 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 580 if (!DeclReg) 581 return false; 582 583 // This region corresponds to a declaration, find out if it's a global/extern 584 // variable named stdin with the proper type. 585 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 586 D = D->getCanonicalDecl(); 587 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 588 if (const PointerType * PtrTy = 589 dyn_cast<PointerType>(D->getType().getTypePtr())) 590 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 591 return true; 592 } 593 return false; 594} 595 596static bool getPrintfFormatArgumentNum(const CallExpr *CE, 597 const CheckerContext &C, 598 unsigned int &ArgNum) { 599 // Find if the function contains a format string argument. 600 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 601 // vsnprintf, syslog, custom annotated functions. 602 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 603 if (!FDecl) 604 return false; 605 for (specific_attr_iterator<FormatAttr> 606 i = FDecl->specific_attr_begin<FormatAttr>(), 607 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 608 609 const FormatAttr *Format = *i; 610 ArgNum = Format->getFormatIdx() - 1; 611 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 612 return true; 613 } 614 615 // Or if a function is named setproctitle (this is a heuristic). 616 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 617 ArgNum = 0; 618 return true; 619 } 620 621 return false; 622} 623 624bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 625 const char Msg[], 626 CheckerContext &C) const { 627 assert(E); 628 629 // Check for taint. 630 ProgramStateRef State = C.getState(); 631 if (!State->isTainted(getPointedToSymbol(C, E)) && 632 !State->isTainted(E, C.getLocationContext())) 633 return false; 634 635 // Generate diagnostic. 636 if (ExplodedNode *N = C.addTransition()) { 637 initBugType(); 638 BugReport *report = new BugReport(*BT, Msg, N); 639 report->addRange(E->getSourceRange()); 640 C.EmitReport(report); 641 return true; 642 } 643 return false; 644} 645 646bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 647 CheckerContext &C) const{ 648 // Check if the function contains a format string argument. 649 unsigned int ArgNum = 0; 650 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 651 return false; 652 653 // If either the format string content or the pointer itself are tainted, warn. 654 if (generateReportIfTainted(CE->getArg(ArgNum), 655 MsgUncontrolledFormatString, C)) 656 return true; 657 return false; 658} 659 660bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 661 StringRef Name, 662 CheckerContext &C) const { 663 // TODO: It might make sense to run this check on demand. In some cases, 664 // we should check if the environment has been cleansed here. We also might 665 // need to know if the user was reset before these calls(seteuid). 666 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 667 .Case("system", 0) 668 .Case("popen", 0) 669 .Case("execl", 0) 670 .Case("execle", 0) 671 .Case("execlp", 0) 672 .Case("execv", 0) 673 .Case("execvp", 0) 674 .Case("execvP", 0) 675 .Case("execve", 0) 676 .Case("dlopen", 0) 677 .Default(UINT_MAX); 678 679 if (ArgNum == UINT_MAX) 680 return false; 681 682 if (generateReportIfTainted(CE->getArg(ArgNum), 683 MsgSanitizeSystemArgs, C)) 684 return true; 685 686 return false; 687} 688 689// TODO: Should this check be a part of the CString checker? 690// If yes, should taint be a global setting? 691bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 692 const FunctionDecl *FDecl, 693 CheckerContext &C) const { 694 // If the function has a buffer size argument, set ArgNum. 695 unsigned ArgNum = InvalidArgIndex; 696 unsigned BId = 0; 697 if ( (BId = FDecl->getMemoryFunctionKind()) ) 698 switch(BId) { 699 case Builtin::BImemcpy: 700 case Builtin::BImemmove: 701 case Builtin::BIstrncpy: 702 ArgNum = 2; 703 break; 704 case Builtin::BIstrndup: 705 ArgNum = 1; 706 break; 707 default: 708 break; 709 }; 710 711 if (ArgNum == InvalidArgIndex) { 712 if (C.isCLibraryFunction(FDecl, "malloc") || 713 C.isCLibraryFunction(FDecl, "calloc") || 714 C.isCLibraryFunction(FDecl, "alloca")) 715 ArgNum = 0; 716 else if (C.isCLibraryFunction(FDecl, "memccpy")) 717 ArgNum = 3; 718 else if (C.isCLibraryFunction(FDecl, "realloc")) 719 ArgNum = 1; 720 else if (C.isCLibraryFunction(FDecl, "bcopy")) 721 ArgNum = 2; 722 } 723 724 if (ArgNum != InvalidArgIndex && 725 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 726 return true; 727 728 return false; 729} 730 731void ento::registerGenericTaintChecker(CheckerManager &mgr) { 732 mgr.registerChecker<GenericTaintChecker>(); 733} 734