GenericTaintChecker.cpp revision cfa88f893915ceb8ae4ce2f17c46c24a4d67502f
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/AST/Attr.h" 19#include "clang/Basic/Builtins.h" 20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21#include "clang/StaticAnalyzer/Core/Checker.h" 22#include "clang/StaticAnalyzer/Core/CheckerManager.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25#include <climits> 26 27using namespace clang; 28using namespace ento; 29 30namespace { 31class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32 check::PreStmt<CallExpr> > { 33public: 34 static void *getTag() { static int Tag; return &Tag; } 35 36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 38 39 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 40 41private: 42 static const unsigned InvalidArgIndex = UINT_MAX; 43 /// Denotes the return vale. 44 static const unsigned ReturnValueIndex = UINT_MAX - 1; 45 46 mutable OwningPtr<BugType> BT; 47 inline void initBugType() const { 48 if (!BT) 49 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); 50 } 51 52 /// \brief Catch taint related bugs. Check if tainted data is passed to a 53 /// system call etc. 54 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 55 56 /// \brief Add taint sources on a pre-visit. 57 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 58 59 /// \brief Propagate taint generated at pre-visit. 60 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 61 62 /// \brief Add taint sources on a post visit. 63 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 64 65 /// Check if the region the expression evaluates to is the standard input, 66 /// and thus, is tainted. 67 static bool isStdin(const Expr *E, CheckerContext &C); 68 69 /// \brief Given a pointer argument, get the symbol of the value it contains 70 /// (points to). 71 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 72 73 /// Functions defining the attack surface. 74 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 75 CheckerContext &C) const; 76 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 77 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 78 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 79 80 /// Taint the scanned input if the file is tainted. 81 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 82 83 /// Check for CWE-134: Uncontrolled Format String. 84 static const char MsgUncontrolledFormatString[]; 85 bool checkUncontrolledFormatString(const CallExpr *CE, 86 CheckerContext &C) const; 87 88 /// Check for: 89 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 90 /// CWE-78, "Failure to Sanitize Data into an OS Command" 91 static const char MsgSanitizeSystemArgs[]; 92 bool checkSystemCall(const CallExpr *CE, StringRef Name, 93 CheckerContext &C) const; 94 95 /// Check if tainted data is used as a buffer size ins strn.. functions, 96 /// and allocators. 97 static const char MsgTaintedBufferSize[]; 98 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 99 CheckerContext &C) const; 100 101 /// Generate a report if the expression is tainted or points to tainted data. 102 bool generateReportIfTainted(const Expr *E, const char Msg[], 103 CheckerContext &C) const; 104 105 106 typedef SmallVector<unsigned, 2> ArgVector; 107 108 /// \brief A struct used to specify taint propagation rules for a function. 109 /// 110 /// If any of the possible taint source arguments is tainted, all of the 111 /// destination arguments should also be tainted. Use InvalidArgIndex in the 112 /// src list to specify that all of the arguments can introduce taint. Use 113 /// InvalidArgIndex in the dst arguments to signify that all the non-const 114 /// pointer and reference arguments might be tainted on return. If 115 /// ReturnValueIndex is added to the dst list, the return value will be 116 /// tainted. 117 struct TaintPropagationRule { 118 /// List of arguments which can be taint sources and should be checked. 119 ArgVector SrcArgs; 120 /// List of arguments which should be tainted on function return. 121 ArgVector DstArgs; 122 // TODO: Check if using other data structures would be more optimal. 123 124 TaintPropagationRule() {} 125 126 TaintPropagationRule(unsigned SArg, 127 unsigned DArg, bool TaintRet = false) { 128 SrcArgs.push_back(SArg); 129 DstArgs.push_back(DArg); 130 if (TaintRet) 131 DstArgs.push_back(ReturnValueIndex); 132 } 133 134 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 135 unsigned DArg, bool TaintRet = false) { 136 SrcArgs.push_back(SArg1); 137 SrcArgs.push_back(SArg2); 138 DstArgs.push_back(DArg); 139 if (TaintRet) 140 DstArgs.push_back(ReturnValueIndex); 141 } 142 143 /// Get the propagation rule for a given function. 144 static TaintPropagationRule 145 getTaintPropagationRule(const FunctionDecl *FDecl, 146 StringRef Name, 147 CheckerContext &C); 148 149 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 150 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 151 152 inline bool isNull() const { return SrcArgs.empty(); } 153 154 inline bool isDestinationArgument(unsigned ArgNum) const { 155 return (std::find(DstArgs.begin(), 156 DstArgs.end(), ArgNum) != DstArgs.end()); 157 } 158 159 static inline bool isTaintedOrPointsToTainted(const Expr *E, 160 ProgramStateRef State, 161 CheckerContext &C) { 162 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 163 (E->getType().getTypePtr()->isPointerType() && 164 State->isTainted(getPointedToSymbol(C, E)))); 165 } 166 167 /// \brief Pre-process a function which propagates taint according to the 168 /// taint rule. 169 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 170 171 }; 172}; 173 174const unsigned GenericTaintChecker::ReturnValueIndex; 175const unsigned GenericTaintChecker::InvalidArgIndex; 176 177const char GenericTaintChecker::MsgUncontrolledFormatString[] = 178 "Untrusted data is used as a format string " 179 "(CWE-134: Uncontrolled Format String)"; 180 181const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 182 "Untrusted data is passed to a system call " 183 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 184 185const char GenericTaintChecker::MsgTaintedBufferSize[] = 186 "Untrusted data is used to specify the buffer size " 187 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 188 "character data and the null terminator)"; 189 190} // end of anonymous namespace 191 192/// A set which is used to pass information from call pre-visit instruction 193/// to the call post-visit. The values are unsigned integers, which are either 194/// ReturnValueIndex, or indexes of the pointer/reference argument, which 195/// points to data, which should be tainted on return. 196REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 197 198GenericTaintChecker::TaintPropagationRule 199GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 200 const FunctionDecl *FDecl, 201 StringRef Name, 202 CheckerContext &C) { 203 // TODO: Currently, we might loose precision here: we always mark a return 204 // value as tainted even if it's just a pointer, pointing to tainted data. 205 206 // Check for exact name match for functions without builtin substitutes. 207 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 208 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 209 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 210 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 211 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 212 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 213 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("read", TaintPropagationRule(0, 2, 1, true)) 220 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 221 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 222 .Case("fgets", TaintPropagationRule(2, 0, true)) 223 .Case("getline", TaintPropagationRule(2, 0)) 224 .Case("getdelim", TaintPropagationRule(3, 0)) 225 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 226 .Default(TaintPropagationRule()); 227 228 if (!Rule.isNull()) 229 return Rule; 230 231 // Check if it's one of the memory setting/copying functions. 232 // This check is specialized but faster then calling isCLibraryFunction. 233 unsigned BId = 0; 234 if ( (BId = FDecl->getMemoryFunctionKind()) ) 235 switch(BId) { 236 case Builtin::BImemcpy: 237 case Builtin::BImemmove: 238 case Builtin::BIstrncpy: 239 case Builtin::BIstrncat: 240 return TaintPropagationRule(1, 2, 0, true); 241 case Builtin::BIstrlcpy: 242 case Builtin::BIstrlcat: 243 return TaintPropagationRule(1, 2, 0, false); 244 case Builtin::BIstrndup: 245 return TaintPropagationRule(0, 1, ReturnValueIndex); 246 247 default: 248 break; 249 }; 250 251 // Process all other functions which could be defined as builtins. 252 if (Rule.isNull()) { 253 if (C.isCLibraryFunction(FDecl, "snprintf") || 254 C.isCLibraryFunction(FDecl, "sprintf")) 255 return TaintPropagationRule(InvalidArgIndex, 0, true); 256 else if (C.isCLibraryFunction(FDecl, "strcpy") || 257 C.isCLibraryFunction(FDecl, "stpcpy") || 258 C.isCLibraryFunction(FDecl, "strcat")) 259 return TaintPropagationRule(1, 0, true); 260 else if (C.isCLibraryFunction(FDecl, "bcopy")) 261 return TaintPropagationRule(0, 2, 1, false); 262 else if (C.isCLibraryFunction(FDecl, "strdup") || 263 C.isCLibraryFunction(FDecl, "strdupa")) 264 return TaintPropagationRule(0, ReturnValueIndex); 265 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 266 return TaintPropagationRule(0, ReturnValueIndex); 267 } 268 269 // Skipping the following functions, since they might be used for cleansing 270 // or smart memory copy: 271 // - memccpy - copying until hitting a special character. 272 273 return TaintPropagationRule(); 274} 275 276void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 277 CheckerContext &C) const { 278 // Check for errors first. 279 if (checkPre(CE, C)) 280 return; 281 282 // Add taint second. 283 addSourcesPre(CE, C); 284} 285 286void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 287 CheckerContext &C) const { 288 if (propagateFromPre(CE, C)) 289 return; 290 addSourcesPost(CE, C); 291} 292 293void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 294 CheckerContext &C) const { 295 ProgramStateRef State = 0; 296 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 297 if (!FDecl || FDecl->getKind() != Decl::Function) 298 return; 299 300 StringRef Name = C.getCalleeName(FDecl); 301 if (Name.empty()) 302 return; 303 304 // First, try generating a propagation rule for this function. 305 TaintPropagationRule Rule = 306 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 307 if (!Rule.isNull()) { 308 State = Rule.process(CE, C); 309 if (!State) 310 return; 311 C.addTransition(State); 312 return; 313 } 314 315 // Otherwise, check if we have custom pre-processing implemented. 316 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 317 .Case("fscanf", &GenericTaintChecker::preFscanf) 318 .Default(0); 319 // Check and evaluate the call. 320 if (evalFunction) 321 State = (this->*evalFunction)(CE, C); 322 if (!State) 323 return; 324 C.addTransition(State); 325 326} 327 328bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 329 CheckerContext &C) const { 330 ProgramStateRef State = C.getState(); 331 332 // Depending on what was tainted at pre-visit, we determined a set of 333 // arguments which should be tainted after the function returns. These are 334 // stored in the state as TaintArgsOnPostVisit set. 335 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 336 if (TaintArgs.isEmpty()) 337 return false; 338 339 for (llvm::ImmutableSet<unsigned>::iterator 340 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 341 unsigned ArgNum = *I; 342 343 // Special handling for the tainted return value. 344 if (ArgNum == ReturnValueIndex) { 345 State = State->addTaint(CE, C.getLocationContext()); 346 continue; 347 } 348 349 // The arguments are pointer arguments. The data they are pointing at is 350 // tainted after the call. 351 if (CE->getNumArgs() < (ArgNum + 1)) 352 return false; 353 const Expr* Arg = CE->getArg(ArgNum); 354 SymbolRef Sym = getPointedToSymbol(C, Arg); 355 if (Sym) 356 State = State->addTaint(Sym); 357 } 358 359 // Clear up the taint info from the state. 360 State = State->remove<TaintArgsOnPostVisit>(); 361 362 if (State != C.getState()) { 363 C.addTransition(State); 364 return true; 365 } 366 return false; 367} 368 369void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 370 CheckerContext &C) const { 371 // Define the attack surface. 372 // Set the evaluation function by switching on the callee name. 373 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 374 if (!FDecl || FDecl->getKind() != Decl::Function) 375 return; 376 377 StringRef Name = C.getCalleeName(FDecl); 378 if (Name.empty()) 379 return; 380 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 381 .Case("scanf", &GenericTaintChecker::postScanf) 382 // TODO: Add support for vfscanf & family. 383 .Case("getchar", &GenericTaintChecker::postRetTaint) 384 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 385 .Case("getenv", &GenericTaintChecker::postRetTaint) 386 .Case("fopen", &GenericTaintChecker::postRetTaint) 387 .Case("fdopen", &GenericTaintChecker::postRetTaint) 388 .Case("freopen", &GenericTaintChecker::postRetTaint) 389 .Case("getch", &GenericTaintChecker::postRetTaint) 390 .Case("wgetch", &GenericTaintChecker::postRetTaint) 391 .Case("socket", &GenericTaintChecker::postSocket) 392 .Default(0); 393 394 // If the callee isn't defined, it is not of security concern. 395 // Check and evaluate the call. 396 ProgramStateRef State = 0; 397 if (evalFunction) 398 State = (this->*evalFunction)(CE, C); 399 if (!State) 400 return; 401 402 C.addTransition(State); 403} 404 405bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 406 407 if (checkUncontrolledFormatString(CE, C)) 408 return true; 409 410 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 411 if (!FDecl || FDecl->getKind() != Decl::Function) 412 return false; 413 414 StringRef Name = C.getCalleeName(FDecl); 415 if (Name.empty()) 416 return false; 417 418 if (checkSystemCall(CE, Name, C)) 419 return true; 420 421 if (checkTaintedBufferSize(CE, FDecl, C)) 422 return true; 423 424 return false; 425} 426 427SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 428 const Expr* Arg) { 429 ProgramStateRef State = C.getState(); 430 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 431 if (AddrVal.isUnknownOrUndef()) 432 return 0; 433 434 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 435 if (!AddrLoc) 436 return 0; 437 438 const PointerType *ArgTy = 439 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 440 SVal Val = State->getSVal(*AddrLoc, 441 ArgTy ? ArgTy->getPointeeType(): QualType()); 442 return Val.getAsSymbol(); 443} 444 445ProgramStateRef 446GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 447 CheckerContext &C) const { 448 ProgramStateRef State = C.getState(); 449 450 // Check for taint in arguments. 451 bool IsTainted = false; 452 for (ArgVector::const_iterator I = SrcArgs.begin(), 453 E = SrcArgs.end(); I != E; ++I) { 454 unsigned ArgNum = *I; 455 456 if (ArgNum == InvalidArgIndex) { 457 // Check if any of the arguments is tainted, but skip the 458 // destination arguments. 459 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 460 if (isDestinationArgument(i)) 461 continue; 462 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 463 break; 464 } 465 break; 466 } 467 468 if (CE->getNumArgs() < (ArgNum + 1)) 469 return State; 470 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 471 break; 472 } 473 if (!IsTainted) 474 return State; 475 476 // Mark the arguments which should be tainted after the function returns. 477 for (ArgVector::const_iterator I = DstArgs.begin(), 478 E = DstArgs.end(); I != E; ++I) { 479 unsigned ArgNum = *I; 480 481 // Should we mark all arguments as tainted? 482 if (ArgNum == InvalidArgIndex) { 483 // For all pointer and references that were passed in: 484 // If they are not pointing to const data, mark data as tainted. 485 // TODO: So far we are just going one level down; ideally we'd need to 486 // recurse here. 487 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 488 const Expr *Arg = CE->getArg(i); 489 // Process pointer argument. 490 const Type *ArgTy = Arg->getType().getTypePtr(); 491 QualType PType = ArgTy->getPointeeType(); 492 if ((!PType.isNull() && !PType.isConstQualified()) 493 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 494 State = State->add<TaintArgsOnPostVisit>(i); 495 } 496 continue; 497 } 498 499 // Should mark the return value? 500 if (ArgNum == ReturnValueIndex) { 501 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 502 continue; 503 } 504 505 // Mark the given argument. 506 assert(ArgNum < CE->getNumArgs()); 507 State = State->add<TaintArgsOnPostVisit>(ArgNum); 508 } 509 510 return State; 511} 512 513 514// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 515// and arg 1 should get taint. 516ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 517 CheckerContext &C) const { 518 assert(CE->getNumArgs() >= 2); 519 ProgramStateRef State = C.getState(); 520 521 // Check is the file descriptor is tainted. 522 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 523 isStdin(CE->getArg(0), C)) { 524 // All arguments except for the first two should get taint. 525 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 526 State = State->add<TaintArgsOnPostVisit>(i); 527 return State; 528 } 529 530 return 0; 531} 532 533 534// If argument 0(protocol domain) is network, the return value should get taint. 535ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 536 CheckerContext &C) const { 537 ProgramStateRef State = C.getState(); 538 if (CE->getNumArgs() < 3) 539 return State; 540 541 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 542 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 543 // White list the internal communication protocols. 544 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 545 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 546 return State; 547 State = State->addTaint(CE, C.getLocationContext()); 548 return State; 549} 550 551ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 552 CheckerContext &C) const { 553 ProgramStateRef State = C.getState(); 554 if (CE->getNumArgs() < 2) 555 return State; 556 557 // All arguments except for the very first one should get taint. 558 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 559 // The arguments are pointer arguments. The data they are pointing at is 560 // tainted after the call. 561 const Expr* Arg = CE->getArg(i); 562 SymbolRef Sym = getPointedToSymbol(C, Arg); 563 if (Sym) 564 State = State->addTaint(Sym); 565 } 566 return State; 567} 568 569ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 570 CheckerContext &C) const { 571 return C.getState()->addTaint(CE, C.getLocationContext()); 572} 573 574bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 575 ProgramStateRef State = C.getState(); 576 SVal Val = State->getSVal(E, C.getLocationContext()); 577 578 // stdin is a pointer, so it would be a region. 579 const MemRegion *MemReg = Val.getAsRegion(); 580 581 // The region should be symbolic, we do not know it's value. 582 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 583 if (!SymReg) 584 return false; 585 586 // Get it's symbol and find the declaration region it's pointing to. 587 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 588 if (!Sm) 589 return false; 590 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 591 if (!DeclReg) 592 return false; 593 594 // This region corresponds to a declaration, find out if it's a global/extern 595 // variable named stdin with the proper type. 596 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 597 D = D->getCanonicalDecl(); 598 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 599 if (const PointerType * PtrTy = 600 dyn_cast<PointerType>(D->getType().getTypePtr())) 601 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 602 return true; 603 } 604 return false; 605} 606 607static bool getPrintfFormatArgumentNum(const CallExpr *CE, 608 const CheckerContext &C, 609 unsigned int &ArgNum) { 610 // Find if the function contains a format string argument. 611 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 612 // vsnprintf, syslog, custom annotated functions. 613 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 614 if (!FDecl) 615 return false; 616 for (specific_attr_iterator<FormatAttr> 617 i = FDecl->specific_attr_begin<FormatAttr>(), 618 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 619 620 const FormatAttr *Format = *i; 621 ArgNum = Format->getFormatIdx() - 1; 622 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 623 return true; 624 } 625 626 // Or if a function is named setproctitle (this is a heuristic). 627 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 628 ArgNum = 0; 629 return true; 630 } 631 632 return false; 633} 634 635bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 636 const char Msg[], 637 CheckerContext &C) const { 638 assert(E); 639 640 // Check for taint. 641 ProgramStateRef State = C.getState(); 642 if (!State->isTainted(getPointedToSymbol(C, E)) && 643 !State->isTainted(E, C.getLocationContext())) 644 return false; 645 646 // Generate diagnostic. 647 if (ExplodedNode *N = C.addTransition()) { 648 initBugType(); 649 BugReport *report = new BugReport(*BT, Msg, N); 650 report->addRange(E->getSourceRange()); 651 C.emitReport(report); 652 return true; 653 } 654 return false; 655} 656 657bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 658 CheckerContext &C) const{ 659 // Check if the function contains a format string argument. 660 unsigned int ArgNum = 0; 661 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 662 return false; 663 664 // If either the format string content or the pointer itself are tainted, warn. 665 if (generateReportIfTainted(CE->getArg(ArgNum), 666 MsgUncontrolledFormatString, C)) 667 return true; 668 return false; 669} 670 671bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 672 StringRef Name, 673 CheckerContext &C) const { 674 // TODO: It might make sense to run this check on demand. In some cases, 675 // we should check if the environment has been cleansed here. We also might 676 // need to know if the user was reset before these calls(seteuid). 677 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 678 .Case("system", 0) 679 .Case("popen", 0) 680 .Case("execl", 0) 681 .Case("execle", 0) 682 .Case("execlp", 0) 683 .Case("execv", 0) 684 .Case("execvp", 0) 685 .Case("execvP", 0) 686 .Case("execve", 0) 687 .Case("dlopen", 0) 688 .Default(UINT_MAX); 689 690 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 691 return false; 692 693 if (generateReportIfTainted(CE->getArg(ArgNum), 694 MsgSanitizeSystemArgs, C)) 695 return true; 696 697 return false; 698} 699 700// TODO: Should this check be a part of the CString checker? 701// If yes, should taint be a global setting? 702bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 703 const FunctionDecl *FDecl, 704 CheckerContext &C) const { 705 // If the function has a buffer size argument, set ArgNum. 706 unsigned ArgNum = InvalidArgIndex; 707 unsigned BId = 0; 708 if ( (BId = FDecl->getMemoryFunctionKind()) ) 709 switch(BId) { 710 case Builtin::BImemcpy: 711 case Builtin::BImemmove: 712 case Builtin::BIstrncpy: 713 ArgNum = 2; 714 break; 715 case Builtin::BIstrndup: 716 ArgNum = 1; 717 break; 718 default: 719 break; 720 }; 721 722 if (ArgNum == InvalidArgIndex) { 723 if (C.isCLibraryFunction(FDecl, "malloc") || 724 C.isCLibraryFunction(FDecl, "calloc") || 725 C.isCLibraryFunction(FDecl, "alloca")) 726 ArgNum = 0; 727 else if (C.isCLibraryFunction(FDecl, "memccpy")) 728 ArgNum = 3; 729 else if (C.isCLibraryFunction(FDecl, "realloc")) 730 ArgNum = 1; 731 else if (C.isCLibraryFunction(FDecl, "bcopy")) 732 ArgNum = 2; 733 } 734 735 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 736 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 737 return true; 738 739 return false; 740} 741 742void ento::registerGenericTaintChecker(CheckerManager &mgr) { 743 mgr.registerChecker<GenericTaintChecker>(); 744} 745