GenericTaintChecker.cpp revision 5ef6e94b294cc47750d8ab220858a36726caba59
1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/StaticAnalyzer/Core/Checker.h" 19#include "clang/StaticAnalyzer/Core/CheckerManager.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 21#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 22#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23#include "clang/Basic/Builtins.h" 24#include <climits> 25 26using namespace clang; 27using namespace ento; 28 29namespace { 30class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 31 check::PreStmt<CallExpr> > { 32public: 33 static void *getTag() { static int Tag; return &Tag; } 34 35 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 36 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable OwningPtr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, get the symbol of the value it contains 69 /// (points to). 70 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 71 72 /// Functions defining the attack surface. 73 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 74 CheckerContext &C) const; 75 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 78 79 /// Taint the scanned input if the file is tainted. 80 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 81 82 /// Check for CWE-134: Uncontrolled Format String. 83 static const char MsgUncontrolledFormatString[]; 84 bool checkUncontrolledFormatString(const CallExpr *CE, 85 CheckerContext &C) const; 86 87 /// Check for: 88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 89 /// CWE-78, "Failure to Sanitize Data into an OS Command" 90 static const char MsgSanitizeSystemArgs[]; 91 bool checkSystemCall(const CallExpr *CE, StringRef Name, 92 CheckerContext &C) const; 93 94 /// Check if tainted data is used as a buffer size ins strn.. functions, 95 /// and allocators. 96 static const char MsgTaintedBufferSize[]; 97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 98 CheckerContext &C) const; 99 100 /// Generate a report if the expression is tainted or points to tainted data. 101 bool generateReportIfTainted(const Expr *E, const char Msg[], 102 CheckerContext &C) const; 103 104 105 typedef llvm::SmallVector<unsigned, 2> ArgVector; 106 107 /// \brief A struct used to specify taint propagation rules for a function. 108 /// 109 /// If any of the possible taint source arguments is tainted, all of the 110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 111 /// src list to specify that all of the arguments can introduce taint. Use 112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 113 /// pointer and reference arguments might be tainted on return. If 114 /// ReturnValueIndex is added to the dst list, the return value will be 115 /// tainted. 116 struct TaintPropagationRule { 117 /// List of arguments which can be taint sources and should be checked. 118 ArgVector SrcArgs; 119 /// List of arguments which should be tainted on function return. 120 ArgVector DstArgs; 121 // TODO: Check if using other data structures would be more optimal. 122 123 TaintPropagationRule() {} 124 125 TaintPropagationRule(unsigned SArg, 126 unsigned DArg, bool TaintRet = false) { 127 SrcArgs.push_back(SArg); 128 DstArgs.push_back(DArg); 129 if (TaintRet) 130 DstArgs.push_back(ReturnValueIndex); 131 } 132 133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 134 unsigned DArg, bool TaintRet = false) { 135 SrcArgs.push_back(SArg1); 136 SrcArgs.push_back(SArg2); 137 DstArgs.push_back(DArg); 138 if (TaintRet) 139 DstArgs.push_back(ReturnValueIndex); 140 } 141 142 /// Get the propagation rule for a given function. 143 static TaintPropagationRule 144 getTaintPropagationRule(const FunctionDecl *FDecl, 145 StringRef Name, 146 CheckerContext &C); 147 148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 150 151 inline bool isNull() const { return SrcArgs.empty(); } 152 153 inline bool isDestinationArgument(unsigned ArgNum) const { 154 return (std::find(DstArgs.begin(), 155 DstArgs.end(), ArgNum) != DstArgs.end()); 156 } 157 158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 159 ProgramStateRef State, 160 CheckerContext &C) { 161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 162 (E->getType().getTypePtr()->isPointerType() && 163 State->isTainted(getPointedToSymbol(C, E)))); 164 } 165 166 /// \brief Pre-process a function which propagates taint according to the 167 /// taint rule. 168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 169 170 }; 171}; 172 173const unsigned GenericTaintChecker::ReturnValueIndex; 174const unsigned GenericTaintChecker::InvalidArgIndex; 175 176const char GenericTaintChecker::MsgUncontrolledFormatString[] = 177 "Untrusted data is used as a format string " 178 "(CWE-134: Uncontrolled Format String)"; 179 180const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 181 "Untrusted data is passed to a system call " 182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 183 184const char GenericTaintChecker::MsgTaintedBufferSize[] = 185 "Untrusted data is used to specify the buffer size " 186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 187 "character data and the null terminator)"; 188 189} // end of anonymous namespace 190 191/// A set which is used to pass information from call pre-visit instruction 192/// to the call post-visit. The values are unsigned integers, which are either 193/// ReturnValueIndex, or indexes of the pointer/reference argument, which 194/// points to data, which should be tainted on return. 195namespace { struct TaintArgsOnPostVisit{}; } 196namespace clang { namespace ento { 197template<> struct ProgramStateTrait<TaintArgsOnPostVisit> 198 : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > { 199 static void *GDMIndex() { return GenericTaintChecker::getTag(); } 200}; 201}} 202 203GenericTaintChecker::TaintPropagationRule 204GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 205 const FunctionDecl *FDecl, 206 StringRef Name, 207 CheckerContext &C) { 208 // TODO: Currently, we might loose precision here: we always mark a return 209 // value as tainted even if it's just a pointer, pointing to tainted data. 210 211 // Check for exact name match for functions without builtin substitutes. 212 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 213 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 219 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 220 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 221 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 222 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 223 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 224 .Case("read", TaintPropagationRule(0, 2, 1, true)) 225 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 226 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 227 .Case("fgets", TaintPropagationRule(2, 0, true)) 228 .Case("getline", TaintPropagationRule(2, 0)) 229 .Case("getdelim", TaintPropagationRule(3, 0)) 230 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 231 .Default(TaintPropagationRule()); 232 233 if (!Rule.isNull()) 234 return Rule; 235 236 // Check if it's one of the memory setting/copying functions. 237 // This check is specialized but faster then calling isCLibraryFunction. 238 unsigned BId = 0; 239 if ( (BId = FDecl->getMemoryFunctionKind()) ) 240 switch(BId) { 241 case Builtin::BImemcpy: 242 case Builtin::BImemmove: 243 case Builtin::BIstrncpy: 244 case Builtin::BIstrncat: 245 return TaintPropagationRule(1, 2, 0, true); 246 case Builtin::BIstrlcpy: 247 case Builtin::BIstrlcat: 248 return TaintPropagationRule(1, 2, 0, false); 249 case Builtin::BIstrndup: 250 return TaintPropagationRule(0, 1, ReturnValueIndex); 251 252 default: 253 break; 254 }; 255 256 // Process all other functions which could be defined as builtins. 257 if (Rule.isNull()) { 258 if (C.isCLibraryFunction(FDecl, "snprintf") || 259 C.isCLibraryFunction(FDecl, "sprintf")) 260 return TaintPropagationRule(InvalidArgIndex, 0, true); 261 else if (C.isCLibraryFunction(FDecl, "strcpy") || 262 C.isCLibraryFunction(FDecl, "stpcpy") || 263 C.isCLibraryFunction(FDecl, "strcat")) 264 return TaintPropagationRule(1, 0, true); 265 else if (C.isCLibraryFunction(FDecl, "bcopy")) 266 return TaintPropagationRule(0, 2, 1, false); 267 else if (C.isCLibraryFunction(FDecl, "strdup") || 268 C.isCLibraryFunction(FDecl, "strdupa")) 269 return TaintPropagationRule(0, ReturnValueIndex); 270 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 271 return TaintPropagationRule(0, ReturnValueIndex); 272 } 273 274 // Skipping the following functions, since they might be used for cleansing 275 // or smart memory copy: 276 // - memccpy - copying until hitting a special character. 277 278 return TaintPropagationRule(); 279} 280 281void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 282 CheckerContext &C) const { 283 // Check for errors first. 284 if (checkPre(CE, C)) 285 return; 286 287 // Add taint second. 288 addSourcesPre(CE, C); 289} 290 291void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 292 CheckerContext &C) const { 293 if (propagateFromPre(CE, C)) 294 return; 295 addSourcesPost(CE, C); 296} 297 298void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 299 CheckerContext &C) const { 300 ProgramStateRef State = 0; 301 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 302 if (!FDecl || FDecl->getKind() != Decl::Function) 303 return; 304 305 StringRef Name = C.getCalleeName(FDecl); 306 if (Name.empty()) 307 return; 308 309 // First, try generating a propagation rule for this function. 310 TaintPropagationRule Rule = 311 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 312 if (!Rule.isNull()) { 313 State = Rule.process(CE, C); 314 if (!State) 315 return; 316 C.addTransition(State); 317 return; 318 } 319 320 // Otherwise, check if we have custom pre-processing implemented. 321 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 322 .Case("fscanf", &GenericTaintChecker::preFscanf) 323 .Default(0); 324 // Check and evaluate the call. 325 if (evalFunction) 326 State = (this->*evalFunction)(CE, C); 327 if (!State) 328 return; 329 C.addTransition(State); 330 331} 332 333bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 334 CheckerContext &C) const { 335 ProgramStateRef State = C.getState(); 336 337 // Depending on what was tainted at pre-visit, we determined a set of 338 // arguments which should be tainted after the function returns. These are 339 // stored in the state as TaintArgsOnPostVisit set. 340 llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>(); 341 if (TaintArgs.isEmpty()) 342 return false; 343 344 for (llvm::ImmutableSet<unsigned>::iterator 345 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 346 unsigned ArgNum = *I; 347 348 // Special handling for the tainted return value. 349 if (ArgNum == ReturnValueIndex) { 350 State = State->addTaint(CE, C.getLocationContext()); 351 continue; 352 } 353 354 // The arguments are pointer arguments. The data they are pointing at is 355 // tainted after the call. 356 if (CE->getNumArgs() < (ArgNum + 1)) 357 return false; 358 const Expr* Arg = CE->getArg(ArgNum); 359 SymbolRef Sym = getPointedToSymbol(C, Arg); 360 if (Sym) 361 State = State->addTaint(Sym); 362 } 363 364 // Clear up the taint info from the state. 365 State = State->remove<TaintArgsOnPostVisit>(); 366 367 if (State != C.getState()) { 368 C.addTransition(State); 369 return true; 370 } 371 return false; 372} 373 374void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 375 CheckerContext &C) const { 376 // Define the attack surface. 377 // Set the evaluation function by switching on the callee name. 378 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 379 if (!FDecl || FDecl->getKind() != Decl::Function) 380 return; 381 382 StringRef Name = C.getCalleeName(FDecl); 383 if (Name.empty()) 384 return; 385 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 386 .Case("scanf", &GenericTaintChecker::postScanf) 387 // TODO: Add support for vfscanf & family. 388 .Case("getchar", &GenericTaintChecker::postRetTaint) 389 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 390 .Case("getenv", &GenericTaintChecker::postRetTaint) 391 .Case("fopen", &GenericTaintChecker::postRetTaint) 392 .Case("fdopen", &GenericTaintChecker::postRetTaint) 393 .Case("freopen", &GenericTaintChecker::postRetTaint) 394 .Case("getch", &GenericTaintChecker::postRetTaint) 395 .Case("wgetch", &GenericTaintChecker::postRetTaint) 396 .Case("socket", &GenericTaintChecker::postSocket) 397 .Default(0); 398 399 // If the callee isn't defined, it is not of security concern. 400 // Check and evaluate the call. 401 ProgramStateRef State = 0; 402 if (evalFunction) 403 State = (this->*evalFunction)(CE, C); 404 if (!State) 405 return; 406 407 C.addTransition(State); 408} 409 410bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 411 412 if (checkUncontrolledFormatString(CE, C)) 413 return true; 414 415 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 416 if (!FDecl || FDecl->getKind() != Decl::Function) 417 return false; 418 419 StringRef Name = C.getCalleeName(FDecl); 420 if (Name.empty()) 421 return false; 422 423 if (checkSystemCall(CE, Name, C)) 424 return true; 425 426 if (checkTaintedBufferSize(CE, FDecl, C)) 427 return true; 428 429 return false; 430} 431 432SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 433 const Expr* Arg) { 434 ProgramStateRef State = C.getState(); 435 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 436 if (AddrVal.isUnknownOrUndef()) 437 return 0; 438 439 Loc *AddrLoc = dyn_cast<Loc>(&AddrVal); 440 if (!AddrLoc) 441 return 0; 442 443 const PointerType *ArgTy = 444 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 445 SVal Val = State->getSVal(*AddrLoc, 446 ArgTy ? ArgTy->getPointeeType(): QualType()); 447 return Val.getAsSymbol(); 448} 449 450ProgramStateRef 451GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 452 CheckerContext &C) const { 453 ProgramStateRef State = C.getState(); 454 455 // Check for taint in arguments. 456 bool IsTainted = false; 457 for (ArgVector::const_iterator I = SrcArgs.begin(), 458 E = SrcArgs.end(); I != E; ++I) { 459 unsigned ArgNum = *I; 460 461 if (ArgNum == InvalidArgIndex) { 462 // Check if any of the arguments is tainted, but skip the 463 // destination arguments. 464 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 465 if (isDestinationArgument(i)) 466 continue; 467 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 468 break; 469 } 470 break; 471 } 472 473 if (CE->getNumArgs() < (ArgNum + 1)) 474 return State; 475 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 476 break; 477 } 478 if (!IsTainted) 479 return State; 480 481 // Mark the arguments which should be tainted after the function returns. 482 for (ArgVector::const_iterator I = DstArgs.begin(), 483 E = DstArgs.end(); I != E; ++I) { 484 unsigned ArgNum = *I; 485 486 // Should we mark all arguments as tainted? 487 if (ArgNum == InvalidArgIndex) { 488 // For all pointer and references that were passed in: 489 // If they are not pointing to const data, mark data as tainted. 490 // TODO: So far we are just going one level down; ideally we'd need to 491 // recurse here. 492 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 493 const Expr *Arg = CE->getArg(i); 494 // Process pointer argument. 495 const Type *ArgTy = Arg->getType().getTypePtr(); 496 QualType PType = ArgTy->getPointeeType(); 497 if ((!PType.isNull() && !PType.isConstQualified()) 498 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 499 State = State->add<TaintArgsOnPostVisit>(i); 500 } 501 continue; 502 } 503 504 // Should mark the return value? 505 if (ArgNum == ReturnValueIndex) { 506 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 507 continue; 508 } 509 510 // Mark the given argument. 511 assert(ArgNum < CE->getNumArgs()); 512 State = State->add<TaintArgsOnPostVisit>(ArgNum); 513 } 514 515 return State; 516} 517 518 519// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 520// and arg 1 should get taint. 521ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 522 CheckerContext &C) const { 523 assert(CE->getNumArgs() >= 2); 524 ProgramStateRef State = C.getState(); 525 526 // Check is the file descriptor is tainted. 527 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 528 isStdin(CE->getArg(0), C)) { 529 // All arguments except for the first two should get taint. 530 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 531 State = State->add<TaintArgsOnPostVisit>(i); 532 return State; 533 } 534 535 return 0; 536} 537 538 539// If argument 0(protocol domain) is network, the return value should get taint. 540ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 541 CheckerContext &C) const { 542 ProgramStateRef State = C.getState(); 543 if (CE->getNumArgs() < 3) 544 return State; 545 546 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 547 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 548 // White list the internal communication protocols. 549 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 550 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 551 return State; 552 State = State->addTaint(CE, C.getLocationContext()); 553 return State; 554} 555 556ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 557 CheckerContext &C) const { 558 ProgramStateRef State = C.getState(); 559 if (CE->getNumArgs() < 2) 560 return State; 561 562 SVal x = State->getSVal(CE->getArg(1), C.getLocationContext()); 563 // All arguments except for the very first one should get taint. 564 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 565 // The arguments are pointer arguments. The data they are pointing at is 566 // tainted after the call. 567 const Expr* Arg = CE->getArg(i); 568 SymbolRef Sym = getPointedToSymbol(C, Arg); 569 if (Sym) 570 State = State->addTaint(Sym); 571 } 572 return State; 573} 574 575ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 576 CheckerContext &C) const { 577 return C.getState()->addTaint(CE, C.getLocationContext()); 578} 579 580bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 581 ProgramStateRef State = C.getState(); 582 SVal Val = State->getSVal(E, C.getLocationContext()); 583 584 // stdin is a pointer, so it would be a region. 585 const MemRegion *MemReg = Val.getAsRegion(); 586 587 // The region should be symbolic, we do not know it's value. 588 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 589 if (!SymReg) 590 return false; 591 592 // Get it's symbol and find the declaration region it's pointing to. 593 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 594 if (!Sm) 595 return false; 596 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 597 if (!DeclReg) 598 return false; 599 600 // This region corresponds to a declaration, find out if it's a global/extern 601 // variable named stdin with the proper type. 602 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 603 D = D->getCanonicalDecl(); 604 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 605 if (const PointerType * PtrTy = 606 dyn_cast<PointerType>(D->getType().getTypePtr())) 607 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 608 return true; 609 } 610 return false; 611} 612 613static bool getPrintfFormatArgumentNum(const CallExpr *CE, 614 const CheckerContext &C, 615 unsigned int &ArgNum) { 616 // Find if the function contains a format string argument. 617 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 618 // vsnprintf, syslog, custom annotated functions. 619 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 620 if (!FDecl) 621 return false; 622 for (specific_attr_iterator<FormatAttr> 623 i = FDecl->specific_attr_begin<FormatAttr>(), 624 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) { 625 626 const FormatAttr *Format = *i; 627 ArgNum = Format->getFormatIdx() - 1; 628 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum) 629 return true; 630 } 631 632 // Or if a function is named setproctitle (this is a heuristic). 633 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 634 ArgNum = 0; 635 return true; 636 } 637 638 return false; 639} 640 641bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 642 const char Msg[], 643 CheckerContext &C) const { 644 assert(E); 645 646 // Check for taint. 647 ProgramStateRef State = C.getState(); 648 if (!State->isTainted(getPointedToSymbol(C, E)) && 649 !State->isTainted(E, C.getLocationContext())) 650 return false; 651 652 // Generate diagnostic. 653 if (ExplodedNode *N = C.addTransition()) { 654 initBugType(); 655 BugReport *report = new BugReport(*BT, Msg, N); 656 report->addRange(E->getSourceRange()); 657 C.EmitReport(report); 658 return true; 659 } 660 return false; 661} 662 663bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 664 CheckerContext &C) const{ 665 // Check if the function contains a format string argument. 666 unsigned int ArgNum = 0; 667 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 668 return false; 669 670 // If either the format string content or the pointer itself are tainted, warn. 671 if (generateReportIfTainted(CE->getArg(ArgNum), 672 MsgUncontrolledFormatString, C)) 673 return true; 674 return false; 675} 676 677bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 678 StringRef Name, 679 CheckerContext &C) const { 680 // TODO: It might make sense to run this check on demand. In some cases, 681 // we should check if the environment has been cleansed here. We also might 682 // need to know if the user was reset before these calls(seteuid). 683 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 684 .Case("system", 0) 685 .Case("popen", 0) 686 .Case("execl", 0) 687 .Case("execle", 0) 688 .Case("execlp", 0) 689 .Case("execv", 0) 690 .Case("execvp", 0) 691 .Case("execvP", 0) 692 .Case("execve", 0) 693 .Case("dlopen", 0) 694 .Default(UINT_MAX); 695 696 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 697 return false; 698 699 if (generateReportIfTainted(CE->getArg(ArgNum), 700 MsgSanitizeSystemArgs, C)) 701 return true; 702 703 return false; 704} 705 706// TODO: Should this check be a part of the CString checker? 707// If yes, should taint be a global setting? 708bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 709 const FunctionDecl *FDecl, 710 CheckerContext &C) const { 711 // If the function has a buffer size argument, set ArgNum. 712 unsigned ArgNum = InvalidArgIndex; 713 unsigned BId = 0; 714 if ( (BId = FDecl->getMemoryFunctionKind()) ) 715 switch(BId) { 716 case Builtin::BImemcpy: 717 case Builtin::BImemmove: 718 case Builtin::BIstrncpy: 719 ArgNum = 2; 720 break; 721 case Builtin::BIstrndup: 722 ArgNum = 1; 723 break; 724 default: 725 break; 726 }; 727 728 if (ArgNum == InvalidArgIndex) { 729 if (C.isCLibraryFunction(FDecl, "malloc") || 730 C.isCLibraryFunction(FDecl, "calloc") || 731 C.isCLibraryFunction(FDecl, "alloca")) 732 ArgNum = 0; 733 else if (C.isCLibraryFunction(FDecl, "memccpy")) 734 ArgNum = 3; 735 else if (C.isCLibraryFunction(FDecl, "realloc")) 736 ArgNum = 1; 737 else if (C.isCLibraryFunction(FDecl, "bcopy")) 738 ArgNum = 2; 739 } 740 741 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 742 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 743 return true; 744 745 return false; 746} 747 748void ento::registerGenericTaintChecker(CheckerManager &mgr) { 749 mgr.registerChecker<GenericTaintChecker>(); 750} 751