1//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This checker defines the attack surface for generic taint propagation. 11// 12// The taint information produced by it might be useful to other checkers. For 13// example, checkers should report errors which involve tainted data more 14// aggressively, even if the involved symbols are under constrained. 15// 16//===----------------------------------------------------------------------===// 17#include "ClangSACheckers.h" 18#include "clang/AST/Attr.h" 19#include "clang/Basic/Builtins.h" 20#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21#include "clang/StaticAnalyzer/Core/Checker.h" 22#include "clang/StaticAnalyzer/Core/CheckerManager.h" 23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 25#include <climits> 26 27using namespace clang; 28using namespace ento; 29 30namespace { 31class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 32 check::PreStmt<CallExpr> > { 33public: 34 static void *getTag() { static int Tag; return &Tag; } 35 36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 37 38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 39 40private: 41 static const unsigned InvalidArgIndex = UINT_MAX; 42 /// Denotes the return vale. 43 static const unsigned ReturnValueIndex = UINT_MAX - 1; 44 45 mutable std::unique_ptr<BugType> BT; 46 inline void initBugType() const { 47 if (!BT) 48 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 49 } 50 51 /// \brief Catch taint related bugs. Check if tainted data is passed to a 52 /// system call etc. 53 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 54 55 /// \brief Add taint sources on a pre-visit. 56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 57 58 /// \brief Propagate taint generated at pre-visit. 59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 60 61 /// \brief Add taint sources on a post visit. 62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 63 64 /// Check if the region the expression evaluates to is the standard input, 65 /// and thus, is tainted. 66 static bool isStdin(const Expr *E, CheckerContext &C); 67 68 /// \brief Given a pointer argument, get the symbol of the value it contains 69 /// (points to). 70 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 71 72 /// Functions defining the attack surface. 73 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 74 CheckerContext &C) const; 75 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 78 79 /// Taint the scanned input if the file is tainted. 80 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 81 82 /// Check for CWE-134: Uncontrolled Format String. 83 static const char MsgUncontrolledFormatString[]; 84 bool checkUncontrolledFormatString(const CallExpr *CE, 85 CheckerContext &C) const; 86 87 /// Check for: 88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 89 /// CWE-78, "Failure to Sanitize Data into an OS Command" 90 static const char MsgSanitizeSystemArgs[]; 91 bool checkSystemCall(const CallExpr *CE, StringRef Name, 92 CheckerContext &C) const; 93 94 /// Check if tainted data is used as a buffer size ins strn.. functions, 95 /// and allocators. 96 static const char MsgTaintedBufferSize[]; 97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 98 CheckerContext &C) const; 99 100 /// Generate a report if the expression is tainted or points to tainted data. 101 bool generateReportIfTainted(const Expr *E, const char Msg[], 102 CheckerContext &C) const; 103 104 105 typedef SmallVector<unsigned, 2> ArgVector; 106 107 /// \brief A struct used to specify taint propagation rules for a function. 108 /// 109 /// If any of the possible taint source arguments is tainted, all of the 110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 111 /// src list to specify that all of the arguments can introduce taint. Use 112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 113 /// pointer and reference arguments might be tainted on return. If 114 /// ReturnValueIndex is added to the dst list, the return value will be 115 /// tainted. 116 struct TaintPropagationRule { 117 /// List of arguments which can be taint sources and should be checked. 118 ArgVector SrcArgs; 119 /// List of arguments which should be tainted on function return. 120 ArgVector DstArgs; 121 // TODO: Check if using other data structures would be more optimal. 122 123 TaintPropagationRule() {} 124 125 TaintPropagationRule(unsigned SArg, 126 unsigned DArg, bool TaintRet = false) { 127 SrcArgs.push_back(SArg); 128 DstArgs.push_back(DArg); 129 if (TaintRet) 130 DstArgs.push_back(ReturnValueIndex); 131 } 132 133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 134 unsigned DArg, bool TaintRet = false) { 135 SrcArgs.push_back(SArg1); 136 SrcArgs.push_back(SArg2); 137 DstArgs.push_back(DArg); 138 if (TaintRet) 139 DstArgs.push_back(ReturnValueIndex); 140 } 141 142 /// Get the propagation rule for a given function. 143 static TaintPropagationRule 144 getTaintPropagationRule(const FunctionDecl *FDecl, 145 StringRef Name, 146 CheckerContext &C); 147 148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 150 151 inline bool isNull() const { return SrcArgs.empty(); } 152 153 inline bool isDestinationArgument(unsigned ArgNum) const { 154 return (std::find(DstArgs.begin(), 155 DstArgs.end(), ArgNum) != DstArgs.end()); 156 } 157 158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 159 ProgramStateRef State, 160 CheckerContext &C) { 161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 162 (E->getType().getTypePtr()->isPointerType() && 163 State->isTainted(getPointedToSymbol(C, E)))); 164 } 165 166 /// \brief Pre-process a function which propagates taint according to the 167 /// taint rule. 168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 169 170 }; 171}; 172 173const unsigned GenericTaintChecker::ReturnValueIndex; 174const unsigned GenericTaintChecker::InvalidArgIndex; 175 176const char GenericTaintChecker::MsgUncontrolledFormatString[] = 177 "Untrusted data is used as a format string " 178 "(CWE-134: Uncontrolled Format String)"; 179 180const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 181 "Untrusted data is passed to a system call " 182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 183 184const char GenericTaintChecker::MsgTaintedBufferSize[] = 185 "Untrusted data is used to specify the buffer size " 186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 187 "character data and the null terminator)"; 188 189} // end of anonymous namespace 190 191/// A set which is used to pass information from call pre-visit instruction 192/// to the call post-visit. The values are unsigned integers, which are either 193/// ReturnValueIndex, or indexes of the pointer/reference argument, which 194/// points to data, which should be tainted on return. 195REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 196 197GenericTaintChecker::TaintPropagationRule 198GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 199 const FunctionDecl *FDecl, 200 StringRef Name, 201 CheckerContext &C) { 202 // TODO: Currently, we might loose precision here: we always mark a return 203 // value as tainted even if it's just a pointer, pointing to tainted data. 204 205 // Check for exact name match for functions without builtin substitutes. 206 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 207 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 208 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 209 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 210 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 211 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 212 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 213 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 214 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 215 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 216 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 217 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 218 .Case("read", TaintPropagationRule(0, 2, 1, true)) 219 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 220 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 221 .Case("fgets", TaintPropagationRule(2, 0, true)) 222 .Case("getline", TaintPropagationRule(2, 0)) 223 .Case("getdelim", TaintPropagationRule(3, 0)) 224 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 225 .Default(TaintPropagationRule()); 226 227 if (!Rule.isNull()) 228 return Rule; 229 230 // Check if it's one of the memory setting/copying functions. 231 // This check is specialized but faster then calling isCLibraryFunction. 232 unsigned BId = 0; 233 if ( (BId = FDecl->getMemoryFunctionKind()) ) 234 switch(BId) { 235 case Builtin::BImemcpy: 236 case Builtin::BImemmove: 237 case Builtin::BIstrncpy: 238 case Builtin::BIstrncat: 239 return TaintPropagationRule(1, 2, 0, true); 240 case Builtin::BIstrlcpy: 241 case Builtin::BIstrlcat: 242 return TaintPropagationRule(1, 2, 0, false); 243 case Builtin::BIstrndup: 244 return TaintPropagationRule(0, 1, ReturnValueIndex); 245 246 default: 247 break; 248 }; 249 250 // Process all other functions which could be defined as builtins. 251 if (Rule.isNull()) { 252 if (C.isCLibraryFunction(FDecl, "snprintf") || 253 C.isCLibraryFunction(FDecl, "sprintf")) 254 return TaintPropagationRule(InvalidArgIndex, 0, true); 255 else if (C.isCLibraryFunction(FDecl, "strcpy") || 256 C.isCLibraryFunction(FDecl, "stpcpy") || 257 C.isCLibraryFunction(FDecl, "strcat")) 258 return TaintPropagationRule(1, 0, true); 259 else if (C.isCLibraryFunction(FDecl, "bcopy")) 260 return TaintPropagationRule(0, 2, 1, false); 261 else if (C.isCLibraryFunction(FDecl, "strdup") || 262 C.isCLibraryFunction(FDecl, "strdupa")) 263 return TaintPropagationRule(0, ReturnValueIndex); 264 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 265 return TaintPropagationRule(0, ReturnValueIndex); 266 } 267 268 // Skipping the following functions, since they might be used for cleansing 269 // or smart memory copy: 270 // - memccpy - copying until hitting a special character. 271 272 return TaintPropagationRule(); 273} 274 275void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 276 CheckerContext &C) const { 277 // Check for errors first. 278 if (checkPre(CE, C)) 279 return; 280 281 // Add taint second. 282 addSourcesPre(CE, C); 283} 284 285void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 286 CheckerContext &C) const { 287 if (propagateFromPre(CE, C)) 288 return; 289 addSourcesPost(CE, C); 290} 291 292void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 293 CheckerContext &C) const { 294 ProgramStateRef State = nullptr; 295 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 296 if (!FDecl || FDecl->getKind() != Decl::Function) 297 return; 298 299 StringRef Name = C.getCalleeName(FDecl); 300 if (Name.empty()) 301 return; 302 303 // First, try generating a propagation rule for this function. 304 TaintPropagationRule Rule = 305 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 306 if (!Rule.isNull()) { 307 State = Rule.process(CE, C); 308 if (!State) 309 return; 310 C.addTransition(State); 311 return; 312 } 313 314 // Otherwise, check if we have custom pre-processing implemented. 315 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 316 .Case("fscanf", &GenericTaintChecker::preFscanf) 317 .Default(nullptr); 318 // Check and evaluate the call. 319 if (evalFunction) 320 State = (this->*evalFunction)(CE, C); 321 if (!State) 322 return; 323 C.addTransition(State); 324 325} 326 327bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 328 CheckerContext &C) const { 329 ProgramStateRef State = C.getState(); 330 331 // Depending on what was tainted at pre-visit, we determined a set of 332 // arguments which should be tainted after the function returns. These are 333 // stored in the state as TaintArgsOnPostVisit set. 334 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 335 if (TaintArgs.isEmpty()) 336 return false; 337 338 for (llvm::ImmutableSet<unsigned>::iterator 339 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 340 unsigned ArgNum = *I; 341 342 // Special handling for the tainted return value. 343 if (ArgNum == ReturnValueIndex) { 344 State = State->addTaint(CE, C.getLocationContext()); 345 continue; 346 } 347 348 // The arguments are pointer arguments. The data they are pointing at is 349 // tainted after the call. 350 if (CE->getNumArgs() < (ArgNum + 1)) 351 return false; 352 const Expr* Arg = CE->getArg(ArgNum); 353 SymbolRef Sym = getPointedToSymbol(C, Arg); 354 if (Sym) 355 State = State->addTaint(Sym); 356 } 357 358 // Clear up the taint info from the state. 359 State = State->remove<TaintArgsOnPostVisit>(); 360 361 if (State != C.getState()) { 362 C.addTransition(State); 363 return true; 364 } 365 return false; 366} 367 368void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 369 CheckerContext &C) const { 370 // Define the attack surface. 371 // Set the evaluation function by switching on the callee name. 372 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 373 if (!FDecl || FDecl->getKind() != Decl::Function) 374 return; 375 376 StringRef Name = C.getCalleeName(FDecl); 377 if (Name.empty()) 378 return; 379 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 380 .Case("scanf", &GenericTaintChecker::postScanf) 381 // TODO: Add support for vfscanf & family. 382 .Case("getchar", &GenericTaintChecker::postRetTaint) 383 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 384 .Case("getenv", &GenericTaintChecker::postRetTaint) 385 .Case("fopen", &GenericTaintChecker::postRetTaint) 386 .Case("fdopen", &GenericTaintChecker::postRetTaint) 387 .Case("freopen", &GenericTaintChecker::postRetTaint) 388 .Case("getch", &GenericTaintChecker::postRetTaint) 389 .Case("wgetch", &GenericTaintChecker::postRetTaint) 390 .Case("socket", &GenericTaintChecker::postSocket) 391 .Default(nullptr); 392 393 // If the callee isn't defined, it is not of security concern. 394 // Check and evaluate the call. 395 ProgramStateRef State = nullptr; 396 if (evalFunction) 397 State = (this->*evalFunction)(CE, C); 398 if (!State) 399 return; 400 401 C.addTransition(State); 402} 403 404bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 405 406 if (checkUncontrolledFormatString(CE, C)) 407 return true; 408 409 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 410 if (!FDecl || FDecl->getKind() != Decl::Function) 411 return false; 412 413 StringRef Name = C.getCalleeName(FDecl); 414 if (Name.empty()) 415 return false; 416 417 if (checkSystemCall(CE, Name, C)) 418 return true; 419 420 if (checkTaintedBufferSize(CE, FDecl, C)) 421 return true; 422 423 return false; 424} 425 426SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 427 const Expr* Arg) { 428 ProgramStateRef State = C.getState(); 429 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 430 if (AddrVal.isUnknownOrUndef()) 431 return nullptr; 432 433 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 434 if (!AddrLoc) 435 return nullptr; 436 437 const PointerType *ArgTy = 438 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 439 SVal Val = State->getSVal(*AddrLoc, 440 ArgTy ? ArgTy->getPointeeType(): QualType()); 441 return Val.getAsSymbol(); 442} 443 444ProgramStateRef 445GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 446 CheckerContext &C) const { 447 ProgramStateRef State = C.getState(); 448 449 // Check for taint in arguments. 450 bool IsTainted = false; 451 for (ArgVector::const_iterator I = SrcArgs.begin(), 452 E = SrcArgs.end(); I != E; ++I) { 453 unsigned ArgNum = *I; 454 455 if (ArgNum == InvalidArgIndex) { 456 // Check if any of the arguments is tainted, but skip the 457 // destination arguments. 458 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 459 if (isDestinationArgument(i)) 460 continue; 461 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 462 break; 463 } 464 break; 465 } 466 467 if (CE->getNumArgs() < (ArgNum + 1)) 468 return State; 469 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 470 break; 471 } 472 if (!IsTainted) 473 return State; 474 475 // Mark the arguments which should be tainted after the function returns. 476 for (ArgVector::const_iterator I = DstArgs.begin(), 477 E = DstArgs.end(); I != E; ++I) { 478 unsigned ArgNum = *I; 479 480 // Should we mark all arguments as tainted? 481 if (ArgNum == InvalidArgIndex) { 482 // For all pointer and references that were passed in: 483 // If they are not pointing to const data, mark data as tainted. 484 // TODO: So far we are just going one level down; ideally we'd need to 485 // recurse here. 486 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 487 const Expr *Arg = CE->getArg(i); 488 // Process pointer argument. 489 const Type *ArgTy = Arg->getType().getTypePtr(); 490 QualType PType = ArgTy->getPointeeType(); 491 if ((!PType.isNull() && !PType.isConstQualified()) 492 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 493 State = State->add<TaintArgsOnPostVisit>(i); 494 } 495 continue; 496 } 497 498 // Should mark the return value? 499 if (ArgNum == ReturnValueIndex) { 500 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 501 continue; 502 } 503 504 // Mark the given argument. 505 assert(ArgNum < CE->getNumArgs()); 506 State = State->add<TaintArgsOnPostVisit>(ArgNum); 507 } 508 509 return State; 510} 511 512 513// If argument 0 (file descriptor) is tainted, all arguments except for arg 0 514// and arg 1 should get taint. 515ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 516 CheckerContext &C) const { 517 assert(CE->getNumArgs() >= 2); 518 ProgramStateRef State = C.getState(); 519 520 // Check is the file descriptor is tainted. 521 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 522 isStdin(CE->getArg(0), C)) { 523 // All arguments except for the first two should get taint. 524 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 525 State = State->add<TaintArgsOnPostVisit>(i); 526 return State; 527 } 528 529 return nullptr; 530} 531 532 533// If argument 0(protocol domain) is network, the return value should get taint. 534ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 535 CheckerContext &C) const { 536 ProgramStateRef State = C.getState(); 537 if (CE->getNumArgs() < 3) 538 return State; 539 540 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 541 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 542 // White list the internal communication protocols. 543 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 544 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 545 return State; 546 State = State->addTaint(CE, C.getLocationContext()); 547 return State; 548} 549 550ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 551 CheckerContext &C) const { 552 ProgramStateRef State = C.getState(); 553 if (CE->getNumArgs() < 2) 554 return State; 555 556 // All arguments except for the very first one should get taint. 557 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 558 // The arguments are pointer arguments. The data they are pointing at is 559 // tainted after the call. 560 const Expr* Arg = CE->getArg(i); 561 SymbolRef Sym = getPointedToSymbol(C, Arg); 562 if (Sym) 563 State = State->addTaint(Sym); 564 } 565 return State; 566} 567 568ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 569 CheckerContext &C) const { 570 return C.getState()->addTaint(CE, C.getLocationContext()); 571} 572 573bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 574 ProgramStateRef State = C.getState(); 575 SVal Val = State->getSVal(E, C.getLocationContext()); 576 577 // stdin is a pointer, so it would be a region. 578 const MemRegion *MemReg = Val.getAsRegion(); 579 580 // The region should be symbolic, we do not know it's value. 581 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 582 if (!SymReg) 583 return false; 584 585 // Get it's symbol and find the declaration region it's pointing to. 586 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 587 if (!Sm) 588 return false; 589 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 590 if (!DeclReg) 591 return false; 592 593 // This region corresponds to a declaration, find out if it's a global/extern 594 // variable named stdin with the proper type. 595 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 596 D = D->getCanonicalDecl(); 597 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 598 if (const PointerType * PtrTy = 599 dyn_cast<PointerType>(D->getType().getTypePtr())) 600 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 601 return true; 602 } 603 return false; 604} 605 606static bool getPrintfFormatArgumentNum(const CallExpr *CE, 607 const CheckerContext &C, 608 unsigned int &ArgNum) { 609 // Find if the function contains a format string argument. 610 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 611 // vsnprintf, syslog, custom annotated functions. 612 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 613 if (!FDecl) 614 return false; 615 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 616 ArgNum = Format->getFormatIdx() - 1; 617 if ((Format->getType()->getName() == "printf") && 618 CE->getNumArgs() > ArgNum) 619 return true; 620 } 621 622 // Or if a function is named setproctitle (this is a heuristic). 623 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 624 ArgNum = 0; 625 return true; 626 } 627 628 return false; 629} 630 631bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 632 const char Msg[], 633 CheckerContext &C) const { 634 assert(E); 635 636 // Check for taint. 637 ProgramStateRef State = C.getState(); 638 if (!State->isTainted(getPointedToSymbol(C, E)) && 639 !State->isTainted(E, C.getLocationContext())) 640 return false; 641 642 // Generate diagnostic. 643 if (ExplodedNode *N = C.addTransition()) { 644 initBugType(); 645 BugReport *report = new BugReport(*BT, Msg, N); 646 report->addRange(E->getSourceRange()); 647 C.emitReport(report); 648 return true; 649 } 650 return false; 651} 652 653bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 654 CheckerContext &C) const{ 655 // Check if the function contains a format string argument. 656 unsigned int ArgNum = 0; 657 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 658 return false; 659 660 // If either the format string content or the pointer itself are tainted, warn. 661 if (generateReportIfTainted(CE->getArg(ArgNum), 662 MsgUncontrolledFormatString, C)) 663 return true; 664 return false; 665} 666 667bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 668 StringRef Name, 669 CheckerContext &C) const { 670 // TODO: It might make sense to run this check on demand. In some cases, 671 // we should check if the environment has been cleansed here. We also might 672 // need to know if the user was reset before these calls(seteuid). 673 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 674 .Case("system", 0) 675 .Case("popen", 0) 676 .Case("execl", 0) 677 .Case("execle", 0) 678 .Case("execlp", 0) 679 .Case("execv", 0) 680 .Case("execvp", 0) 681 .Case("execvP", 0) 682 .Case("execve", 0) 683 .Case("dlopen", 0) 684 .Default(UINT_MAX); 685 686 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 687 return false; 688 689 if (generateReportIfTainted(CE->getArg(ArgNum), 690 MsgSanitizeSystemArgs, C)) 691 return true; 692 693 return false; 694} 695 696// TODO: Should this check be a part of the CString checker? 697// If yes, should taint be a global setting? 698bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 699 const FunctionDecl *FDecl, 700 CheckerContext &C) const { 701 // If the function has a buffer size argument, set ArgNum. 702 unsigned ArgNum = InvalidArgIndex; 703 unsigned BId = 0; 704 if ( (BId = FDecl->getMemoryFunctionKind()) ) 705 switch(BId) { 706 case Builtin::BImemcpy: 707 case Builtin::BImemmove: 708 case Builtin::BIstrncpy: 709 ArgNum = 2; 710 break; 711 case Builtin::BIstrndup: 712 ArgNum = 1; 713 break; 714 default: 715 break; 716 }; 717 718 if (ArgNum == InvalidArgIndex) { 719 if (C.isCLibraryFunction(FDecl, "malloc") || 720 C.isCLibraryFunction(FDecl, "calloc") || 721 C.isCLibraryFunction(FDecl, "alloca")) 722 ArgNum = 0; 723 else if (C.isCLibraryFunction(FDecl, "memccpy")) 724 ArgNum = 3; 725 else if (C.isCLibraryFunction(FDecl, "realloc")) 726 ArgNum = 1; 727 else if (C.isCLibraryFunction(FDecl, "bcopy")) 728 ArgNum = 2; 729 } 730 731 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 732 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 733 return true; 734 735 return false; 736} 737 738void ento::registerGenericTaintChecker(CheckerManager &mgr) { 739 mgr.registerChecker<GenericTaintChecker>(); 740} 741