CStringChecker.cpp revision 0ef473f75426f0a95635d0a9dd567d27b07dbd5b
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ClangSACheckers.h" 16#include "clang/StaticAnalyzer/Core/CheckerManager.h" 17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerVisitor.h" 19#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 20#include "llvm/ADT/StringSwitch.h" 21 22using namespace clang; 23using namespace ento; 24 25namespace { 26class CStringChecker : public CheckerVisitor<CStringChecker> { 27 BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString; 28public: 29 CStringChecker() 30 : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0) 31 {} 32 static void *getTag() { static int tag; return &tag; } 33 34 bool evalCallExpr(CheckerContext &C, const CallExpr *CE); 35 void PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS); 36 void MarkLiveSymbols(const GRState *state, SymbolReaper &SR); 37 void evalDeadSymbols(CheckerContext &C, SymbolReaper &SR); 38 bool wantsRegionChangeUpdate(const GRState *state); 39 40 const GRState *EvalRegionChanges(const GRState *state, 41 const MemRegion * const *Begin, 42 const MemRegion * const *End, 43 bool*); 44 45 typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *); 46 47 void evalMemcpy(CheckerContext &C, const CallExpr *CE); 48 void evalMemmove(CheckerContext &C, const CallExpr *CE); 49 void evalBcopy(CheckerContext &C, const CallExpr *CE); 50 void evalCopyCommon(CheckerContext &C, const GRState *state, 51 const Expr *Size, const Expr *Source, const Expr *Dest, 52 bool Restricted = false); 53 54 void evalMemcmp(CheckerContext &C, const CallExpr *CE); 55 56 void evalstrLength(CheckerContext &C, const CallExpr *CE); 57 void evalstrnLength(CheckerContext &C, const CallExpr *CE); 58 void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 59 bool IsStrnlen = false); 60 61 void evalStrcpy(CheckerContext &C, const CallExpr *CE); 62 void evalStrncpy(CheckerContext &C, const CallExpr *CE); 63 void evalStpcpy(CheckerContext &C, const CallExpr *CE); 64 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd, 65 bool isStrncpy); 66 67 // Utility methods 68 std::pair<const GRState*, const GRState*> 69 assumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty); 70 71 const GRState *setCStringLength(const GRState *state, const MemRegion *MR, 72 SVal strLength); 73 SVal getCStringLengthForRegion(CheckerContext &C, const GRState *&state, 74 const Expr *Ex, const MemRegion *MR); 75 SVal getCStringLength(CheckerContext &C, const GRState *&state, 76 const Expr *Ex, SVal Buf); 77 78 const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state, 79 const Expr *Ex, SVal V); 80 81 bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 82 const MemRegion *MR); 83 84 // Re-usable checks 85 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 86 const Expr *S, SVal l); 87 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 88 const Expr *S, SVal l, 89 bool IsDestination = false); 90 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 91 const Expr *Size, 92 const Expr *FirstBuf, 93 const Expr *SecondBuf = NULL, 94 bool FirstIsDestination = false); 95 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 96 const Expr *Size, const Expr *First, 97 const Expr *Second); 98 void emitOverlapBug(CheckerContext &C, const GRState *state, 99 const Stmt *First, const Stmt *Second); 100}; 101 102class CStringLength { 103public: 104 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 105}; 106} //end anonymous namespace 107 108namespace clang { 109namespace ento { 110 template <> 111 struct GRStateTrait<CStringLength> 112 : public GRStatePartialTrait<CStringLength::EntryMap> { 113 static void *GDMIndex() { return CStringChecker::getTag(); } 114 }; 115} 116} 117 118static void RegisterCStringChecker(ExprEngine &Eng) { 119 Eng.registerCheck(new CStringChecker()); 120} 121 122void ento::registerCStringChecker(CheckerManager &mgr) { 123 mgr.addCheckerRegisterFunction(RegisterCStringChecker); 124} 125 126//===----------------------------------------------------------------------===// 127// Individual checks and utility methods. 128//===----------------------------------------------------------------------===// 129 130std::pair<const GRState*, const GRState*> 131CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 132 QualType Ty) { 133 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 134 if (!val) 135 return std::pair<const GRState*, const GRState *>(state, state); 136 137 SValBuilder &svalBuilder = C.getSValBuilder(); 138 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 139 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 140} 141 142const GRState *CStringChecker::checkNonNull(CheckerContext &C, 143 const GRState *state, 144 const Expr *S, SVal l) { 145 // If a previous check has failed, propagate the failure. 146 if (!state) 147 return NULL; 148 149 const GRState *stateNull, *stateNonNull; 150 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 151 152 if (stateNull && !stateNonNull) { 153 ExplodedNode *N = C.generateSink(stateNull); 154 if (!N) 155 return NULL; 156 157 if (!BT_Null) 158 BT_Null = new BuiltinBug("API", 159 "Null pointer argument in call to byte string function"); 160 161 // Generate a report for this bug. 162 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null); 163 EnhancedBugReport *report = new EnhancedBugReport(*BT, 164 BT->getDescription(), N); 165 166 report->addRange(S->getSourceRange()); 167 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 168 C.EmitReport(report); 169 return NULL; 170 } 171 172 // From here on, assume that the value is non-null. 173 assert(stateNonNull); 174 return stateNonNull; 175} 176 177// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 178const GRState *CStringChecker::CheckLocation(CheckerContext &C, 179 const GRState *state, 180 const Expr *S, SVal l, 181 bool IsDestination) { 182 // If a previous check has failed, propagate the failure. 183 if (!state) 184 return NULL; 185 186 // Check for out of bound array element access. 187 const MemRegion *R = l.getAsRegion(); 188 if (!R) 189 return state; 190 191 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 192 if (!ER) 193 return state; 194 195 assert(ER->getValueType() == C.getASTContext().CharTy && 196 "CheckLocation should only be called with char* ElementRegions"); 197 198 // Get the size of the array. 199 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 200 SValBuilder &svalBuilder = C.getSValBuilder(); 201 SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 202 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 203 204 // Get the index of the accessed element. 205 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 206 207 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 208 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 209 if (StOutBound && !StInBound) { 210 ExplodedNode *N = C.generateSink(StOutBound); 211 if (!N) 212 return NULL; 213 214 BuiltinBug *BT; 215 if (IsDestination) { 216 if (!BT_BoundsWrite) { 217 BT_BoundsWrite = new BuiltinBug("Out-of-bound array access", 218 "Byte string function overflows destination buffer"); 219 } 220 BT = static_cast<BuiltinBug*>(BT_BoundsWrite); 221 } else { 222 if (!BT_Bounds) { 223 BT_Bounds = new BuiltinBug("Out-of-bound array access", 224 "Byte string function accesses out-of-bound array element"); 225 } 226 BT = static_cast<BuiltinBug*>(BT_Bounds); 227 } 228 229 // FIXME: It would be nice to eventually make this diagnostic more clear, 230 // e.g., by referencing the original declaration or by saying *why* this 231 // reference is outside the range. 232 233 // Generate a report for this bug. 234 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 235 236 report->addRange(S->getSourceRange()); 237 C.EmitReport(report); 238 return NULL; 239 } 240 241 // Array bound check succeeded. From this point forward the array bound 242 // should always succeed. 243 return StInBound; 244} 245 246const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 247 const GRState *state, 248 const Expr *Size, 249 const Expr *FirstBuf, 250 const Expr *SecondBuf, 251 bool FirstIsDestination) { 252 // If a previous check has failed, propagate the failure. 253 if (!state) 254 return NULL; 255 256 SValBuilder &svalBuilder = C.getSValBuilder(); 257 ASTContext &Ctx = C.getASTContext(); 258 259 QualType sizeTy = Size->getType(); 260 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 261 262 // Check that the first buffer is non-null. 263 SVal BufVal = state->getSVal(FirstBuf); 264 state = checkNonNull(C, state, FirstBuf, BufVal); 265 if (!state) 266 return NULL; 267 268 // Get the access length and make sure it is known. 269 SVal LengthVal = state->getSVal(Size); 270 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 271 if (!Length) 272 return state; 273 274 // Compute the offset of the last element to be accessed: size-1. 275 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 276 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 277 *Length, One, sizeTy)); 278 279 // Check that the first buffer is sufficently long. 280 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 281 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 282 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 283 LastOffset, PtrTy); 284 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 285 286 // If the buffer isn't large enough, abort. 287 if (!state) 288 return NULL; 289 } 290 291 // If there's a second buffer, check it as well. 292 if (SecondBuf) { 293 BufVal = state->getSVal(SecondBuf); 294 state = checkNonNull(C, state, SecondBuf, BufVal); 295 if (!state) 296 return NULL; 297 298 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 299 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 300 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 301 LastOffset, PtrTy); 302 state = CheckLocation(C, state, SecondBuf, BufEnd); 303 } 304 } 305 306 // Large enough or not, return this state! 307 return state; 308} 309 310const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 311 const GRState *state, 312 const Expr *Size, 313 const Expr *First, 314 const Expr *Second) { 315 // Do a simple check for overlap: if the two arguments are from the same 316 // buffer, see if the end of the first is greater than the start of the second 317 // or vice versa. 318 319 // If a previous check has failed, propagate the failure. 320 if (!state) 321 return NULL; 322 323 const GRState *stateTrue, *stateFalse; 324 325 // Get the buffer values and make sure they're known locations. 326 SVal firstVal = state->getSVal(First); 327 SVal secondVal = state->getSVal(Second); 328 329 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 330 if (!firstLoc) 331 return state; 332 333 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 334 if (!secondLoc) 335 return state; 336 337 // Are the two values the same? 338 SValBuilder &svalBuilder = C.getSValBuilder(); 339 llvm::tie(stateTrue, stateFalse) = 340 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 341 342 if (stateTrue && !stateFalse) { 343 // If the values are known to be equal, that's automatically an overlap. 344 emitOverlapBug(C, stateTrue, First, Second); 345 return NULL; 346 } 347 348 // assume the two expressions are not equal. 349 assert(stateFalse); 350 state = stateFalse; 351 352 // Which value comes first? 353 ASTContext &Ctx = svalBuilder.getContext(); 354 QualType cmpTy = Ctx.IntTy; 355 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 356 *firstLoc, *secondLoc, cmpTy); 357 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 358 if (!reverseTest) 359 return state; 360 361 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 362 if (stateTrue) { 363 if (stateFalse) { 364 // If we don't know which one comes first, we can't perform this test. 365 return state; 366 } else { 367 // Switch the values so that firstVal is before secondVal. 368 Loc *tmpLoc = firstLoc; 369 firstLoc = secondLoc; 370 secondLoc = tmpLoc; 371 372 // Switch the Exprs as well, so that they still correspond. 373 const Expr *tmpExpr = First; 374 First = Second; 375 Second = tmpExpr; 376 } 377 } 378 379 // Get the length, and make sure it too is known. 380 SVal LengthVal = state->getSVal(Size); 381 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 382 if (!Length) 383 return state; 384 385 // Convert the first buffer's start address to char*. 386 // Bail out if the cast fails. 387 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 388 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); 389 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 390 if (!FirstStartLoc) 391 return state; 392 393 // Compute the end of the first buffer. Bail out if THAT fails. 394 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 395 *FirstStartLoc, *Length, CharPtrTy); 396 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 397 if (!FirstEndLoc) 398 return state; 399 400 // Is the end of the first buffer past the start of the second buffer? 401 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 402 *FirstEndLoc, *secondLoc, cmpTy); 403 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 404 if (!OverlapTest) 405 return state; 406 407 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 408 409 if (stateTrue && !stateFalse) { 410 // Overlap! 411 emitOverlapBug(C, stateTrue, First, Second); 412 return NULL; 413 } 414 415 // assume the two expressions don't overlap. 416 assert(stateFalse); 417 return stateFalse; 418} 419 420void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 421 const Stmt *First, const Stmt *Second) { 422 ExplodedNode *N = C.generateSink(state); 423 if (!N) 424 return; 425 426 if (!BT_Overlap) 427 BT_Overlap = new BugType("Unix API", "Improper arguments"); 428 429 // Generate a report for this bug. 430 RangedBugReport *report = 431 new RangedBugReport(*BT_Overlap, 432 "Arguments must not be overlapping buffers", N); 433 report->addRange(First->getSourceRange()); 434 report->addRange(Second->getSourceRange()); 435 436 C.EmitReport(report); 437} 438 439const GRState *CStringChecker::setCStringLength(const GRState *state, 440 const MemRegion *MR, 441 SVal strLength) { 442 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 443 if (strLength.isUnknown()) 444 return state; 445 446 MR = MR->StripCasts(); 447 448 switch (MR->getKind()) { 449 case MemRegion::StringRegionKind: 450 // FIXME: This can happen if we strcpy() into a string region. This is 451 // undefined [C99 6.4.5p6], but we should still warn about it. 452 return state; 453 454 case MemRegion::SymbolicRegionKind: 455 case MemRegion::AllocaRegionKind: 456 case MemRegion::VarRegionKind: 457 case MemRegion::FieldRegionKind: 458 case MemRegion::ObjCIvarRegionKind: 459 return state->set<CStringLength>(MR, strLength); 460 461 case MemRegion::ElementRegionKind: 462 // FIXME: Handle element regions by upper-bounding the parent region's 463 // string length. 464 return state; 465 466 default: 467 // Other regions (mostly non-data) can't have a reliable C string length. 468 // For now, just ignore the change. 469 // FIXME: These are rare but not impossible. We should output some kind of 470 // warning for things like strcpy((char[]){'a', 0}, "b"); 471 return state; 472 } 473} 474 475SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 476 const GRState *&state, 477 const Expr *Ex, 478 const MemRegion *MR) { 479 // If there's a recorded length, go ahead and return it. 480 const SVal *Recorded = state->get<CStringLength>(MR); 481 if (Recorded) 482 return *Recorded; 483 484 // Otherwise, get a new symbol and update the state. 485 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 486 SValBuilder &svalBuilder = C.getSValBuilder(); 487 QualType sizeTy = svalBuilder.getContext().getSizeType(); 488 SVal strLength = svalBuilder.getMetadataSymbolVal(getTag(), MR, Ex, sizeTy, Count); 489 state = state->set<CStringLength>(MR, strLength); 490 return strLength; 491} 492 493SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 494 const Expr *Ex, SVal Buf) { 495 const MemRegion *MR = Buf.getAsRegion(); 496 if (!MR) { 497 // If we can't get a region, see if it's something we /know/ isn't a 498 // C string. In the context of locations, the only time we can issue such 499 // a warning is for labels. 500 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 501 if (ExplodedNode *N = C.generateNode(state)) { 502 if (!BT_NotCString) 503 BT_NotCString = new BuiltinBug("API", 504 "Argument is not a null-terminated string."); 505 506 llvm::SmallString<120> buf; 507 llvm::raw_svector_ostream os(buf); 508 os << "Argument to byte string function is the address of the label '" 509 << Label->getLabel()->getName() 510 << "', which is not a null-terminated string"; 511 512 // Generate a report for this bug. 513 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 514 os.str(), N); 515 516 report->addRange(Ex->getSourceRange()); 517 C.EmitReport(report); 518 } 519 520 return UndefinedVal(); 521 } 522 523 // If it's not a region and not a label, give up. 524 return UnknownVal(); 525 } 526 527 // If we have a region, strip casts from it and see if we can figure out 528 // its length. For anything we can't figure out, just return UnknownVal. 529 MR = MR->StripCasts(); 530 531 switch (MR->getKind()) { 532 case MemRegion::StringRegionKind: { 533 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 534 // so we can assume that the byte length is the correct C string length. 535 SValBuilder &svalBuilder = C.getSValBuilder(); 536 QualType sizeTy = svalBuilder.getContext().getSizeType(); 537 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 538 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 539 } 540 case MemRegion::SymbolicRegionKind: 541 case MemRegion::AllocaRegionKind: 542 case MemRegion::VarRegionKind: 543 case MemRegion::FieldRegionKind: 544 case MemRegion::ObjCIvarRegionKind: 545 return getCStringLengthForRegion(C, state, Ex, MR); 546 case MemRegion::CompoundLiteralRegionKind: 547 // FIXME: Can we track this? Is it necessary? 548 return UnknownVal(); 549 case MemRegion::ElementRegionKind: 550 // FIXME: How can we handle this? It's not good enough to subtract the 551 // offset from the base string length; consider "123\x00567" and &a[5]. 552 return UnknownVal(); 553 default: 554 // Other regions (mostly non-data) can't have a reliable C string length. 555 // In this case, an error is emitted and UndefinedVal is returned. 556 // The caller should always be prepared to handle this case. 557 if (ExplodedNode *N = C.generateNode(state)) { 558 if (!BT_NotCString) 559 BT_NotCString = new BuiltinBug("API", 560 "Argument is not a null-terminated string."); 561 562 llvm::SmallString<120> buf; 563 llvm::raw_svector_ostream os(buf); 564 565 os << "Argument to byte string function is "; 566 567 if (SummarizeRegion(os, C.getASTContext(), MR)) 568 os << ", which is not a null-terminated string"; 569 else 570 os << "not a null-terminated string"; 571 572 // Generate a report for this bug. 573 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 574 os.str(), N); 575 576 report->addRange(Ex->getSourceRange()); 577 C.EmitReport(report); 578 } 579 580 return UndefinedVal(); 581 } 582} 583 584const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 585 const GRState *state, 586 const Expr *E, SVal V) { 587 Loc *L = dyn_cast<Loc>(&V); 588 if (!L) 589 return state; 590 591 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 592 // some assumptions about the value that CFRefCount can't. Even so, it should 593 // probably be refactored. 594 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 595 const MemRegion *R = MR->getRegion()->StripCasts(); 596 597 // Are we dealing with an ElementRegion? If so, we should be invalidating 598 // the super-region. 599 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 600 R = ER->getSuperRegion(); 601 // FIXME: What about layers of ElementRegions? 602 } 603 604 // Invalidate this region. 605 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 606 return state->invalidateRegion(R, E, Count, NULL); 607 } 608 609 // If we have a non-region value by chance, just remove the binding. 610 // FIXME: is this necessary or correct? This handles the non-Region 611 // cases. Is it ever valid to store to these? 612 return state->unbindLoc(*L); 613} 614 615bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 616 const MemRegion *MR) { 617 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 618 if (!TR) 619 return false; 620 621 switch (TR->getKind()) { 622 case MemRegion::FunctionTextRegionKind: { 623 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 624 if (FD) 625 os << "the address of the function '" << FD << "'"; 626 else 627 os << "the address of a function"; 628 return true; 629 } 630 case MemRegion::BlockTextRegionKind: 631 os << "block text"; 632 return true; 633 case MemRegion::BlockDataRegionKind: 634 os << "a block"; 635 return true; 636 case MemRegion::CXXThisRegionKind: 637 case MemRegion::CXXTempObjectRegionKind: 638 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 639 return true; 640 case MemRegion::VarRegionKind: 641 os << "a variable of type" << TR->getValueType().getAsString(); 642 return true; 643 case MemRegion::FieldRegionKind: 644 os << "a field of type " << TR->getValueType().getAsString(); 645 return true; 646 case MemRegion::ObjCIvarRegionKind: 647 os << "an instance variable of type " << TR->getValueType().getAsString(); 648 return true; 649 default: 650 return false; 651 } 652} 653 654//===----------------------------------------------------------------------===// 655// evaluation of individual function calls. 656//===----------------------------------------------------------------------===// 657 658void CStringChecker::evalCopyCommon(CheckerContext &C, const GRState *state, 659 const Expr *Size, const Expr *Dest, 660 const Expr *Source, bool Restricted) { 661 // See if the size argument is zero. 662 SVal sizeVal = state->getSVal(Size); 663 QualType sizeTy = Size->getType(); 664 665 const GRState *stateZeroSize, *stateNonZeroSize; 666 llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); 667 668 // If the size is zero, there won't be any actual memory access. 669 if (stateZeroSize) 670 C.addTransition(stateZeroSize); 671 672 // If the size can be nonzero, we have to check the other arguments. 673 if (stateNonZeroSize) { 674 state = stateNonZeroSize; 675 state = CheckBufferAccess(C, state, Size, Dest, Source, 676 /* FirstIsDst = */ true); 677 if (Restricted) 678 state = CheckOverlap(C, state, Size, Dest, Source); 679 680 if (state) { 681 // Invalidate the destination. 682 // FIXME: Even if we can't perfectly model the copy, we should see if we 683 // can use LazyCompoundVals to copy the source values into the destination. 684 // This would probably remove any existing bindings past the end of the 685 // copied region, but that's still an improvement over blank invalidation. 686 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 687 C.addTransition(state); 688 } 689 } 690} 691 692 693void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) { 694 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 695 // The return value is the address of the destination buffer. 696 const Expr *Dest = CE->getArg(0); 697 const GRState *state = C.getState(); 698 state = state->BindExpr(CE, state->getSVal(Dest)); 699 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1), true); 700} 701 702void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) { 703 // void *memmove(void *dst, const void *src, size_t n); 704 // The return value is the address of the destination buffer. 705 const Expr *Dest = CE->getArg(0); 706 const GRState *state = C.getState(); 707 state = state->BindExpr(CE, state->getSVal(Dest)); 708 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1)); 709} 710 711void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) { 712 // void bcopy(const void *src, void *dst, size_t n); 713 evalCopyCommon(C, C.getState(), CE->getArg(2), CE->getArg(1), CE->getArg(0)); 714} 715 716void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) { 717 // int memcmp(const void *s1, const void *s2, size_t n); 718 const Expr *Left = CE->getArg(0); 719 const Expr *Right = CE->getArg(1); 720 const Expr *Size = CE->getArg(2); 721 722 const GRState *state = C.getState(); 723 SValBuilder &svalBuilder = C.getSValBuilder(); 724 725 // See if the size argument is zero. 726 SVal sizeVal = state->getSVal(Size); 727 QualType sizeTy = Size->getType(); 728 729 const GRState *stateZeroSize, *stateNonZeroSize; 730 llvm::tie(stateZeroSize, stateNonZeroSize) = 731 assumeZero(C, state, sizeVal, sizeTy); 732 733 // If the size can be zero, the result will be 0 in that case, and we don't 734 // have to check either of the buffers. 735 if (stateZeroSize) { 736 state = stateZeroSize; 737 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 738 C.addTransition(state); 739 } 740 741 // If the size can be nonzero, we have to check the other arguments. 742 if (stateNonZeroSize) { 743 state = stateNonZeroSize; 744 // If we know the two buffers are the same, we know the result is 0. 745 // First, get the two buffers' addresses. Another checker will have already 746 // made sure they're not undefined. 747 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 748 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 749 750 // See if they are the same. 751 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 752 const GRState *StSameBuf, *StNotSameBuf; 753 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 754 755 // If the two arguments might be the same buffer, we know the result is zero, 756 // and we only need to check one size. 757 if (StSameBuf) { 758 state = StSameBuf; 759 state = CheckBufferAccess(C, state, Size, Left); 760 if (state) { 761 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 762 C.addTransition(state); 763 } 764 } 765 766 // If the two arguments might be different buffers, we have to check the 767 // size of both of them. 768 if (StNotSameBuf) { 769 state = StNotSameBuf; 770 state = CheckBufferAccess(C, state, Size, Left, Right); 771 if (state) { 772 // The return value is the comparison result, which we don't know. 773 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 774 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 775 state = state->BindExpr(CE, CmpV); 776 C.addTransition(state); 777 } 778 } 779 } 780} 781 782void CStringChecker::evalstrLength(CheckerContext &C, const CallExpr *CE) { 783 // size_t strlen(const char *s); 784 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 785} 786 787void CStringChecker::evalstrnLength(CheckerContext &C, const CallExpr *CE) { 788 // size_t strnlen(const char *s, size_t maxlen); 789 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 790} 791 792void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 793 bool IsStrnlen) { 794 const GRState *state = C.getState(); 795 const Expr *Arg = CE->getArg(0); 796 SVal ArgVal = state->getSVal(Arg); 797 798 // Check that the argument is non-null. 799 state = checkNonNull(C, state, Arg, ArgVal); 800 801 if (state) { 802 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 803 804 // If the argument isn't a valid C string, there's no valid state to 805 // transition to. 806 if (strLength.isUndef()) 807 return; 808 809 // If the check is for strnlen() then bind the return value to no more than 810 // the maxlen value. 811 if (IsStrnlen) { 812 const Expr *maxlenExpr = CE->getArg(1); 813 SVal maxlenVal = state->getSVal(maxlenExpr); 814 815 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 816 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 817 818 QualType cmpTy = C.getSValBuilder().getContext().IntTy; 819 const GRState *stateTrue, *stateFalse; 820 821 // Check if the strLength is greater than or equal to the maxlen 822 llvm::tie(stateTrue, stateFalse) = 823 state->assume(cast<DefinedOrUnknownSVal> 824 (C.getSValBuilder().evalBinOpNN(state, BO_GE, 825 *strLengthNL, *maxlenValNL, 826 cmpTy))); 827 828 // If the strLength is greater than or equal to the maxlen, set strLength 829 // to maxlen 830 if (stateTrue && !stateFalse) { 831 strLength = maxlenVal; 832 } 833 } 834 835 // If getCStringLength couldn't figure out the length, conjure a return 836 // value, so it can be used in constraints, at least. 837 if (strLength.isUnknown()) { 838 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 839 strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 840 } 841 842 // Bind the return value. 843 state = state->BindExpr(CE, strLength); 844 C.addTransition(state); 845 } 846} 847 848void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) { 849 // char *strcpy(char *restrict dst, const char *restrict src); 850 evalStrcpyCommon(C, CE, /* returnEnd = */ false, /* isStrncpy = */ false); 851} 852 853void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) { 854 // char *strcpy(char *restrict dst, const char *restrict src); 855 evalStrcpyCommon(C, CE, /* returnEnd = */ false, /* isStrncpy = */ true); 856} 857 858void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) { 859 // char *stpcpy(char *restrict dst, const char *restrict src); 860 evalStrcpyCommon(C, CE, /* returnEnd = */ true, /* isStrncpy = */ false); 861} 862 863void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 864 bool returnEnd, bool isStrncpy) { 865 const GRState *state = C.getState(); 866 867 // Check that the destination is non-null 868 const Expr *Dst = CE->getArg(0); 869 SVal DstVal = state->getSVal(Dst); 870 871 state = checkNonNull(C, state, Dst, DstVal); 872 if (!state) 873 return; 874 875 // Check that the source is non-null. 876 const Expr *srcExpr = CE->getArg(1); 877 SVal srcVal = state->getSVal(srcExpr); 878 state = checkNonNull(C, state, srcExpr, srcVal); 879 if (!state) 880 return; 881 882 // Get the string length of the source. 883 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 884 885 // If the source isn't a valid C string, give up. 886 if (strLength.isUndef()) 887 return; 888 889 if (isStrncpy) { 890 // Get the max number of characters to copy 891 const Expr *lenExpr = CE->getArg(2); 892 SVal lenVal = state->getSVal(lenExpr); 893 894 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 895 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 896 897 QualType cmpTy = C.getSValBuilder().getContext().IntTy; 898 const GRState *stateTrue, *stateFalse; 899 900 // Check if the max number to copy is less than the length of the src 901 llvm::tie(stateTrue, stateFalse) = 902 state->assume(cast<DefinedOrUnknownSVal> 903 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 904 *strLengthNL, *lenValNL, 905 cmpTy))); 906 907 if (stateTrue) { 908 // Max number to copy is less than the length of the src, so the actual 909 // strLength copied is the max number arg. 910 strLength = lenVal; 911 } 912 } 913 914 SVal Result = (returnEnd ? UnknownVal() : DstVal); 915 916 // If the destination is a MemRegion, try to check for a buffer overflow and 917 // record the new string length. 918 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 919 // If the length is known, we can check for an overflow. 920 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) { 921 SVal lastElement = 922 C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal, 923 *knownStrLength, Dst->getType()); 924 925 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 926 if (!state) 927 return; 928 929 // If this is a stpcpy-style copy, the last element is the return value. 930 if (returnEnd) 931 Result = lastElement; 932 } 933 934 // Invalidate the destination. This must happen before we set the C string 935 // length because invalidation will clear the length. 936 // FIXME: Even if we can't perfectly model the copy, we should see if we 937 // can use LazyCompoundVals to copy the source values into the destination. 938 // This would probably remove any existing bindings past the end of the 939 // string, but that's still an improvement over blank invalidation. 940 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 941 942 // Set the C string length of the destination. 943 state = setCStringLength(state, dstRegVal->getRegion(), strLength); 944 } 945 946 // If this is a stpcpy-style copy, but we were unable to check for a buffer 947 // overflow, we still need a result. Conjure a return value. 948 if (returnEnd && Result.isUnknown()) { 949 SValBuilder &svalBuilder = C.getSValBuilder(); 950 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 951 strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 952 } 953 954 // Set the return value. 955 state = state->BindExpr(CE, Result); 956 C.addTransition(state); 957} 958 959//===----------------------------------------------------------------------===// 960// The driver method, and other Checker callbacks. 961//===----------------------------------------------------------------------===// 962 963bool CStringChecker::evalCallExpr(CheckerContext &C, const CallExpr *CE) { 964 // Get the callee. All the functions we care about are C functions 965 // with simple identifiers. 966 const GRState *state = C.getState(); 967 const Expr *Callee = CE->getCallee(); 968 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 969 970 if (!FD) 971 return false; 972 973 // Get the name of the callee. If it's a builtin, strip off the prefix. 974 IdentifierInfo *II = FD->getIdentifier(); 975 if (!II) // if no identifier, not a simple C function 976 return false; 977 llvm::StringRef Name = II->getName(); 978 if (Name.startswith("__builtin_")) 979 Name = Name.substr(10); 980 981 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 982 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 983 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 984 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 985 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 986 .Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) 987 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 988 .Case("strlen", &CStringChecker::evalstrLength) 989 .Case("strnlen", &CStringChecker::evalstrnLength) 990 .Case("bcopy", &CStringChecker::evalBcopy) 991 .Default(NULL); 992 993 // If the callee isn't a string function, let another checker handle it. 994 if (!evalFunction) 995 return false; 996 997 // Check and evaluate the call. 998 (this->*evalFunction)(C, CE); 999 return true; 1000} 1001 1002void CStringChecker::PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS) { 1003 // Record string length for char a[] = "abc"; 1004 const GRState *state = C.getState(); 1005 1006 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1007 I != E; ++I) { 1008 const VarDecl *D = dyn_cast<VarDecl>(*I); 1009 if (!D) 1010 continue; 1011 1012 // FIXME: Handle array fields of structs. 1013 if (!D->getType()->isArrayType()) 1014 continue; 1015 1016 const Expr *Init = D->getInit(); 1017 if (!Init) 1018 continue; 1019 if (!isa<StringLiteral>(Init)) 1020 continue; 1021 1022 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 1023 const MemRegion *MR = VarLoc.getAsRegion(); 1024 if (!MR) 1025 continue; 1026 1027 SVal StrVal = state->getSVal(Init); 1028 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1029 DefinedOrUnknownSVal strLength 1030 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 1031 1032 state = state->set<CStringLength>(MR, strLength); 1033 } 1034 1035 C.addTransition(state); 1036} 1037 1038bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) { 1039 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1040 return !Entries.isEmpty(); 1041} 1042 1043const GRState *CStringChecker::EvalRegionChanges(const GRState *state, 1044 const MemRegion * const *Begin, 1045 const MemRegion * const *End, 1046 bool *) { 1047 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1048 if (Entries.isEmpty()) 1049 return state; 1050 1051 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1052 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1053 1054 // First build sets for the changed regions and their super-regions. 1055 for ( ; Begin != End; ++Begin) { 1056 const MemRegion *MR = *Begin; 1057 Invalidated.insert(MR); 1058 1059 SuperRegions.insert(MR); 1060 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1061 MR = SR->getSuperRegion(); 1062 SuperRegions.insert(MR); 1063 } 1064 } 1065 1066 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1067 1068 // Then loop over the entries in the current state. 1069 for (CStringLength::EntryMap::iterator I = Entries.begin(), 1070 E = Entries.end(); I != E; ++I) { 1071 const MemRegion *MR = I.getKey(); 1072 1073 // Is this entry for a super-region of a changed region? 1074 if (SuperRegions.count(MR)) { 1075 Entries = F.remove(Entries, MR); 1076 continue; 1077 } 1078 1079 // Is this entry for a sub-region of a changed region? 1080 const MemRegion *Super = MR; 1081 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1082 Super = SR->getSuperRegion(); 1083 if (Invalidated.count(Super)) { 1084 Entries = F.remove(Entries, MR); 1085 break; 1086 } 1087 } 1088 } 1089 1090 return state->set<CStringLength>(Entries); 1091} 1092 1093void CStringChecker::MarkLiveSymbols(const GRState *state, SymbolReaper &SR) { 1094 // Mark all symbols in our string length map as valid. 1095 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1096 1097 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1098 I != E; ++I) { 1099 SVal Len = I.getData(); 1100 if (SymbolRef Sym = Len.getAsSymbol()) 1101 SR.markInUse(Sym); 1102 } 1103} 1104 1105void CStringChecker::evalDeadSymbols(CheckerContext &C, SymbolReaper &SR) { 1106 if (!SR.hasDeadSymbols()) 1107 return; 1108 1109 const GRState *state = C.getState(); 1110 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1111 if (Entries.isEmpty()) 1112 return; 1113 1114 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1115 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1116 I != E; ++I) { 1117 SVal Len = I.getData(); 1118 if (SymbolRef Sym = Len.getAsSymbol()) { 1119 if (SR.isDead(Sym)) 1120 Entries = F.remove(Entries, I.getKey()); 1121 } 1122 } 1123 1124 state = state->set<CStringLength>(Entries); 1125 C.generateNode(state); 1126} 1127