CStringChecker.cpp revision be4242ce039f0542ea0dd5f234aa0ee698f90c53
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ClangSACheckers.h" 16#include "clang/StaticAnalyzer/Core/CheckerManager.h" 17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerVisitor.h" 19#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 20#include "llvm/ADT/StringSwitch.h" 21 22using namespace clang; 23using namespace ento; 24 25namespace { 26class CStringChecker : public CheckerVisitor<CStringChecker> { 27 BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString; 28public: 29 CStringChecker() 30 : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0) 31 {} 32 static void *getTag() { static int tag; return &tag; } 33 34 bool evalCallExpr(CheckerContext &C, const CallExpr *CE); 35 void PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS); 36 void MarkLiveSymbols(const GRState *state, SymbolReaper &SR); 37 void evalDeadSymbols(CheckerContext &C, SymbolReaper &SR); 38 bool wantsRegionChangeUpdate(const GRState *state); 39 40 const GRState *EvalRegionChanges(const GRState *state, 41 const MemRegion * const *Begin, 42 const MemRegion * const *End, 43 bool*); 44 45 typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *); 46 47 void evalMemcpy(CheckerContext &C, const CallExpr *CE); 48 void evalMemmove(CheckerContext &C, const CallExpr *CE); 49 void evalBcopy(CheckerContext &C, const CallExpr *CE); 50 void evalCopyCommon(CheckerContext &C, const GRState *state, 51 const Expr *Size, const Expr *Source, const Expr *Dest, 52 bool Restricted = false); 53 54 void evalMemcmp(CheckerContext &C, const CallExpr *CE); 55 56 void evalstrLength(CheckerContext &C, const CallExpr *CE); 57 void evalstrnLength(CheckerContext &C, const CallExpr *CE); 58 void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 59 bool IsStrnlen = false); 60 61 void evalStrcpy(CheckerContext &C, const CallExpr *CE); 62 void evalStpcpy(CheckerContext &C, const CallExpr *CE); 63 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd); 64 65 // Utility methods 66 std::pair<const GRState*, const GRState*> 67 assumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty); 68 69 const GRState *setCStringLength(const GRState *state, const MemRegion *MR, 70 SVal strLength); 71 SVal getCStringLengthForRegion(CheckerContext &C, const GRState *&state, 72 const Expr *Ex, const MemRegion *MR); 73 SVal getCStringLength(CheckerContext &C, const GRState *&state, 74 const Expr *Ex, SVal Buf); 75 76 const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state, 77 const Expr *Ex, SVal V); 78 79 bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 80 const MemRegion *MR); 81 82 // Re-usable checks 83 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 84 const Expr *S, SVal l); 85 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 86 const Expr *S, SVal l, 87 bool IsDestination = false); 88 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 89 const Expr *Size, 90 const Expr *FirstBuf, 91 const Expr *SecondBuf = NULL, 92 bool FirstIsDestination = false); 93 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 94 const Expr *Size, const Expr *First, 95 const Expr *Second); 96 void emitOverlapBug(CheckerContext &C, const GRState *state, 97 const Stmt *First, const Stmt *Second); 98}; 99 100class CStringLength { 101public: 102 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 103}; 104} //end anonymous namespace 105 106namespace clang { 107namespace ento { 108 template <> 109 struct GRStateTrait<CStringLength> 110 : public GRStatePartialTrait<CStringLength::EntryMap> { 111 static void *GDMIndex() { return CStringChecker::getTag(); } 112 }; 113} 114} 115 116static void RegisterCStringChecker(ExprEngine &Eng) { 117 Eng.registerCheck(new CStringChecker()); 118} 119 120void ento::registerCStringChecker(CheckerManager &mgr) { 121 mgr.addCheckerRegisterFunction(RegisterCStringChecker); 122} 123 124//===----------------------------------------------------------------------===// 125// Individual checks and utility methods. 126//===----------------------------------------------------------------------===// 127 128std::pair<const GRState*, const GRState*> 129CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 130 QualType Ty) { 131 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 132 if (!val) 133 return std::pair<const GRState*, const GRState *>(state, state); 134 135 SValBuilder &svalBuilder = C.getSValBuilder(); 136 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 137 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 138} 139 140const GRState *CStringChecker::checkNonNull(CheckerContext &C, 141 const GRState *state, 142 const Expr *S, SVal l) { 143 // If a previous check has failed, propagate the failure. 144 if (!state) 145 return NULL; 146 147 const GRState *stateNull, *stateNonNull; 148 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 149 150 if (stateNull && !stateNonNull) { 151 ExplodedNode *N = C.generateSink(stateNull); 152 if (!N) 153 return NULL; 154 155 if (!BT_Null) 156 BT_Null = new BuiltinBug("API", 157 "Null pointer argument in call to byte string function"); 158 159 // Generate a report for this bug. 160 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null); 161 EnhancedBugReport *report = new EnhancedBugReport(*BT, 162 BT->getDescription(), N); 163 164 report->addRange(S->getSourceRange()); 165 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 166 C.EmitReport(report); 167 return NULL; 168 } 169 170 // From here on, assume that the value is non-null. 171 assert(stateNonNull); 172 return stateNonNull; 173} 174 175// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 176const GRState *CStringChecker::CheckLocation(CheckerContext &C, 177 const GRState *state, 178 const Expr *S, SVal l, 179 bool IsDestination) { 180 // If a previous check has failed, propagate the failure. 181 if (!state) 182 return NULL; 183 184 // Check for out of bound array element access. 185 const MemRegion *R = l.getAsRegion(); 186 if (!R) 187 return state; 188 189 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 190 if (!ER) 191 return state; 192 193 assert(ER->getValueType() == C.getASTContext().CharTy && 194 "CheckLocation should only be called with char* ElementRegions"); 195 196 // Get the size of the array. 197 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 198 SValBuilder &svalBuilder = C.getSValBuilder(); 199 SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 200 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 201 202 // Get the index of the accessed element. 203 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 204 205 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 206 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 207 if (StOutBound && !StInBound) { 208 ExplodedNode *N = C.generateSink(StOutBound); 209 if (!N) 210 return NULL; 211 212 BuiltinBug *BT; 213 if (IsDestination) { 214 if (!BT_BoundsWrite) { 215 BT_BoundsWrite = new BuiltinBug("Out-of-bound array access", 216 "Byte string function overflows destination buffer"); 217 } 218 BT = static_cast<BuiltinBug*>(BT_BoundsWrite); 219 } else { 220 if (!BT_Bounds) { 221 BT_Bounds = new BuiltinBug("Out-of-bound array access", 222 "Byte string function accesses out-of-bound array element"); 223 } 224 BT = static_cast<BuiltinBug*>(BT_Bounds); 225 } 226 227 // FIXME: It would be nice to eventually make this diagnostic more clear, 228 // e.g., by referencing the original declaration or by saying *why* this 229 // reference is outside the range. 230 231 // Generate a report for this bug. 232 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 233 234 report->addRange(S->getSourceRange()); 235 C.EmitReport(report); 236 return NULL; 237 } 238 239 // Array bound check succeeded. From this point forward the array bound 240 // should always succeed. 241 return StInBound; 242} 243 244const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 245 const GRState *state, 246 const Expr *Size, 247 const Expr *FirstBuf, 248 const Expr *SecondBuf, 249 bool FirstIsDestination) { 250 // If a previous check has failed, propagate the failure. 251 if (!state) 252 return NULL; 253 254 SValBuilder &svalBuilder = C.getSValBuilder(); 255 ASTContext &Ctx = C.getASTContext(); 256 257 QualType sizeTy = Size->getType(); 258 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 259 260 // Check that the first buffer is non-null. 261 SVal BufVal = state->getSVal(FirstBuf); 262 state = checkNonNull(C, state, FirstBuf, BufVal); 263 if (!state) 264 return NULL; 265 266 // Get the access length and make sure it is known. 267 SVal LengthVal = state->getSVal(Size); 268 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 269 if (!Length) 270 return state; 271 272 // Compute the offset of the last element to be accessed: size-1. 273 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 274 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 275 *Length, One, sizeTy)); 276 277 // Check that the first buffer is sufficently long. 278 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 279 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 280 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 281 LastOffset, PtrTy); 282 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 283 284 // If the buffer isn't large enough, abort. 285 if (!state) 286 return NULL; 287 } 288 289 // If there's a second buffer, check it as well. 290 if (SecondBuf) { 291 BufVal = state->getSVal(SecondBuf); 292 state = checkNonNull(C, state, SecondBuf, BufVal); 293 if (!state) 294 return NULL; 295 296 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 297 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 298 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 299 LastOffset, PtrTy); 300 state = CheckLocation(C, state, SecondBuf, BufEnd); 301 } 302 } 303 304 // Large enough or not, return this state! 305 return state; 306} 307 308const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 309 const GRState *state, 310 const Expr *Size, 311 const Expr *First, 312 const Expr *Second) { 313 // Do a simple check for overlap: if the two arguments are from the same 314 // buffer, see if the end of the first is greater than the start of the second 315 // or vice versa. 316 317 // If a previous check has failed, propagate the failure. 318 if (!state) 319 return NULL; 320 321 const GRState *stateTrue, *stateFalse; 322 323 // Get the buffer values and make sure they're known locations. 324 SVal firstVal = state->getSVal(First); 325 SVal secondVal = state->getSVal(Second); 326 327 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 328 if (!firstLoc) 329 return state; 330 331 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 332 if (!secondLoc) 333 return state; 334 335 // Are the two values the same? 336 SValBuilder &svalBuilder = C.getSValBuilder(); 337 llvm::tie(stateTrue, stateFalse) = 338 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 339 340 if (stateTrue && !stateFalse) { 341 // If the values are known to be equal, that's automatically an overlap. 342 emitOverlapBug(C, stateTrue, First, Second); 343 return NULL; 344 } 345 346 // assume the two expressions are not equal. 347 assert(stateFalse); 348 state = stateFalse; 349 350 // Which value comes first? 351 ASTContext &Ctx = svalBuilder.getContext(); 352 QualType cmpTy = Ctx.IntTy; 353 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 354 *firstLoc, *secondLoc, cmpTy); 355 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 356 if (!reverseTest) 357 return state; 358 359 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 360 if (stateTrue) { 361 if (stateFalse) { 362 // If we don't know which one comes first, we can't perform this test. 363 return state; 364 } else { 365 // Switch the values so that firstVal is before secondVal. 366 Loc *tmpLoc = firstLoc; 367 firstLoc = secondLoc; 368 secondLoc = tmpLoc; 369 370 // Switch the Exprs as well, so that they still correspond. 371 const Expr *tmpExpr = First; 372 First = Second; 373 Second = tmpExpr; 374 } 375 } 376 377 // Get the length, and make sure it too is known. 378 SVal LengthVal = state->getSVal(Size); 379 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 380 if (!Length) 381 return state; 382 383 // Convert the first buffer's start address to char*. 384 // Bail out if the cast fails. 385 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 386 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); 387 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 388 if (!FirstStartLoc) 389 return state; 390 391 // Compute the end of the first buffer. Bail out if THAT fails. 392 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 393 *FirstStartLoc, *Length, CharPtrTy); 394 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 395 if (!FirstEndLoc) 396 return state; 397 398 // Is the end of the first buffer past the start of the second buffer? 399 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 400 *FirstEndLoc, *secondLoc, cmpTy); 401 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 402 if (!OverlapTest) 403 return state; 404 405 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 406 407 if (stateTrue && !stateFalse) { 408 // Overlap! 409 emitOverlapBug(C, stateTrue, First, Second); 410 return NULL; 411 } 412 413 // assume the two expressions don't overlap. 414 assert(stateFalse); 415 return stateFalse; 416} 417 418void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 419 const Stmt *First, const Stmt *Second) { 420 ExplodedNode *N = C.generateSink(state); 421 if (!N) 422 return; 423 424 if (!BT_Overlap) 425 BT_Overlap = new BugType("Unix API", "Improper arguments"); 426 427 // Generate a report for this bug. 428 RangedBugReport *report = 429 new RangedBugReport(*BT_Overlap, 430 "Arguments must not be overlapping buffers", N); 431 report->addRange(First->getSourceRange()); 432 report->addRange(Second->getSourceRange()); 433 434 C.EmitReport(report); 435} 436 437const GRState *CStringChecker::setCStringLength(const GRState *state, 438 const MemRegion *MR, 439 SVal strLength) { 440 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 441 if (strLength.isUnknown()) 442 return state; 443 444 MR = MR->StripCasts(); 445 446 switch (MR->getKind()) { 447 case MemRegion::StringRegionKind: 448 // FIXME: This can happen if we strcpy() into a string region. This is 449 // undefined [C99 6.4.5p6], but we should still warn about it. 450 return state; 451 452 case MemRegion::SymbolicRegionKind: 453 case MemRegion::AllocaRegionKind: 454 case MemRegion::VarRegionKind: 455 case MemRegion::FieldRegionKind: 456 case MemRegion::ObjCIvarRegionKind: 457 return state->set<CStringLength>(MR, strLength); 458 459 case MemRegion::ElementRegionKind: 460 // FIXME: Handle element regions by upper-bounding the parent region's 461 // string length. 462 return state; 463 464 default: 465 // Other regions (mostly non-data) can't have a reliable C string length. 466 // For now, just ignore the change. 467 // FIXME: These are rare but not impossible. We should output some kind of 468 // warning for things like strcpy((char[]){'a', 0}, "b"); 469 return state; 470 } 471} 472 473SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 474 const GRState *&state, 475 const Expr *Ex, 476 const MemRegion *MR) { 477 // If there's a recorded length, go ahead and return it. 478 const SVal *Recorded = state->get<CStringLength>(MR); 479 if (Recorded) 480 return *Recorded; 481 482 // Otherwise, get a new symbol and update the state. 483 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 484 SValBuilder &svalBuilder = C.getSValBuilder(); 485 QualType sizeTy = svalBuilder.getContext().getSizeType(); 486 SVal strLength = svalBuilder.getMetadataSymbolVal(getTag(), MR, Ex, sizeTy, Count); 487 state = state->set<CStringLength>(MR, strLength); 488 return strLength; 489} 490 491SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 492 const Expr *Ex, SVal Buf) { 493 const MemRegion *MR = Buf.getAsRegion(); 494 if (!MR) { 495 // If we can't get a region, see if it's something we /know/ isn't a 496 // C string. In the context of locations, the only time we can issue such 497 // a warning is for labels. 498 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 499 if (ExplodedNode *N = C.generateNode(state)) { 500 if (!BT_NotCString) 501 BT_NotCString = new BuiltinBug("API", 502 "Argument is not a null-terminated string."); 503 504 llvm::SmallString<120> buf; 505 llvm::raw_svector_ostream os(buf); 506 os << "Argument to byte string function is the address of the label '" 507 << Label->getLabel()->getName() 508 << "', which is not a null-terminated string"; 509 510 // Generate a report for this bug. 511 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 512 os.str(), N); 513 514 report->addRange(Ex->getSourceRange()); 515 C.EmitReport(report); 516 } 517 518 return UndefinedVal(); 519 } 520 521 // If it's not a region and not a label, give up. 522 return UnknownVal(); 523 } 524 525 // If we have a region, strip casts from it and see if we can figure out 526 // its length. For anything we can't figure out, just return UnknownVal. 527 MR = MR->StripCasts(); 528 529 switch (MR->getKind()) { 530 case MemRegion::StringRegionKind: { 531 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 532 // so we can assume that the byte length is the correct C string length. 533 SValBuilder &svalBuilder = C.getSValBuilder(); 534 QualType sizeTy = svalBuilder.getContext().getSizeType(); 535 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 536 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 537 } 538 case MemRegion::SymbolicRegionKind: 539 case MemRegion::AllocaRegionKind: 540 case MemRegion::VarRegionKind: 541 case MemRegion::FieldRegionKind: 542 case MemRegion::ObjCIvarRegionKind: 543 return getCStringLengthForRegion(C, state, Ex, MR); 544 case MemRegion::CompoundLiteralRegionKind: 545 // FIXME: Can we track this? Is it necessary? 546 return UnknownVal(); 547 case MemRegion::ElementRegionKind: 548 // FIXME: How can we handle this? It's not good enough to subtract the 549 // offset from the base string length; consider "123\x00567" and &a[5]. 550 return UnknownVal(); 551 default: 552 // Other regions (mostly non-data) can't have a reliable C string length. 553 // In this case, an error is emitted and UndefinedVal is returned. 554 // The caller should always be prepared to handle this case. 555 if (ExplodedNode *N = C.generateNode(state)) { 556 if (!BT_NotCString) 557 BT_NotCString = new BuiltinBug("API", 558 "Argument is not a null-terminated string."); 559 560 llvm::SmallString<120> buf; 561 llvm::raw_svector_ostream os(buf); 562 563 os << "Argument to byte string function is "; 564 565 if (SummarizeRegion(os, C.getASTContext(), MR)) 566 os << ", which is not a null-terminated string"; 567 else 568 os << "not a null-terminated string"; 569 570 // Generate a report for this bug. 571 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 572 os.str(), N); 573 574 report->addRange(Ex->getSourceRange()); 575 C.EmitReport(report); 576 } 577 578 return UndefinedVal(); 579 } 580} 581 582const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 583 const GRState *state, 584 const Expr *E, SVal V) { 585 Loc *L = dyn_cast<Loc>(&V); 586 if (!L) 587 return state; 588 589 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 590 // some assumptions about the value that CFRefCount can't. Even so, it should 591 // probably be refactored. 592 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 593 const MemRegion *R = MR->getRegion()->StripCasts(); 594 595 // Are we dealing with an ElementRegion? If so, we should be invalidating 596 // the super-region. 597 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 598 R = ER->getSuperRegion(); 599 // FIXME: What about layers of ElementRegions? 600 } 601 602 // Invalidate this region. 603 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 604 return state->invalidateRegion(R, E, Count, NULL); 605 } 606 607 // If we have a non-region value by chance, just remove the binding. 608 // FIXME: is this necessary or correct? This handles the non-Region 609 // cases. Is it ever valid to store to these? 610 return state->unbindLoc(*L); 611} 612 613bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 614 const MemRegion *MR) { 615 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 616 if (!TR) 617 return false; 618 619 switch (TR->getKind()) { 620 case MemRegion::FunctionTextRegionKind: { 621 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 622 if (FD) 623 os << "the address of the function '" << FD << "'"; 624 else 625 os << "the address of a function"; 626 return true; 627 } 628 case MemRegion::BlockTextRegionKind: 629 os << "block text"; 630 return true; 631 case MemRegion::BlockDataRegionKind: 632 os << "a block"; 633 return true; 634 case MemRegion::CXXThisRegionKind: 635 case MemRegion::CXXTempObjectRegionKind: 636 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 637 return true; 638 case MemRegion::VarRegionKind: 639 os << "a variable of type" << TR->getValueType().getAsString(); 640 return true; 641 case MemRegion::FieldRegionKind: 642 os << "a field of type " << TR->getValueType().getAsString(); 643 return true; 644 case MemRegion::ObjCIvarRegionKind: 645 os << "an instance variable of type " << TR->getValueType().getAsString(); 646 return true; 647 default: 648 return false; 649 } 650} 651 652//===----------------------------------------------------------------------===// 653// evaluation of individual function calls. 654//===----------------------------------------------------------------------===// 655 656void CStringChecker::evalCopyCommon(CheckerContext &C, const GRState *state, 657 const Expr *Size, const Expr *Dest, 658 const Expr *Source, bool Restricted) { 659 // See if the size argument is zero. 660 SVal sizeVal = state->getSVal(Size); 661 QualType sizeTy = Size->getType(); 662 663 const GRState *stateZeroSize, *stateNonZeroSize; 664 llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); 665 666 // If the size is zero, there won't be any actual memory access. 667 if (stateZeroSize) 668 C.addTransition(stateZeroSize); 669 670 // If the size can be nonzero, we have to check the other arguments. 671 if (stateNonZeroSize) { 672 state = stateNonZeroSize; 673 state = CheckBufferAccess(C, state, Size, Dest, Source, 674 /* FirstIsDst = */ true); 675 if (Restricted) 676 state = CheckOverlap(C, state, Size, Dest, Source); 677 678 if (state) { 679 // Invalidate the destination. 680 // FIXME: Even if we can't perfectly model the copy, we should see if we 681 // can use LazyCompoundVals to copy the source values into the destination. 682 // This would probably remove any existing bindings past the end of the 683 // copied region, but that's still an improvement over blank invalidation. 684 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 685 C.addTransition(state); 686 } 687 } 688} 689 690 691void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) { 692 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 693 // The return value is the address of the destination buffer. 694 const Expr *Dest = CE->getArg(0); 695 const GRState *state = C.getState(); 696 state = state->BindExpr(CE, state->getSVal(Dest)); 697 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1), true); 698} 699 700void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) { 701 // void *memmove(void *dst, const void *src, size_t n); 702 // The return value is the address of the destination buffer. 703 const Expr *Dest = CE->getArg(0); 704 const GRState *state = C.getState(); 705 state = state->BindExpr(CE, state->getSVal(Dest)); 706 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1)); 707} 708 709void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) { 710 // void bcopy(const void *src, void *dst, size_t n); 711 evalCopyCommon(C, C.getState(), CE->getArg(2), CE->getArg(1), CE->getArg(0)); 712} 713 714void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) { 715 // int memcmp(const void *s1, const void *s2, size_t n); 716 const Expr *Left = CE->getArg(0); 717 const Expr *Right = CE->getArg(1); 718 const Expr *Size = CE->getArg(2); 719 720 const GRState *state = C.getState(); 721 SValBuilder &svalBuilder = C.getSValBuilder(); 722 723 // See if the size argument is zero. 724 SVal sizeVal = state->getSVal(Size); 725 QualType sizeTy = Size->getType(); 726 727 const GRState *stateZeroSize, *stateNonZeroSize; 728 llvm::tie(stateZeroSize, stateNonZeroSize) = 729 assumeZero(C, state, sizeVal, sizeTy); 730 731 // If the size can be zero, the result will be 0 in that case, and we don't 732 // have to check either of the buffers. 733 if (stateZeroSize) { 734 state = stateZeroSize; 735 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 736 C.addTransition(state); 737 } 738 739 // If the size can be nonzero, we have to check the other arguments. 740 if (stateNonZeroSize) { 741 state = stateNonZeroSize; 742 // If we know the two buffers are the same, we know the result is 0. 743 // First, get the two buffers' addresses. Another checker will have already 744 // made sure they're not undefined. 745 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 746 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 747 748 // See if they are the same. 749 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 750 const GRState *StSameBuf, *StNotSameBuf; 751 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 752 753 // If the two arguments might be the same buffer, we know the result is zero, 754 // and we only need to check one size. 755 if (StSameBuf) { 756 state = StSameBuf; 757 state = CheckBufferAccess(C, state, Size, Left); 758 if (state) { 759 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 760 C.addTransition(state); 761 } 762 } 763 764 // If the two arguments might be different buffers, we have to check the 765 // size of both of them. 766 if (StNotSameBuf) { 767 state = StNotSameBuf; 768 state = CheckBufferAccess(C, state, Size, Left, Right); 769 if (state) { 770 // The return value is the comparison result, which we don't know. 771 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 772 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 773 state = state->BindExpr(CE, CmpV); 774 C.addTransition(state); 775 } 776 } 777 } 778} 779 780void CStringChecker::evalstrLength(CheckerContext &C, const CallExpr *CE) { 781 // size_t strlen(const char *s); 782 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 783} 784 785void CStringChecker::evalstrnLength(CheckerContext &C, const CallExpr *CE) { 786 // size_t strnlen(const char *s, size_t maxlen); 787 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 788} 789 790void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 791 bool IsStrnlen) { 792 const GRState *state = C.getState(); 793 const Expr *Arg = CE->getArg(0); 794 SVal ArgVal = state->getSVal(Arg); 795 796 // Check that the argument is non-null. 797 state = checkNonNull(C, state, Arg, ArgVal); 798 799 if (state) { 800 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 801 802 // If the argument isn't a valid C string, there's no valid state to 803 // transition to. 804 if (strLength.isUndef()) 805 return; 806 807 // If the check is for strnlen() then bind the return value to no more than 808 // the maxlen value. 809 if (IsStrnlen) { 810 const Expr *maxlenExpr = CE->getArg(1); 811 SVal maxlenVal = state->getSVal(maxlenExpr); 812 813 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 814 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 815 816 QualType cmpTy = C.getSValBuilder().getContext().IntTy; 817 const GRState *stateTrue, *stateFalse; 818 819 // Check if the strLength is greater than or equal to the maxlen 820 llvm::tie(stateTrue, stateFalse) = 821 state->assume(cast<DefinedOrUnknownSVal> 822 (C.getSValBuilder().evalBinOpNN(state, BO_GE, 823 *strLengthNL, *maxlenValNL, 824 cmpTy))); 825 826 // If the strLength is greater than or equal to the maxlen, set strLength 827 // to maxlen 828 if (stateTrue && !stateFalse) { 829 strLength = maxlenVal; 830 } 831 } 832 833 // If getCStringLength couldn't figure out the length, conjure a return 834 // value, so it can be used in constraints, at least. 835 if (strLength.isUnknown()) { 836 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 837 strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 838 } 839 840 // Bind the return value. 841 state = state->BindExpr(CE, strLength); 842 C.addTransition(state); 843 } 844} 845 846void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) { 847 // char *strcpy(char *restrict dst, const char *restrict src); 848 evalStrcpyCommon(C, CE, /* returnEnd = */ false); 849} 850 851void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) { 852 // char *stpcpy(char *restrict dst, const char *restrict src); 853 evalStrcpyCommon(C, CE, /* returnEnd = */ true); 854} 855 856void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 857 bool returnEnd) { 858 const GRState *state = C.getState(); 859 860 // Check that the destination is non-null 861 const Expr *Dst = CE->getArg(0); 862 SVal DstVal = state->getSVal(Dst); 863 864 state = checkNonNull(C, state, Dst, DstVal); 865 if (!state) 866 return; 867 868 // Check that the source is non-null. 869 const Expr *srcExpr = CE->getArg(1); 870 SVal srcVal = state->getSVal(srcExpr); 871 state = checkNonNull(C, state, srcExpr, srcVal); 872 if (!state) 873 return; 874 875 // Get the string length of the source. 876 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 877 878 // If the source isn't a valid C string, give up. 879 if (strLength.isUndef()) 880 return; 881 882 SVal Result = (returnEnd ? UnknownVal() : DstVal); 883 884 // If the destination is a MemRegion, try to check for a buffer overflow and 885 // record the new string length. 886 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 887 // If the length is known, we can check for an overflow. 888 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) { 889 SVal lastElement = 890 C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal, 891 *knownStrLength, Dst->getType()); 892 893 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 894 if (!state) 895 return; 896 897 // If this is a stpcpy-style copy, the last element is the return value. 898 if (returnEnd) 899 Result = lastElement; 900 } 901 902 // Invalidate the destination. This must happen before we set the C string 903 // length because invalidation will clear the length. 904 // FIXME: Even if we can't perfectly model the copy, we should see if we 905 // can use LazyCompoundVals to copy the source values into the destination. 906 // This would probably remove any existing bindings past the end of the 907 // string, but that's still an improvement over blank invalidation. 908 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 909 910 // Set the C string length of the destination. 911 state = setCStringLength(state, dstRegVal->getRegion(), strLength); 912 } 913 914 // If this is a stpcpy-style copy, but we were unable to check for a buffer 915 // overflow, we still need a result. Conjure a return value. 916 if (returnEnd && Result.isUnknown()) { 917 SValBuilder &svalBuilder = C.getSValBuilder(); 918 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 919 strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 920 } 921 922 // Set the return value. 923 state = state->BindExpr(CE, Result); 924 C.addTransition(state); 925} 926 927//===----------------------------------------------------------------------===// 928// The driver method, and other Checker callbacks. 929//===----------------------------------------------------------------------===// 930 931bool CStringChecker::evalCallExpr(CheckerContext &C, const CallExpr *CE) { 932 // Get the callee. All the functions we care about are C functions 933 // with simple identifiers. 934 const GRState *state = C.getState(); 935 const Expr *Callee = CE->getCallee(); 936 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 937 938 if (!FD) 939 return false; 940 941 // Get the name of the callee. If it's a builtin, strip off the prefix. 942 IdentifierInfo *II = FD->getIdentifier(); 943 if (!II) // if no identifier, not a simple C function 944 return false; 945 llvm::StringRef Name = II->getName(); 946 if (Name.startswith("__builtin_")) 947 Name = Name.substr(10); 948 949 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 950 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 951 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 952 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 953 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 954 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 955 .Case("strlen", &CStringChecker::evalstrLength) 956 .Case("strnlen", &CStringChecker::evalstrnLength) 957 .Case("bcopy", &CStringChecker::evalBcopy) 958 .Default(NULL); 959 960 // If the callee isn't a string function, let another checker handle it. 961 if (!evalFunction) 962 return false; 963 964 // Check and evaluate the call. 965 (this->*evalFunction)(C, CE); 966 return true; 967} 968 969void CStringChecker::PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS) { 970 // Record string length for char a[] = "abc"; 971 const GRState *state = C.getState(); 972 973 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 974 I != E; ++I) { 975 const VarDecl *D = dyn_cast<VarDecl>(*I); 976 if (!D) 977 continue; 978 979 // FIXME: Handle array fields of structs. 980 if (!D->getType()->isArrayType()) 981 continue; 982 983 const Expr *Init = D->getInit(); 984 if (!Init) 985 continue; 986 if (!isa<StringLiteral>(Init)) 987 continue; 988 989 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 990 const MemRegion *MR = VarLoc.getAsRegion(); 991 if (!MR) 992 continue; 993 994 SVal StrVal = state->getSVal(Init); 995 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 996 DefinedOrUnknownSVal strLength 997 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 998 999 state = state->set<CStringLength>(MR, strLength); 1000 } 1001 1002 C.addTransition(state); 1003} 1004 1005bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) { 1006 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1007 return !Entries.isEmpty(); 1008} 1009 1010const GRState *CStringChecker::EvalRegionChanges(const GRState *state, 1011 const MemRegion * const *Begin, 1012 const MemRegion * const *End, 1013 bool *) { 1014 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1015 if (Entries.isEmpty()) 1016 return state; 1017 1018 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1019 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1020 1021 // First build sets for the changed regions and their super-regions. 1022 for ( ; Begin != End; ++Begin) { 1023 const MemRegion *MR = *Begin; 1024 Invalidated.insert(MR); 1025 1026 SuperRegions.insert(MR); 1027 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1028 MR = SR->getSuperRegion(); 1029 SuperRegions.insert(MR); 1030 } 1031 } 1032 1033 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1034 1035 // Then loop over the entries in the current state. 1036 for (CStringLength::EntryMap::iterator I = Entries.begin(), 1037 E = Entries.end(); I != E; ++I) { 1038 const MemRegion *MR = I.getKey(); 1039 1040 // Is this entry for a super-region of a changed region? 1041 if (SuperRegions.count(MR)) { 1042 Entries = F.remove(Entries, MR); 1043 continue; 1044 } 1045 1046 // Is this entry for a sub-region of a changed region? 1047 const MemRegion *Super = MR; 1048 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1049 Super = SR->getSuperRegion(); 1050 if (Invalidated.count(Super)) { 1051 Entries = F.remove(Entries, MR); 1052 break; 1053 } 1054 } 1055 } 1056 1057 return state->set<CStringLength>(Entries); 1058} 1059 1060void CStringChecker::MarkLiveSymbols(const GRState *state, SymbolReaper &SR) { 1061 // Mark all symbols in our string length map as valid. 1062 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1063 1064 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1065 I != E; ++I) { 1066 SVal Len = I.getData(); 1067 if (SymbolRef Sym = Len.getAsSymbol()) 1068 SR.markInUse(Sym); 1069 } 1070} 1071 1072void CStringChecker::evalDeadSymbols(CheckerContext &C, SymbolReaper &SR) { 1073 if (!SR.hasDeadSymbols()) 1074 return; 1075 1076 const GRState *state = C.getState(); 1077 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1078 if (Entries.isEmpty()) 1079 return; 1080 1081 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1082 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1083 I != E; ++I) { 1084 SVal Len = I.getData(); 1085 if (SymbolRef Sym = Len.getAsSymbol()) { 1086 if (SR.isDead(Sym)) 1087 Entries = F.remove(Entries, I.getKey()); 1088 } 1089 } 1090 1091 state = state->set<CStringLength>(Entries); 1092 C.generateNode(state); 1093} 1094