CStringChecker.cpp revision 695fb502825a53ccd178ec1c85c77929d88acb71
11d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 21d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling// 31d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling// The LLVM Compiler Infrastructure 41d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling// 51d2624a10e2c559f8ba9ef89eaa30832c0a83a96Sascha Haeberling// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ClangSACheckers.h" 16#include "clang/StaticAnalyzer/Core/CheckerManager.h" 17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerVisitor.h" 19#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 20#include "llvm/ADT/StringSwitch.h" 21 22using namespace clang; 23using namespace ento; 24 25namespace { 26class CStringChecker : public CheckerVisitor<CStringChecker> { 27 BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString; 28public: 29 CStringChecker() 30 : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0) 31 {} 32 static void *getTag() { static int tag; return &tag; } 33 34 bool evalCallExpr(CheckerContext &C, const CallExpr *CE); 35 void PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS); 36 void MarkLiveSymbols(const GRState *state, SymbolReaper &SR); 37 void evalDeadSymbols(CheckerContext &C, SymbolReaper &SR); 38 bool wantsRegionChangeUpdate(const GRState *state); 39 40 const GRState *EvalRegionChanges(const GRState *state, 41 const MemRegion * const *Begin, 42 const MemRegion * const *End, 43 bool*); 44 45 typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *); 46 47 void evalMemcpy(CheckerContext &C, const CallExpr *CE); 48 void evalMemmove(CheckerContext &C, const CallExpr *CE); 49 void evalBcopy(CheckerContext &C, const CallExpr *CE); 50 void evalCopyCommon(CheckerContext &C, const GRState *state, 51 const Expr *Size, const Expr *Source, const Expr *Dest, 52 bool Restricted = false); 53 54 void evalMemcmp(CheckerContext &C, const CallExpr *CE); 55 56 void evalstrLength(CheckerContext &C, const CallExpr *CE); 57 58 void evalStrcpy(CheckerContext &C, const CallExpr *CE); 59 void evalStpcpy(CheckerContext &C, const CallExpr *CE); 60 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd); 61 62 // Utility methods 63 std::pair<const GRState*, const GRState*> 64 assumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty); 65 66 const GRState *setCStringLength(const GRState *state, const MemRegion *MR, 67 SVal strLength); 68 SVal getCStringLengthForRegion(CheckerContext &C, const GRState *&state, 69 const Expr *Ex, const MemRegion *MR); 70 SVal getCStringLength(CheckerContext &C, const GRState *&state, 71 const Expr *Ex, SVal Buf); 72 73 const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state, 74 const Expr *Ex, SVal V); 75 76 bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 77 const MemRegion *MR); 78 79 // Re-usable checks 80 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 81 const Expr *S, SVal l); 82 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 83 const Expr *S, SVal l, 84 bool IsDestination = false); 85 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 86 const Expr *Size, 87 const Expr *FirstBuf, 88 const Expr *SecondBuf = NULL, 89 bool FirstIsDestination = false); 90 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 91 const Expr *Size, const Expr *First, 92 const Expr *Second); 93 void emitOverlapBug(CheckerContext &C, const GRState *state, 94 const Stmt *First, const Stmt *Second); 95}; 96 97class CStringLength { 98public: 99 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 100}; 101} //end anonymous namespace 102 103namespace clang { 104namespace ento { 105 template <> 106 struct GRStateTrait<CStringLength> 107 : public GRStatePartialTrait<CStringLength::EntryMap> { 108 static void *GDMIndex() { return CStringChecker::getTag(); } 109 }; 110} 111} 112 113static void RegisterCStringChecker(ExprEngine &Eng) { 114 Eng.registerCheck(new CStringChecker()); 115} 116 117void ento::registerCStringChecker(CheckerManager &mgr) { 118 mgr.addCheckerRegisterFunction(RegisterCStringChecker); 119} 120 121//===----------------------------------------------------------------------===// 122// Individual checks and utility methods. 123//===----------------------------------------------------------------------===// 124 125std::pair<const GRState*, const GRState*> 126CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 127 QualType Ty) { 128 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 129 if (!val) 130 return std::pair<const GRState*, const GRState *>(state, state); 131 132 SValBuilder &svalBuilder = C.getSValBuilder(); 133 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 134 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 135} 136 137const GRState *CStringChecker::checkNonNull(CheckerContext &C, 138 const GRState *state, 139 const Expr *S, SVal l) { 140 // If a previous check has failed, propagate the failure. 141 if (!state) 142 return NULL; 143 144 const GRState *stateNull, *stateNonNull; 145 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 146 147 if (stateNull && !stateNonNull) { 148 ExplodedNode *N = C.generateSink(stateNull); 149 if (!N) 150 return NULL; 151 152 if (!BT_Null) 153 BT_Null = new BuiltinBug("API", 154 "Null pointer argument in call to byte string function"); 155 156 // Generate a report for this bug. 157 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null); 158 EnhancedBugReport *report = new EnhancedBugReport(*BT, 159 BT->getDescription(), N); 160 161 report->addRange(S->getSourceRange()); 162 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 163 C.EmitReport(report); 164 return NULL; 165 } 166 167 // From here on, assume that the value is non-null. 168 assert(stateNonNull); 169 return stateNonNull; 170} 171 172// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 173const GRState *CStringChecker::CheckLocation(CheckerContext &C, 174 const GRState *state, 175 const Expr *S, SVal l, 176 bool IsDestination) { 177 // If a previous check has failed, propagate the failure. 178 if (!state) 179 return NULL; 180 181 // Check for out of bound array element access. 182 const MemRegion *R = l.getAsRegion(); 183 if (!R) 184 return state; 185 186 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 187 if (!ER) 188 return state; 189 190 assert(ER->getValueType() == C.getASTContext().CharTy && 191 "CheckLocation should only be called with char* ElementRegions"); 192 193 // Get the size of the array. 194 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 195 SValBuilder &svalBuilder = C.getSValBuilder(); 196 SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 197 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 198 199 // Get the index of the accessed element. 200 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 201 202 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 203 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 204 if (StOutBound && !StInBound) { 205 ExplodedNode *N = C.generateSink(StOutBound); 206 if (!N) 207 return NULL; 208 209 BuiltinBug *BT; 210 if (IsDestination) { 211 if (!BT_BoundsWrite) { 212 BT_BoundsWrite = new BuiltinBug("Out-of-bound array access", 213 "Byte string function overflows destination buffer"); 214 } 215 BT = static_cast<BuiltinBug*>(BT_BoundsWrite); 216 } else { 217 if (!BT_Bounds) { 218 BT_Bounds = new BuiltinBug("Out-of-bound array access", 219 "Byte string function accesses out-of-bound array element"); 220 } 221 BT = static_cast<BuiltinBug*>(BT_Bounds); 222 } 223 224 // FIXME: It would be nice to eventually make this diagnostic more clear, 225 // e.g., by referencing the original declaration or by saying *why* this 226 // reference is outside the range. 227 228 // Generate a report for this bug. 229 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 230 231 report->addRange(S->getSourceRange()); 232 C.EmitReport(report); 233 return NULL; 234 } 235 236 // Array bound check succeeded. From this point forward the array bound 237 // should always succeed. 238 return StInBound; 239} 240 241const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 242 const GRState *state, 243 const Expr *Size, 244 const Expr *FirstBuf, 245 const Expr *SecondBuf, 246 bool FirstIsDestination) { 247 // If a previous check has failed, propagate the failure. 248 if (!state) 249 return NULL; 250 251 SValBuilder &svalBuilder = C.getSValBuilder(); 252 ASTContext &Ctx = C.getASTContext(); 253 254 QualType sizeTy = Size->getType(); 255 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 256 257 // Check that the first buffer is non-null. 258 SVal BufVal = state->getSVal(FirstBuf); 259 state = checkNonNull(C, state, FirstBuf, BufVal); 260 if (!state) 261 return NULL; 262 263 // Get the access length and make sure it is known. 264 SVal LengthVal = state->getSVal(Size); 265 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 266 if (!Length) 267 return state; 268 269 // Compute the offset of the last element to be accessed: size-1. 270 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 271 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 272 *Length, One, sizeTy)); 273 274 // Check that the first buffer is sufficently long. 275 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 276 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 277 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 278 LastOffset, PtrTy); 279 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 280 281 // If the buffer isn't large enough, abort. 282 if (!state) 283 return NULL; 284 } 285 286 // If there's a second buffer, check it as well. 287 if (SecondBuf) { 288 BufVal = state->getSVal(SecondBuf); 289 state = checkNonNull(C, state, SecondBuf, BufVal); 290 if (!state) 291 return NULL; 292 293 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 294 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 295 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 296 LastOffset, PtrTy); 297 state = CheckLocation(C, state, SecondBuf, BufEnd); 298 } 299 } 300 301 // Large enough or not, return this state! 302 return state; 303} 304 305const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 306 const GRState *state, 307 const Expr *Size, 308 const Expr *First, 309 const Expr *Second) { 310 // Do a simple check for overlap: if the two arguments are from the same 311 // buffer, see if the end of the first is greater than the start of the second 312 // or vice versa. 313 314 // If a previous check has failed, propagate the failure. 315 if (!state) 316 return NULL; 317 318 const GRState *stateTrue, *stateFalse; 319 320 // Get the buffer values and make sure they're known locations. 321 SVal firstVal = state->getSVal(First); 322 SVal secondVal = state->getSVal(Second); 323 324 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 325 if (!firstLoc) 326 return state; 327 328 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 329 if (!secondLoc) 330 return state; 331 332 // Are the two values the same? 333 SValBuilder &svalBuilder = C.getSValBuilder(); 334 llvm::tie(stateTrue, stateFalse) = 335 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 336 337 if (stateTrue && !stateFalse) { 338 // If the values are known to be equal, that's automatically an overlap. 339 emitOverlapBug(C, stateTrue, First, Second); 340 return NULL; 341 } 342 343 // assume the two expressions are not equal. 344 assert(stateFalse); 345 state = stateFalse; 346 347 // Which value comes first? 348 ASTContext &Ctx = svalBuilder.getContext(); 349 QualType cmpTy = Ctx.IntTy; 350 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 351 *firstLoc, *secondLoc, cmpTy); 352 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 353 if (!reverseTest) 354 return state; 355 356 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 357 if (stateTrue) { 358 if (stateFalse) { 359 // If we don't know which one comes first, we can't perform this test. 360 return state; 361 } else { 362 // Switch the values so that firstVal is before secondVal. 363 Loc *tmpLoc = firstLoc; 364 firstLoc = secondLoc; 365 secondLoc = tmpLoc; 366 367 // Switch the Exprs as well, so that they still correspond. 368 const Expr *tmpExpr = First; 369 First = Second; 370 Second = tmpExpr; 371 } 372 } 373 374 // Get the length, and make sure it too is known. 375 SVal LengthVal = state->getSVal(Size); 376 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 377 if (!Length) 378 return state; 379 380 // Convert the first buffer's start address to char*. 381 // Bail out if the cast fails. 382 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 383 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); 384 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 385 if (!FirstStartLoc) 386 return state; 387 388 // Compute the end of the first buffer. Bail out if THAT fails. 389 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 390 *FirstStartLoc, *Length, CharPtrTy); 391 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 392 if (!FirstEndLoc) 393 return state; 394 395 // Is the end of the first buffer past the start of the second buffer? 396 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 397 *FirstEndLoc, *secondLoc, cmpTy); 398 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 399 if (!OverlapTest) 400 return state; 401 402 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 403 404 if (stateTrue && !stateFalse) { 405 // Overlap! 406 emitOverlapBug(C, stateTrue, First, Second); 407 return NULL; 408 } 409 410 // assume the two expressions don't overlap. 411 assert(stateFalse); 412 return stateFalse; 413} 414 415void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 416 const Stmt *First, const Stmt *Second) { 417 ExplodedNode *N = C.generateSink(state); 418 if (!N) 419 return; 420 421 if (!BT_Overlap) 422 BT_Overlap = new BugType("Unix API", "Improper arguments"); 423 424 // Generate a report for this bug. 425 RangedBugReport *report = 426 new RangedBugReport(*BT_Overlap, 427 "Arguments must not be overlapping buffers", N); 428 report->addRange(First->getSourceRange()); 429 report->addRange(Second->getSourceRange()); 430 431 C.EmitReport(report); 432} 433 434const GRState *CStringChecker::setCStringLength(const GRState *state, 435 const MemRegion *MR, 436 SVal strLength) { 437 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 438 if (strLength.isUnknown()) 439 return state; 440 441 MR = MR->StripCasts(); 442 443 switch (MR->getKind()) { 444 case MemRegion::StringRegionKind: 445 // FIXME: This can happen if we strcpy() into a string region. This is 446 // undefined [C99 6.4.5p6], but we should still warn about it. 447 return state; 448 449 case MemRegion::SymbolicRegionKind: 450 case MemRegion::AllocaRegionKind: 451 case MemRegion::VarRegionKind: 452 case MemRegion::FieldRegionKind: 453 case MemRegion::ObjCIvarRegionKind: 454 return state->set<CStringLength>(MR, strLength); 455 456 case MemRegion::ElementRegionKind: 457 // FIXME: Handle element regions by upper-bounding the parent region's 458 // string length. 459 return state; 460 461 default: 462 // Other regions (mostly non-data) can't have a reliable C string length. 463 // For now, just ignore the change. 464 // FIXME: These are rare but not impossible. We should output some kind of 465 // warning for things like strcpy((char[]){'a', 0}, "b"); 466 return state; 467 } 468} 469 470SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 471 const GRState *&state, 472 const Expr *Ex, 473 const MemRegion *MR) { 474 // If there's a recorded length, go ahead and return it. 475 const SVal *Recorded = state->get<CStringLength>(MR); 476 if (Recorded) 477 return *Recorded; 478 479 // Otherwise, get a new symbol and update the state. 480 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 481 SValBuilder &svalBuilder = C.getSValBuilder(); 482 QualType sizeTy = svalBuilder.getContext().getSizeType(); 483 SVal strLength = svalBuilder.getMetadataSymbolVal(getTag(), MR, Ex, sizeTy, Count); 484 state = state->set<CStringLength>(MR, strLength); 485 return strLength; 486} 487 488SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 489 const Expr *Ex, SVal Buf) { 490 const MemRegion *MR = Buf.getAsRegion(); 491 if (!MR) { 492 // If we can't get a region, see if it's something we /know/ isn't a 493 // C string. In the context of locations, the only time we can issue such 494 // a warning is for labels. 495 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 496 if (ExplodedNode *N = C.generateNode(state)) { 497 if (!BT_NotCString) 498 BT_NotCString = new BuiltinBug("API", 499 "Argument is not a null-terminated string."); 500 501 llvm::SmallString<120> buf; 502 llvm::raw_svector_ostream os(buf); 503 os << "Argument to byte string function is the address of the label '" 504 << Label->getLabel()->getName() 505 << "', which is not a null-terminated string"; 506 507 // Generate a report for this bug. 508 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 509 os.str(), N); 510 511 report->addRange(Ex->getSourceRange()); 512 C.EmitReport(report); 513 } 514 515 return UndefinedVal(); 516 } 517 518 // If it's not a region and not a label, give up. 519 return UnknownVal(); 520 } 521 522 // If we have a region, strip casts from it and see if we can figure out 523 // its length. For anything we can't figure out, just return UnknownVal. 524 MR = MR->StripCasts(); 525 526 switch (MR->getKind()) { 527 case MemRegion::StringRegionKind: { 528 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 529 // so we can assume that the byte length is the correct C string length. 530 SValBuilder &svalBuilder = C.getSValBuilder(); 531 QualType sizeTy = svalBuilder.getContext().getSizeType(); 532 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 533 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 534 } 535 case MemRegion::SymbolicRegionKind: 536 case MemRegion::AllocaRegionKind: 537 case MemRegion::VarRegionKind: 538 case MemRegion::FieldRegionKind: 539 case MemRegion::ObjCIvarRegionKind: 540 return getCStringLengthForRegion(C, state, Ex, MR); 541 case MemRegion::CompoundLiteralRegionKind: 542 // FIXME: Can we track this? Is it necessary? 543 return UnknownVal(); 544 case MemRegion::ElementRegionKind: 545 // FIXME: How can we handle this? It's not good enough to subtract the 546 // offset from the base string length; consider "123\x00567" and &a[5]. 547 return UnknownVal(); 548 default: 549 // Other regions (mostly non-data) can't have a reliable C string length. 550 // In this case, an error is emitted and UndefinedVal is returned. 551 // The caller should always be prepared to handle this case. 552 if (ExplodedNode *N = C.generateNode(state)) { 553 if (!BT_NotCString) 554 BT_NotCString = new BuiltinBug("API", 555 "Argument is not a null-terminated string."); 556 557 llvm::SmallString<120> buf; 558 llvm::raw_svector_ostream os(buf); 559 560 os << "Argument to byte string function is "; 561 562 if (SummarizeRegion(os, C.getASTContext(), MR)) 563 os << ", which is not a null-terminated string"; 564 else 565 os << "not a null-terminated string"; 566 567 // Generate a report for this bug. 568 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 569 os.str(), N); 570 571 report->addRange(Ex->getSourceRange()); 572 C.EmitReport(report); 573 } 574 575 return UndefinedVal(); 576 } 577} 578 579const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 580 const GRState *state, 581 const Expr *E, SVal V) { 582 Loc *L = dyn_cast<Loc>(&V); 583 if (!L) 584 return state; 585 586 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 587 // some assumptions about the value that CFRefCount can't. Even so, it should 588 // probably be refactored. 589 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 590 const MemRegion *R = MR->getRegion()->StripCasts(); 591 592 // Are we dealing with an ElementRegion? If so, we should be invalidating 593 // the super-region. 594 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 595 R = ER->getSuperRegion(); 596 // FIXME: What about layers of ElementRegions? 597 } 598 599 // Invalidate this region. 600 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 601 return state->invalidateRegion(R, E, Count, NULL); 602 } 603 604 // If we have a non-region value by chance, just remove the binding. 605 // FIXME: is this necessary or correct? This handles the non-Region 606 // cases. Is it ever valid to store to these? 607 return state->unbindLoc(*L); 608} 609 610bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 611 const MemRegion *MR) { 612 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 613 if (!TR) 614 return false; 615 616 switch (TR->getKind()) { 617 case MemRegion::FunctionTextRegionKind: { 618 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 619 if (FD) 620 os << "the address of the function '" << FD << "'"; 621 else 622 os << "the address of a function"; 623 return true; 624 } 625 case MemRegion::BlockTextRegionKind: 626 os << "block text"; 627 return true; 628 case MemRegion::BlockDataRegionKind: 629 os << "a block"; 630 return true; 631 case MemRegion::CXXThisRegionKind: 632 case MemRegion::CXXTempObjectRegionKind: 633 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 634 return true; 635 case MemRegion::VarRegionKind: 636 os << "a variable of type" << TR->getValueType().getAsString(); 637 return true; 638 case MemRegion::FieldRegionKind: 639 os << "a field of type " << TR->getValueType().getAsString(); 640 return true; 641 case MemRegion::ObjCIvarRegionKind: 642 os << "an instance variable of type " << TR->getValueType().getAsString(); 643 return true; 644 default: 645 return false; 646 } 647} 648 649//===----------------------------------------------------------------------===// 650// evaluation of individual function calls. 651//===----------------------------------------------------------------------===// 652 653void CStringChecker::evalCopyCommon(CheckerContext &C, const GRState *state, 654 const Expr *Size, const Expr *Dest, 655 const Expr *Source, bool Restricted) { 656 // See if the size argument is zero. 657 SVal sizeVal = state->getSVal(Size); 658 QualType sizeTy = Size->getType(); 659 660 const GRState *stateZeroSize, *stateNonZeroSize; 661 llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); 662 663 // If the size is zero, there won't be any actual memory access. 664 if (stateZeroSize) 665 C.addTransition(stateZeroSize); 666 667 // If the size can be nonzero, we have to check the other arguments. 668 if (stateNonZeroSize) { 669 state = stateNonZeroSize; 670 state = CheckBufferAccess(C, state, Size, Dest, Source, 671 /* FirstIsDst = */ true); 672 if (Restricted) 673 state = CheckOverlap(C, state, Size, Dest, Source); 674 675 if (state) { 676 // Invalidate the destination. 677 // FIXME: Even if we can't perfectly model the copy, we should see if we 678 // can use LazyCompoundVals to copy the source values into the destination. 679 // This would probably remove any existing bindings past the end of the 680 // copied region, but that's still an improvement over blank invalidation. 681 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 682 C.addTransition(state); 683 } 684 } 685} 686 687 688void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) { 689 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 690 // The return value is the address of the destination buffer. 691 const Expr *Dest = CE->getArg(0); 692 const GRState *state = C.getState(); 693 state = state->BindExpr(CE, state->getSVal(Dest)); 694 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1), true); 695} 696 697void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) { 698 // void *memmove(void *dst, const void *src, size_t n); 699 // The return value is the address of the destination buffer. 700 const Expr *Dest = CE->getArg(0); 701 const GRState *state = C.getState(); 702 state = state->BindExpr(CE, state->getSVal(Dest)); 703 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1)); 704} 705 706void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) { 707 // void bcopy(const void *src, void *dst, size_t n); 708 evalCopyCommon(C, C.getState(), CE->getArg(2), CE->getArg(1), CE->getArg(0)); 709} 710 711void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) { 712 // int memcmp(const void *s1, const void *s2, size_t n); 713 const Expr *Left = CE->getArg(0); 714 const Expr *Right = CE->getArg(1); 715 const Expr *Size = CE->getArg(2); 716 717 const GRState *state = C.getState(); 718 SValBuilder &svalBuilder = C.getSValBuilder(); 719 720 // See if the size argument is zero. 721 SVal sizeVal = state->getSVal(Size); 722 QualType sizeTy = Size->getType(); 723 724 const GRState *stateZeroSize, *stateNonZeroSize; 725 llvm::tie(stateZeroSize, stateNonZeroSize) = 726 assumeZero(C, state, sizeVal, sizeTy); 727 728 // If the size can be zero, the result will be 0 in that case, and we don't 729 // have to check either of the buffers. 730 if (stateZeroSize) { 731 state = stateZeroSize; 732 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 733 C.addTransition(state); 734 } 735 736 // If the size can be nonzero, we have to check the other arguments. 737 if (stateNonZeroSize) { 738 state = stateNonZeroSize; 739 // If we know the two buffers are the same, we know the result is 0. 740 // First, get the two buffers' addresses. Another checker will have already 741 // made sure they're not undefined. 742 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 743 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 744 745 // See if they are the same. 746 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 747 const GRState *StSameBuf, *StNotSameBuf; 748 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 749 750 // If the two arguments might be the same buffer, we know the result is zero, 751 // and we only need to check one size. 752 if (StSameBuf) { 753 state = StSameBuf; 754 state = CheckBufferAccess(C, state, Size, Left); 755 if (state) { 756 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 757 C.addTransition(state); 758 } 759 } 760 761 // If the two arguments might be different buffers, we have to check the 762 // size of both of them. 763 if (StNotSameBuf) { 764 state = StNotSameBuf; 765 state = CheckBufferAccess(C, state, Size, Left, Right); 766 if (state) { 767 // The return value is the comparison result, which we don't know. 768 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 769 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 770 state = state->BindExpr(CE, CmpV); 771 C.addTransition(state); 772 } 773 } 774 } 775} 776 777void CStringChecker::evalstrLength(CheckerContext &C, const CallExpr *CE) { 778 // size_t strlen(const char *s); 779 const GRState *state = C.getState(); 780 const Expr *Arg = CE->getArg(0); 781 SVal ArgVal = state->getSVal(Arg); 782 783 // Check that the argument is non-null. 784 state = checkNonNull(C, state, Arg, ArgVal); 785 786 if (state) { 787 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 788 789 // If the argument isn't a valid C string, there's no valid state to 790 // transition to. 791 if (strLength.isUndef()) 792 return; 793 794 // If getCStringLength couldn't figure out the length, conjure a return 795 // value, so it can be used in constraints, at least. 796 if (strLength.isUnknown()) { 797 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 798 strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 799 } 800 801 // Bind the return value. 802 state = state->BindExpr(CE, strLength); 803 C.addTransition(state); 804 } 805} 806 807void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) { 808 // char *strcpy(char *restrict dst, const char *restrict src); 809 evalStrcpyCommon(C, CE, /* returnEnd = */ false); 810} 811 812void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) { 813 // char *stpcpy(char *restrict dst, const char *restrict src); 814 evalStrcpyCommon(C, CE, /* returnEnd = */ true); 815} 816 817void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 818 bool returnEnd) { 819 const GRState *state = C.getState(); 820 821 // Check that the destination is non-null 822 const Expr *Dst = CE->getArg(0); 823 SVal DstVal = state->getSVal(Dst); 824 825 state = checkNonNull(C, state, Dst, DstVal); 826 if (!state) 827 return; 828 829 // Check that the source is non-null. 830 const Expr *srcExpr = CE->getArg(1); 831 SVal srcVal = state->getSVal(srcExpr); 832 state = checkNonNull(C, state, srcExpr, srcVal); 833 if (!state) 834 return; 835 836 // Get the string length of the source. 837 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 838 839 // If the source isn't a valid C string, give up. 840 if (strLength.isUndef()) 841 return; 842 843 SVal Result = (returnEnd ? UnknownVal() : DstVal); 844 845 // If the destination is a MemRegion, try to check for a buffer overflow and 846 // record the new string length. 847 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 848 // If the length is known, we can check for an overflow. 849 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) { 850 SVal lastElement = 851 C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal, 852 *knownStrLength, Dst->getType()); 853 854 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 855 if (!state) 856 return; 857 858 // If this is a stpcpy-style copy, the last element is the return value. 859 if (returnEnd) 860 Result = lastElement; 861 } 862 863 // Invalidate the destination. This must happen before we set the C string 864 // length because invalidation will clear the length. 865 // FIXME: Even if we can't perfectly model the copy, we should see if we 866 // can use LazyCompoundVals to copy the source values into the destination. 867 // This would probably remove any existing bindings past the end of the 868 // string, but that's still an improvement over blank invalidation. 869 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 870 871 // Set the C string length of the destination. 872 state = setCStringLength(state, dstRegVal->getRegion(), strLength); 873 } 874 875 // If this is a stpcpy-style copy, but we were unable to check for a buffer 876 // overflow, we still need a result. Conjure a return value. 877 if (returnEnd && Result.isUnknown()) { 878 SValBuilder &svalBuilder = C.getSValBuilder(); 879 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 880 strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 881 } 882 883 // Set the return value. 884 state = state->BindExpr(CE, Result); 885 C.addTransition(state); 886} 887 888//===----------------------------------------------------------------------===// 889// The driver method, and other Checker callbacks. 890//===----------------------------------------------------------------------===// 891 892bool CStringChecker::evalCallExpr(CheckerContext &C, const CallExpr *CE) { 893 // Get the callee. All the functions we care about are C functions 894 // with simple identifiers. 895 const GRState *state = C.getState(); 896 const Expr *Callee = CE->getCallee(); 897 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 898 899 if (!FD) 900 return false; 901 902 // Get the name of the callee. If it's a builtin, strip off the prefix. 903 IdentifierInfo *II = FD->getIdentifier(); 904 if (!II) // if no identifier, not a simple C function 905 return false; 906 llvm::StringRef Name = II->getName(); 907 if (Name.startswith("__builtin_")) 908 Name = Name.substr(10); 909 910 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 911 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 912 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 913 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 914 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 915 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 916 .Case("strlen", &CStringChecker::evalstrLength) 917 .Case("bcopy", &CStringChecker::evalBcopy) 918 .Default(NULL); 919 920 // If the callee isn't a string function, let another checker handle it. 921 if (!evalFunction) 922 return false; 923 924 // Check and evaluate the call. 925 (this->*evalFunction)(C, CE); 926 return true; 927} 928 929void CStringChecker::PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS) { 930 // Record string length for char a[] = "abc"; 931 const GRState *state = C.getState(); 932 933 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 934 I != E; ++I) { 935 const VarDecl *D = dyn_cast<VarDecl>(*I); 936 if (!D) 937 continue; 938 939 // FIXME: Handle array fields of structs. 940 if (!D->getType()->isArrayType()) 941 continue; 942 943 const Expr *Init = D->getInit(); 944 if (!Init) 945 continue; 946 if (!isa<StringLiteral>(Init)) 947 continue; 948 949 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 950 const MemRegion *MR = VarLoc.getAsRegion(); 951 if (!MR) 952 continue; 953 954 SVal StrVal = state->getSVal(Init); 955 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 956 DefinedOrUnknownSVal strLength 957 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 958 959 state = state->set<CStringLength>(MR, strLength); 960 } 961 962 C.addTransition(state); 963} 964 965bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) { 966 CStringLength::EntryMap Entries = state->get<CStringLength>(); 967 return !Entries.isEmpty(); 968} 969 970const GRState *CStringChecker::EvalRegionChanges(const GRState *state, 971 const MemRegion * const *Begin, 972 const MemRegion * const *End, 973 bool *) { 974 CStringLength::EntryMap Entries = state->get<CStringLength>(); 975 if (Entries.isEmpty()) 976 return state; 977 978 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 979 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 980 981 // First build sets for the changed regions and their super-regions. 982 for ( ; Begin != End; ++Begin) { 983 const MemRegion *MR = *Begin; 984 Invalidated.insert(MR); 985 986 SuperRegions.insert(MR); 987 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 988 MR = SR->getSuperRegion(); 989 SuperRegions.insert(MR); 990 } 991 } 992 993 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 994 995 // Then loop over the entries in the current state. 996 for (CStringLength::EntryMap::iterator I = Entries.begin(), 997 E = Entries.end(); I != E; ++I) { 998 const MemRegion *MR = I.getKey(); 999 1000 // Is this entry for a super-region of a changed region? 1001 if (SuperRegions.count(MR)) { 1002 Entries = F.remove(Entries, MR); 1003 continue; 1004 } 1005 1006 // Is this entry for a sub-region of a changed region? 1007 const MemRegion *Super = MR; 1008 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1009 Super = SR->getSuperRegion(); 1010 if (Invalidated.count(Super)) { 1011 Entries = F.remove(Entries, MR); 1012 break; 1013 } 1014 } 1015 } 1016 1017 return state->set<CStringLength>(Entries); 1018} 1019 1020void CStringChecker::MarkLiveSymbols(const GRState *state, SymbolReaper &SR) { 1021 // Mark all symbols in our string length map as valid. 1022 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1023 1024 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1025 I != E; ++I) { 1026 SVal Len = I.getData(); 1027 if (SymbolRef Sym = Len.getAsSymbol()) 1028 SR.markInUse(Sym); 1029 } 1030} 1031 1032void CStringChecker::evalDeadSymbols(CheckerContext &C, SymbolReaper &SR) { 1033 if (!SR.hasDeadSymbols()) 1034 return; 1035 1036 const GRState *state = C.getState(); 1037 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1038 if (Entries.isEmpty()) 1039 return; 1040 1041 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1042 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1043 I != E; ++I) { 1044 SVal Len = I.getData(); 1045 if (SymbolRef Sym = Len.getAsSymbol()) { 1046 if (SR.isDead(Sym)) 1047 Entries = F.remove(Entries, I.getKey()); 1048 } 1049 } 1050 1051 state = state->set<CStringLength>(Entries); 1052 C.generateNode(state); 1053} 1054