CStringChecker.cpp revision 183ff98f425d470c2a0276880aaf43496c9dad14
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ClangSACheckers.h" 16#include "clang/StaticAnalyzer/Core/CheckerV2.h" 17#include "clang/StaticAnalyzer/Core/CheckerManager.h" 18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 19#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 21#include "llvm/ADT/StringSwitch.h" 22 23using namespace clang; 24using namespace ento; 25 26namespace { 27class CStringChecker : public CheckerV2< eval::Call, 28 check::PreStmt<DeclStmt>, 29 check::LiveSymbols, 30 check::DeadSymbols, 31 check::RegionChanges 32 > { 33 mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite, 34 BT_Overlap, BT_NotCString; 35public: 36 static void *getTag() { static int tag; return &tag; } 37 38 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 39 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 40 void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const; 41 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 42 bool wantsRegionChangeUpdate(const GRState *state) const; 43 44 const GRState *checkRegionChanges(const GRState *state, 45 const MemRegion * const *Begin, 46 const MemRegion * const *End) const; 47 48 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 49 const CallExpr *) const; 50 51 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 52 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 53 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 54 void evalCopyCommon(CheckerContext &C, const GRState *state, 55 const Expr *Size, const Expr *Source, const Expr *Dest, 56 bool Restricted = false) const; 57 58 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 59 60 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 61 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 62 void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 63 bool IsStrnlen = false) const; 64 65 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 66 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 67 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 68 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd, 69 bool isStrncpy) const; 70 71 // Utility methods 72 std::pair<const GRState*, const GRState*> 73 static assumeZero(CheckerContext &C, 74 const GRState *state, SVal V, QualType Ty); 75 76 static const GRState *setCStringLength(const GRState *state, 77 const MemRegion *MR, SVal strLength); 78 static SVal getCStringLengthForRegion(CheckerContext &C, 79 const GRState *&state, 80 const Expr *Ex, const MemRegion *MR); 81 SVal getCStringLength(CheckerContext &C, const GRState *&state, 82 const Expr *Ex, SVal Buf) const; 83 84 static const GRState *InvalidateBuffer(CheckerContext &C, 85 const GRState *state, 86 const Expr *Ex, SVal V); 87 88 static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 89 const MemRegion *MR); 90 91 // Re-usable checks 92 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 93 const Expr *S, SVal l) const; 94 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 95 const Expr *S, SVal l, 96 bool IsDestination = false) const; 97 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 98 const Expr *Size, 99 const Expr *FirstBuf, 100 const Expr *SecondBuf = NULL, 101 bool FirstIsDestination = false) const; 102 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 103 const Expr *Size, const Expr *First, 104 const Expr *Second) const; 105 void emitOverlapBug(CheckerContext &C, const GRState *state, 106 const Stmt *First, const Stmt *Second) const; 107}; 108 109class CStringLength { 110public: 111 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 112}; 113} //end anonymous namespace 114 115namespace clang { 116namespace ento { 117 template <> 118 struct GRStateTrait<CStringLength> 119 : public GRStatePartialTrait<CStringLength::EntryMap> { 120 static void *GDMIndex() { return CStringChecker::getTag(); } 121 }; 122} 123} 124 125//===----------------------------------------------------------------------===// 126// Individual checks and utility methods. 127//===----------------------------------------------------------------------===// 128 129std::pair<const GRState*, const GRState*> 130CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 131 QualType Ty) { 132 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 133 if (!val) 134 return std::pair<const GRState*, const GRState *>(state, state); 135 136 SValBuilder &svalBuilder = C.getSValBuilder(); 137 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 138 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 139} 140 141const GRState *CStringChecker::checkNonNull(CheckerContext &C, 142 const GRState *state, 143 const Expr *S, SVal l) const { 144 // If a previous check has failed, propagate the failure. 145 if (!state) 146 return NULL; 147 148 const GRState *stateNull, *stateNonNull; 149 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 150 151 if (stateNull && !stateNonNull) { 152 ExplodedNode *N = C.generateSink(stateNull); 153 if (!N) 154 return NULL; 155 156 if (!BT_Null) 157 BT_Null.reset(new BuiltinBug("API", 158 "Null pointer argument in call to byte string function")); 159 160 // Generate a report for this bug. 161 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 162 EnhancedBugReport *report = new EnhancedBugReport(*BT, 163 BT->getDescription(), N); 164 165 report->addRange(S->getSourceRange()); 166 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 167 C.EmitReport(report); 168 return NULL; 169 } 170 171 // From here on, assume that the value is non-null. 172 assert(stateNonNull); 173 return stateNonNull; 174} 175 176// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 177const GRState *CStringChecker::CheckLocation(CheckerContext &C, 178 const GRState *state, 179 const Expr *S, SVal l, 180 bool IsDestination) const { 181 // If a previous check has failed, propagate the failure. 182 if (!state) 183 return NULL; 184 185 // Check for out of bound array element access. 186 const MemRegion *R = l.getAsRegion(); 187 if (!R) 188 return state; 189 190 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 191 if (!ER) 192 return state; 193 194 assert(ER->getValueType() == C.getASTContext().CharTy && 195 "CheckLocation should only be called with char* ElementRegions"); 196 197 // Get the size of the array. 198 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 199 SValBuilder &svalBuilder = C.getSValBuilder(); 200 SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 201 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 202 203 // Get the index of the accessed element. 204 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 205 206 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 207 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 208 if (StOutBound && !StInBound) { 209 ExplodedNode *N = C.generateSink(StOutBound); 210 if (!N) 211 return NULL; 212 213 BuiltinBug *BT; 214 if (IsDestination) { 215 if (!BT_BoundsWrite) { 216 BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access", 217 "Byte string function overflows destination buffer")); 218 } 219 BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get()); 220 } else { 221 if (!BT_Bounds) { 222 BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", 223 "Byte string function accesses out-of-bound array element")); 224 } 225 BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 226 } 227 228 // FIXME: It would be nice to eventually make this diagnostic more clear, 229 // e.g., by referencing the original declaration or by saying *why* this 230 // reference is outside the range. 231 232 // Generate a report for this bug. 233 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 234 235 report->addRange(S->getSourceRange()); 236 C.EmitReport(report); 237 return NULL; 238 } 239 240 // Array bound check succeeded. From this point forward the array bound 241 // should always succeed. 242 return StInBound; 243} 244 245const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 246 const GRState *state, 247 const Expr *Size, 248 const Expr *FirstBuf, 249 const Expr *SecondBuf, 250 bool FirstIsDestination) const { 251 // If a previous check has failed, propagate the failure. 252 if (!state) 253 return NULL; 254 255 SValBuilder &svalBuilder = C.getSValBuilder(); 256 ASTContext &Ctx = C.getASTContext(); 257 258 QualType sizeTy = Size->getType(); 259 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 260 261 // Check that the first buffer is non-null. 262 SVal BufVal = state->getSVal(FirstBuf); 263 state = checkNonNull(C, state, FirstBuf, BufVal); 264 if (!state) 265 return NULL; 266 267 // Get the access length and make sure it is known. 268 SVal LengthVal = state->getSVal(Size); 269 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 270 if (!Length) 271 return state; 272 273 // Compute the offset of the last element to be accessed: size-1. 274 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 275 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 276 *Length, One, sizeTy)); 277 278 // Check that the first buffer is sufficently long. 279 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 280 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 281 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 282 LastOffset, PtrTy); 283 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 284 285 // If the buffer isn't large enough, abort. 286 if (!state) 287 return NULL; 288 } 289 290 // If there's a second buffer, check it as well. 291 if (SecondBuf) { 292 BufVal = state->getSVal(SecondBuf); 293 state = checkNonNull(C, state, SecondBuf, BufVal); 294 if (!state) 295 return NULL; 296 297 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 298 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 299 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 300 LastOffset, PtrTy); 301 state = CheckLocation(C, state, SecondBuf, BufEnd); 302 } 303 } 304 305 // Large enough or not, return this state! 306 return state; 307} 308 309const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 310 const GRState *state, 311 const Expr *Size, 312 const Expr *First, 313 const Expr *Second) const { 314 // Do a simple check for overlap: if the two arguments are from the same 315 // buffer, see if the end of the first is greater than the start of the second 316 // or vice versa. 317 318 // If a previous check has failed, propagate the failure. 319 if (!state) 320 return NULL; 321 322 const GRState *stateTrue, *stateFalse; 323 324 // Get the buffer values and make sure they're known locations. 325 SVal firstVal = state->getSVal(First); 326 SVal secondVal = state->getSVal(Second); 327 328 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 329 if (!firstLoc) 330 return state; 331 332 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 333 if (!secondLoc) 334 return state; 335 336 // Are the two values the same? 337 SValBuilder &svalBuilder = C.getSValBuilder(); 338 llvm::tie(stateTrue, stateFalse) = 339 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 340 341 if (stateTrue && !stateFalse) { 342 // If the values are known to be equal, that's automatically an overlap. 343 emitOverlapBug(C, stateTrue, First, Second); 344 return NULL; 345 } 346 347 // assume the two expressions are not equal. 348 assert(stateFalse); 349 state = stateFalse; 350 351 // Which value comes first? 352 ASTContext &Ctx = svalBuilder.getContext(); 353 QualType cmpTy = Ctx.IntTy; 354 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 355 *firstLoc, *secondLoc, cmpTy); 356 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 357 if (!reverseTest) 358 return state; 359 360 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 361 if (stateTrue) { 362 if (stateFalse) { 363 // If we don't know which one comes first, we can't perform this test. 364 return state; 365 } else { 366 // Switch the values so that firstVal is before secondVal. 367 Loc *tmpLoc = firstLoc; 368 firstLoc = secondLoc; 369 secondLoc = tmpLoc; 370 371 // Switch the Exprs as well, so that they still correspond. 372 const Expr *tmpExpr = First; 373 First = Second; 374 Second = tmpExpr; 375 } 376 } 377 378 // Get the length, and make sure it too is known. 379 SVal LengthVal = state->getSVal(Size); 380 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 381 if (!Length) 382 return state; 383 384 // Convert the first buffer's start address to char*. 385 // Bail out if the cast fails. 386 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 387 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); 388 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 389 if (!FirstStartLoc) 390 return state; 391 392 // Compute the end of the first buffer. Bail out if THAT fails. 393 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 394 *FirstStartLoc, *Length, CharPtrTy); 395 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 396 if (!FirstEndLoc) 397 return state; 398 399 // Is the end of the first buffer past the start of the second buffer? 400 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 401 *FirstEndLoc, *secondLoc, cmpTy); 402 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 403 if (!OverlapTest) 404 return state; 405 406 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 407 408 if (stateTrue && !stateFalse) { 409 // Overlap! 410 emitOverlapBug(C, stateTrue, First, Second); 411 return NULL; 412 } 413 414 // assume the two expressions don't overlap. 415 assert(stateFalse); 416 return stateFalse; 417} 418 419void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 420 const Stmt *First, const Stmt *Second) const { 421 ExplodedNode *N = C.generateSink(state); 422 if (!N) 423 return; 424 425 if (!BT_Overlap) 426 BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); 427 428 // Generate a report for this bug. 429 RangedBugReport *report = 430 new RangedBugReport(*BT_Overlap, 431 "Arguments must not be overlapping buffers", N); 432 report->addRange(First->getSourceRange()); 433 report->addRange(Second->getSourceRange()); 434 435 C.EmitReport(report); 436} 437 438const GRState *CStringChecker::setCStringLength(const GRState *state, 439 const MemRegion *MR, 440 SVal strLength) { 441 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 442 if (strLength.isUnknown()) 443 return state; 444 445 MR = MR->StripCasts(); 446 447 switch (MR->getKind()) { 448 case MemRegion::StringRegionKind: 449 // FIXME: This can happen if we strcpy() into a string region. This is 450 // undefined [C99 6.4.5p6], but we should still warn about it. 451 return state; 452 453 case MemRegion::SymbolicRegionKind: 454 case MemRegion::AllocaRegionKind: 455 case MemRegion::VarRegionKind: 456 case MemRegion::FieldRegionKind: 457 case MemRegion::ObjCIvarRegionKind: 458 return state->set<CStringLength>(MR, strLength); 459 460 case MemRegion::ElementRegionKind: 461 // FIXME: Handle element regions by upper-bounding the parent region's 462 // string length. 463 return state; 464 465 default: 466 // Other regions (mostly non-data) can't have a reliable C string length. 467 // For now, just ignore the change. 468 // FIXME: These are rare but not impossible. We should output some kind of 469 // warning for things like strcpy((char[]){'a', 0}, "b"); 470 return state; 471 } 472} 473 474SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 475 const GRState *&state, 476 const Expr *Ex, 477 const MemRegion *MR) { 478 // If there's a recorded length, go ahead and return it. 479 const SVal *Recorded = state->get<CStringLength>(MR); 480 if (Recorded) 481 return *Recorded; 482 483 // Otherwise, get a new symbol and update the state. 484 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 485 SValBuilder &svalBuilder = C.getSValBuilder(); 486 QualType sizeTy = svalBuilder.getContext().getSizeType(); 487 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 488 MR, Ex, sizeTy, Count); 489 state = state->set<CStringLength>(MR, strLength); 490 return strLength; 491} 492 493SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 494 const Expr *Ex, SVal Buf) const { 495 const MemRegion *MR = Buf.getAsRegion(); 496 if (!MR) { 497 // If we can't get a region, see if it's something we /know/ isn't a 498 // C string. In the context of locations, the only time we can issue such 499 // a warning is for labels. 500 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 501 if (ExplodedNode *N = C.generateNode(state)) { 502 if (!BT_NotCString) 503 BT_NotCString.reset(new BuiltinBug("API", 504 "Argument is not a null-terminated string.")); 505 506 llvm::SmallString<120> buf; 507 llvm::raw_svector_ostream os(buf); 508 os << "Argument to byte string function is the address of the label '" 509 << Label->getLabel()->getName() 510 << "', which is not a null-terminated string"; 511 512 // Generate a report for this bug. 513 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 514 os.str(), N); 515 516 report->addRange(Ex->getSourceRange()); 517 C.EmitReport(report); 518 } 519 520 return UndefinedVal(); 521 } 522 523 // If it's not a region and not a label, give up. 524 return UnknownVal(); 525 } 526 527 // If we have a region, strip casts from it and see if we can figure out 528 // its length. For anything we can't figure out, just return UnknownVal. 529 MR = MR->StripCasts(); 530 531 switch (MR->getKind()) { 532 case MemRegion::StringRegionKind: { 533 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 534 // so we can assume that the byte length is the correct C string length. 535 SValBuilder &svalBuilder = C.getSValBuilder(); 536 QualType sizeTy = svalBuilder.getContext().getSizeType(); 537 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 538 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 539 } 540 case MemRegion::SymbolicRegionKind: 541 case MemRegion::AllocaRegionKind: 542 case MemRegion::VarRegionKind: 543 case MemRegion::FieldRegionKind: 544 case MemRegion::ObjCIvarRegionKind: 545 return getCStringLengthForRegion(C, state, Ex, MR); 546 case MemRegion::CompoundLiteralRegionKind: 547 // FIXME: Can we track this? Is it necessary? 548 return UnknownVal(); 549 case MemRegion::ElementRegionKind: 550 // FIXME: How can we handle this? It's not good enough to subtract the 551 // offset from the base string length; consider "123\x00567" and &a[5]. 552 return UnknownVal(); 553 default: 554 // Other regions (mostly non-data) can't have a reliable C string length. 555 // In this case, an error is emitted and UndefinedVal is returned. 556 // The caller should always be prepared to handle this case. 557 if (ExplodedNode *N = C.generateNode(state)) { 558 if (!BT_NotCString) 559 BT_NotCString.reset(new BuiltinBug("API", 560 "Argument is not a null-terminated string.")); 561 562 llvm::SmallString<120> buf; 563 llvm::raw_svector_ostream os(buf); 564 565 os << "Argument to byte string function is "; 566 567 if (SummarizeRegion(os, C.getASTContext(), MR)) 568 os << ", which is not a null-terminated string"; 569 else 570 os << "not a null-terminated string"; 571 572 // Generate a report for this bug. 573 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 574 os.str(), N); 575 576 report->addRange(Ex->getSourceRange()); 577 C.EmitReport(report); 578 } 579 580 return UndefinedVal(); 581 } 582} 583 584const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 585 const GRState *state, 586 const Expr *E, SVal V) { 587 Loc *L = dyn_cast<Loc>(&V); 588 if (!L) 589 return state; 590 591 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 592 // some assumptions about the value that CFRefCount can't. Even so, it should 593 // probably be refactored. 594 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 595 const MemRegion *R = MR->getRegion()->StripCasts(); 596 597 // Are we dealing with an ElementRegion? If so, we should be invalidating 598 // the super-region. 599 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 600 R = ER->getSuperRegion(); 601 // FIXME: What about layers of ElementRegions? 602 } 603 604 // Invalidate this region. 605 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 606 return state->invalidateRegion(R, E, Count, NULL); 607 } 608 609 // If we have a non-region value by chance, just remove the binding. 610 // FIXME: is this necessary or correct? This handles the non-Region 611 // cases. Is it ever valid to store to these? 612 return state->unbindLoc(*L); 613} 614 615bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 616 const MemRegion *MR) { 617 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 618 if (!TR) 619 return false; 620 621 switch (TR->getKind()) { 622 case MemRegion::FunctionTextRegionKind: { 623 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 624 if (FD) 625 os << "the address of the function '" << FD << "'"; 626 else 627 os << "the address of a function"; 628 return true; 629 } 630 case MemRegion::BlockTextRegionKind: 631 os << "block text"; 632 return true; 633 case MemRegion::BlockDataRegionKind: 634 os << "a block"; 635 return true; 636 case MemRegion::CXXThisRegionKind: 637 case MemRegion::CXXTempObjectRegionKind: 638 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 639 return true; 640 case MemRegion::VarRegionKind: 641 os << "a variable of type" << TR->getValueType().getAsString(); 642 return true; 643 case MemRegion::FieldRegionKind: 644 os << "a field of type " << TR->getValueType().getAsString(); 645 return true; 646 case MemRegion::ObjCIvarRegionKind: 647 os << "an instance variable of type " << TR->getValueType().getAsString(); 648 return true; 649 default: 650 return false; 651 } 652} 653 654//===----------------------------------------------------------------------===// 655// evaluation of individual function calls. 656//===----------------------------------------------------------------------===// 657 658void CStringChecker::evalCopyCommon(CheckerContext &C, const GRState *state, 659 const Expr *Size, const Expr *Dest, 660 const Expr *Source, bool Restricted) const { 661 // See if the size argument is zero. 662 SVal sizeVal = state->getSVal(Size); 663 QualType sizeTy = Size->getType(); 664 665 const GRState *stateZeroSize, *stateNonZeroSize; 666 llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); 667 668 // If the size is zero, there won't be any actual memory access. 669 if (stateZeroSize) 670 C.addTransition(stateZeroSize); 671 672 // If the size can be nonzero, we have to check the other arguments. 673 if (stateNonZeroSize) { 674 state = stateNonZeroSize; 675 state = CheckBufferAccess(C, state, Size, Dest, Source, 676 /* FirstIsDst = */ true); 677 if (Restricted) 678 state = CheckOverlap(C, state, Size, Dest, Source); 679 680 if (state) { 681 // Invalidate the destination. 682 // FIXME: Even if we can't perfectly model the copy, we should see if we 683 // can use LazyCompoundVals to copy the source values into the destination. 684 // This would probably remove any existing bindings past the end of the 685 // copied region, but that's still an improvement over blank invalidation. 686 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 687 C.addTransition(state); 688 } 689 } 690} 691 692 693void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 694 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 695 // The return value is the address of the destination buffer. 696 const Expr *Dest = CE->getArg(0); 697 const GRState *state = C.getState(); 698 state = state->BindExpr(CE, state->getSVal(Dest)); 699 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1), true); 700} 701 702void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 703 // void *memmove(void *dst, const void *src, size_t n); 704 // The return value is the address of the destination buffer. 705 const Expr *Dest = CE->getArg(0); 706 const GRState *state = C.getState(); 707 state = state->BindExpr(CE, state->getSVal(Dest)); 708 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1)); 709} 710 711void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 712 // void bcopy(const void *src, void *dst, size_t n); 713 evalCopyCommon(C, C.getState(), CE->getArg(2), CE->getArg(1), CE->getArg(0)); 714} 715 716void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 717 // int memcmp(const void *s1, const void *s2, size_t n); 718 const Expr *Left = CE->getArg(0); 719 const Expr *Right = CE->getArg(1); 720 const Expr *Size = CE->getArg(2); 721 722 const GRState *state = C.getState(); 723 SValBuilder &svalBuilder = C.getSValBuilder(); 724 725 // See if the size argument is zero. 726 SVal sizeVal = state->getSVal(Size); 727 QualType sizeTy = Size->getType(); 728 729 const GRState *stateZeroSize, *stateNonZeroSize; 730 llvm::tie(stateZeroSize, stateNonZeroSize) = 731 assumeZero(C, state, sizeVal, sizeTy); 732 733 // If the size can be zero, the result will be 0 in that case, and we don't 734 // have to check either of the buffers. 735 if (stateZeroSize) { 736 state = stateZeroSize; 737 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 738 C.addTransition(state); 739 } 740 741 // If the size can be nonzero, we have to check the other arguments. 742 if (stateNonZeroSize) { 743 state = stateNonZeroSize; 744 // If we know the two buffers are the same, we know the result is 0. 745 // First, get the two buffers' addresses. Another checker will have already 746 // made sure they're not undefined. 747 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 748 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 749 750 // See if they are the same. 751 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 752 const GRState *StSameBuf, *StNotSameBuf; 753 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 754 755 // If the two arguments might be the same buffer, we know the result is zero, 756 // and we only need to check one size. 757 if (StSameBuf) { 758 state = StSameBuf; 759 state = CheckBufferAccess(C, state, Size, Left); 760 if (state) { 761 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 762 C.addTransition(state); 763 } 764 } 765 766 // If the two arguments might be different buffers, we have to check the 767 // size of both of them. 768 if (StNotSameBuf) { 769 state = StNotSameBuf; 770 state = CheckBufferAccess(C, state, Size, Left, Right); 771 if (state) { 772 // The return value is the comparison result, which we don't know. 773 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 774 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 775 state = state->BindExpr(CE, CmpV); 776 C.addTransition(state); 777 } 778 } 779 } 780} 781 782void CStringChecker::evalstrLength(CheckerContext &C, 783 const CallExpr *CE) const { 784 // size_t strlen(const char *s); 785 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 786} 787 788void CStringChecker::evalstrnLength(CheckerContext &C, 789 const CallExpr *CE) const { 790 // size_t strnlen(const char *s, size_t maxlen); 791 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 792} 793 794void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 795 bool IsStrnlen) const { 796 const GRState *state = C.getState(); 797 const Expr *Arg = CE->getArg(0); 798 SVal ArgVal = state->getSVal(Arg); 799 800 // Check that the argument is non-null. 801 state = checkNonNull(C, state, Arg, ArgVal); 802 803 if (state) { 804 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 805 806 // If the argument isn't a valid C string, there's no valid state to 807 // transition to. 808 if (strLength.isUndef()) 809 return; 810 811 // If the check is for strnlen() then bind the return value to no more than 812 // the maxlen value. 813 if (IsStrnlen) { 814 const Expr *maxlenExpr = CE->getArg(1); 815 SVal maxlenVal = state->getSVal(maxlenExpr); 816 817 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 818 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 819 820 QualType cmpTy = C.getSValBuilder().getContext().IntTy; 821 const GRState *stateTrue, *stateFalse; 822 823 // Check if the strLength is greater than or equal to the maxlen 824 llvm::tie(stateTrue, stateFalse) = 825 state->assume(cast<DefinedOrUnknownSVal> 826 (C.getSValBuilder().evalBinOpNN(state, BO_GE, 827 *strLengthNL, *maxlenValNL, 828 cmpTy))); 829 830 // If the strLength is greater than or equal to the maxlen, set strLength 831 // to maxlen 832 if (stateTrue && !stateFalse) { 833 strLength = maxlenVal; 834 } 835 } 836 837 // If getCStringLength couldn't figure out the length, conjure a return 838 // value, so it can be used in constraints, at least. 839 if (strLength.isUnknown()) { 840 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 841 strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 842 } 843 844 // Bind the return value. 845 state = state->BindExpr(CE, strLength); 846 C.addTransition(state); 847 } 848} 849 850void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 851 // char *strcpy(char *restrict dst, const char *restrict src); 852 evalStrcpyCommon(C, CE, /* returnEnd = */ false, /* isStrncpy = */ false); 853} 854 855void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 856 // char *strcpy(char *restrict dst, const char *restrict src); 857 evalStrcpyCommon(C, CE, /* returnEnd = */ false, /* isStrncpy = */ true); 858} 859 860void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 861 // char *stpcpy(char *restrict dst, const char *restrict src); 862 evalStrcpyCommon(C, CE, /* returnEnd = */ true, /* isStrncpy = */ false); 863} 864 865void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 866 bool returnEnd, bool isStrncpy) const { 867 const GRState *state = C.getState(); 868 869 // Check that the destination is non-null 870 const Expr *Dst = CE->getArg(0); 871 SVal DstVal = state->getSVal(Dst); 872 873 state = checkNonNull(C, state, Dst, DstVal); 874 if (!state) 875 return; 876 877 // Check that the source is non-null. 878 const Expr *srcExpr = CE->getArg(1); 879 SVal srcVal = state->getSVal(srcExpr); 880 state = checkNonNull(C, state, srcExpr, srcVal); 881 if (!state) 882 return; 883 884 // Get the string length of the source. 885 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 886 887 // If the source isn't a valid C string, give up. 888 if (strLength.isUndef()) 889 return; 890 891 if (isStrncpy) { 892 // Get the max number of characters to copy 893 const Expr *lenExpr = CE->getArg(2); 894 SVal lenVal = state->getSVal(lenExpr); 895 896 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 897 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 898 899 QualType cmpTy = C.getSValBuilder().getContext().IntTy; 900 const GRState *stateTrue, *stateFalse; 901 902 // Check if the max number to copy is less than the length of the src 903 llvm::tie(stateTrue, stateFalse) = 904 state->assume(cast<DefinedOrUnknownSVal> 905 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 906 *strLengthNL, *lenValNL, 907 cmpTy))); 908 909 if (stateTrue) { 910 // Max number to copy is less than the length of the src, so the actual 911 // strLength copied is the max number arg. 912 strLength = lenVal; 913 } 914 } 915 916 SVal Result = (returnEnd ? UnknownVal() : DstVal); 917 918 // If the destination is a MemRegion, try to check for a buffer overflow and 919 // record the new string length. 920 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 921 // If the length is known, we can check for an overflow. 922 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) { 923 SVal lastElement = 924 C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal, 925 *knownStrLength, Dst->getType()); 926 927 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 928 if (!state) 929 return; 930 931 // If this is a stpcpy-style copy, the last element is the return value. 932 if (returnEnd) 933 Result = lastElement; 934 } 935 936 // Invalidate the destination. This must happen before we set the C string 937 // length because invalidation will clear the length. 938 // FIXME: Even if we can't perfectly model the copy, we should see if we 939 // can use LazyCompoundVals to copy the source values into the destination. 940 // This would probably remove any existing bindings past the end of the 941 // string, but that's still an improvement over blank invalidation. 942 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 943 944 // Set the C string length of the destination. 945 state = setCStringLength(state, dstRegVal->getRegion(), strLength); 946 } 947 948 // If this is a stpcpy-style copy, but we were unable to check for a buffer 949 // overflow, we still need a result. Conjure a return value. 950 if (returnEnd && Result.isUnknown()) { 951 SValBuilder &svalBuilder = C.getSValBuilder(); 952 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 953 strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 954 } 955 956 // Set the return value. 957 state = state->BindExpr(CE, Result); 958 C.addTransition(state); 959} 960 961//===----------------------------------------------------------------------===// 962// The driver method, and other Checker callbacks. 963//===----------------------------------------------------------------------===// 964 965bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 966 // Get the callee. All the functions we care about are C functions 967 // with simple identifiers. 968 const GRState *state = C.getState(); 969 const Expr *Callee = CE->getCallee(); 970 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 971 972 if (!FD) 973 return false; 974 975 // Get the name of the callee. If it's a builtin, strip off the prefix. 976 IdentifierInfo *II = FD->getIdentifier(); 977 if (!II) // if no identifier, not a simple C function 978 return false; 979 llvm::StringRef Name = II->getName(); 980 if (Name.startswith("__builtin_")) 981 Name = Name.substr(10); 982 983 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 984 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 985 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 986 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 987 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 988 .Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) 989 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 990 .Case("strlen", &CStringChecker::evalstrLength) 991 .Case("strnlen", &CStringChecker::evalstrnLength) 992 .Case("bcopy", &CStringChecker::evalBcopy) 993 .Default(NULL); 994 995 // If the callee isn't a string function, let another checker handle it. 996 if (!evalFunction) 997 return false; 998 999 // Check and evaluate the call. 1000 (this->*evalFunction)(C, CE); 1001 return true; 1002} 1003 1004void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 1005 // Record string length for char a[] = "abc"; 1006 const GRState *state = C.getState(); 1007 1008 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1009 I != E; ++I) { 1010 const VarDecl *D = dyn_cast<VarDecl>(*I); 1011 if (!D) 1012 continue; 1013 1014 // FIXME: Handle array fields of structs. 1015 if (!D->getType()->isArrayType()) 1016 continue; 1017 1018 const Expr *Init = D->getInit(); 1019 if (!Init) 1020 continue; 1021 if (!isa<StringLiteral>(Init)) 1022 continue; 1023 1024 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 1025 const MemRegion *MR = VarLoc.getAsRegion(); 1026 if (!MR) 1027 continue; 1028 1029 SVal StrVal = state->getSVal(Init); 1030 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1031 DefinedOrUnknownSVal strLength 1032 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 1033 1034 state = state->set<CStringLength>(MR, strLength); 1035 } 1036 1037 C.addTransition(state); 1038} 1039 1040bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const { 1041 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1042 return !Entries.isEmpty(); 1043} 1044 1045const GRState * 1046CStringChecker::checkRegionChanges(const GRState *state, 1047 const MemRegion * const *Begin, 1048 const MemRegion * const *End) const { 1049 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1050 if (Entries.isEmpty()) 1051 return state; 1052 1053 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1054 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1055 1056 // First build sets for the changed regions and their super-regions. 1057 for ( ; Begin != End; ++Begin) { 1058 const MemRegion *MR = *Begin; 1059 Invalidated.insert(MR); 1060 1061 SuperRegions.insert(MR); 1062 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1063 MR = SR->getSuperRegion(); 1064 SuperRegions.insert(MR); 1065 } 1066 } 1067 1068 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1069 1070 // Then loop over the entries in the current state. 1071 for (CStringLength::EntryMap::iterator I = Entries.begin(), 1072 E = Entries.end(); I != E; ++I) { 1073 const MemRegion *MR = I.getKey(); 1074 1075 // Is this entry for a super-region of a changed region? 1076 if (SuperRegions.count(MR)) { 1077 Entries = F.remove(Entries, MR); 1078 continue; 1079 } 1080 1081 // Is this entry for a sub-region of a changed region? 1082 const MemRegion *Super = MR; 1083 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1084 Super = SR->getSuperRegion(); 1085 if (Invalidated.count(Super)) { 1086 Entries = F.remove(Entries, MR); 1087 break; 1088 } 1089 } 1090 } 1091 1092 return state->set<CStringLength>(Entries); 1093} 1094 1095void CStringChecker::checkLiveSymbols(const GRState *state, 1096 SymbolReaper &SR) const { 1097 // Mark all symbols in our string length map as valid. 1098 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1099 1100 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1101 I != E; ++I) { 1102 SVal Len = I.getData(); 1103 if (SymbolRef Sym = Len.getAsSymbol()) 1104 SR.markInUse(Sym); 1105 } 1106} 1107 1108void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 1109 CheckerContext &C) const { 1110 if (!SR.hasDeadSymbols()) 1111 return; 1112 1113 const GRState *state = C.getState(); 1114 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1115 if (Entries.isEmpty()) 1116 return; 1117 1118 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1119 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1120 I != E; ++I) { 1121 SVal Len = I.getData(); 1122 if (SymbolRef Sym = Len.getAsSymbol()) { 1123 if (SR.isDead(Sym)) 1124 Entries = F.remove(Entries, I.getKey()); 1125 } 1126 } 1127 1128 state = state->set<CStringLength>(Entries); 1129 C.generateNode(state); 1130} 1131 1132void ento::registerCStringChecker(CheckerManager &mgr) { 1133 mgr.registerChecker<CStringChecker>(); 1134} 1135