CStringChecker.cpp revision af1a9330ffc0757e1534206f4f50eb420ef57b23
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ExperimentalChecks.h" 16#include "clang/StaticAnalyzer/BugReporter/BugType.h" 17#include "clang/StaticAnalyzer/PathSensitive/CheckerVisitor.h" 18#include "clang/StaticAnalyzer/PathSensitive/GRStateTrait.h" 19#include "llvm/ADT/StringSwitch.h" 20 21using namespace clang; 22using namespace ento; 23 24namespace { 25class CStringChecker : public CheckerVisitor<CStringChecker> { 26 BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString; 27public: 28 CStringChecker() 29 : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0) 30 {} 31 static void *getTag() { static int tag; return &tag; } 32 33 bool evalCallExpr(CheckerContext &C, const CallExpr *CE); 34 void PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS); 35 void MarkLiveSymbols(const GRState *state, SymbolReaper &SR); 36 void evalDeadSymbols(CheckerContext &C, SymbolReaper &SR); 37 bool wantsRegionChangeUpdate(const GRState *state); 38 39 const GRState *EvalRegionChanges(const GRState *state, 40 const MemRegion * const *Begin, 41 const MemRegion * const *End, 42 bool*); 43 44 typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *); 45 46 void evalMemcpy(CheckerContext &C, const CallExpr *CE); 47 void evalMemmove(CheckerContext &C, const CallExpr *CE); 48 void evalBcopy(CheckerContext &C, const CallExpr *CE); 49 void evalCopyCommon(CheckerContext &C, const GRState *state, 50 const Expr *Size, const Expr *Source, const Expr *Dest, 51 bool Restricted = false); 52 53 void evalMemcmp(CheckerContext &C, const CallExpr *CE); 54 55 void evalstrLength(CheckerContext &C, const CallExpr *CE); 56 57 void evalStrcpy(CheckerContext &C, const CallExpr *CE); 58 void evalStpcpy(CheckerContext &C, const CallExpr *CE); 59 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd); 60 61 // Utility methods 62 std::pair<const GRState*, const GRState*> 63 assumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty); 64 65 const GRState *setCStringLength(const GRState *state, const MemRegion *MR, 66 SVal strLength); 67 SVal getCStringLengthForRegion(CheckerContext &C, const GRState *&state, 68 const Expr *Ex, const MemRegion *MR); 69 SVal getCStringLength(CheckerContext &C, const GRState *&state, 70 const Expr *Ex, SVal Buf); 71 72 const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state, 73 const Expr *Ex, SVal V); 74 75 bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 76 const MemRegion *MR); 77 78 // Re-usable checks 79 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 80 const Expr *S, SVal l); 81 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 82 const Expr *S, SVal l, 83 bool IsDestination = false); 84 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 85 const Expr *Size, 86 const Expr *FirstBuf, 87 const Expr *SecondBuf = NULL, 88 bool FirstIsDestination = false); 89 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 90 const Expr *Size, const Expr *First, 91 const Expr *Second); 92 void emitOverlapBug(CheckerContext &C, const GRState *state, 93 const Stmt *First, const Stmt *Second); 94}; 95 96class CStringLength { 97public: 98 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 99}; 100} //end anonymous namespace 101 102namespace clang { 103namespace ento { 104 template <> 105 struct GRStateTrait<CStringLength> 106 : public GRStatePartialTrait<CStringLength::EntryMap> { 107 static void *GDMIndex() { return CStringChecker::getTag(); } 108 }; 109} 110} 111 112void ento::RegisterCStringChecker(ExprEngine &Eng) { 113 Eng.registerCheck(new CStringChecker()); 114} 115 116//===----------------------------------------------------------------------===// 117// Individual checks and utility methods. 118//===----------------------------------------------------------------------===// 119 120std::pair<const GRState*, const GRState*> 121CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 122 QualType Ty) { 123 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 124 if (!val) 125 return std::pair<const GRState*, const GRState *>(state, state); 126 127 SValBuilder &svalBuilder = C.getSValBuilder(); 128 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 129 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 130} 131 132const GRState *CStringChecker::checkNonNull(CheckerContext &C, 133 const GRState *state, 134 const Expr *S, SVal l) { 135 // If a previous check has failed, propagate the failure. 136 if (!state) 137 return NULL; 138 139 const GRState *stateNull, *stateNonNull; 140 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 141 142 if (stateNull && !stateNonNull) { 143 ExplodedNode *N = C.generateSink(stateNull); 144 if (!N) 145 return NULL; 146 147 if (!BT_Null) 148 BT_Null = new BuiltinBug("API", 149 "Null pointer argument in call to byte string function"); 150 151 // Generate a report for this bug. 152 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null); 153 EnhancedBugReport *report = new EnhancedBugReport(*BT, 154 BT->getDescription(), N); 155 156 report->addRange(S->getSourceRange()); 157 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 158 C.EmitReport(report); 159 return NULL; 160 } 161 162 // From here on, assume that the value is non-null. 163 assert(stateNonNull); 164 return stateNonNull; 165} 166 167// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 168const GRState *CStringChecker::CheckLocation(CheckerContext &C, 169 const GRState *state, 170 const Expr *S, SVal l, 171 bool IsDestination) { 172 // If a previous check has failed, propagate the failure. 173 if (!state) 174 return NULL; 175 176 // Check for out of bound array element access. 177 const MemRegion *R = l.getAsRegion(); 178 if (!R) 179 return state; 180 181 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 182 if (!ER) 183 return state; 184 185 assert(ER->getValueType() == C.getASTContext().CharTy && 186 "CheckLocation should only be called with char* ElementRegions"); 187 188 // Get the size of the array. 189 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 190 SValBuilder &svalBuilder = C.getSValBuilder(); 191 SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 192 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 193 194 // Get the index of the accessed element. 195 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 196 197 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 198 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 199 if (StOutBound && !StInBound) { 200 ExplodedNode *N = C.generateSink(StOutBound); 201 if (!N) 202 return NULL; 203 204 BuiltinBug *BT; 205 if (IsDestination) { 206 if (!BT_BoundsWrite) { 207 BT_BoundsWrite = new BuiltinBug("Out-of-bound array access", 208 "Byte string function overflows destination buffer"); 209 } 210 BT = static_cast<BuiltinBug*>(BT_BoundsWrite); 211 } else { 212 if (!BT_Bounds) { 213 BT_Bounds = new BuiltinBug("Out-of-bound array access", 214 "Byte string function accesses out-of-bound array element"); 215 } 216 BT = static_cast<BuiltinBug*>(BT_Bounds); 217 } 218 219 // FIXME: It would be nice to eventually make this diagnostic more clear, 220 // e.g., by referencing the original declaration or by saying *why* this 221 // reference is outside the range. 222 223 // Generate a report for this bug. 224 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 225 226 report->addRange(S->getSourceRange()); 227 C.EmitReport(report); 228 return NULL; 229 } 230 231 // Array bound check succeeded. From this point forward the array bound 232 // should always succeed. 233 return StInBound; 234} 235 236const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 237 const GRState *state, 238 const Expr *Size, 239 const Expr *FirstBuf, 240 const Expr *SecondBuf, 241 bool FirstIsDestination) { 242 // If a previous check has failed, propagate the failure. 243 if (!state) 244 return NULL; 245 246 SValBuilder &svalBuilder = C.getSValBuilder(); 247 ASTContext &Ctx = C.getASTContext(); 248 249 QualType sizeTy = Size->getType(); 250 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 251 252 // Check that the first buffer is non-null. 253 SVal BufVal = state->getSVal(FirstBuf); 254 state = checkNonNull(C, state, FirstBuf, BufVal); 255 if (!state) 256 return NULL; 257 258 // Get the access length and make sure it is known. 259 SVal LengthVal = state->getSVal(Size); 260 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 261 if (!Length) 262 return state; 263 264 // Compute the offset of the last element to be accessed: size-1. 265 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 266 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 267 *Length, One, sizeTy)); 268 269 // Check that the first buffer is sufficently long. 270 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 271 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 272 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 273 LastOffset, PtrTy); 274 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 275 276 // If the buffer isn't large enough, abort. 277 if (!state) 278 return NULL; 279 } 280 281 // If there's a second buffer, check it as well. 282 if (SecondBuf) { 283 BufVal = state->getSVal(SecondBuf); 284 state = checkNonNull(C, state, SecondBuf, BufVal); 285 if (!state) 286 return NULL; 287 288 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 289 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 290 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 291 LastOffset, PtrTy); 292 state = CheckLocation(C, state, SecondBuf, BufEnd); 293 } 294 } 295 296 // Large enough or not, return this state! 297 return state; 298} 299 300const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 301 const GRState *state, 302 const Expr *Size, 303 const Expr *First, 304 const Expr *Second) { 305 // Do a simple check for overlap: if the two arguments are from the same 306 // buffer, see if the end of the first is greater than the start of the second 307 // or vice versa. 308 309 // If a previous check has failed, propagate the failure. 310 if (!state) 311 return NULL; 312 313 const GRState *stateTrue, *stateFalse; 314 315 // Get the buffer values and make sure they're known locations. 316 SVal firstVal = state->getSVal(First); 317 SVal secondVal = state->getSVal(Second); 318 319 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 320 if (!firstLoc) 321 return state; 322 323 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 324 if (!secondLoc) 325 return state; 326 327 // Are the two values the same? 328 SValBuilder &svalBuilder = C.getSValBuilder(); 329 llvm::tie(stateTrue, stateFalse) = 330 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 331 332 if (stateTrue && !stateFalse) { 333 // If the values are known to be equal, that's automatically an overlap. 334 emitOverlapBug(C, stateTrue, First, Second); 335 return NULL; 336 } 337 338 // assume the two expressions are not equal. 339 assert(stateFalse); 340 state = stateFalse; 341 342 // Which value comes first? 343 ASTContext &Ctx = svalBuilder.getContext(); 344 QualType cmpTy = Ctx.IntTy; 345 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 346 *firstLoc, *secondLoc, cmpTy); 347 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 348 if (!reverseTest) 349 return state; 350 351 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 352 if (stateTrue) { 353 if (stateFalse) { 354 // If we don't know which one comes first, we can't perform this test. 355 return state; 356 } else { 357 // Switch the values so that firstVal is before secondVal. 358 Loc *tmpLoc = firstLoc; 359 firstLoc = secondLoc; 360 secondLoc = tmpLoc; 361 362 // Switch the Exprs as well, so that they still correspond. 363 const Expr *tmpExpr = First; 364 First = Second; 365 Second = tmpExpr; 366 } 367 } 368 369 // Get the length, and make sure it too is known. 370 SVal LengthVal = state->getSVal(Size); 371 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 372 if (!Length) 373 return state; 374 375 // Convert the first buffer's start address to char*. 376 // Bail out if the cast fails. 377 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 378 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); 379 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 380 if (!FirstStartLoc) 381 return state; 382 383 // Compute the end of the first buffer. Bail out if THAT fails. 384 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 385 *FirstStartLoc, *Length, CharPtrTy); 386 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 387 if (!FirstEndLoc) 388 return state; 389 390 // Is the end of the first buffer past the start of the second buffer? 391 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 392 *FirstEndLoc, *secondLoc, cmpTy); 393 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 394 if (!OverlapTest) 395 return state; 396 397 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 398 399 if (stateTrue && !stateFalse) { 400 // Overlap! 401 emitOverlapBug(C, stateTrue, First, Second); 402 return NULL; 403 } 404 405 // assume the two expressions don't overlap. 406 assert(stateFalse); 407 return stateFalse; 408} 409 410void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 411 const Stmt *First, const Stmt *Second) { 412 ExplodedNode *N = C.generateSink(state); 413 if (!N) 414 return; 415 416 if (!BT_Overlap) 417 BT_Overlap = new BugType("Unix API", "Improper arguments"); 418 419 // Generate a report for this bug. 420 RangedBugReport *report = 421 new RangedBugReport(*BT_Overlap, 422 "Arguments must not be overlapping buffers", N); 423 report->addRange(First->getSourceRange()); 424 report->addRange(Second->getSourceRange()); 425 426 C.EmitReport(report); 427} 428 429const GRState *CStringChecker::setCStringLength(const GRState *state, 430 const MemRegion *MR, 431 SVal strLength) { 432 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 433 if (strLength.isUnknown()) 434 return state; 435 436 MR = MR->StripCasts(); 437 438 switch (MR->getKind()) { 439 case MemRegion::StringRegionKind: 440 // FIXME: This can happen if we strcpy() into a string region. This is 441 // undefined [C99 6.4.5p6], but we should still warn about it. 442 return state; 443 444 case MemRegion::SymbolicRegionKind: 445 case MemRegion::AllocaRegionKind: 446 case MemRegion::VarRegionKind: 447 case MemRegion::FieldRegionKind: 448 case MemRegion::ObjCIvarRegionKind: 449 return state->set<CStringLength>(MR, strLength); 450 451 case MemRegion::ElementRegionKind: 452 // FIXME: Handle element regions by upper-bounding the parent region's 453 // string length. 454 return state; 455 456 default: 457 // Other regions (mostly non-data) can't have a reliable C string length. 458 // For now, just ignore the change. 459 // FIXME: These are rare but not impossible. We should output some kind of 460 // warning for things like strcpy((char[]){'a', 0}, "b"); 461 return state; 462 } 463} 464 465SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 466 const GRState *&state, 467 const Expr *Ex, 468 const MemRegion *MR) { 469 // If there's a recorded length, go ahead and return it. 470 const SVal *Recorded = state->get<CStringLength>(MR); 471 if (Recorded) 472 return *Recorded; 473 474 // Otherwise, get a new symbol and update the state. 475 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 476 SValBuilder &svalBuilder = C.getSValBuilder(); 477 QualType sizeTy = svalBuilder.getContext().getSizeType(); 478 SVal strLength = svalBuilder.getMetadataSymbolVal(getTag(), MR, Ex, sizeTy, Count); 479 state = state->set<CStringLength>(MR, strLength); 480 return strLength; 481} 482 483SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 484 const Expr *Ex, SVal Buf) { 485 const MemRegion *MR = Buf.getAsRegion(); 486 if (!MR) { 487 // If we can't get a region, see if it's something we /know/ isn't a 488 // C string. In the context of locations, the only time we can issue such 489 // a warning is for labels. 490 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 491 if (ExplodedNode *N = C.generateNode(state)) { 492 if (!BT_NotCString) 493 BT_NotCString = new BuiltinBug("API", 494 "Argument is not a null-terminated string."); 495 496 llvm::SmallString<120> buf; 497 llvm::raw_svector_ostream os(buf); 498 os << "Argument to byte string function is the address of the label '" 499 << Label->getLabel()->getID()->getName() 500 << "', which is not a null-terminated string"; 501 502 // Generate a report for this bug. 503 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 504 os.str(), N); 505 506 report->addRange(Ex->getSourceRange()); 507 C.EmitReport(report); 508 } 509 510 return UndefinedVal(); 511 } 512 513 // If it's not a region and not a label, give up. 514 return UnknownVal(); 515 } 516 517 // If we have a region, strip casts from it and see if we can figure out 518 // its length. For anything we can't figure out, just return UnknownVal. 519 MR = MR->StripCasts(); 520 521 switch (MR->getKind()) { 522 case MemRegion::StringRegionKind: { 523 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 524 // so we can assume that the byte length is the correct C string length. 525 SValBuilder &svalBuilder = C.getSValBuilder(); 526 QualType sizeTy = svalBuilder.getContext().getSizeType(); 527 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 528 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 529 } 530 case MemRegion::SymbolicRegionKind: 531 case MemRegion::AllocaRegionKind: 532 case MemRegion::VarRegionKind: 533 case MemRegion::FieldRegionKind: 534 case MemRegion::ObjCIvarRegionKind: 535 return getCStringLengthForRegion(C, state, Ex, MR); 536 case MemRegion::CompoundLiteralRegionKind: 537 // FIXME: Can we track this? Is it necessary? 538 return UnknownVal(); 539 case MemRegion::ElementRegionKind: 540 // FIXME: How can we handle this? It's not good enough to subtract the 541 // offset from the base string length; consider "123\x00567" and &a[5]. 542 return UnknownVal(); 543 default: 544 // Other regions (mostly non-data) can't have a reliable C string length. 545 // In this case, an error is emitted and UndefinedVal is returned. 546 // The caller should always be prepared to handle this case. 547 if (ExplodedNode *N = C.generateNode(state)) { 548 if (!BT_NotCString) 549 BT_NotCString = new BuiltinBug("API", 550 "Argument is not a null-terminated string."); 551 552 llvm::SmallString<120> buf; 553 llvm::raw_svector_ostream os(buf); 554 555 os << "Argument to byte string function is "; 556 557 if (SummarizeRegion(os, C.getASTContext(), MR)) 558 os << ", which is not a null-terminated string"; 559 else 560 os << "not a null-terminated string"; 561 562 // Generate a report for this bug. 563 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 564 os.str(), N); 565 566 report->addRange(Ex->getSourceRange()); 567 C.EmitReport(report); 568 } 569 570 return UndefinedVal(); 571 } 572} 573 574const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 575 const GRState *state, 576 const Expr *E, SVal V) { 577 Loc *L = dyn_cast<Loc>(&V); 578 if (!L) 579 return state; 580 581 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 582 // some assumptions about the value that CFRefCount can't. Even so, it should 583 // probably be refactored. 584 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 585 const MemRegion *R = MR->getRegion()->StripCasts(); 586 587 // Are we dealing with an ElementRegion? If so, we should be invalidating 588 // the super-region. 589 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 590 R = ER->getSuperRegion(); 591 // FIXME: What about layers of ElementRegions? 592 } 593 594 // Invalidate this region. 595 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 596 return state->InvalidateRegion(R, E, Count, NULL); 597 } 598 599 // If we have a non-region value by chance, just remove the binding. 600 // FIXME: is this necessary or correct? This handles the non-Region 601 // cases. Is it ever valid to store to these? 602 return state->unbindLoc(*L); 603} 604 605bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 606 const MemRegion *MR) { 607 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 608 if (!TR) 609 return false; 610 611 switch (TR->getKind()) { 612 case MemRegion::FunctionTextRegionKind: { 613 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 614 if (FD) 615 os << "the address of the function '" << FD << "'"; 616 else 617 os << "the address of a function"; 618 return true; 619 } 620 case MemRegion::BlockTextRegionKind: 621 os << "block text"; 622 return true; 623 case MemRegion::BlockDataRegionKind: 624 os << "a block"; 625 return true; 626 case MemRegion::CXXThisRegionKind: 627 case MemRegion::CXXTempObjectRegionKind: 628 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 629 return true; 630 case MemRegion::VarRegionKind: 631 os << "a variable of type" << TR->getValueType().getAsString(); 632 return true; 633 case MemRegion::FieldRegionKind: 634 os << "a field of type " << TR->getValueType().getAsString(); 635 return true; 636 case MemRegion::ObjCIvarRegionKind: 637 os << "an instance variable of type " << TR->getValueType().getAsString(); 638 return true; 639 default: 640 return false; 641 } 642} 643 644//===----------------------------------------------------------------------===// 645// evaluation of individual function calls. 646//===----------------------------------------------------------------------===// 647 648void CStringChecker::evalCopyCommon(CheckerContext &C, const GRState *state, 649 const Expr *Size, const Expr *Dest, 650 const Expr *Source, bool Restricted) { 651 // See if the size argument is zero. 652 SVal sizeVal = state->getSVal(Size); 653 QualType sizeTy = Size->getType(); 654 655 const GRState *stateZeroSize, *stateNonZeroSize; 656 llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); 657 658 // If the size is zero, there won't be any actual memory access. 659 if (stateZeroSize) 660 C.addTransition(stateZeroSize); 661 662 // If the size can be nonzero, we have to check the other arguments. 663 if (stateNonZeroSize) { 664 state = stateNonZeroSize; 665 state = CheckBufferAccess(C, state, Size, Dest, Source, 666 /* FirstIsDst = */ true); 667 if (Restricted) 668 state = CheckOverlap(C, state, Size, Dest, Source); 669 670 if (state) { 671 // Invalidate the destination. 672 // FIXME: Even if we can't perfectly model the copy, we should see if we 673 // can use LazyCompoundVals to copy the source values into the destination. 674 // This would probably remove any existing bindings past the end of the 675 // copied region, but that's still an improvement over blank invalidation. 676 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 677 C.addTransition(state); 678 } 679 } 680} 681 682 683void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) { 684 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 685 // The return value is the address of the destination buffer. 686 const Expr *Dest = CE->getArg(0); 687 const GRState *state = C.getState(); 688 state = state->BindExpr(CE, state->getSVal(Dest)); 689 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1), true); 690} 691 692void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) { 693 // void *memmove(void *dst, const void *src, size_t n); 694 // The return value is the address of the destination buffer. 695 const Expr *Dest = CE->getArg(0); 696 const GRState *state = C.getState(); 697 state = state->BindExpr(CE, state->getSVal(Dest)); 698 evalCopyCommon(C, state, CE->getArg(2), Dest, CE->getArg(1)); 699} 700 701void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) { 702 // void bcopy(const void *src, void *dst, size_t n); 703 evalCopyCommon(C, C.getState(), CE->getArg(2), CE->getArg(1), CE->getArg(0)); 704} 705 706void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) { 707 // int memcmp(const void *s1, const void *s2, size_t n); 708 const Expr *Left = CE->getArg(0); 709 const Expr *Right = CE->getArg(1); 710 const Expr *Size = CE->getArg(2); 711 712 const GRState *state = C.getState(); 713 SValBuilder &svalBuilder = C.getSValBuilder(); 714 715 // See if the size argument is zero. 716 SVal sizeVal = state->getSVal(Size); 717 QualType sizeTy = Size->getType(); 718 719 const GRState *stateZeroSize, *stateNonZeroSize; 720 llvm::tie(stateZeroSize, stateNonZeroSize) = 721 assumeZero(C, state, sizeVal, sizeTy); 722 723 // If the size can be zero, the result will be 0 in that case, and we don't 724 // have to check either of the buffers. 725 if (stateZeroSize) { 726 state = stateZeroSize; 727 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 728 C.addTransition(state); 729 } 730 731 // If the size can be nonzero, we have to check the other arguments. 732 if (stateNonZeroSize) { 733 state = stateNonZeroSize; 734 // If we know the two buffers are the same, we know the result is 0. 735 // First, get the two buffers' addresses. Another checker will have already 736 // made sure they're not undefined. 737 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 738 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 739 740 // See if they are the same. 741 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 742 const GRState *StSameBuf, *StNotSameBuf; 743 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 744 745 // If the two arguments might be the same buffer, we know the result is zero, 746 // and we only need to check one size. 747 if (StSameBuf) { 748 state = StSameBuf; 749 state = CheckBufferAccess(C, state, Size, Left); 750 if (state) { 751 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 752 C.addTransition(state); 753 } 754 } 755 756 // If the two arguments might be different buffers, we have to check the 757 // size of both of them. 758 if (StNotSameBuf) { 759 state = StNotSameBuf; 760 state = CheckBufferAccess(C, state, Size, Left, Right); 761 if (state) { 762 // The return value is the comparison result, which we don't know. 763 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 764 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 765 state = state->BindExpr(CE, CmpV); 766 C.addTransition(state); 767 } 768 } 769 } 770} 771 772void CStringChecker::evalstrLength(CheckerContext &C, const CallExpr *CE) { 773 // size_t strlen(const char *s); 774 const GRState *state = C.getState(); 775 const Expr *Arg = CE->getArg(0); 776 SVal ArgVal = state->getSVal(Arg); 777 778 // Check that the argument is non-null. 779 state = checkNonNull(C, state, Arg, ArgVal); 780 781 if (state) { 782 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 783 784 // If the argument isn't a valid C string, there's no valid state to 785 // transition to. 786 if (strLength.isUndef()) 787 return; 788 789 // If getCStringLength couldn't figure out the length, conjure a return 790 // value, so it can be used in constraints, at least. 791 if (strLength.isUnknown()) { 792 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 793 strLength = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 794 } 795 796 // Bind the return value. 797 state = state->BindExpr(CE, strLength); 798 C.addTransition(state); 799 } 800} 801 802void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) { 803 // char *strcpy(char *restrict dst, const char *restrict src); 804 evalStrcpyCommon(C, CE, /* returnEnd = */ false); 805} 806 807void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) { 808 // char *stpcpy(char *restrict dst, const char *restrict src); 809 evalStrcpyCommon(C, CE, /* returnEnd = */ true); 810} 811 812void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 813 bool returnEnd) { 814 const GRState *state = C.getState(); 815 816 // Check that the destination is non-null 817 const Expr *Dst = CE->getArg(0); 818 SVal DstVal = state->getSVal(Dst); 819 820 state = checkNonNull(C, state, Dst, DstVal); 821 if (!state) 822 return; 823 824 // Check that the source is non-null. 825 const Expr *srcExpr = CE->getArg(1); 826 SVal srcVal = state->getSVal(srcExpr); 827 state = checkNonNull(C, state, srcExpr, srcVal); 828 if (!state) 829 return; 830 831 // Get the string length of the source. 832 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 833 834 // If the source isn't a valid C string, give up. 835 if (strLength.isUndef()) 836 return; 837 838 SVal Result = (returnEnd ? UnknownVal() : DstVal); 839 840 // If the destination is a MemRegion, try to check for a buffer overflow and 841 // record the new string length. 842 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 843 // If the length is known, we can check for an overflow. 844 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&strLength)) { 845 SVal lastElement = 846 C.getSValBuilder().evalBinOpLN(state, BO_Add, *dstRegVal, 847 *knownStrLength, Dst->getType()); 848 849 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 850 if (!state) 851 return; 852 853 // If this is a stpcpy-style copy, the last element is the return value. 854 if (returnEnd) 855 Result = lastElement; 856 } 857 858 // Invalidate the destination. This must happen before we set the C string 859 // length because invalidation will clear the length. 860 // FIXME: Even if we can't perfectly model the copy, we should see if we 861 // can use LazyCompoundVals to copy the source values into the destination. 862 // This would probably remove any existing bindings past the end of the 863 // string, but that's still an improvement over blank invalidation. 864 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 865 866 // Set the C string length of the destination. 867 state = setCStringLength(state, dstRegVal->getRegion(), strLength); 868 } 869 870 // If this is a stpcpy-style copy, but we were unable to check for a buffer 871 // overflow, we still need a result. Conjure a return value. 872 if (returnEnd && Result.isUnknown()) { 873 SValBuilder &svalBuilder = C.getSValBuilder(); 874 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 875 strLength = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 876 } 877 878 // Set the return value. 879 state = state->BindExpr(CE, Result); 880 C.addTransition(state); 881} 882 883//===----------------------------------------------------------------------===// 884// The driver method, and other Checker callbacks. 885//===----------------------------------------------------------------------===// 886 887bool CStringChecker::evalCallExpr(CheckerContext &C, const CallExpr *CE) { 888 // Get the callee. All the functions we care about are C functions 889 // with simple identifiers. 890 const GRState *state = C.getState(); 891 const Expr *Callee = CE->getCallee(); 892 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 893 894 if (!FD) 895 return false; 896 897 // Get the name of the callee. If it's a builtin, strip off the prefix. 898 IdentifierInfo *II = FD->getIdentifier(); 899 if (!II) // if no identifier, not a simple C function 900 return false; 901 llvm::StringRef Name = II->getName(); 902 if (Name.startswith("__builtin_")) 903 Name = Name.substr(10); 904 905 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 906 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 907 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 908 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 909 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 910 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 911 .Case("strlen", &CStringChecker::evalstrLength) 912 .Case("bcopy", &CStringChecker::evalBcopy) 913 .Default(NULL); 914 915 // If the callee isn't a string function, let another checker handle it. 916 if (!evalFunction) 917 return false; 918 919 // Check and evaluate the call. 920 (this->*evalFunction)(C, CE); 921 return true; 922} 923 924void CStringChecker::PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS) { 925 // Record string length for char a[] = "abc"; 926 const GRState *state = C.getState(); 927 928 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 929 I != E; ++I) { 930 const VarDecl *D = dyn_cast<VarDecl>(*I); 931 if (!D) 932 continue; 933 934 // FIXME: Handle array fields of structs. 935 if (!D->getType()->isArrayType()) 936 continue; 937 938 const Expr *Init = D->getInit(); 939 if (!Init) 940 continue; 941 if (!isa<StringLiteral>(Init)) 942 continue; 943 944 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 945 const MemRegion *MR = VarLoc.getAsRegion(); 946 if (!MR) 947 continue; 948 949 SVal StrVal = state->getSVal(Init); 950 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 951 DefinedOrUnknownSVal strLength 952 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 953 954 state = state->set<CStringLength>(MR, strLength); 955 } 956 957 C.addTransition(state); 958} 959 960bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) { 961 CStringLength::EntryMap Entries = state->get<CStringLength>(); 962 return !Entries.isEmpty(); 963} 964 965const GRState *CStringChecker::EvalRegionChanges(const GRState *state, 966 const MemRegion * const *Begin, 967 const MemRegion * const *End, 968 bool *) { 969 CStringLength::EntryMap Entries = state->get<CStringLength>(); 970 if (Entries.isEmpty()) 971 return state; 972 973 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 974 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 975 976 // First build sets for the changed regions and their super-regions. 977 for ( ; Begin != End; ++Begin) { 978 const MemRegion *MR = *Begin; 979 Invalidated.insert(MR); 980 981 SuperRegions.insert(MR); 982 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 983 MR = SR->getSuperRegion(); 984 SuperRegions.insert(MR); 985 } 986 } 987 988 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 989 990 // Then loop over the entries in the current state. 991 for (CStringLength::EntryMap::iterator I = Entries.begin(), 992 E = Entries.end(); I != E; ++I) { 993 const MemRegion *MR = I.getKey(); 994 995 // Is this entry for a super-region of a changed region? 996 if (SuperRegions.count(MR)) { 997 Entries = F.remove(Entries, MR); 998 continue; 999 } 1000 1001 // Is this entry for a sub-region of a changed region? 1002 const MemRegion *Super = MR; 1003 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1004 Super = SR->getSuperRegion(); 1005 if (Invalidated.count(Super)) { 1006 Entries = F.remove(Entries, MR); 1007 break; 1008 } 1009 } 1010 } 1011 1012 return state->set<CStringLength>(Entries); 1013} 1014 1015void CStringChecker::MarkLiveSymbols(const GRState *state, SymbolReaper &SR) { 1016 // Mark all symbols in our string length map as valid. 1017 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1018 1019 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1020 I != E; ++I) { 1021 SVal Len = I.getData(); 1022 if (SymbolRef Sym = Len.getAsSymbol()) 1023 SR.markInUse(Sym); 1024 } 1025} 1026 1027void CStringChecker::evalDeadSymbols(CheckerContext &C, SymbolReaper &SR) { 1028 if (!SR.hasDeadSymbols()) 1029 return; 1030 1031 const GRState *state = C.getState(); 1032 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1033 if (Entries.isEmpty()) 1034 return; 1035 1036 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1037 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1038 I != E; ++I) { 1039 SVal Len = I.getData(); 1040 if (SymbolRef Sym = Len.getAsSymbol()) { 1041 if (SR.isDead(Sym)) 1042 Entries = F.remove(Entries, I.getKey()); 1043 } 1044 } 1045 1046 state = state->set<CStringLength>(Entries); 1047 C.generateNode(state); 1048} 1049