CStringChecker.cpp revision d5af0e17b00ab2ee6a8c1f352bb9eeb1cc5b2d07
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ClangSACheckers.h" 16#include "clang/StaticAnalyzer/Core/Checker.h" 17#include "clang/StaticAnalyzer/Core/CheckerManager.h" 18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 19#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 21#include "llvm/ADT/StringSwitch.h" 22 23using namespace clang; 24using namespace ento; 25 26namespace { 27class CStringChecker : public Checker< eval::Call, 28 check::PreStmt<DeclStmt>, 29 check::LiveSymbols, 30 check::DeadSymbols, 31 check::RegionChanges 32 > { 33 mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite, 34 BT_Overlap, BT_NotCString, 35 BT_AdditionOverflow; 36public: 37 static void *getTag() { static int tag; return &tag; } 38 39 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 40 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 41 void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const; 42 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 43 bool wantsRegionChangeUpdate(const GRState *state) const; 44 45 const GRState *checkRegionChanges(const GRState *state, 46 const StoreManager::InvalidatedSymbols *, 47 const MemRegion * const *Begin, 48 const MemRegion * const *End) const; 49 50 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 51 const CallExpr *) const; 52 53 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 54 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 55 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 56 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 57 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 58 const GRState *state, 59 const Expr *Size, const Expr *Source, const Expr *Dest, 60 bool Restricted = false, 61 bool IsMempcpy = false) const; 62 63 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 64 65 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 66 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 67 void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 68 bool IsStrnlen = false) const; 69 70 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 71 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 72 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 73 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd, 74 bool isBounded, bool isAppending) const; 75 76 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 77 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 78 79 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 80 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 81 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 82 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 83 void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 84 bool isBounded = false, bool ignoreCase = false) const; 85 86 // Utility methods 87 std::pair<const GRState*, const GRState*> 88 static assumeZero(CheckerContext &C, 89 const GRState *state, SVal V, QualType Ty); 90 91 static const GRState *setCStringLength(const GRState *state, 92 const MemRegion *MR, SVal strLength); 93 static SVal getCStringLengthForRegion(CheckerContext &C, 94 const GRState *&state, 95 const Expr *Ex, const MemRegion *MR, 96 bool hypothetical); 97 SVal getCStringLength(CheckerContext &C, const GRState *&state, 98 const Expr *Ex, SVal Buf, 99 bool hypothetical = false) const; 100 101 const StringLiteral *getCStringLiteral(CheckerContext &C, 102 const GRState *&state, 103 const Expr *expr, 104 SVal val) const; 105 106 static const GRState *InvalidateBuffer(CheckerContext &C, 107 const GRState *state, 108 const Expr *Ex, SVal V); 109 110 static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 111 const MemRegion *MR); 112 113 // Re-usable checks 114 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 115 const Expr *S, SVal l) const; 116 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 117 const Expr *S, SVal l, 118 bool IsDestination = false) const; 119 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 120 const Expr *Size, 121 const Expr *FirstBuf, 122 const Expr *SecondBuf = NULL, 123 bool FirstIsDestination = false) const; 124 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 125 const Expr *Size, const Expr *First, 126 const Expr *Second) const; 127 void emitOverlapBug(CheckerContext &C, const GRState *state, 128 const Stmt *First, const Stmt *Second) const; 129 const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state, 130 NonLoc left, NonLoc right) const; 131}; 132 133class CStringLength { 134public: 135 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 136}; 137} //end anonymous namespace 138 139namespace clang { 140namespace ento { 141 template <> 142 struct GRStateTrait<CStringLength> 143 : public GRStatePartialTrait<CStringLength::EntryMap> { 144 static void *GDMIndex() { return CStringChecker::getTag(); } 145 }; 146} 147} 148 149//===----------------------------------------------------------------------===// 150// Individual checks and utility methods. 151//===----------------------------------------------------------------------===// 152 153std::pair<const GRState*, const GRState*> 154CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 155 QualType Ty) { 156 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 157 if (!val) 158 return std::pair<const GRState*, const GRState *>(state, state); 159 160 SValBuilder &svalBuilder = C.getSValBuilder(); 161 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 162 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 163} 164 165const GRState *CStringChecker::checkNonNull(CheckerContext &C, 166 const GRState *state, 167 const Expr *S, SVal l) const { 168 // If a previous check has failed, propagate the failure. 169 if (!state) 170 return NULL; 171 172 const GRState *stateNull, *stateNonNull; 173 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 174 175 if (stateNull && !stateNonNull) { 176 ExplodedNode *N = C.generateSink(stateNull); 177 if (!N) 178 return NULL; 179 180 if (!BT_Null) 181 BT_Null.reset(new BuiltinBug("API", 182 "Null pointer argument in call to byte string function")); 183 184 // Generate a report for this bug. 185 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 186 EnhancedBugReport *report = new EnhancedBugReport(*BT, 187 BT->getDescription(), N); 188 189 report->addRange(S->getSourceRange()); 190 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 191 C.EmitReport(report); 192 return NULL; 193 } 194 195 // From here on, assume that the value is non-null. 196 assert(stateNonNull); 197 return stateNonNull; 198} 199 200// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 201const GRState *CStringChecker::CheckLocation(CheckerContext &C, 202 const GRState *state, 203 const Expr *S, SVal l, 204 bool IsDestination) const { 205 // If a previous check has failed, propagate the failure. 206 if (!state) 207 return NULL; 208 209 // Check for out of bound array element access. 210 const MemRegion *R = l.getAsRegion(); 211 if (!R) 212 return state; 213 214 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 215 if (!ER) 216 return state; 217 218 assert(ER->getValueType() == C.getASTContext().CharTy && 219 "CheckLocation should only be called with char* ElementRegions"); 220 221 // Get the size of the array. 222 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 223 SValBuilder &svalBuilder = C.getSValBuilder(); 224 SVal Extent = svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 225 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 226 227 // Get the index of the accessed element. 228 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 229 230 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 231 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 232 if (StOutBound && !StInBound) { 233 ExplodedNode *N = C.generateSink(StOutBound); 234 if (!N) 235 return NULL; 236 237 BuiltinBug *BT; 238 if (IsDestination) { 239 if (!BT_BoundsWrite) { 240 BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access", 241 "Byte string function overflows destination buffer")); 242 } 243 BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get()); 244 } else { 245 if (!BT_Bounds) { 246 BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", 247 "Byte string function accesses out-of-bound array element")); 248 } 249 BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 250 } 251 252 // FIXME: It would be nice to eventually make this diagnostic more clear, 253 // e.g., by referencing the original declaration or by saying *why* this 254 // reference is outside the range. 255 256 // Generate a report for this bug. 257 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 258 259 report->addRange(S->getSourceRange()); 260 C.EmitReport(report); 261 return NULL; 262 } 263 264 // Array bound check succeeded. From this point forward the array bound 265 // should always succeed. 266 return StInBound; 267} 268 269const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 270 const GRState *state, 271 const Expr *Size, 272 const Expr *FirstBuf, 273 const Expr *SecondBuf, 274 bool FirstIsDestination) const { 275 // If a previous check has failed, propagate the failure. 276 if (!state) 277 return NULL; 278 279 SValBuilder &svalBuilder = C.getSValBuilder(); 280 ASTContext &Ctx = C.getASTContext(); 281 282 QualType sizeTy = Size->getType(); 283 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 284 285 // Check that the first buffer is non-null. 286 SVal BufVal = state->getSVal(FirstBuf); 287 state = checkNonNull(C, state, FirstBuf, BufVal); 288 if (!state) 289 return NULL; 290 291 // Get the access length and make sure it is known. 292 SVal LengthVal = state->getSVal(Size); 293 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 294 if (!Length) 295 return state; 296 297 // Compute the offset of the last element to be accessed: size-1. 298 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 299 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 300 *Length, One, sizeTy)); 301 302 // Check that the first buffer is sufficiently long. 303 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 304 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 305 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 306 LastOffset, PtrTy); 307 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 308 309 // If the buffer isn't large enough, abort. 310 if (!state) 311 return NULL; 312 } 313 314 // If there's a second buffer, check it as well. 315 if (SecondBuf) { 316 BufVal = state->getSVal(SecondBuf); 317 state = checkNonNull(C, state, SecondBuf, BufVal); 318 if (!state) 319 return NULL; 320 321 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 322 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 323 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 324 LastOffset, PtrTy); 325 state = CheckLocation(C, state, SecondBuf, BufEnd); 326 } 327 } 328 329 // Large enough or not, return this state! 330 return state; 331} 332 333const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 334 const GRState *state, 335 const Expr *Size, 336 const Expr *First, 337 const Expr *Second) const { 338 // Do a simple check for overlap: if the two arguments are from the same 339 // buffer, see if the end of the first is greater than the start of the second 340 // or vice versa. 341 342 // If a previous check has failed, propagate the failure. 343 if (!state) 344 return NULL; 345 346 const GRState *stateTrue, *stateFalse; 347 348 // Get the buffer values and make sure they're known locations. 349 SVal firstVal = state->getSVal(First); 350 SVal secondVal = state->getSVal(Second); 351 352 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 353 if (!firstLoc) 354 return state; 355 356 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 357 if (!secondLoc) 358 return state; 359 360 // Are the two values the same? 361 SValBuilder &svalBuilder = C.getSValBuilder(); 362 llvm::tie(stateTrue, stateFalse) = 363 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 364 365 if (stateTrue && !stateFalse) { 366 // If the values are known to be equal, that's automatically an overlap. 367 emitOverlapBug(C, stateTrue, First, Second); 368 return NULL; 369 } 370 371 // assume the two expressions are not equal. 372 assert(stateFalse); 373 state = stateFalse; 374 375 // Which value comes first? 376 ASTContext &Ctx = svalBuilder.getContext(); 377 QualType cmpTy = Ctx.IntTy; 378 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 379 *firstLoc, *secondLoc, cmpTy); 380 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 381 if (!reverseTest) 382 return state; 383 384 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 385 if (stateTrue) { 386 if (stateFalse) { 387 // If we don't know which one comes first, we can't perform this test. 388 return state; 389 } else { 390 // Switch the values so that firstVal is before secondVal. 391 Loc *tmpLoc = firstLoc; 392 firstLoc = secondLoc; 393 secondLoc = tmpLoc; 394 395 // Switch the Exprs as well, so that they still correspond. 396 const Expr *tmpExpr = First; 397 First = Second; 398 Second = tmpExpr; 399 } 400 } 401 402 // Get the length, and make sure it too is known. 403 SVal LengthVal = state->getSVal(Size); 404 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 405 if (!Length) 406 return state; 407 408 // Convert the first buffer's start address to char*. 409 // Bail out if the cast fails. 410 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 411 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, First->getType()); 412 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 413 if (!FirstStartLoc) 414 return state; 415 416 // Compute the end of the first buffer. Bail out if THAT fails. 417 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 418 *FirstStartLoc, *Length, CharPtrTy); 419 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 420 if (!FirstEndLoc) 421 return state; 422 423 // Is the end of the first buffer past the start of the second buffer? 424 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 425 *FirstEndLoc, *secondLoc, cmpTy); 426 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 427 if (!OverlapTest) 428 return state; 429 430 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 431 432 if (stateTrue && !stateFalse) { 433 // Overlap! 434 emitOverlapBug(C, stateTrue, First, Second); 435 return NULL; 436 } 437 438 // assume the two expressions don't overlap. 439 assert(stateFalse); 440 return stateFalse; 441} 442 443void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 444 const Stmt *First, const Stmt *Second) const { 445 ExplodedNode *N = C.generateSink(state); 446 if (!N) 447 return; 448 449 if (!BT_Overlap) 450 BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); 451 452 // Generate a report for this bug. 453 RangedBugReport *report = 454 new RangedBugReport(*BT_Overlap, 455 "Arguments must not be overlapping buffers", N); 456 report->addRange(First->getSourceRange()); 457 report->addRange(Second->getSourceRange()); 458 459 C.EmitReport(report); 460} 461 462const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C, 463 const GRState *state, 464 NonLoc left, 465 NonLoc right) const { 466 // If a previous check has failed, propagate the failure. 467 if (!state) 468 return NULL; 469 470 SValBuilder &svalBuilder = C.getSValBuilder(); 471 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 472 473 QualType sizeTy = svalBuilder.getContext().getSizeType(); 474 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 475 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 476 477 SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 478 sizeTy); 479 480 if (maxMinusRight.isUnknownOrUndef()) { 481 // Try switching the operands. (The order of these two assignments is 482 // important!) 483 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 484 sizeTy); 485 left = right; 486 } 487 488 if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) { 489 QualType cmpTy = svalBuilder.getConditionType(); 490 // If left > max - right, we have an overflow. 491 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 492 *maxMinusRightNL, cmpTy); 493 494 const GRState *stateOverflow, *stateOkay; 495 llvm::tie(stateOverflow, stateOkay) = 496 state->assume(cast<DefinedOrUnknownSVal>(willOverflow)); 497 498 if (stateOverflow && !stateOkay) { 499 // We have an overflow. Emit a bug report. 500 ExplodedNode *N = C.generateSink(stateOverflow); 501 if (!N) 502 return NULL; 503 504 if (!BT_AdditionOverflow) 505 BT_AdditionOverflow.reset(new BuiltinBug("API", 506 "Sum of expressions causes overflow")); 507 508 llvm::SmallString<120> buf; 509 llvm::raw_svector_ostream os(buf); 510 // This isn't a great error message, but this should never occur in real 511 // code anyway -- you'd have to create a buffer longer than a size_t can 512 // represent, which is sort of a contradiction. 513 os << "This expression will create a string whose length is too big to " 514 << "be represented as a size_t"; 515 516 // Generate a report for this bug. 517 BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N); 518 C.EmitReport(report); 519 520 return NULL; 521 } 522 523 // From now on, assume an overflow didn't occur. 524 assert(stateOkay); 525 state = stateOkay; 526 } 527 528 return state; 529} 530 531const GRState *CStringChecker::setCStringLength(const GRState *state, 532 const MemRegion *MR, 533 SVal strLength) { 534 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 535 536 MR = MR->StripCasts(); 537 538 switch (MR->getKind()) { 539 case MemRegion::StringRegionKind: 540 // FIXME: This can happen if we strcpy() into a string region. This is 541 // undefined [C99 6.4.5p6], but we should still warn about it. 542 return state; 543 544 case MemRegion::SymbolicRegionKind: 545 case MemRegion::AllocaRegionKind: 546 case MemRegion::VarRegionKind: 547 case MemRegion::FieldRegionKind: 548 case MemRegion::ObjCIvarRegionKind: 549 // These are the types we can currently track string lengths for. 550 break; 551 552 case MemRegion::ElementRegionKind: 553 // FIXME: Handle element regions by upper-bounding the parent region's 554 // string length. 555 return state; 556 557 default: 558 // Other regions (mostly non-data) can't have a reliable C string length. 559 // For now, just ignore the change. 560 // FIXME: These are rare but not impossible. We should output some kind of 561 // warning for things like strcpy((char[]){'a', 0}, "b"); 562 return state; 563 } 564 565 if (strLength.isUnknown()) 566 return state->remove<CStringLength>(MR); 567 568 return state->set<CStringLength>(MR, strLength); 569} 570 571SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 572 const GRState *&state, 573 const Expr *Ex, 574 const MemRegion *MR, 575 bool hypothetical) { 576 if (!hypothetical) { 577 // If there's a recorded length, go ahead and return it. 578 const SVal *Recorded = state->get<CStringLength>(MR); 579 if (Recorded) 580 return *Recorded; 581 } 582 583 // Otherwise, get a new symbol and update the state. 584 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 585 SValBuilder &svalBuilder = C.getSValBuilder(); 586 QualType sizeTy = svalBuilder.getContext().getSizeType(); 587 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 588 MR, Ex, sizeTy, Count); 589 590 if (!hypothetical) 591 state = state->set<CStringLength>(MR, strLength); 592 593 return strLength; 594} 595 596SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 597 const Expr *Ex, SVal Buf, 598 bool hypothetical) const { 599 const MemRegion *MR = Buf.getAsRegion(); 600 if (!MR) { 601 // If we can't get a region, see if it's something we /know/ isn't a 602 // C string. In the context of locations, the only time we can issue such 603 // a warning is for labels. 604 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 605 if (ExplodedNode *N = C.generateNode(state)) { 606 if (!BT_NotCString) 607 BT_NotCString.reset(new BuiltinBug("API", 608 "Argument is not a null-terminated string.")); 609 610 llvm::SmallString<120> buf; 611 llvm::raw_svector_ostream os(buf); 612 os << "Argument to byte string function is the address of the label '" 613 << Label->getLabel()->getName() 614 << "', which is not a null-terminated string"; 615 616 // Generate a report for this bug. 617 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 618 os.str(), N); 619 620 report->addRange(Ex->getSourceRange()); 621 C.EmitReport(report); 622 } 623 624 return UndefinedVal(); 625 } 626 627 // If it's not a region and not a label, give up. 628 return UnknownVal(); 629 } 630 631 // If we have a region, strip casts from it and see if we can figure out 632 // its length. For anything we can't figure out, just return UnknownVal. 633 MR = MR->StripCasts(); 634 635 switch (MR->getKind()) { 636 case MemRegion::StringRegionKind: { 637 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 638 // so we can assume that the byte length is the correct C string length. 639 SValBuilder &svalBuilder = C.getSValBuilder(); 640 QualType sizeTy = svalBuilder.getContext().getSizeType(); 641 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 642 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 643 } 644 case MemRegion::SymbolicRegionKind: 645 case MemRegion::AllocaRegionKind: 646 case MemRegion::VarRegionKind: 647 case MemRegion::FieldRegionKind: 648 case MemRegion::ObjCIvarRegionKind: 649 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 650 case MemRegion::CompoundLiteralRegionKind: 651 // FIXME: Can we track this? Is it necessary? 652 return UnknownVal(); 653 case MemRegion::ElementRegionKind: 654 // FIXME: How can we handle this? It's not good enough to subtract the 655 // offset from the base string length; consider "123\x00567" and &a[5]. 656 return UnknownVal(); 657 default: 658 // Other regions (mostly non-data) can't have a reliable C string length. 659 // In this case, an error is emitted and UndefinedVal is returned. 660 // The caller should always be prepared to handle this case. 661 if (ExplodedNode *N = C.generateNode(state)) { 662 if (!BT_NotCString) 663 BT_NotCString.reset(new BuiltinBug("API", 664 "Argument is not a null-terminated string.")); 665 666 llvm::SmallString<120> buf; 667 llvm::raw_svector_ostream os(buf); 668 669 os << "Argument to byte string function is "; 670 671 if (SummarizeRegion(os, C.getASTContext(), MR)) 672 os << ", which is not a null-terminated string"; 673 else 674 os << "not a null-terminated string"; 675 676 // Generate a report for this bug. 677 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 678 os.str(), N); 679 680 report->addRange(Ex->getSourceRange()); 681 C.EmitReport(report); 682 } 683 684 return UndefinedVal(); 685 } 686} 687 688const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 689 const GRState *&state, const Expr *expr, SVal val) const { 690 691 // Get the memory region pointed to by the val. 692 const MemRegion *bufRegion = val.getAsRegion(); 693 if (!bufRegion) 694 return NULL; 695 696 // Strip casts off the memory region. 697 bufRegion = bufRegion->StripCasts(); 698 699 // Cast the memory region to a string region. 700 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 701 if (!strRegion) 702 return NULL; 703 704 // Return the actual string in the string region. 705 return strRegion->getStringLiteral(); 706} 707 708const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 709 const GRState *state, 710 const Expr *E, SVal V) { 711 Loc *L = dyn_cast<Loc>(&V); 712 if (!L) 713 return state; 714 715 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 716 // some assumptions about the value that CFRefCount can't. Even so, it should 717 // probably be refactored. 718 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 719 const MemRegion *R = MR->getRegion()->StripCasts(); 720 721 // Are we dealing with an ElementRegion? If so, we should be invalidating 722 // the super-region. 723 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 724 R = ER->getSuperRegion(); 725 // FIXME: What about layers of ElementRegions? 726 } 727 728 // Invalidate this region. 729 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 730 return state->invalidateRegion(R, E, Count, NULL); 731 } 732 733 // If we have a non-region value by chance, just remove the binding. 734 // FIXME: is this necessary or correct? This handles the non-Region 735 // cases. Is it ever valid to store to these? 736 return state->unbindLoc(*L); 737} 738 739bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 740 const MemRegion *MR) { 741 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 742 if (!TR) 743 return false; 744 745 switch (TR->getKind()) { 746 case MemRegion::FunctionTextRegionKind: { 747 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 748 if (FD) 749 os << "the address of the function '" << FD << "'"; 750 else 751 os << "the address of a function"; 752 return true; 753 } 754 case MemRegion::BlockTextRegionKind: 755 os << "block text"; 756 return true; 757 case MemRegion::BlockDataRegionKind: 758 os << "a block"; 759 return true; 760 case MemRegion::CXXThisRegionKind: 761 case MemRegion::CXXTempObjectRegionKind: 762 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 763 return true; 764 case MemRegion::VarRegionKind: 765 os << "a variable of type" << TR->getValueType().getAsString(); 766 return true; 767 case MemRegion::FieldRegionKind: 768 os << "a field of type " << TR->getValueType().getAsString(); 769 return true; 770 case MemRegion::ObjCIvarRegionKind: 771 os << "an instance variable of type " << TR->getValueType().getAsString(); 772 return true; 773 default: 774 return false; 775 } 776} 777 778//===----------------------------------------------------------------------===// 779// evaluation of individual function calls. 780//===----------------------------------------------------------------------===// 781 782void CStringChecker::evalCopyCommon(CheckerContext &C, 783 const CallExpr *CE, 784 const GRState *state, 785 const Expr *Size, const Expr *Dest, 786 const Expr *Source, bool Restricted, 787 bool IsMempcpy) const { 788 // See if the size argument is zero. 789 SVal sizeVal = state->getSVal(Size); 790 QualType sizeTy = Size->getType(); 791 792 const GRState *stateZeroSize, *stateNonZeroSize; 793 llvm::tie(stateZeroSize, stateNonZeroSize) = assumeZero(C, state, sizeVal, sizeTy); 794 795 // Get the value of the Dest. 796 SVal destVal = state->getSVal(Dest); 797 798 // If the size is zero, there won't be any actual memory access, so 799 // just bind the return value to the destination buffer and return. 800 if (stateZeroSize) { 801 stateZeroSize = stateZeroSize->BindExpr(CE, destVal); 802 C.addTransition(stateZeroSize); 803 } 804 805 // If the size can be nonzero, we have to check the other arguments. 806 if (stateNonZeroSize) { 807 state = stateNonZeroSize; 808 809 // Ensure the destination is not null. If it is NULL there will be a 810 // NULL pointer dereference. 811 state = checkNonNull(C, state, Dest, destVal); 812 if (!state) 813 return; 814 815 // Get the value of the Src. 816 SVal srcVal = state->getSVal(Source); 817 818 // Ensure the source is not null. If it is NULL there will be a 819 // NULL pointer dereference. 820 state = checkNonNull(C, state, Source, srcVal); 821 if (!state) 822 return; 823 824 // Ensure the accesses are valid and that the buffers do not overlap. 825 state = CheckBufferAccess(C, state, Size, Dest, Source, 826 /* FirstIsDst = */ true); 827 if (Restricted) 828 state = CheckOverlap(C, state, Size, Dest, Source); 829 830 if (!state) 831 return; 832 833 // If this is mempcpy, get the byte after the last byte copied and 834 // bind the expr. 835 if (IsMempcpy) { 836 loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal); 837 assert(destRegVal && "Destination should be a known MemRegionVal here"); 838 839 // Get the length to copy. 840 NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal); 841 842 if (lenValNonLoc) { 843 // Get the byte after the last byte copied. 844 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 845 *destRegVal, 846 *lenValNonLoc, 847 Dest->getType()); 848 849 // The byte after the last byte copied is the return value. 850 state = state->BindExpr(CE, lastElement); 851 } else { 852 // If we don't know how much we copied, we can at least 853 // conjure a return value for later. 854 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 855 SVal result = 856 C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 857 state = state->BindExpr(CE, result); 858 } 859 860 } else { 861 // All other copies return the destination buffer. 862 // (Well, bcopy() has a void return type, but this won't hurt.) 863 state = state->BindExpr(CE, destVal); 864 } 865 866 // Invalidate the destination. 867 // FIXME: Even if we can't perfectly model the copy, we should see if we 868 // can use LazyCompoundVals to copy the source values into the destination. 869 // This would probably remove any existing bindings past the end of the 870 // copied region, but that's still an improvement over blank invalidation. 871 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 872 C.addTransition(state); 873 } 874} 875 876 877void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 878 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 879 // The return value is the address of the destination buffer. 880 const Expr *Dest = CE->getArg(0); 881 const GRState *state = C.getState(); 882 883 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 884} 885 886void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 887 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 888 // The return value is a pointer to the byte following the last written byte. 889 const Expr *Dest = CE->getArg(0); 890 const GRState *state = C.getState(); 891 892 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 893} 894 895void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 896 // void *memmove(void *dst, const void *src, size_t n); 897 // The return value is the address of the destination buffer. 898 const Expr *Dest = CE->getArg(0); 899 const GRState *state = C.getState(); 900 901 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 902} 903 904void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 905 // void bcopy(const void *src, void *dst, size_t n); 906 evalCopyCommon(C, CE, C.getState(), 907 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 908} 909 910void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 911 // int memcmp(const void *s1, const void *s2, size_t n); 912 const Expr *Left = CE->getArg(0); 913 const Expr *Right = CE->getArg(1); 914 const Expr *Size = CE->getArg(2); 915 916 const GRState *state = C.getState(); 917 SValBuilder &svalBuilder = C.getSValBuilder(); 918 919 // See if the size argument is zero. 920 SVal sizeVal = state->getSVal(Size); 921 QualType sizeTy = Size->getType(); 922 923 const GRState *stateZeroSize, *stateNonZeroSize; 924 llvm::tie(stateZeroSize, stateNonZeroSize) = 925 assumeZero(C, state, sizeVal, sizeTy); 926 927 // If the size can be zero, the result will be 0 in that case, and we don't 928 // have to check either of the buffers. 929 if (stateZeroSize) { 930 state = stateZeroSize; 931 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 932 C.addTransition(state); 933 } 934 935 // If the size can be nonzero, we have to check the other arguments. 936 if (stateNonZeroSize) { 937 state = stateNonZeroSize; 938 // If we know the two buffers are the same, we know the result is 0. 939 // First, get the two buffers' addresses. Another checker will have already 940 // made sure they're not undefined. 941 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 942 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 943 944 // See if they are the same. 945 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 946 const GRState *StSameBuf, *StNotSameBuf; 947 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 948 949 // If the two arguments might be the same buffer, we know the result is zero, 950 // and we only need to check one size. 951 if (StSameBuf) { 952 state = StSameBuf; 953 state = CheckBufferAccess(C, state, Size, Left); 954 if (state) { 955 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 956 C.addTransition(state); 957 } 958 } 959 960 // If the two arguments might be different buffers, we have to check the 961 // size of both of them. 962 if (StNotSameBuf) { 963 state = StNotSameBuf; 964 state = CheckBufferAccess(C, state, Size, Left, Right); 965 if (state) { 966 // The return value is the comparison result, which we don't know. 967 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 968 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 969 state = state->BindExpr(CE, CmpV); 970 C.addTransition(state); 971 } 972 } 973 } 974} 975 976void CStringChecker::evalstrLength(CheckerContext &C, 977 const CallExpr *CE) const { 978 // size_t strlen(const char *s); 979 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 980} 981 982void CStringChecker::evalstrnLength(CheckerContext &C, 983 const CallExpr *CE) const { 984 // size_t strnlen(const char *s, size_t maxlen); 985 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 986} 987 988void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 989 bool IsStrnlen) const { 990 const GRState *state = C.getState(); 991 992 if (IsStrnlen) { 993 const Expr *maxlenExpr = CE->getArg(1); 994 SVal maxlenVal = state->getSVal(maxlenExpr); 995 996 const GRState *stateZeroSize, *stateNonZeroSize; 997 llvm::tie(stateZeroSize, stateNonZeroSize) = 998 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 999 1000 // If the size can be zero, the result will be 0 in that case, and we don't 1001 // have to check the string itself. 1002 if (stateZeroSize) { 1003 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1004 stateZeroSize = stateZeroSize->BindExpr(CE, zero); 1005 C.addTransition(stateZeroSize); 1006 } 1007 1008 // If the size is GUARANTEED to be zero, we're done! 1009 if (!stateNonZeroSize) 1010 return; 1011 1012 // Otherwise, record the assumption that the size is nonzero. 1013 state = stateNonZeroSize; 1014 } 1015 1016 // Check that the string argument is non-null. 1017 const Expr *Arg = CE->getArg(0); 1018 SVal ArgVal = state->getSVal(Arg); 1019 1020 state = checkNonNull(C, state, Arg, ArgVal); 1021 1022 if (!state) 1023 return; 1024 1025 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1026 1027 // If the argument isn't a valid C string, there's no valid state to 1028 // transition to. 1029 if (strLength.isUndef()) 1030 return; 1031 1032 DefinedOrUnknownSVal result = UnknownVal(); 1033 1034 // If the check is for strnlen() then bind the return value to no more than 1035 // the maxlen value. 1036 if (IsStrnlen) { 1037 QualType cmpTy = C.getSValBuilder().getContext().IntTy; 1038 1039 // It's a little unfortunate to be getting this again, 1040 // but it's not that expensive... 1041 const Expr *maxlenExpr = CE->getArg(1); 1042 SVal maxlenVal = state->getSVal(maxlenExpr); 1043 1044 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1045 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 1046 1047 if (strLengthNL && maxlenValNL) { 1048 const GRState *stateStringTooLong, *stateStringNotTooLong; 1049 1050 // Check if the strLength is greater than the maxlen. 1051 llvm::tie(stateStringTooLong, stateStringNotTooLong) = 1052 state->assume(cast<DefinedOrUnknownSVal> 1053 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 1054 *strLengthNL, 1055 *maxlenValNL, 1056 cmpTy))); 1057 1058 if (stateStringTooLong && !stateStringNotTooLong) { 1059 // If the string is longer than maxlen, return maxlen. 1060 result = *maxlenValNL; 1061 } else if (stateStringNotTooLong && !stateStringTooLong) { 1062 // If the string is shorter than maxlen, return its length. 1063 result = *strLengthNL; 1064 } 1065 } 1066 1067 if (result.isUnknown()) { 1068 // If we don't have enough information for a comparison, there's 1069 // no guarantee the full string length will actually be returned. 1070 // All we know is the return value is the min of the string length 1071 // and the limit. This is better than nothing. 1072 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1073 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 1074 NonLoc *resultNL = cast<NonLoc>(&result); 1075 1076 if (strLengthNL) { 1077 state = state->assume(cast<DefinedOrUnknownSVal> 1078 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1079 *resultNL, 1080 *strLengthNL, 1081 cmpTy)), true); 1082 } 1083 1084 if (maxlenValNL) { 1085 state = state->assume(cast<DefinedOrUnknownSVal> 1086 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1087 *resultNL, 1088 *maxlenValNL, 1089 cmpTy)), true); 1090 } 1091 } 1092 1093 } else { 1094 // This is a plain strlen(), not strnlen(). 1095 result = cast<DefinedOrUnknownSVal>(strLength); 1096 1097 // If we don't know the length of the string, conjure a return 1098 // value, so it can be used in constraints, at least. 1099 if (result.isUnknown()) { 1100 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1101 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 1102 } 1103 } 1104 1105 // Bind the return value. 1106 assert(!result.isUnknown() && "Should have conjured a value by now"); 1107 state = state->BindExpr(CE, result); 1108 C.addTransition(state); 1109} 1110 1111void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1112 // char *strcpy(char *restrict dst, const char *restrict src); 1113 evalStrcpyCommon(C, CE, 1114 /* returnEnd = */ false, 1115 /* isBounded = */ false, 1116 /* isAppending = */ false); 1117} 1118 1119void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1120 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1121 evalStrcpyCommon(C, CE, 1122 /* returnEnd = */ false, 1123 /* isBounded = */ true, 1124 /* isAppending = */ false); 1125} 1126 1127void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1128 // char *stpcpy(char *restrict dst, const char *restrict src); 1129 evalStrcpyCommon(C, CE, 1130 /* returnEnd = */ true, 1131 /* isBounded = */ false, 1132 /* isAppending = */ false); 1133} 1134 1135void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1136 //char *strcat(char *restrict s1, const char *restrict s2); 1137 evalStrcpyCommon(C, CE, 1138 /* returnEnd = */ false, 1139 /* isBounded = */ false, 1140 /* isAppending = */ true); 1141} 1142 1143void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1144 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1145 evalStrcpyCommon(C, CE, 1146 /* returnEnd = */ false, 1147 /* isBounded = */ true, 1148 /* isAppending = */ true); 1149} 1150 1151void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1152 bool returnEnd, bool isBounded, 1153 bool isAppending) const { 1154 const GRState *state = C.getState(); 1155 1156 // Check that the destination is non-null. 1157 const Expr *Dst = CE->getArg(0); 1158 SVal DstVal = state->getSVal(Dst); 1159 1160 state = checkNonNull(C, state, Dst, DstVal); 1161 if (!state) 1162 return; 1163 1164 // Check that the source is non-null. 1165 const Expr *srcExpr = CE->getArg(1); 1166 SVal srcVal = state->getSVal(srcExpr); 1167 state = checkNonNull(C, state, srcExpr, srcVal); 1168 if (!state) 1169 return; 1170 1171 // Get the string length of the source. 1172 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1173 1174 // If the source isn't a valid C string, give up. 1175 if (strLength.isUndef()) 1176 return; 1177 1178 SValBuilder &svalBuilder = C.getSValBuilder(); 1179 QualType cmpTy = svalBuilder.getConditionType(); 1180 1181 SVal amountCopied = UnknownVal(); 1182 1183 // If the function is strncpy, strncat, etc... it is bounded. 1184 if (isBounded) { 1185 // Get the max number of characters to copy. 1186 const Expr *lenExpr = CE->getArg(2); 1187 SVal lenVal = state->getSVal(lenExpr); 1188 1189 // Protect against misdeclared strncpy(). 1190 lenVal = svalBuilder.evalCast(lenVal, 1191 svalBuilder.getContext().getSizeType(), 1192 lenExpr->getType()); 1193 1194 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1195 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 1196 1197 // If we know both values, we might be able to figure out how much 1198 // we're copying. 1199 if (strLengthNL && lenValNL) { 1200 const GRState *stateSourceTooLong, *stateSourceNotTooLong; 1201 1202 // Check if the max number to copy is less than the length of the src. 1203 llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = 1204 state->assume(cast<DefinedOrUnknownSVal> 1205 (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL, 1206 *lenValNL, cmpTy))); 1207 1208 if (stateSourceTooLong && !stateSourceNotTooLong) { 1209 // Max number to copy is less than the length of the src, so the actual 1210 // strLength copied is the max number arg. 1211 state = stateSourceTooLong; 1212 amountCopied = lenVal; 1213 1214 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1215 // The source buffer entirely fits in the bound. 1216 state = stateSourceNotTooLong; 1217 amountCopied = strLength; 1218 } 1219 } 1220 1221 // If we couldn't pin down the copy length, at least bound it. 1222 if (amountCopied.isUnknown()) { 1223 // Try to get a "hypothetical" string length symbol, which we can later 1224 // set as a real value if that turns out to be the case. 1225 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1226 assert(!amountCopied.isUndef()); 1227 1228 if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) { 1229 if (lenValNL) { 1230 // amountCopied <= lenVal 1231 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1232 *amountCopiedNL, 1233 *lenValNL, 1234 cmpTy); 1235 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound), 1236 true); 1237 if (!state) 1238 return; 1239 } 1240 1241 if (strLengthNL) { 1242 // amountCopied <= strlen(source) 1243 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1244 *amountCopiedNL, 1245 *strLengthNL, 1246 cmpTy); 1247 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc), 1248 true); 1249 if (!state) 1250 return; 1251 } 1252 } 1253 } 1254 1255 } else { 1256 // The function isn't bounded. The amount copied should match the length 1257 // of the source buffer. 1258 amountCopied = strLength; 1259 } 1260 1261 assert(state); 1262 1263 // This represents the number of characters copied into the destination 1264 // buffer. (It may not actually be the strlen if the destination buffer 1265 // is not terminated.) 1266 SVal finalStrLength = UnknownVal(); 1267 1268 // If this is an appending function (strcat, strncat...) then set the 1269 // string length to strlen(src) + strlen(dst) since the buffer will 1270 // ultimately contain both. 1271 if (isAppending) { 1272 // Get the string length of the destination, or give up. 1273 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1274 if (dstStrLength.isUndef()) 1275 return; 1276 1277 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1278 1279 NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied); 1280 NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength); 1281 1282 // If we know both string lengths, we might know the final string length. 1283 if (srcStrLengthNL && dstStrLengthNL) { 1284 // Make sure the two lengths together don't overflow a size_t. 1285 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1286 if (!state) 1287 return; 1288 1289 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1290 *dstStrLengthNL, sizeTy); 1291 } 1292 1293 // If we couldn't get a single value for the final string length, 1294 // we can at least bound it by the individual lengths. 1295 if (finalStrLength.isUnknown()) { 1296 // Try to get a "hypothetical" string length symbol, which we can later 1297 // set as a real value if that turns out to be the case. 1298 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1299 assert(!finalStrLength.isUndef()); 1300 1301 if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) { 1302 if (srcStrLengthNL) { 1303 // finalStrLength >= srcStrLength 1304 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1305 *finalStrLengthNL, 1306 *srcStrLengthNL, 1307 cmpTy); 1308 state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult), 1309 true); 1310 if (!state) 1311 return; 1312 } 1313 1314 if (dstStrLengthNL) { 1315 // finalStrLength >= dstStrLength 1316 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1317 *finalStrLengthNL, 1318 *dstStrLengthNL, 1319 cmpTy); 1320 state = state->assume(cast<DefinedOrUnknownSVal>(destInResult), 1321 true); 1322 if (!state) 1323 return; 1324 } 1325 } 1326 } 1327 1328 } else { 1329 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1330 // the final string length will match the input string length. 1331 finalStrLength = amountCopied; 1332 } 1333 1334 // The final result of the function will either be a pointer past the last 1335 // copied element, or a pointer to the start of the destination buffer. 1336 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1337 1338 assert(state); 1339 1340 // If the destination is a MemRegion, try to check for a buffer overflow and 1341 // record the new string length. 1342 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 1343 // If the final length is known, we can check for an overflow. 1344 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) { 1345 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1346 *knownStrLength, 1347 Dst->getType()); 1348 1349 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 1350 if (!state) 1351 return; 1352 1353 // If this is a stpcpy-style copy, the last element is the return value. 1354 if (returnEnd) 1355 Result = lastElement; 1356 } 1357 1358 // Invalidate the destination. This must happen before we set the C string 1359 // length because invalidation will clear the length. 1360 // FIXME: Even if we can't perfectly model the copy, we should see if we 1361 // can use LazyCompoundVals to copy the source values into the destination. 1362 // This would probably remove any existing bindings past the end of the 1363 // string, but that's still an improvement over blank invalidation. 1364 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 1365 1366 // Set the C string length of the destination. 1367 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1368 } 1369 1370 assert(state); 1371 1372 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1373 // overflow, we still need a result. Conjure a return value. 1374 if (returnEnd && Result.isUnknown()) { 1375 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1376 Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 1377 } 1378 1379 // Set the return value. 1380 state = state->BindExpr(CE, Result); 1381 C.addTransition(state); 1382} 1383 1384void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1385 //int strcmp(const char *restrict s1, const char *restrict s2); 1386 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1387} 1388 1389void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1390 //int strncmp(const char *restrict s1, const char *restrict s2, size_t n); 1391 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1392} 1393 1394void CStringChecker::evalStrcasecmp(CheckerContext &C, 1395 const CallExpr *CE) const { 1396 //int strcasecmp(const char *restrict s1, const char *restrict s2); 1397 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1398} 1399 1400void CStringChecker::evalStrncasecmp(CheckerContext &C, 1401 const CallExpr *CE) const { 1402 //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n); 1403 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1404} 1405 1406void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1407 bool isBounded, bool ignoreCase) const { 1408 const GRState *state = C.getState(); 1409 1410 // Check that the first string is non-null 1411 const Expr *s1 = CE->getArg(0); 1412 SVal s1Val = state->getSVal(s1); 1413 state = checkNonNull(C, state, s1, s1Val); 1414 if (!state) 1415 return; 1416 1417 // Check that the second string is non-null. 1418 const Expr *s2 = CE->getArg(1); 1419 SVal s2Val = state->getSVal(s2); 1420 state = checkNonNull(C, state, s2, s2Val); 1421 if (!state) 1422 return; 1423 1424 // Get the string length of the first string or give up. 1425 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1426 if (s1Length.isUndef()) 1427 return; 1428 1429 // Get the string length of the second string or give up. 1430 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1431 if (s2Length.isUndef()) 1432 return; 1433 1434 // Get the string literal of the first string. 1435 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1436 if (!s1StrLiteral) 1437 return; 1438 llvm::StringRef s1StrRef = s1StrLiteral->getString(); 1439 1440 // Get the string literal of the second string. 1441 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1442 if (!s2StrLiteral) 1443 return; 1444 llvm::StringRef s2StrRef = s2StrLiteral->getString(); 1445 1446 int result; 1447 if (isBounded) { 1448 // Get the max number of characters to compare. 1449 const Expr *lenExpr = CE->getArg(2); 1450 SVal lenVal = state->getSVal(lenExpr); 1451 1452 // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt 1453 // then give up, otherwise get the value and use it as the bounds. 1454 nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal); 1455 if (!CI) 1456 return; 1457 llvm::APSInt lenInt(CI->getValue()); 1458 1459 // Create substrings of each to compare the prefix. 1460 s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue()); 1461 s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue()); 1462 } 1463 1464 if (ignoreCase) { 1465 // Compare string 1 to string 2 the same way strcasecmp() does. 1466 result = s1StrRef.compare_lower(s2StrRef); 1467 } else { 1468 // Compare string 1 to string 2 the same way strcmp() does. 1469 result = s1StrRef.compare(s2StrRef); 1470 } 1471 1472 // Build the SVal of the comparison to bind the return value. 1473 SValBuilder &svalBuilder = C.getSValBuilder(); 1474 QualType intTy = svalBuilder.getContext().IntTy; 1475 SVal resultVal = svalBuilder.makeIntVal(result, intTy); 1476 1477 // Bind the return value of the expression. 1478 // Set the return value. 1479 state = state->BindExpr(CE, resultVal); 1480 C.addTransition(state); 1481} 1482 1483//===----------------------------------------------------------------------===// 1484// The driver method, and other Checker callbacks. 1485//===----------------------------------------------------------------------===// 1486 1487bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 1488 // Get the callee. All the functions we care about are C functions 1489 // with simple identifiers. 1490 const GRState *state = C.getState(); 1491 const Expr *Callee = CE->getCallee(); 1492 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 1493 1494 if (!FD) 1495 return false; 1496 1497 // Get the name of the callee. If it's a builtin, strip off the prefix. 1498 IdentifierInfo *II = FD->getIdentifier(); 1499 if (!II) // if no identifier, not a simple C function 1500 return false; 1501 llvm::StringRef Name = II->getName(); 1502 if (Name.startswith("__builtin_")) 1503 Name = Name.substr(10); 1504 1505 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 1506 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 1507 .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy) 1508 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 1509 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 1510 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 1511 //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) 1512 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 1513 .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat) 1514 .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat) 1515 .Case("strlen", &CStringChecker::evalstrLength) 1516 .Case("strnlen", &CStringChecker::evalstrnLength) 1517 .Case("strcmp", &CStringChecker::evalStrcmp) 1518 .Case("strncmp", &CStringChecker::evalStrncmp) 1519 .Case("strcasecmp", &CStringChecker::evalStrcasecmp) 1520 .Case("strncasecmp", &CStringChecker::evalStrncasecmp) 1521 .Case("bcopy", &CStringChecker::evalBcopy) 1522 .Default(NULL); 1523 1524 // If the callee isn't a string function, let another checker handle it. 1525 if (!evalFunction) 1526 return false; 1527 1528 // Check and evaluate the call. 1529 (this->*evalFunction)(C, CE); 1530 return true; 1531} 1532 1533void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 1534 // Record string length for char a[] = "abc"; 1535 const GRState *state = C.getState(); 1536 1537 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1538 I != E; ++I) { 1539 const VarDecl *D = dyn_cast<VarDecl>(*I); 1540 if (!D) 1541 continue; 1542 1543 // FIXME: Handle array fields of structs. 1544 if (!D->getType()->isArrayType()) 1545 continue; 1546 1547 const Expr *Init = D->getInit(); 1548 if (!Init) 1549 continue; 1550 if (!isa<StringLiteral>(Init)) 1551 continue; 1552 1553 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 1554 const MemRegion *MR = VarLoc.getAsRegion(); 1555 if (!MR) 1556 continue; 1557 1558 SVal StrVal = state->getSVal(Init); 1559 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1560 DefinedOrUnknownSVal strLength 1561 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 1562 1563 state = state->set<CStringLength>(MR, strLength); 1564 } 1565 1566 C.addTransition(state); 1567} 1568 1569bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const { 1570 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1571 return !Entries.isEmpty(); 1572} 1573 1574const GRState * 1575CStringChecker::checkRegionChanges(const GRState *state, 1576 const StoreManager::InvalidatedSymbols *, 1577 const MemRegion * const *Begin, 1578 const MemRegion * const *End) const { 1579 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1580 if (Entries.isEmpty()) 1581 return state; 1582 1583 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1584 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1585 1586 // First build sets for the changed regions and their super-regions. 1587 for ( ; Begin != End; ++Begin) { 1588 const MemRegion *MR = *Begin; 1589 Invalidated.insert(MR); 1590 1591 SuperRegions.insert(MR); 1592 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1593 MR = SR->getSuperRegion(); 1594 SuperRegions.insert(MR); 1595 } 1596 } 1597 1598 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1599 1600 // Then loop over the entries in the current state. 1601 for (CStringLength::EntryMap::iterator I = Entries.begin(), 1602 E = Entries.end(); I != E; ++I) { 1603 const MemRegion *MR = I.getKey(); 1604 1605 // Is this entry for a super-region of a changed region? 1606 if (SuperRegions.count(MR)) { 1607 Entries = F.remove(Entries, MR); 1608 continue; 1609 } 1610 1611 // Is this entry for a sub-region of a changed region? 1612 const MemRegion *Super = MR; 1613 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1614 Super = SR->getSuperRegion(); 1615 if (Invalidated.count(Super)) { 1616 Entries = F.remove(Entries, MR); 1617 break; 1618 } 1619 } 1620 } 1621 1622 return state->set<CStringLength>(Entries); 1623} 1624 1625void CStringChecker::checkLiveSymbols(const GRState *state, 1626 SymbolReaper &SR) const { 1627 // Mark all symbols in our string length map as valid. 1628 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1629 1630 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1631 I != E; ++I) { 1632 SVal Len = I.getData(); 1633 1634 for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end(); 1635 si != se; ++si) 1636 SR.markInUse(*si); 1637 } 1638} 1639 1640void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 1641 CheckerContext &C) const { 1642 if (!SR.hasDeadSymbols()) 1643 return; 1644 1645 const GRState *state = C.getState(); 1646 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1647 if (Entries.isEmpty()) 1648 return; 1649 1650 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1651 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1652 I != E; ++I) { 1653 SVal Len = I.getData(); 1654 if (SymbolRef Sym = Len.getAsSymbol()) { 1655 if (SR.isDead(Sym)) 1656 Entries = F.remove(Entries, I.getKey()); 1657 } 1658 } 1659 1660 state = state->set<CStringLength>(Entries); 1661 C.generateNode(state); 1662} 1663 1664void ento::registerCStringChecker(CheckerManager &mgr) { 1665 mgr.registerChecker<CStringChecker>(); 1666} 1667