CStringChecker.cpp revision 1e022415b9a66c84a9005b4e0bb2d4becb76d189
1//= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This defines CStringChecker, which is an assortment of checks on calls 11// to functions in <string.h>. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ClangSACheckers.h" 16#include "clang/StaticAnalyzer/Core/Checker.h" 17#include "clang/StaticAnalyzer/Core/CheckerManager.h" 18#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 19#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 20#include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 21#include "llvm/ADT/StringSwitch.h" 22 23using namespace clang; 24using namespace ento; 25 26namespace { 27class CStringChecker : public Checker< eval::Call, 28 check::PreStmt<DeclStmt>, 29 check::LiveSymbols, 30 check::DeadSymbols, 31 check::RegionChanges 32 > { 33 mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, BT_BoundsWrite, 34 BT_Overlap, BT_NotCString, 35 BT_AdditionOverflow; 36public: 37 static void *getTag() { static int tag; return &tag; } 38 39 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 40 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 41 void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const; 42 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 43 bool wantsRegionChangeUpdate(const GRState *state) const; 44 45 const GRState *checkRegionChanges(const GRState *state, 46 const StoreManager::InvalidatedSymbols *, 47 const MemRegion * const *Begin, 48 const MemRegion * const *End) const; 49 50 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 51 const CallExpr *) const; 52 53 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 54 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 55 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 56 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 57 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 58 const GRState *state, 59 const Expr *Size, const Expr *Source, const Expr *Dest, 60 bool Restricted = false, 61 bool IsMempcpy = false) const; 62 63 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 64 65 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 66 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 67 void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 68 bool IsStrnlen = false) const; 69 70 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 71 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 72 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 73 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd, 74 bool isBounded, bool isAppending) const; 75 76 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 77 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 78 79 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 80 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 81 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 82 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 83 void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 84 bool isBounded = false, bool ignoreCase = false) const; 85 86 // Utility methods 87 std::pair<const GRState*, const GRState*> 88 static assumeZero(CheckerContext &C, 89 const GRState *state, SVal V, QualType Ty); 90 91 static const GRState *setCStringLength(const GRState *state, 92 const MemRegion *MR, SVal strLength); 93 static SVal getCStringLengthForRegion(CheckerContext &C, 94 const GRState *&state, 95 const Expr *Ex, const MemRegion *MR, 96 bool hypothetical); 97 SVal getCStringLength(CheckerContext &C, const GRState *&state, 98 const Expr *Ex, SVal Buf, 99 bool hypothetical = false) const; 100 101 const StringLiteral *getCStringLiteral(CheckerContext &C, 102 const GRState *&state, 103 const Expr *expr, 104 SVal val) const; 105 106 static const GRState *InvalidateBuffer(CheckerContext &C, 107 const GRState *state, 108 const Expr *Ex, SVal V); 109 110 static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 111 const MemRegion *MR); 112 113 // Re-usable checks 114 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 115 const Expr *S, SVal l) const; 116 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 117 const Expr *S, SVal l, 118 bool IsDestination = false) const; 119 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 120 const Expr *Size, 121 const Expr *FirstBuf, 122 const Expr *SecondBuf = NULL, 123 bool FirstIsDestination = false) const; 124 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 125 const Expr *Size, const Expr *First, 126 const Expr *Second) const; 127 void emitOverlapBug(CheckerContext &C, const GRState *state, 128 const Stmt *First, const Stmt *Second) const; 129 const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state, 130 NonLoc left, NonLoc right) const; 131}; 132 133class CStringLength { 134public: 135 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 136}; 137} //end anonymous namespace 138 139namespace clang { 140namespace ento { 141 template <> 142 struct GRStateTrait<CStringLength> 143 : public GRStatePartialTrait<CStringLength::EntryMap> { 144 static void *GDMIndex() { return CStringChecker::getTag(); } 145 }; 146} 147} 148 149//===----------------------------------------------------------------------===// 150// Individual checks and utility methods. 151//===----------------------------------------------------------------------===// 152 153std::pair<const GRState*, const GRState*> 154CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 155 QualType Ty) { 156 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 157 if (!val) 158 return std::pair<const GRState*, const GRState *>(state, state); 159 160 SValBuilder &svalBuilder = C.getSValBuilder(); 161 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 162 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 163} 164 165const GRState *CStringChecker::checkNonNull(CheckerContext &C, 166 const GRState *state, 167 const Expr *S, SVal l) const { 168 // If a previous check has failed, propagate the failure. 169 if (!state) 170 return NULL; 171 172 const GRState *stateNull, *stateNonNull; 173 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 174 175 if (stateNull && !stateNonNull) { 176 ExplodedNode *N = C.generateSink(stateNull); 177 if (!N) 178 return NULL; 179 180 if (!BT_Null) 181 BT_Null.reset(new BuiltinBug("API", 182 "Null pointer argument in call to byte string function")); 183 184 // Generate a report for this bug. 185 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 186 EnhancedBugReport *report = new EnhancedBugReport(*BT, 187 BT->getDescription(), N); 188 189 report->addRange(S->getSourceRange()); 190 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 191 C.EmitReport(report); 192 return NULL; 193 } 194 195 // From here on, assume that the value is non-null. 196 assert(stateNonNull); 197 return stateNonNull; 198} 199 200// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 201const GRState *CStringChecker::CheckLocation(CheckerContext &C, 202 const GRState *state, 203 const Expr *S, SVal l, 204 bool IsDestination) const { 205 // If a previous check has failed, propagate the failure. 206 if (!state) 207 return NULL; 208 209 // Check for out of bound array element access. 210 const MemRegion *R = l.getAsRegion(); 211 if (!R) 212 return state; 213 214 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 215 if (!ER) 216 return state; 217 218 assert(ER->getValueType() == C.getASTContext().CharTy && 219 "CheckLocation should only be called with char* ElementRegions"); 220 221 // Get the size of the array. 222 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 223 SValBuilder &svalBuilder = C.getSValBuilder(); 224 SVal Extent = 225 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 226 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 227 228 // Get the index of the accessed element. 229 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 230 231 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 232 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 233 if (StOutBound && !StInBound) { 234 ExplodedNode *N = C.generateSink(StOutBound); 235 if (!N) 236 return NULL; 237 238 BuiltinBug *BT; 239 if (IsDestination) { 240 if (!BT_BoundsWrite) { 241 BT_BoundsWrite.reset(new BuiltinBug("Out-of-bound array access", 242 "Byte string function overflows destination buffer")); 243 } 244 BT = static_cast<BuiltinBug*>(BT_BoundsWrite.get()); 245 } else { 246 if (!BT_Bounds) { 247 BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", 248 "Byte string function accesses out-of-bound array element")); 249 } 250 BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 251 } 252 253 // FIXME: It would be nice to eventually make this diagnostic more clear, 254 // e.g., by referencing the original declaration or by saying *why* this 255 // reference is outside the range. 256 257 // Generate a report for this bug. 258 RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N); 259 260 report->addRange(S->getSourceRange()); 261 C.EmitReport(report); 262 return NULL; 263 } 264 265 // Array bound check succeeded. From this point forward the array bound 266 // should always succeed. 267 return StInBound; 268} 269 270const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 271 const GRState *state, 272 const Expr *Size, 273 const Expr *FirstBuf, 274 const Expr *SecondBuf, 275 bool FirstIsDestination) const { 276 // If a previous check has failed, propagate the failure. 277 if (!state) 278 return NULL; 279 280 SValBuilder &svalBuilder = C.getSValBuilder(); 281 ASTContext &Ctx = svalBuilder.getContext(); 282 283 QualType sizeTy = Size->getType(); 284 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 285 286 // Check that the first buffer is non-null. 287 SVal BufVal = state->getSVal(FirstBuf); 288 state = checkNonNull(C, state, FirstBuf, BufVal); 289 if (!state) 290 return NULL; 291 292 // Get the access length and make sure it is known. 293 SVal LengthVal = state->getSVal(Size); 294 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 295 if (!Length) 296 return state; 297 298 // Compute the offset of the last element to be accessed: size-1. 299 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 300 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 301 *Length, One, sizeTy)); 302 303 // Check that the first buffer is sufficiently long. 304 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 305 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 306 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 307 LastOffset, PtrTy); 308 state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination); 309 310 // If the buffer isn't large enough, abort. 311 if (!state) 312 return NULL; 313 } 314 315 // If there's a second buffer, check it as well. 316 if (SecondBuf) { 317 BufVal = state->getSVal(SecondBuf); 318 state = checkNonNull(C, state, SecondBuf, BufVal); 319 if (!state) 320 return NULL; 321 322 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 323 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 324 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 325 LastOffset, PtrTy); 326 state = CheckLocation(C, state, SecondBuf, BufEnd); 327 } 328 } 329 330 // Large enough or not, return this state! 331 return state; 332} 333 334const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 335 const GRState *state, 336 const Expr *Size, 337 const Expr *First, 338 const Expr *Second) const { 339 // Do a simple check for overlap: if the two arguments are from the same 340 // buffer, see if the end of the first is greater than the start of the second 341 // or vice versa. 342 343 // If a previous check has failed, propagate the failure. 344 if (!state) 345 return NULL; 346 347 const GRState *stateTrue, *stateFalse; 348 349 // Get the buffer values and make sure they're known locations. 350 SVal firstVal = state->getSVal(First); 351 SVal secondVal = state->getSVal(Second); 352 353 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 354 if (!firstLoc) 355 return state; 356 357 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 358 if (!secondLoc) 359 return state; 360 361 // Are the two values the same? 362 SValBuilder &svalBuilder = C.getSValBuilder(); 363 llvm::tie(stateTrue, stateFalse) = 364 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 365 366 if (stateTrue && !stateFalse) { 367 // If the values are known to be equal, that's automatically an overlap. 368 emitOverlapBug(C, stateTrue, First, Second); 369 return NULL; 370 } 371 372 // assume the two expressions are not equal. 373 assert(stateFalse); 374 state = stateFalse; 375 376 // Which value comes first? 377 QualType cmpTy = svalBuilder.getComparisonType(); 378 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 379 *firstLoc, *secondLoc, cmpTy); 380 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 381 if (!reverseTest) 382 return state; 383 384 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 385 if (stateTrue) { 386 if (stateFalse) { 387 // If we don't know which one comes first, we can't perform this test. 388 return state; 389 } else { 390 // Switch the values so that firstVal is before secondVal. 391 Loc *tmpLoc = firstLoc; 392 firstLoc = secondLoc; 393 secondLoc = tmpLoc; 394 395 // Switch the Exprs as well, so that they still correspond. 396 const Expr *tmpExpr = First; 397 First = Second; 398 Second = tmpExpr; 399 } 400 } 401 402 // Get the length, and make sure it too is known. 403 SVal LengthVal = state->getSVal(Size); 404 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 405 if (!Length) 406 return state; 407 408 // Convert the first buffer's start address to char*. 409 // Bail out if the cast fails. 410 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 411 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 412 First->getType()); 413 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 414 if (!FirstStartLoc) 415 return state; 416 417 // Compute the end of the first buffer. Bail out if THAT fails. 418 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 419 *FirstStartLoc, *Length, CharPtrTy); 420 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 421 if (!FirstEndLoc) 422 return state; 423 424 // Is the end of the first buffer past the start of the second buffer? 425 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 426 *FirstEndLoc, *secondLoc, cmpTy); 427 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 428 if (!OverlapTest) 429 return state; 430 431 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 432 433 if (stateTrue && !stateFalse) { 434 // Overlap! 435 emitOverlapBug(C, stateTrue, First, Second); 436 return NULL; 437 } 438 439 // assume the two expressions don't overlap. 440 assert(stateFalse); 441 return stateFalse; 442} 443 444void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 445 const Stmt *First, const Stmt *Second) const { 446 ExplodedNode *N = C.generateSink(state); 447 if (!N) 448 return; 449 450 if (!BT_Overlap) 451 BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); 452 453 // Generate a report for this bug. 454 RangedBugReport *report = 455 new RangedBugReport(*BT_Overlap, 456 "Arguments must not be overlapping buffers", N); 457 report->addRange(First->getSourceRange()); 458 report->addRange(Second->getSourceRange()); 459 460 C.EmitReport(report); 461} 462 463const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C, 464 const GRState *state, 465 NonLoc left, 466 NonLoc right) const { 467 // If a previous check has failed, propagate the failure. 468 if (!state) 469 return NULL; 470 471 SValBuilder &svalBuilder = C.getSValBuilder(); 472 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 473 474 QualType sizeTy = svalBuilder.getContext().getSizeType(); 475 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 476 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 477 478 SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 479 sizeTy); 480 481 if (maxMinusRight.isUnknownOrUndef()) { 482 // Try switching the operands. (The order of these two assignments is 483 // important!) 484 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 485 sizeTy); 486 left = right; 487 } 488 489 if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) { 490 QualType cmpTy = svalBuilder.getConditionType(); 491 // If left > max - right, we have an overflow. 492 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 493 *maxMinusRightNL, cmpTy); 494 495 const GRState *stateOverflow, *stateOkay; 496 llvm::tie(stateOverflow, stateOkay) = 497 state->assume(cast<DefinedOrUnknownSVal>(willOverflow)); 498 499 if (stateOverflow && !stateOkay) { 500 // We have an overflow. Emit a bug report. 501 ExplodedNode *N = C.generateSink(stateOverflow); 502 if (!N) 503 return NULL; 504 505 if (!BT_AdditionOverflow) 506 BT_AdditionOverflow.reset(new BuiltinBug("API", 507 "Sum of expressions causes overflow")); 508 509 llvm::SmallString<120> buf; 510 llvm::raw_svector_ostream os(buf); 511 // This isn't a great error message, but this should never occur in real 512 // code anyway -- you'd have to create a buffer longer than a size_t can 513 // represent, which is sort of a contradiction. 514 os << "This expression will create a string whose length is too big to " 515 << "be represented as a size_t"; 516 517 // Generate a report for this bug. 518 BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N); 519 C.EmitReport(report); 520 521 return NULL; 522 } 523 524 // From now on, assume an overflow didn't occur. 525 assert(stateOkay); 526 state = stateOkay; 527 } 528 529 return state; 530} 531 532const GRState *CStringChecker::setCStringLength(const GRState *state, 533 const MemRegion *MR, 534 SVal strLength) { 535 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 536 537 MR = MR->StripCasts(); 538 539 switch (MR->getKind()) { 540 case MemRegion::StringRegionKind: 541 // FIXME: This can happen if we strcpy() into a string region. This is 542 // undefined [C99 6.4.5p6], but we should still warn about it. 543 return state; 544 545 case MemRegion::SymbolicRegionKind: 546 case MemRegion::AllocaRegionKind: 547 case MemRegion::VarRegionKind: 548 case MemRegion::FieldRegionKind: 549 case MemRegion::ObjCIvarRegionKind: 550 // These are the types we can currently track string lengths for. 551 break; 552 553 case MemRegion::ElementRegionKind: 554 // FIXME: Handle element regions by upper-bounding the parent region's 555 // string length. 556 return state; 557 558 default: 559 // Other regions (mostly non-data) can't have a reliable C string length. 560 // For now, just ignore the change. 561 // FIXME: These are rare but not impossible. We should output some kind of 562 // warning for things like strcpy((char[]){'a', 0}, "b"); 563 return state; 564 } 565 566 if (strLength.isUnknown()) 567 return state->remove<CStringLength>(MR); 568 569 return state->set<CStringLength>(MR, strLength); 570} 571 572SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 573 const GRState *&state, 574 const Expr *Ex, 575 const MemRegion *MR, 576 bool hypothetical) { 577 if (!hypothetical) { 578 // If there's a recorded length, go ahead and return it. 579 const SVal *Recorded = state->get<CStringLength>(MR); 580 if (Recorded) 581 return *Recorded; 582 } 583 584 // Otherwise, get a new symbol and update the state. 585 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 586 SValBuilder &svalBuilder = C.getSValBuilder(); 587 QualType sizeTy = svalBuilder.getContext().getSizeType(); 588 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 589 MR, Ex, sizeTy, Count); 590 591 if (!hypothetical) 592 state = state->set<CStringLength>(MR, strLength); 593 594 return strLength; 595} 596 597SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 598 const Expr *Ex, SVal Buf, 599 bool hypothetical) const { 600 const MemRegion *MR = Buf.getAsRegion(); 601 if (!MR) { 602 // If we can't get a region, see if it's something we /know/ isn't a 603 // C string. In the context of locations, the only time we can issue such 604 // a warning is for labels. 605 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 606 if (ExplodedNode *N = C.generateNode(state)) { 607 if (!BT_NotCString) 608 BT_NotCString.reset(new BuiltinBug("API", 609 "Argument is not a null-terminated string.")); 610 611 llvm::SmallString<120> buf; 612 llvm::raw_svector_ostream os(buf); 613 os << "Argument to byte string function is the address of the label '" 614 << Label->getLabel()->getName() 615 << "', which is not a null-terminated string"; 616 617 // Generate a report for this bug. 618 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 619 os.str(), N); 620 621 report->addRange(Ex->getSourceRange()); 622 C.EmitReport(report); 623 } 624 625 return UndefinedVal(); 626 } 627 628 // If it's not a region and not a label, give up. 629 return UnknownVal(); 630 } 631 632 // If we have a region, strip casts from it and see if we can figure out 633 // its length. For anything we can't figure out, just return UnknownVal. 634 MR = MR->StripCasts(); 635 636 switch (MR->getKind()) { 637 case MemRegion::StringRegionKind: { 638 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 639 // so we can assume that the byte length is the correct C string length. 640 SValBuilder &svalBuilder = C.getSValBuilder(); 641 QualType sizeTy = svalBuilder.getContext().getSizeType(); 642 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 643 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 644 } 645 case MemRegion::SymbolicRegionKind: 646 case MemRegion::AllocaRegionKind: 647 case MemRegion::VarRegionKind: 648 case MemRegion::FieldRegionKind: 649 case MemRegion::ObjCIvarRegionKind: 650 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 651 case MemRegion::CompoundLiteralRegionKind: 652 // FIXME: Can we track this? Is it necessary? 653 return UnknownVal(); 654 case MemRegion::ElementRegionKind: 655 // FIXME: How can we handle this? It's not good enough to subtract the 656 // offset from the base string length; consider "123\x00567" and &a[5]. 657 return UnknownVal(); 658 default: 659 // Other regions (mostly non-data) can't have a reliable C string length. 660 // In this case, an error is emitted and UndefinedVal is returned. 661 // The caller should always be prepared to handle this case. 662 if (ExplodedNode *N = C.generateNode(state)) { 663 if (!BT_NotCString) 664 BT_NotCString.reset(new BuiltinBug("API", 665 "Argument is not a null-terminated string.")); 666 667 llvm::SmallString<120> buf; 668 llvm::raw_svector_ostream os(buf); 669 670 os << "Argument to byte string function is "; 671 672 if (SummarizeRegion(os, C.getASTContext(), MR)) 673 os << ", which is not a null-terminated string"; 674 else 675 os << "not a null-terminated string"; 676 677 // Generate a report for this bug. 678 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 679 os.str(), N); 680 681 report->addRange(Ex->getSourceRange()); 682 C.EmitReport(report); 683 } 684 685 return UndefinedVal(); 686 } 687} 688 689const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 690 const GRState *&state, const Expr *expr, SVal val) const { 691 692 // Get the memory region pointed to by the val. 693 const MemRegion *bufRegion = val.getAsRegion(); 694 if (!bufRegion) 695 return NULL; 696 697 // Strip casts off the memory region. 698 bufRegion = bufRegion->StripCasts(); 699 700 // Cast the memory region to a string region. 701 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 702 if (!strRegion) 703 return NULL; 704 705 // Return the actual string in the string region. 706 return strRegion->getStringLiteral(); 707} 708 709const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 710 const GRState *state, 711 const Expr *E, SVal V) { 712 Loc *L = dyn_cast<Loc>(&V); 713 if (!L) 714 return state; 715 716 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 717 // some assumptions about the value that CFRefCount can't. Even so, it should 718 // probably be refactored. 719 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 720 const MemRegion *R = MR->getRegion()->StripCasts(); 721 722 // Are we dealing with an ElementRegion? If so, we should be invalidating 723 // the super-region. 724 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 725 R = ER->getSuperRegion(); 726 // FIXME: What about layers of ElementRegions? 727 } 728 729 // Invalidate this region. 730 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 731 return state->invalidateRegion(R, E, Count, NULL); 732 } 733 734 // If we have a non-region value by chance, just remove the binding. 735 // FIXME: is this necessary or correct? This handles the non-Region 736 // cases. Is it ever valid to store to these? 737 return state->unbindLoc(*L); 738} 739 740bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 741 const MemRegion *MR) { 742 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 743 if (!TR) 744 return false; 745 746 switch (TR->getKind()) { 747 case MemRegion::FunctionTextRegionKind: { 748 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 749 if (FD) 750 os << "the address of the function '" << FD << "'"; 751 else 752 os << "the address of a function"; 753 return true; 754 } 755 case MemRegion::BlockTextRegionKind: 756 os << "block text"; 757 return true; 758 case MemRegion::BlockDataRegionKind: 759 os << "a block"; 760 return true; 761 case MemRegion::CXXThisRegionKind: 762 case MemRegion::CXXTempObjectRegionKind: 763 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 764 return true; 765 case MemRegion::VarRegionKind: 766 os << "a variable of type" << TR->getValueType().getAsString(); 767 return true; 768 case MemRegion::FieldRegionKind: 769 os << "a field of type " << TR->getValueType().getAsString(); 770 return true; 771 case MemRegion::ObjCIvarRegionKind: 772 os << "an instance variable of type " << TR->getValueType().getAsString(); 773 return true; 774 default: 775 return false; 776 } 777} 778 779//===----------------------------------------------------------------------===// 780// evaluation of individual function calls. 781//===----------------------------------------------------------------------===// 782 783void CStringChecker::evalCopyCommon(CheckerContext &C, 784 const CallExpr *CE, 785 const GRState *state, 786 const Expr *Size, const Expr *Dest, 787 const Expr *Source, bool Restricted, 788 bool IsMempcpy) const { 789 // See if the size argument is zero. 790 SVal sizeVal = state->getSVal(Size); 791 QualType sizeTy = Size->getType(); 792 793 const GRState *stateZeroSize, *stateNonZeroSize; 794 llvm::tie(stateZeroSize, stateNonZeroSize) = 795 assumeZero(C, state, sizeVal, sizeTy); 796 797 // Get the value of the Dest. 798 SVal destVal = state->getSVal(Dest); 799 800 // If the size is zero, there won't be any actual memory access, so 801 // just bind the return value to the destination buffer and return. 802 if (stateZeroSize) { 803 stateZeroSize = stateZeroSize->BindExpr(CE, destVal); 804 C.addTransition(stateZeroSize); 805 } 806 807 // If the size can be nonzero, we have to check the other arguments. 808 if (stateNonZeroSize) { 809 state = stateNonZeroSize; 810 811 // Ensure the destination is not null. If it is NULL there will be a 812 // NULL pointer dereference. 813 state = checkNonNull(C, state, Dest, destVal); 814 if (!state) 815 return; 816 817 // Get the value of the Src. 818 SVal srcVal = state->getSVal(Source); 819 820 // Ensure the source is not null. If it is NULL there will be a 821 // NULL pointer dereference. 822 state = checkNonNull(C, state, Source, srcVal); 823 if (!state) 824 return; 825 826 // Ensure the accesses are valid and that the buffers do not overlap. 827 state = CheckBufferAccess(C, state, Size, Dest, Source, 828 /* FirstIsDst = */ true); 829 if (Restricted) 830 state = CheckOverlap(C, state, Size, Dest, Source); 831 832 if (!state) 833 return; 834 835 // If this is mempcpy, get the byte after the last byte copied and 836 // bind the expr. 837 if (IsMempcpy) { 838 loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal); 839 assert(destRegVal && "Destination should be a known MemRegionVal here"); 840 841 // Get the length to copy. 842 NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal); 843 844 if (lenValNonLoc) { 845 // Get the byte after the last byte copied. 846 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 847 *destRegVal, 848 *lenValNonLoc, 849 Dest->getType()); 850 851 // The byte after the last byte copied is the return value. 852 state = state->BindExpr(CE, lastElement); 853 } else { 854 // If we don't know how much we copied, we can at least 855 // conjure a return value for later. 856 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 857 SVal result = 858 C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 859 state = state->BindExpr(CE, result); 860 } 861 862 } else { 863 // All other copies return the destination buffer. 864 // (Well, bcopy() has a void return type, but this won't hurt.) 865 state = state->BindExpr(CE, destVal); 866 } 867 868 // Invalidate the destination. 869 // FIXME: Even if we can't perfectly model the copy, we should see if we 870 // can use LazyCompoundVals to copy the source values into the destination. 871 // This would probably remove any existing bindings past the end of the 872 // copied region, but that's still an improvement over blank invalidation. 873 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 874 C.addTransition(state); 875 } 876} 877 878 879void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 880 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 881 // The return value is the address of the destination buffer. 882 const Expr *Dest = CE->getArg(0); 883 const GRState *state = C.getState(); 884 885 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 886} 887 888void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 889 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 890 // The return value is a pointer to the byte following the last written byte. 891 const Expr *Dest = CE->getArg(0); 892 const GRState *state = C.getState(); 893 894 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 895} 896 897void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 898 // void *memmove(void *dst, const void *src, size_t n); 899 // The return value is the address of the destination buffer. 900 const Expr *Dest = CE->getArg(0); 901 const GRState *state = C.getState(); 902 903 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 904} 905 906void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 907 // void bcopy(const void *src, void *dst, size_t n); 908 evalCopyCommon(C, CE, C.getState(), 909 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 910} 911 912void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 913 // int memcmp(const void *s1, const void *s2, size_t n); 914 const Expr *Left = CE->getArg(0); 915 const Expr *Right = CE->getArg(1); 916 const Expr *Size = CE->getArg(2); 917 918 const GRState *state = C.getState(); 919 SValBuilder &svalBuilder = C.getSValBuilder(); 920 921 // See if the size argument is zero. 922 SVal sizeVal = state->getSVal(Size); 923 QualType sizeTy = Size->getType(); 924 925 const GRState *stateZeroSize, *stateNonZeroSize; 926 llvm::tie(stateZeroSize, stateNonZeroSize) = 927 assumeZero(C, state, sizeVal, sizeTy); 928 929 // If the size can be zero, the result will be 0 in that case, and we don't 930 // have to check either of the buffers. 931 if (stateZeroSize) { 932 state = stateZeroSize; 933 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 934 C.addTransition(state); 935 } 936 937 // If the size can be nonzero, we have to check the other arguments. 938 if (stateNonZeroSize) { 939 state = stateNonZeroSize; 940 // If we know the two buffers are the same, we know the result is 0. 941 // First, get the two buffers' addresses. Another checker will have already 942 // made sure they're not undefined. 943 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 944 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 945 946 // See if they are the same. 947 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 948 const GRState *StSameBuf, *StNotSameBuf; 949 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 950 951 // If the two arguments might be the same buffer, we know the result is 0, 952 // and we only need to check one size. 953 if (StSameBuf) { 954 state = StSameBuf; 955 state = CheckBufferAccess(C, state, Size, Left); 956 if (state) { 957 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 958 C.addTransition(state); 959 } 960 } 961 962 // If the two arguments might be different buffers, we have to check the 963 // size of both of them. 964 if (StNotSameBuf) { 965 state = StNotSameBuf; 966 state = CheckBufferAccess(C, state, Size, Left, Right); 967 if (state) { 968 // The return value is the comparison result, which we don't know. 969 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 970 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 971 state = state->BindExpr(CE, CmpV); 972 C.addTransition(state); 973 } 974 } 975 } 976} 977 978void CStringChecker::evalstrLength(CheckerContext &C, 979 const CallExpr *CE) const { 980 // size_t strlen(const char *s); 981 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 982} 983 984void CStringChecker::evalstrnLength(CheckerContext &C, 985 const CallExpr *CE) const { 986 // size_t strnlen(const char *s, size_t maxlen); 987 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 988} 989 990void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 991 bool IsStrnlen) const { 992 const GRState *state = C.getState(); 993 994 if (IsStrnlen) { 995 const Expr *maxlenExpr = CE->getArg(1); 996 SVal maxlenVal = state->getSVal(maxlenExpr); 997 998 const GRState *stateZeroSize, *stateNonZeroSize; 999 llvm::tie(stateZeroSize, stateNonZeroSize) = 1000 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1001 1002 // If the size can be zero, the result will be 0 in that case, and we don't 1003 // have to check the string itself. 1004 if (stateZeroSize) { 1005 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1006 stateZeroSize = stateZeroSize->BindExpr(CE, zero); 1007 C.addTransition(stateZeroSize); 1008 } 1009 1010 // If the size is GUARANTEED to be zero, we're done! 1011 if (!stateNonZeroSize) 1012 return; 1013 1014 // Otherwise, record the assumption that the size is nonzero. 1015 state = stateNonZeroSize; 1016 } 1017 1018 // Check that the string argument is non-null. 1019 const Expr *Arg = CE->getArg(0); 1020 SVal ArgVal = state->getSVal(Arg); 1021 1022 state = checkNonNull(C, state, Arg, ArgVal); 1023 1024 if (!state) 1025 return; 1026 1027 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1028 1029 // If the argument isn't a valid C string, there's no valid state to 1030 // transition to. 1031 if (strLength.isUndef()) 1032 return; 1033 1034 DefinedOrUnknownSVal result = UnknownVal(); 1035 1036 // If the check is for strnlen() then bind the return value to no more than 1037 // the maxlen value. 1038 if (IsStrnlen) { 1039 QualType cmpTy = C.getSValBuilder().getComparisonType(); 1040 1041 // It's a little unfortunate to be getting this again, 1042 // but it's not that expensive... 1043 const Expr *maxlenExpr = CE->getArg(1); 1044 SVal maxlenVal = state->getSVal(maxlenExpr); 1045 1046 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1047 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 1048 1049 if (strLengthNL && maxlenValNL) { 1050 const GRState *stateStringTooLong, *stateStringNotTooLong; 1051 1052 // Check if the strLength is greater than the maxlen. 1053 llvm::tie(stateStringTooLong, stateStringNotTooLong) = 1054 state->assume(cast<DefinedOrUnknownSVal> 1055 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 1056 *strLengthNL, 1057 *maxlenValNL, 1058 cmpTy))); 1059 1060 if (stateStringTooLong && !stateStringNotTooLong) { 1061 // If the string is longer than maxlen, return maxlen. 1062 result = *maxlenValNL; 1063 } else if (stateStringNotTooLong && !stateStringTooLong) { 1064 // If the string is shorter than maxlen, return its length. 1065 result = *strLengthNL; 1066 } 1067 } 1068 1069 if (result.isUnknown()) { 1070 // If we don't have enough information for a comparison, there's 1071 // no guarantee the full string length will actually be returned. 1072 // All we know is the return value is the min of the string length 1073 // and the limit. This is better than nothing. 1074 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1075 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 1076 NonLoc *resultNL = cast<NonLoc>(&result); 1077 1078 if (strLengthNL) { 1079 state = state->assume(cast<DefinedOrUnknownSVal> 1080 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1081 *resultNL, 1082 *strLengthNL, 1083 cmpTy)), true); 1084 } 1085 1086 if (maxlenValNL) { 1087 state = state->assume(cast<DefinedOrUnknownSVal> 1088 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1089 *resultNL, 1090 *maxlenValNL, 1091 cmpTy)), true); 1092 } 1093 } 1094 1095 } else { 1096 // This is a plain strlen(), not strnlen(). 1097 result = cast<DefinedOrUnknownSVal>(strLength); 1098 1099 // If we don't know the length of the string, conjure a return 1100 // value, so it can be used in constraints, at least. 1101 if (result.isUnknown()) { 1102 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1103 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 1104 } 1105 } 1106 1107 // Bind the return value. 1108 assert(!result.isUnknown() && "Should have conjured a value by now"); 1109 state = state->BindExpr(CE, result); 1110 C.addTransition(state); 1111} 1112 1113void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1114 // char *strcpy(char *restrict dst, const char *restrict src); 1115 evalStrcpyCommon(C, CE, 1116 /* returnEnd = */ false, 1117 /* isBounded = */ false, 1118 /* isAppending = */ false); 1119} 1120 1121void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1122 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1123 evalStrcpyCommon(C, CE, 1124 /* returnEnd = */ false, 1125 /* isBounded = */ true, 1126 /* isAppending = */ false); 1127} 1128 1129void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1130 // char *stpcpy(char *restrict dst, const char *restrict src); 1131 evalStrcpyCommon(C, CE, 1132 /* returnEnd = */ true, 1133 /* isBounded = */ false, 1134 /* isAppending = */ false); 1135} 1136 1137void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1138 //char *strcat(char *restrict s1, const char *restrict s2); 1139 evalStrcpyCommon(C, CE, 1140 /* returnEnd = */ false, 1141 /* isBounded = */ false, 1142 /* isAppending = */ true); 1143} 1144 1145void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1146 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1147 evalStrcpyCommon(C, CE, 1148 /* returnEnd = */ false, 1149 /* isBounded = */ true, 1150 /* isAppending = */ true); 1151} 1152 1153void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1154 bool returnEnd, bool isBounded, 1155 bool isAppending) const { 1156 const GRState *state = C.getState(); 1157 1158 // Check that the destination is non-null. 1159 const Expr *Dst = CE->getArg(0); 1160 SVal DstVal = state->getSVal(Dst); 1161 1162 state = checkNonNull(C, state, Dst, DstVal); 1163 if (!state) 1164 return; 1165 1166 // Check that the source is non-null. 1167 const Expr *srcExpr = CE->getArg(1); 1168 SVal srcVal = state->getSVal(srcExpr); 1169 state = checkNonNull(C, state, srcExpr, srcVal); 1170 if (!state) 1171 return; 1172 1173 // Get the string length of the source. 1174 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1175 1176 // If the source isn't a valid C string, give up. 1177 if (strLength.isUndef()) 1178 return; 1179 1180 SValBuilder &svalBuilder = C.getSValBuilder(); 1181 QualType cmpTy = svalBuilder.getConditionType(); 1182 1183 SVal amountCopied = UnknownVal(); 1184 1185 // If the function is strncpy, strncat, etc... it is bounded. 1186 if (isBounded) { 1187 // Get the max number of characters to copy. 1188 const Expr *lenExpr = CE->getArg(2); 1189 SVal lenVal = state->getSVal(lenExpr); 1190 1191 // Protect against misdeclared strncpy(). 1192 lenVal = svalBuilder.evalCast(lenVal, 1193 svalBuilder.getContext().getSizeType(), 1194 lenExpr->getType()); 1195 1196 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1197 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 1198 1199 // If we know both values, we might be able to figure out how much 1200 // we're copying. 1201 if (strLengthNL && lenValNL) { 1202 const GRState *stateSourceTooLong, *stateSourceNotTooLong; 1203 1204 // Check if the max number to copy is less than the length of the src. 1205 llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = 1206 state->assume(cast<DefinedOrUnknownSVal> 1207 (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL, 1208 *lenValNL, cmpTy))); 1209 1210 if (stateSourceTooLong && !stateSourceNotTooLong) { 1211 // Max number to copy is less than the length of the src, so the actual 1212 // strLength copied is the max number arg. 1213 state = stateSourceTooLong; 1214 amountCopied = lenVal; 1215 1216 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1217 // The source buffer entirely fits in the bound. 1218 state = stateSourceNotTooLong; 1219 amountCopied = strLength; 1220 } 1221 } 1222 1223 // If we couldn't pin down the copy length, at least bound it. 1224 if (amountCopied.isUnknown()) { 1225 // Try to get a "hypothetical" string length symbol, which we can later 1226 // set as a real value if that turns out to be the case. 1227 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1228 assert(!amountCopied.isUndef()); 1229 1230 if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) { 1231 if (lenValNL) { 1232 // amountCopied <= lenVal 1233 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1234 *amountCopiedNL, 1235 *lenValNL, 1236 cmpTy); 1237 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound), 1238 true); 1239 if (!state) 1240 return; 1241 } 1242 1243 if (strLengthNL) { 1244 // amountCopied <= strlen(source) 1245 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1246 *amountCopiedNL, 1247 *strLengthNL, 1248 cmpTy); 1249 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc), 1250 true); 1251 if (!state) 1252 return; 1253 } 1254 } 1255 } 1256 1257 } else { 1258 // The function isn't bounded. The amount copied should match the length 1259 // of the source buffer. 1260 amountCopied = strLength; 1261 } 1262 1263 assert(state); 1264 1265 // This represents the number of characters copied into the destination 1266 // buffer. (It may not actually be the strlen if the destination buffer 1267 // is not terminated.) 1268 SVal finalStrLength = UnknownVal(); 1269 1270 // If this is an appending function (strcat, strncat...) then set the 1271 // string length to strlen(src) + strlen(dst) since the buffer will 1272 // ultimately contain both. 1273 if (isAppending) { 1274 // Get the string length of the destination. If the destination is memory 1275 // that can't have a string length, we shouldn't be copying into it anyway. 1276 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1277 if (dstStrLength.isUndef()) 1278 return; 1279 1280 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1281 1282 NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied); 1283 NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength); 1284 1285 // If we know both string lengths, we might know the final string length. 1286 if (srcStrLengthNL && dstStrLengthNL) { 1287 // Make sure the two lengths together don't overflow a size_t. 1288 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1289 if (!state) 1290 return; 1291 1292 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1293 *dstStrLengthNL, sizeTy); 1294 } 1295 1296 // If we couldn't get a single value for the final string length, 1297 // we can at least bound it by the individual lengths. 1298 if (finalStrLength.isUnknown()) { 1299 // Try to get a "hypothetical" string length symbol, which we can later 1300 // set as a real value if that turns out to be the case. 1301 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1302 assert(!finalStrLength.isUndef()); 1303 1304 if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) { 1305 if (srcStrLengthNL) { 1306 // finalStrLength >= srcStrLength 1307 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1308 *finalStrLengthNL, 1309 *srcStrLengthNL, 1310 cmpTy); 1311 state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult), 1312 true); 1313 if (!state) 1314 return; 1315 } 1316 1317 if (dstStrLengthNL) { 1318 // finalStrLength >= dstStrLength 1319 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1320 *finalStrLengthNL, 1321 *dstStrLengthNL, 1322 cmpTy); 1323 state = state->assume(cast<DefinedOrUnknownSVal>(destInResult), 1324 true); 1325 if (!state) 1326 return; 1327 } 1328 } 1329 } 1330 1331 } else { 1332 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1333 // the final string length will match the input string length. 1334 finalStrLength = amountCopied; 1335 } 1336 1337 // The final result of the function will either be a pointer past the last 1338 // copied element, or a pointer to the start of the destination buffer. 1339 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1340 1341 assert(state); 1342 1343 // If the destination is a MemRegion, try to check for a buffer overflow and 1344 // record the new string length. 1345 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 1346 // If the final length is known, we can check for an overflow. 1347 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) { 1348 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1349 *knownStrLength, 1350 Dst->getType()); 1351 1352 state = CheckLocation(C, state, Dst, lastElement, /* IsDst = */ true); 1353 if (!state) 1354 return; 1355 1356 // If this is a stpcpy-style copy, the last element is the return value. 1357 if (returnEnd) 1358 Result = lastElement; 1359 } 1360 1361 // Invalidate the destination. This must happen before we set the C string 1362 // length because invalidation will clear the length. 1363 // FIXME: Even if we can't perfectly model the copy, we should see if we 1364 // can use LazyCompoundVals to copy the source values into the destination. 1365 // This would probably remove any existing bindings past the end of the 1366 // string, but that's still an improvement over blank invalidation. 1367 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 1368 1369 // Set the C string length of the destination. 1370 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1371 } 1372 1373 assert(state); 1374 1375 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1376 // overflow, we still need a result. Conjure a return value. 1377 if (returnEnd && Result.isUnknown()) { 1378 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1379 Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 1380 } 1381 1382 // Set the return value. 1383 state = state->BindExpr(CE, Result); 1384 C.addTransition(state); 1385} 1386 1387void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1388 //int strcmp(const char *restrict s1, const char *restrict s2); 1389 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1390} 1391 1392void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1393 //int strncmp(const char *restrict s1, const char *restrict s2, size_t n); 1394 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1395} 1396 1397void CStringChecker::evalStrcasecmp(CheckerContext &C, 1398 const CallExpr *CE) const { 1399 //int strcasecmp(const char *restrict s1, const char *restrict s2); 1400 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1401} 1402 1403void CStringChecker::evalStrncasecmp(CheckerContext &C, 1404 const CallExpr *CE) const { 1405 //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n); 1406 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1407} 1408 1409void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1410 bool isBounded, bool ignoreCase) const { 1411 const GRState *state = C.getState(); 1412 1413 // Check that the first string is non-null 1414 const Expr *s1 = CE->getArg(0); 1415 SVal s1Val = state->getSVal(s1); 1416 state = checkNonNull(C, state, s1, s1Val); 1417 if (!state) 1418 return; 1419 1420 // Check that the second string is non-null. 1421 const Expr *s2 = CE->getArg(1); 1422 SVal s2Val = state->getSVal(s2); 1423 state = checkNonNull(C, state, s2, s2Val); 1424 if (!state) 1425 return; 1426 1427 // Get the string length of the first string or give up. 1428 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1429 if (s1Length.isUndef()) 1430 return; 1431 1432 // Get the string length of the second string or give up. 1433 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1434 if (s2Length.isUndef()) 1435 return; 1436 1437 // Get the string literal of the first string. 1438 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1439 if (!s1StrLiteral) 1440 return; 1441 llvm::StringRef s1StrRef = s1StrLiteral->getString(); 1442 1443 // Get the string literal of the second string. 1444 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1445 if (!s2StrLiteral) 1446 return; 1447 llvm::StringRef s2StrRef = s2StrLiteral->getString(); 1448 1449 int result; 1450 if (isBounded) { 1451 // Get the max number of characters to compare. 1452 const Expr *lenExpr = CE->getArg(2); 1453 SVal lenVal = state->getSVal(lenExpr); 1454 1455 // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt 1456 // then give up, otherwise get the value and use it as the bounds. 1457 nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal); 1458 if (!CI) 1459 return; 1460 llvm::APSInt lenInt(CI->getValue()); 1461 1462 // Create substrings of each to compare the prefix. 1463 s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue()); 1464 s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue()); 1465 } 1466 1467 if (ignoreCase) { 1468 // Compare string 1 to string 2 the same way strcasecmp() does. 1469 result = s1StrRef.compare_lower(s2StrRef); 1470 } else { 1471 // Compare string 1 to string 2 the same way strcmp() does. 1472 result = s1StrRef.compare(s2StrRef); 1473 } 1474 1475 // Build the SVal of the comparison to bind the return value. 1476 SValBuilder &svalBuilder = C.getSValBuilder(); 1477 QualType intTy = svalBuilder.getContext().IntTy; 1478 SVal resultVal = svalBuilder.makeIntVal(result, intTy); 1479 1480 // Bind the return value of the expression. 1481 // Set the return value. 1482 state = state->BindExpr(CE, resultVal); 1483 C.addTransition(state); 1484} 1485 1486//===----------------------------------------------------------------------===// 1487// The driver method, and other Checker callbacks. 1488//===----------------------------------------------------------------------===// 1489 1490bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 1491 // Get the callee. All the functions we care about are C functions 1492 // with simple identifiers. 1493 const GRState *state = C.getState(); 1494 const Expr *Callee = CE->getCallee(); 1495 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 1496 1497 if (!FD) 1498 return false; 1499 1500 // Get the name of the callee. If it's a builtin, strip off the prefix. 1501 IdentifierInfo *II = FD->getIdentifier(); 1502 if (!II) // if no identifier, not a simple C function 1503 return false; 1504 llvm::StringRef Name = II->getName(); 1505 if (Name.startswith("__builtin_")) 1506 Name = Name.substr(10); 1507 1508 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 1509 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 1510 .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy) 1511 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 1512 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 1513 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 1514 //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) 1515 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 1516 .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat) 1517 .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat) 1518 .Case("strlen", &CStringChecker::evalstrLength) 1519 .Case("strnlen", &CStringChecker::evalstrnLength) 1520 .Case("strcmp", &CStringChecker::evalStrcmp) 1521 .Case("strncmp", &CStringChecker::evalStrncmp) 1522 .Case("strcasecmp", &CStringChecker::evalStrcasecmp) 1523 .Case("strncasecmp", &CStringChecker::evalStrncasecmp) 1524 .Case("bcopy", &CStringChecker::evalBcopy) 1525 .Default(NULL); 1526 1527 // If the callee isn't a string function, let another checker handle it. 1528 if (!evalFunction) 1529 return false; 1530 1531 // Check and evaluate the call. 1532 (this->*evalFunction)(C, CE); 1533 return true; 1534} 1535 1536void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 1537 // Record string length for char a[] = "abc"; 1538 const GRState *state = C.getState(); 1539 1540 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1541 I != E; ++I) { 1542 const VarDecl *D = dyn_cast<VarDecl>(*I); 1543 if (!D) 1544 continue; 1545 1546 // FIXME: Handle array fields of structs. 1547 if (!D->getType()->isArrayType()) 1548 continue; 1549 1550 const Expr *Init = D->getInit(); 1551 if (!Init) 1552 continue; 1553 if (!isa<StringLiteral>(Init)) 1554 continue; 1555 1556 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 1557 const MemRegion *MR = VarLoc.getAsRegion(); 1558 if (!MR) 1559 continue; 1560 1561 SVal StrVal = state->getSVal(Init); 1562 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1563 DefinedOrUnknownSVal strLength 1564 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 1565 1566 state = state->set<CStringLength>(MR, strLength); 1567 } 1568 1569 C.addTransition(state); 1570} 1571 1572bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const { 1573 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1574 return !Entries.isEmpty(); 1575} 1576 1577const GRState * 1578CStringChecker::checkRegionChanges(const GRState *state, 1579 const StoreManager::InvalidatedSymbols *, 1580 const MemRegion * const *Begin, 1581 const MemRegion * const *End) const { 1582 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1583 if (Entries.isEmpty()) 1584 return state; 1585 1586 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1587 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1588 1589 // First build sets for the changed regions and their super-regions. 1590 for ( ; Begin != End; ++Begin) { 1591 const MemRegion *MR = *Begin; 1592 Invalidated.insert(MR); 1593 1594 SuperRegions.insert(MR); 1595 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1596 MR = SR->getSuperRegion(); 1597 SuperRegions.insert(MR); 1598 } 1599 } 1600 1601 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1602 1603 // Then loop over the entries in the current state. 1604 for (CStringLength::EntryMap::iterator I = Entries.begin(), 1605 E = Entries.end(); I != E; ++I) { 1606 const MemRegion *MR = I.getKey(); 1607 1608 // Is this entry for a super-region of a changed region? 1609 if (SuperRegions.count(MR)) { 1610 Entries = F.remove(Entries, MR); 1611 continue; 1612 } 1613 1614 // Is this entry for a sub-region of a changed region? 1615 const MemRegion *Super = MR; 1616 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1617 Super = SR->getSuperRegion(); 1618 if (Invalidated.count(Super)) { 1619 Entries = F.remove(Entries, MR); 1620 break; 1621 } 1622 } 1623 } 1624 1625 return state->set<CStringLength>(Entries); 1626} 1627 1628void CStringChecker::checkLiveSymbols(const GRState *state, 1629 SymbolReaper &SR) const { 1630 // Mark all symbols in our string length map as valid. 1631 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1632 1633 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1634 I != E; ++I) { 1635 SVal Len = I.getData(); 1636 1637 for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end(); 1638 si != se; ++si) 1639 SR.markInUse(*si); 1640 } 1641} 1642 1643void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 1644 CheckerContext &C) const { 1645 if (!SR.hasDeadSymbols()) 1646 return; 1647 1648 const GRState *state = C.getState(); 1649 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1650 if (Entries.isEmpty()) 1651 return; 1652 1653 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1654 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1655 I != E; ++I) { 1656 SVal Len = I.getData(); 1657 if (SymbolRef Sym = Len.getAsSymbol()) { 1658 if (SR.isDead(Sym)) 1659 Entries = F.remove(Entries, I.getKey()); 1660 } 1661 } 1662 1663 state = state->set<CStringLength>(Entries); 1664 C.generateNode(state); 1665} 1666 1667void ento::registerCStringChecker(CheckerManager &mgr) { 1668 mgr.registerChecker<CStringChecker>(); 1669} 1670