SemaChecking.cpp revision 4493f79fce48cd9cbd9f55fa9d452cde736747a0
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/Decl.h" 18#include "clang/AST/Expr.h" 19#include "clang/AST/ExprCXX.h" 20#include "clang/AST/ExprObjC.h" 21#include "clang/Lex/Preprocessor.h" 22#include "clang/Lex/LiteralSupport.h" 23#include "clang/Basic/SourceManager.h" 24#include "clang/Basic/Diagnostic.h" 25#include "clang/Basic/LangOptions.h" 26#include "clang/Basic/TargetInfo.h" 27#include "llvm/ADT/OwningPtr.h" 28#include "llvm/ADT/SmallString.h" 29#include "llvm/ADT/StringExtras.h" 30#include "SemaUtil.h" 31using namespace clang; 32 33/// CheckFunctionCall - Check a direct function call for various correctness 34/// and safety properties not strictly enforced by the C type system. 35Action::ExprResult 36Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 37 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 38 // Get the IdentifierInfo* for the called function. 39 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 40 41 switch (FnInfo->getBuiltinID()) { 42 case Builtin::BI__builtin___CFStringMakeConstantString: 43 assert(TheCall->getNumArgs() == 1 && 44 "Wrong # arguments to builtin CFStringMakeConstantString"); 45 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 46 return true; 47 return TheCall.take(); 48 case Builtin::BI__builtin_stdarg_start: 49 case Builtin::BI__builtin_va_start: 50 if (SemaBuiltinVAStart(TheCall.get())) 51 return true; 52 return TheCall.take(); 53 case Builtin::BI__builtin_isgreater: 54 case Builtin::BI__builtin_isgreaterequal: 55 case Builtin::BI__builtin_isless: 56 case Builtin::BI__builtin_islessequal: 57 case Builtin::BI__builtin_islessgreater: 58 case Builtin::BI__builtin_isunordered: 59 if (SemaBuiltinUnorderedCompare(TheCall.get())) 60 return true; 61 return TheCall.take(); 62 case Builtin::BI__builtin_return_address: 63 case Builtin::BI__builtin_frame_address: 64 if (SemaBuiltinStackAddress(TheCall.get())) 65 return true; 66 return TheCall.take(); 67 case Builtin::BI__builtin_shufflevector: 68 return SemaBuiltinShuffleVector(TheCall.get()); 69 case Builtin::BI__builtin_prefetch: 70 if (SemaBuiltinPrefetch(TheCall.get())) 71 return true; 72 return TheCall.take(); 73 } 74 75 // Search the KnownFunctionIDs for the identifier. 76 unsigned i = 0, e = id_num_known_functions; 77 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 78 if (i == e) return TheCall.take(); 79 80 // Printf checking. 81 if (i <= id_vprintf) { 82 // Retrieve the index of the format string parameter and determine 83 // if the function is passed a va_arg argument. 84 unsigned format_idx = 0; 85 bool HasVAListArg = false; 86 87 switch (i) { 88 default: assert(false && "No format string argument index."); 89 case id_printf: format_idx = 0; break; 90 case id_fprintf: format_idx = 1; break; 91 case id_sprintf: format_idx = 1; break; 92 case id_snprintf: format_idx = 2; break; 93 case id_asprintf: format_idx = 1; break; 94 case id_NSLog: format_idx = 0; break; 95 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 96 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 97 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 98 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 99 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 100 } 101 102 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 103 } 104 105 return TheCall.take(); 106} 107 108/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 109/// CFString constructor is correct 110bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 111 Arg = Arg->IgnoreParenCasts(); 112 113 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 114 115 if (!Literal || Literal->isWide()) { 116 Diag(Arg->getLocStart(), 117 diag::err_cfstring_literal_not_string_constant, 118 Arg->getSourceRange()); 119 return true; 120 } 121 122 const char *Data = Literal->getStrData(); 123 unsigned Length = Literal->getByteLength(); 124 125 for (unsigned i = 0; i < Length; ++i) { 126 if (!isascii(Data[i])) { 127 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 128 diag::warn_cfstring_literal_contains_non_ascii_character, 129 Arg->getSourceRange()); 130 break; 131 } 132 133 if (!Data[i]) { 134 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 135 diag::warn_cfstring_literal_contains_nul_character, 136 Arg->getSourceRange()); 137 break; 138 } 139 } 140 141 return false; 142} 143 144/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 145/// Emit an error and return true on failure, return false on success. 146bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 147 Expr *Fn = TheCall->getCallee(); 148 if (TheCall->getNumArgs() > 2) { 149 Diag(TheCall->getArg(2)->getLocStart(), 150 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 151 SourceRange(TheCall->getArg(2)->getLocStart(), 152 (*(TheCall->arg_end()-1))->getLocEnd())); 153 return true; 154 } 155 156 // Determine whether the current function is variadic or not. 157 bool isVariadic; 158 if (getCurFunctionDecl()) 159 isVariadic = 160 cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic(); 161 else 162 isVariadic = getCurMethodDecl()->isVariadic(); 163 164 if (!isVariadic) { 165 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 166 return true; 167 } 168 169 // Verify that the second argument to the builtin is the last argument of the 170 // current function or method. 171 bool SecondArgIsLastNamedArgument = false; 172 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 173 174 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 175 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 176 // FIXME: This isn't correct for methods (results in bogus warning). 177 // Get the last formal in the current function. 178 const ParmVarDecl *LastArg; 179 if (getCurFunctionDecl()) 180 LastArg = *(getCurFunctionDecl()->param_end()-1); 181 else 182 LastArg = *(getCurMethodDecl()->param_end()-1); 183 SecondArgIsLastNamedArgument = PV == LastArg; 184 } 185 } 186 187 if (!SecondArgIsLastNamedArgument) 188 Diag(TheCall->getArg(1)->getLocStart(), 189 diag::warn_second_parameter_of_va_start_not_last_named_argument); 190 return false; 191} 192 193/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 194/// friends. This is declared to take (...), so we have to check everything. 195bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 196 if (TheCall->getNumArgs() < 2) 197 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 198 if (TheCall->getNumArgs() > 2) 199 return Diag(TheCall->getArg(2)->getLocStart(), 200 diag::err_typecheck_call_too_many_args, 201 SourceRange(TheCall->getArg(2)->getLocStart(), 202 (*(TheCall->arg_end()-1))->getLocEnd())); 203 204 Expr *OrigArg0 = TheCall->getArg(0); 205 Expr *OrigArg1 = TheCall->getArg(1); 206 207 // Do standard promotions between the two arguments, returning their common 208 // type. 209 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 210 211 // If the common type isn't a real floating type, then the arguments were 212 // invalid for this operation. 213 if (!Res->isRealFloatingType()) 214 return Diag(OrigArg0->getLocStart(), 215 diag::err_typecheck_call_invalid_ordered_compare, 216 OrigArg0->getType().getAsString(), 217 OrigArg1->getType().getAsString(), 218 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 219 220 return false; 221} 222 223bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 224 // The signature for these builtins is exact; the only thing we need 225 // to check is that the argument is a constant. 226 SourceLocation Loc; 227 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) { 228 return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange()); 229 } 230 return false; 231} 232 233/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 234// This is declared to take (...), so we have to check everything. 235Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 236 if (TheCall->getNumArgs() < 3) 237 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 238 TheCall->getSourceRange()); 239 240 QualType FAType = TheCall->getArg(0)->getType(); 241 QualType SAType = TheCall->getArg(1)->getType(); 242 243 if (!FAType->isVectorType() || !SAType->isVectorType()) { 244 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 245 SourceRange(TheCall->getArg(0)->getLocStart(), 246 TheCall->getArg(1)->getLocEnd())); 247 return true; 248 } 249 250 if (FAType.getCanonicalType().getUnqualifiedType() != 251 SAType.getCanonicalType().getUnqualifiedType()) { 252 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 253 SourceRange(TheCall->getArg(0)->getLocStart(), 254 TheCall->getArg(1)->getLocEnd())); 255 return true; 256 } 257 258 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 259 if (TheCall->getNumArgs() != numElements+2) { 260 if (TheCall->getNumArgs() < numElements+2) 261 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 262 TheCall->getSourceRange()); 263 else 264 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 265 TheCall->getSourceRange()); 266 return true; 267 } 268 269 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 270 llvm::APSInt Result(32); 271 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) { 272 Diag(TheCall->getLocStart(), 273 diag::err_shufflevector_nonconstant_argument, 274 TheCall->getArg(i)->getSourceRange()); 275 return true; 276 } 277 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) { 278 Diag(TheCall->getLocStart(), 279 diag::err_shufflevector_argument_too_large, 280 TheCall->getArg(i)->getSourceRange()); 281 return true; 282 } 283 } 284 285 llvm::SmallVector<Expr*, 32> exprs; 286 287 for (unsigned i = 0; i < TheCall->getNumArgs(); i++) { 288 exprs.push_back(TheCall->getArg(i)); 289 TheCall->setArg(i, 0); 290 } 291 292 ShuffleVectorExpr* E = new ShuffleVectorExpr( 293 exprs.begin(), numElements+2, FAType, 294 TheCall->getCallee()->getLocStart(), 295 TheCall->getRParenLoc()); 296 297 return E; 298} 299 300/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 301// This is declared to take (const void*, ...) and can take two 302// optional constant int args. 303bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 304 unsigned numArgs = TheCall->getNumArgs(); 305 bool res = false; 306 307 if (numArgs > 3) { 308 res |= Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 309 TheCall->getSourceRange()); 310 } 311 312 // Argument 0 is checked for us and the remaining arguments must be 313 // constant integers. 314 for (unsigned i=1; i<numArgs; ++i) { 315 Expr *Arg = TheCall->getArg(i); 316 QualType RWType = Arg->getType(); 317 318 const BuiltinType *BT = RWType->getAsBuiltinType(); 319 // FIXME: 32 is wrong, needs to be proper width of Int 320 llvm::APSInt Result(32); 321 if (!BT || BT->getKind() != BuiltinType::Int || 322 !Arg->isIntegerConstantExpr(Result, Context)) { 323 if (Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument, 324 SourceRange(Arg->getLocStart(), Arg->getLocEnd()))) { 325 res = true; 326 continue; 327 } 328 } 329 330 // FIXME: gcc issues a warning and rewrites these to 0. These 331 // seems especially odd for the third argument since the default 332 // is 3. 333 if (i==1) { 334 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 335 res |= Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_range, 336 "0", "1", 337 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 338 } else { 339 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 340 res |= Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_range, 341 "0", "3", 342 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 343 } 344 } 345 346 return res; 347} 348 349/// CheckPrintfArguments - Check calls to printf (and similar functions) for 350/// correct use of format strings. 351/// 352/// HasVAListArg - A predicate indicating whether the printf-like 353/// function is passed an explicit va_arg argument (e.g., vprintf) 354/// 355/// format_idx - The index into Args for the format string. 356/// 357/// Improper format strings to functions in the printf family can be 358/// the source of bizarre bugs and very serious security holes. A 359/// good source of information is available in the following paper 360/// (which includes additional references): 361/// 362/// FormatGuard: Automatic Protection From printf Format String 363/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 364/// 365/// Functionality implemented: 366/// 367/// We can statically check the following properties for string 368/// literal format strings for non v.*printf functions (where the 369/// arguments are passed directly): 370// 371/// (1) Are the number of format conversions equal to the number of 372/// data arguments? 373/// 374/// (2) Does each format conversion correctly match the type of the 375/// corresponding data argument? (TODO) 376/// 377/// Moreover, for all printf functions we can: 378/// 379/// (3) Check for a missing format string (when not caught by type checking). 380/// 381/// (4) Check for no-operation flags; e.g. using "#" with format 382/// conversion 'c' (TODO) 383/// 384/// (5) Check the use of '%n', a major source of security holes. 385/// 386/// (6) Check for malformed format conversions that don't specify anything. 387/// 388/// (7) Check for empty format strings. e.g: printf(""); 389/// 390/// (8) Check that the format string is a wide literal. 391/// 392/// (9) Also check the arguments of functions with the __format__ attribute. 393/// (TODO). 394/// 395/// All of these checks can be done by parsing the format string. 396/// 397/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 398void 399Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 400 unsigned format_idx) { 401 Expr *Fn = TheCall->getCallee(); 402 403 // CHECK: printf-like function is called with no format string. 404 if (format_idx >= TheCall->getNumArgs()) { 405 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 406 Fn->getSourceRange()); 407 return; 408 } 409 410 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 411 412 // CHECK: format string is not a string literal. 413 // 414 // Dynamically generated format strings are difficult to 415 // automatically vet at compile time. Requiring that format strings 416 // are string literals: (1) permits the checking of format strings by 417 // the compiler and thereby (2) can practically remove the source of 418 // many format string exploits. 419 420 // Format string can be either ObjC string (e.g. @"%d") or 421 // C string (e.g. "%d") 422 // ObjC string uses the same format specifiers as C string, so we can use 423 // the same format string checking logic for both ObjC and C strings. 424 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 425 StringLiteral *FExpr = NULL; 426 427 if(ObjCFExpr != NULL) 428 FExpr = ObjCFExpr->getString(); 429 else 430 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 431 432 if (FExpr == NULL) { 433 // For vprintf* functions (i.e., HasVAListArg==true), we add a 434 // special check to see if the format string is a function parameter 435 // of the function calling the printf function. If the function 436 // has an attribute indicating it is a printf-like function, then we 437 // should suppress warnings concerning non-literals being used in a call 438 // to a vprintf function. For example: 439 // 440 // void 441 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 442 // va_list ap; 443 // va_start(ap, fmt); 444 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 445 // ... 446 // 447 // 448 // FIXME: We don't have full attribute support yet, so just check to see 449 // if the argument is a DeclRefExpr that references a parameter. We'll 450 // add proper support for checking the attribute later. 451 if (HasVAListArg) 452 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 453 if (isa<ParmVarDecl>(DR->getDecl())) 454 return; 455 456 Diag(TheCall->getArg(format_idx)->getLocStart(), 457 diag::warn_printf_not_string_constant, Fn->getSourceRange()); 458 return; 459 } 460 461 // CHECK: is the format string a wide literal? 462 if (FExpr->isWide()) { 463 Diag(FExpr->getLocStart(), 464 diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange()); 465 return; 466 } 467 468 // Str - The format string. NOTE: this is NOT null-terminated! 469 const char * const Str = FExpr->getStrData(); 470 471 // CHECK: empty format string? 472 const unsigned StrLen = FExpr->getByteLength(); 473 474 if (StrLen == 0) { 475 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 476 Fn->getSourceRange()); 477 return; 478 } 479 480 // We process the format string using a binary state machine. The 481 // current state is stored in CurrentState. 482 enum { 483 state_OrdChr, 484 state_Conversion 485 } CurrentState = state_OrdChr; 486 487 // numConversions - The number of conversions seen so far. This is 488 // incremented as we traverse the format string. 489 unsigned numConversions = 0; 490 491 // numDataArgs - The number of data arguments after the format 492 // string. This can only be determined for non vprintf-like 493 // functions. For those functions, this value is 1 (the sole 494 // va_arg argument). 495 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 496 497 // Inspect the format string. 498 unsigned StrIdx = 0; 499 500 // LastConversionIdx - Index within the format string where we last saw 501 // a '%' character that starts a new format conversion. 502 unsigned LastConversionIdx = 0; 503 504 for (; StrIdx < StrLen; ++StrIdx) { 505 506 // Is the number of detected conversion conversions greater than 507 // the number of matching data arguments? If so, stop. 508 if (!HasVAListArg && numConversions > numDataArgs) break; 509 510 // Handle "\0" 511 if (Str[StrIdx] == '\0') { 512 // The string returned by getStrData() is not null-terminated, 513 // so the presence of a null character is likely an error. 514 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 515 diag::warn_printf_format_string_contains_null_char, 516 Fn->getSourceRange()); 517 return; 518 } 519 520 // Ordinary characters (not processing a format conversion). 521 if (CurrentState == state_OrdChr) { 522 if (Str[StrIdx] == '%') { 523 CurrentState = state_Conversion; 524 LastConversionIdx = StrIdx; 525 } 526 continue; 527 } 528 529 // Seen '%'. Now processing a format conversion. 530 switch (Str[StrIdx]) { 531 // Handle dynamic precision or width specifier. 532 case '*': { 533 ++numConversions; 534 535 if (!HasVAListArg && numConversions > numDataArgs) { 536 SourceLocation Loc = FExpr->getLocStart(); 537 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 538 539 if (Str[StrIdx-1] == '.') 540 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 541 Fn->getSourceRange()); 542 else 543 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 544 Fn->getSourceRange()); 545 546 // Don't do any more checking. We'll just emit spurious errors. 547 return; 548 } 549 550 // Perform type checking on width/precision specifier. 551 Expr *E = TheCall->getArg(format_idx+numConversions); 552 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 553 if (BT->getKind() == BuiltinType::Int) 554 break; 555 556 SourceLocation Loc = 557 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 558 559 if (Str[StrIdx-1] == '.') 560 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 561 E->getType().getAsString(), E->getSourceRange()); 562 else 563 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 564 E->getType().getAsString(), E->getSourceRange()); 565 566 break; 567 } 568 569 // Characters which can terminate a format conversion 570 // (e.g. "%d"). Characters that specify length modifiers or 571 // other flags are handled by the default case below. 572 // 573 // FIXME: additional checks will go into the following cases. 574 case 'i': 575 case 'd': 576 case 'o': 577 case 'u': 578 case 'x': 579 case 'X': 580 case 'D': 581 case 'O': 582 case 'U': 583 case 'e': 584 case 'E': 585 case 'f': 586 case 'F': 587 case 'g': 588 case 'G': 589 case 'a': 590 case 'A': 591 case 'c': 592 case 'C': 593 case 'S': 594 case 's': 595 case 'p': 596 ++numConversions; 597 CurrentState = state_OrdChr; 598 break; 599 600 // CHECK: Are we using "%n"? Issue a warning. 601 case 'n': { 602 ++numConversions; 603 CurrentState = state_OrdChr; 604 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 605 LastConversionIdx+1); 606 607 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); 608 break; 609 } 610 611 // Handle "%@" 612 case '@': 613 // %@ is allowed in ObjC format strings only. 614 if(ObjCFExpr != NULL) 615 CurrentState = state_OrdChr; 616 else { 617 // Issue a warning: invalid format conversion. 618 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 619 LastConversionIdx+1); 620 621 Diag(Loc, diag::warn_printf_invalid_conversion, 622 std::string(Str+LastConversionIdx, 623 Str+std::min(LastConversionIdx+2, StrLen)), 624 Fn->getSourceRange()); 625 } 626 ++numConversions; 627 break; 628 629 // Handle "%%" 630 case '%': 631 // Sanity check: Was the first "%" character the previous one? 632 // If not, we will assume that we have a malformed format 633 // conversion, and that the current "%" character is the start 634 // of a new conversion. 635 if (StrIdx - LastConversionIdx == 1) 636 CurrentState = state_OrdChr; 637 else { 638 // Issue a warning: invalid format conversion. 639 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 640 LastConversionIdx+1); 641 642 Diag(Loc, diag::warn_printf_invalid_conversion, 643 std::string(Str+LastConversionIdx, Str+StrIdx), 644 Fn->getSourceRange()); 645 646 // This conversion is broken. Advance to the next format 647 // conversion. 648 LastConversionIdx = StrIdx; 649 ++numConversions; 650 } 651 break; 652 653 default: 654 // This case catches all other characters: flags, widths, etc. 655 // We should eventually process those as well. 656 break; 657 } 658 } 659 660 if (CurrentState == state_Conversion) { 661 // Issue a warning: invalid format conversion. 662 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 663 LastConversionIdx+1); 664 665 Diag(Loc, diag::warn_printf_invalid_conversion, 666 std::string(Str+LastConversionIdx, 667 Str+std::min(LastConversionIdx+2, StrLen)), 668 Fn->getSourceRange()); 669 return; 670 } 671 672 if (!HasVAListArg) { 673 // CHECK: Does the number of format conversions exceed the number 674 // of data arguments? 675 if (numConversions > numDataArgs) { 676 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 677 LastConversionIdx); 678 679 Diag(Loc, diag::warn_printf_insufficient_data_args, 680 Fn->getSourceRange()); 681 } 682 // CHECK: Does the number of data arguments exceed the number of 683 // format conversions in the format string? 684 else if (numConversions < numDataArgs) 685 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 686 diag::warn_printf_too_many_data_args, Fn->getSourceRange()); 687 } 688} 689 690//===--- CHECK: Return Address of Stack Variable --------------------------===// 691 692static DeclRefExpr* EvalVal(Expr *E); 693static DeclRefExpr* EvalAddr(Expr* E); 694 695/// CheckReturnStackAddr - Check if a return statement returns the address 696/// of a stack variable. 697void 698Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 699 SourceLocation ReturnLoc) { 700 701 // Perform checking for returned stack addresses. 702 if (lhsType->isPointerType()) { 703 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 704 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 705 DR->getDecl()->getIdentifier()->getName(), 706 RetValExp->getSourceRange()); 707 } 708 // Perform checking for stack values returned by reference. 709 else if (lhsType->isReferenceType()) { 710 // Check for an implicit cast to a reference. 711 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) 712 if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) 713 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 714 DR->getDecl()->getIdentifier()->getName(), 715 RetValExp->getSourceRange()); 716 } 717} 718 719/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 720/// check if the expression in a return statement evaluates to an address 721/// to a location on the stack. The recursion is used to traverse the 722/// AST of the return expression, with recursion backtracking when we 723/// encounter a subexpression that (1) clearly does not lead to the address 724/// of a stack variable or (2) is something we cannot determine leads to 725/// the address of a stack variable based on such local checking. 726/// 727/// EvalAddr processes expressions that are pointers that are used as 728/// references (and not L-values). EvalVal handles all other values. 729/// At the base case of the recursion is a check for a DeclRefExpr* in 730/// the refers to a stack variable. 731/// 732/// This implementation handles: 733/// 734/// * pointer-to-pointer casts 735/// * implicit conversions from array references to pointers 736/// * taking the address of fields 737/// * arbitrary interplay between "&" and "*" operators 738/// * pointer arithmetic from an address of a stack variable 739/// * taking the address of an array element where the array is on the stack 740static DeclRefExpr* EvalAddr(Expr *E) { 741 // We should only be called for evaluating pointer expressions. 742 assert((E->getType()->isPointerType() || 743 E->getType()->isObjCQualifiedIdType()) && 744 "EvalAddr only works on pointers"); 745 746 // Our "symbolic interpreter" is just a dispatch off the currently 747 // viewed AST node. We then recursively traverse the AST by calling 748 // EvalAddr and EvalVal appropriately. 749 switch (E->getStmtClass()) { 750 case Stmt::ParenExprClass: 751 // Ignore parentheses. 752 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 753 754 case Stmt::UnaryOperatorClass: { 755 // The only unary operator that make sense to handle here 756 // is AddrOf. All others don't make sense as pointers. 757 UnaryOperator *U = cast<UnaryOperator>(E); 758 759 if (U->getOpcode() == UnaryOperator::AddrOf) 760 return EvalVal(U->getSubExpr()); 761 else 762 return NULL; 763 } 764 765 case Stmt::BinaryOperatorClass: { 766 // Handle pointer arithmetic. All other binary operators are not valid 767 // in this context. 768 BinaryOperator *B = cast<BinaryOperator>(E); 769 BinaryOperator::Opcode op = B->getOpcode(); 770 771 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 772 return NULL; 773 774 Expr *Base = B->getLHS(); 775 776 // Determine which argument is the real pointer base. It could be 777 // the RHS argument instead of the LHS. 778 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 779 780 assert (Base->getType()->isPointerType()); 781 return EvalAddr(Base); 782 } 783 784 // For conditional operators we need to see if either the LHS or RHS are 785 // valid DeclRefExpr*s. If one of them is valid, we return it. 786 case Stmt::ConditionalOperatorClass: { 787 ConditionalOperator *C = cast<ConditionalOperator>(E); 788 789 // Handle the GNU extension for missing LHS. 790 if (Expr *lhsExpr = C->getLHS()) 791 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 792 return LHS; 793 794 return EvalAddr(C->getRHS()); 795 } 796 797 // For implicit casts, we need to handle conversions from arrays to 798 // pointer values, and implicit pointer-to-pointer conversions. 799 case Stmt::ImplicitCastExprClass: { 800 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 801 Expr* SubExpr = IE->getSubExpr(); 802 803 if (SubExpr->getType()->isPointerType() || 804 SubExpr->getType()->isObjCQualifiedIdType()) 805 return EvalAddr(SubExpr); 806 else 807 return EvalVal(SubExpr); 808 } 809 810 // For casts, we handle pointer-to-pointer conversions (which 811 // is essentially a no-op from our mini-interpreter's standpoint). 812 // For other casts we abort. 813 case Stmt::CastExprClass: { 814 CastExpr *C = cast<CastExpr>(E); 815 Expr *SubExpr = C->getSubExpr(); 816 817 if (SubExpr->getType()->isPointerType()) 818 return EvalAddr(SubExpr); 819 else 820 return NULL; 821 } 822 823 // C++ casts. For dynamic casts, static casts, and const casts, we 824 // are always converting from a pointer-to-pointer, so we just blow 825 // through the cast. In the case the dynamic cast doesn't fail 826 // (and return NULL), we take the conservative route and report cases 827 // where we return the address of a stack variable. For Reinterpre 828 case Stmt::CXXCastExprClass: { 829 CXXCastExpr *C = cast<CXXCastExpr>(E); 830 831 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { 832 Expr *S = C->getSubExpr(); 833 if (S->getType()->isPointerType()) 834 return EvalAddr(S); 835 else 836 return NULL; 837 } 838 else 839 return EvalAddr(C->getSubExpr()); 840 } 841 842 // Everything else: we simply don't reason about them. 843 default: 844 return NULL; 845 } 846} 847 848 849/// EvalVal - This function is complements EvalAddr in the mutual recursion. 850/// See the comments for EvalAddr for more details. 851static DeclRefExpr* EvalVal(Expr *E) { 852 853 // We should only be called for evaluating non-pointer expressions, or 854 // expressions with a pointer type that are not used as references but instead 855 // are l-values (e.g., DeclRefExpr with a pointer type). 856 857 // Our "symbolic interpreter" is just a dispatch off the currently 858 // viewed AST node. We then recursively traverse the AST by calling 859 // EvalAddr and EvalVal appropriately. 860 switch (E->getStmtClass()) { 861 case Stmt::DeclRefExprClass: { 862 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 863 // at code that refers to a variable's name. We check if it has local 864 // storage within the function, and if so, return the expression. 865 DeclRefExpr *DR = cast<DeclRefExpr>(E); 866 867 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 868 if(V->hasLocalStorage()) return DR; 869 870 return NULL; 871 } 872 873 case Stmt::ParenExprClass: 874 // Ignore parentheses. 875 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 876 877 case Stmt::UnaryOperatorClass: { 878 // The only unary operator that make sense to handle here 879 // is Deref. All others don't resolve to a "name." This includes 880 // handling all sorts of rvalues passed to a unary operator. 881 UnaryOperator *U = cast<UnaryOperator>(E); 882 883 if (U->getOpcode() == UnaryOperator::Deref) 884 return EvalAddr(U->getSubExpr()); 885 886 return NULL; 887 } 888 889 case Stmt::ArraySubscriptExprClass: { 890 // Array subscripts are potential references to data on the stack. We 891 // retrieve the DeclRefExpr* for the array variable if it indeed 892 // has local storage. 893 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 894 } 895 896 case Stmt::ConditionalOperatorClass: { 897 // For conditional operators we need to see if either the LHS or RHS are 898 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 899 ConditionalOperator *C = cast<ConditionalOperator>(E); 900 901 // Handle the GNU extension for missing LHS. 902 if (Expr *lhsExpr = C->getLHS()) 903 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 904 return LHS; 905 906 return EvalVal(C->getRHS()); 907 } 908 909 // Accesses to members are potential references to data on the stack. 910 case Stmt::MemberExprClass: { 911 MemberExpr *M = cast<MemberExpr>(E); 912 913 // Check for indirect access. We only want direct field accesses. 914 if (!M->isArrow()) 915 return EvalVal(M->getBase()); 916 else 917 return NULL; 918 } 919 920 // Everything else: we simply don't reason about them. 921 default: 922 return NULL; 923 } 924} 925 926//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 927 928/// Check for comparisons of floating point operands using != and ==. 929/// Issue a warning if these are no self-comparisons, as they are not likely 930/// to do what the programmer intended. 931void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 932 bool EmitWarning = true; 933 934 Expr* LeftExprSansParen = lex->IgnoreParens(); 935 Expr* RightExprSansParen = rex->IgnoreParens(); 936 937 // Special case: check for x == x (which is OK). 938 // Do not emit warnings for such cases. 939 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 940 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 941 if (DRL->getDecl() == DRR->getDecl()) 942 EmitWarning = false; 943 944 945 // Special case: check for comparisons against literals that can be exactly 946 // represented by APFloat. In such cases, do not emit a warning. This 947 // is a heuristic: often comparison against such literals are used to 948 // detect if a value in a variable has not changed. This clearly can 949 // lead to false negatives. 950 if (EmitWarning) { 951 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 952 if (FLL->isExact()) 953 EmitWarning = false; 954 } 955 else 956 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 957 if (FLR->isExact()) 958 EmitWarning = false; 959 } 960 } 961 962 // Check for comparisons with builtin types. 963 if (EmitWarning) 964 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 965 if (isCallBuiltin(CL)) 966 EmitWarning = false; 967 968 if (EmitWarning) 969 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 970 if (isCallBuiltin(CR)) 971 EmitWarning = false; 972 973 // Emit the diagnostic. 974 if (EmitWarning) 975 Diag(loc, diag::warn_floatingpoint_eq, 976 lex->getSourceRange(),rex->getSourceRange()); 977} 978