SemaChecking.cpp revision b77792eabf5882cf9af8cc810599b20432fda6c2
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/Decl.h" 18#include "clang/AST/Expr.h" 19#include "clang/AST/ExprCXX.h" 20#include "clang/AST/ExprObjC.h" 21#include "clang/Lex/Preprocessor.h" 22#include "clang/Lex/LiteralSupport.h" 23#include "clang/Basic/SourceManager.h" 24#include "clang/Basic/Diagnostic.h" 25#include "clang/Basic/LangOptions.h" 26#include "clang/Basic/TargetInfo.h" 27#include "llvm/ADT/OwningPtr.h" 28#include "llvm/ADT/SmallString.h" 29#include "llvm/ADT/StringExtras.h" 30#include "SemaUtil.h" 31using namespace clang; 32 33/// CheckFunctionCall - Check a direct function call for various correctness 34/// and safety properties not strictly enforced by the C type system. 35Action::ExprResult 36Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 37 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 38 // Get the IdentifierInfo* for the called function. 39 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 40 41 switch (FnInfo->getBuiltinID()) { 42 case Builtin::BI__builtin___CFStringMakeConstantString: 43 assert(TheCall->getNumArgs() == 1 && 44 "Wrong # arguments to builtin CFStringMakeConstantString"); 45 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 46 return true; 47 return TheCall.take(); 48 case Builtin::BI__builtin_stdarg_start: 49 case Builtin::BI__builtin_va_start: 50 if (SemaBuiltinVAStart(TheCall.get())) 51 return true; 52 return TheCall.take(); 53 case Builtin::BI__builtin_isgreater: 54 case Builtin::BI__builtin_isgreaterequal: 55 case Builtin::BI__builtin_isless: 56 case Builtin::BI__builtin_islessequal: 57 case Builtin::BI__builtin_islessgreater: 58 case Builtin::BI__builtin_isunordered: 59 if (SemaBuiltinUnorderedCompare(TheCall.get())) 60 return true; 61 return TheCall.take(); 62 case Builtin::BI__builtin_return_address: 63 case Builtin::BI__builtin_frame_address: 64 if (SemaBuiltinStackAddress(TheCall.get())) 65 return true; 66 return TheCall.take(); 67 case Builtin::BI__builtin_shufflevector: 68 return SemaBuiltinShuffleVector(TheCall.get()); 69 case Builtin::BI__builtin_prefetch: 70 if (SemaBuiltinPrefetch(TheCall.get())) 71 return true; 72 return TheCall.take(); 73 } 74 75 // Search the KnownFunctionIDs for the identifier. 76 unsigned i = 0, e = id_num_known_functions; 77 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 78 if (i == e) return TheCall.take(); 79 80 // Printf checking. 81 if (i <= id_vprintf) { 82 // Retrieve the index of the format string parameter and determine 83 // if the function is passed a va_arg argument. 84 unsigned format_idx = 0; 85 bool HasVAListArg = false; 86 87 switch (i) { 88 default: assert(false && "No format string argument index."); 89 case id_printf: format_idx = 0; break; 90 case id_fprintf: format_idx = 1; break; 91 case id_sprintf: format_idx = 1; break; 92 case id_snprintf: format_idx = 2; break; 93 case id_asprintf: format_idx = 1; break; 94 case id_NSLog: format_idx = 0; break; 95 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 96 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 97 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 98 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 99 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 100 } 101 102 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 103 } 104 105 return TheCall.take(); 106} 107 108/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 109/// CFString constructor is correct 110bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 111 Arg = Arg->IgnoreParenCasts(); 112 113 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 114 115 if (!Literal || Literal->isWide()) { 116 Diag(Arg->getLocStart(), 117 diag::err_cfstring_literal_not_string_constant, 118 Arg->getSourceRange()); 119 return true; 120 } 121 122 const char *Data = Literal->getStrData(); 123 unsigned Length = Literal->getByteLength(); 124 125 for (unsigned i = 0; i < Length; ++i) { 126 if (!isascii(Data[i])) { 127 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 128 diag::warn_cfstring_literal_contains_non_ascii_character, 129 Arg->getSourceRange()); 130 break; 131 } 132 133 if (!Data[i]) { 134 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 135 diag::warn_cfstring_literal_contains_nul_character, 136 Arg->getSourceRange()); 137 break; 138 } 139 } 140 141 return false; 142} 143 144/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 145/// Emit an error and return true on failure, return false on success. 146bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 147 Expr *Fn = TheCall->getCallee(); 148 if (TheCall->getNumArgs() > 2) { 149 Diag(TheCall->getArg(2)->getLocStart(), 150 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 151 SourceRange(TheCall->getArg(2)->getLocStart(), 152 (*(TheCall->arg_end()-1))->getLocEnd())); 153 return true; 154 } 155 156 // Determine whether the current function is variadic or not. 157 bool isVariadic; 158 if (getCurFunctionDecl()) 159 isVariadic = 160 cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic(); 161 else 162 isVariadic = getCurMethodDecl()->isVariadic(); 163 164 if (!isVariadic) { 165 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 166 return true; 167 } 168 169 // Verify that the second argument to the builtin is the last argument of the 170 // current function or method. 171 bool SecondArgIsLastNamedArgument = false; 172 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 173 174 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 175 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 176 // FIXME: This isn't correct for methods (results in bogus warning). 177 // Get the last formal in the current function. 178 const ParmVarDecl *LastArg; 179 if (getCurFunctionDecl()) 180 LastArg = *(getCurFunctionDecl()->param_end()-1); 181 else 182 LastArg = *(getCurMethodDecl()->param_end()-1); 183 SecondArgIsLastNamedArgument = PV == LastArg; 184 } 185 } 186 187 if (!SecondArgIsLastNamedArgument) 188 Diag(TheCall->getArg(1)->getLocStart(), 189 diag::warn_second_parameter_of_va_start_not_last_named_argument); 190 return false; 191} 192 193/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 194/// friends. This is declared to take (...), so we have to check everything. 195bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 196 if (TheCall->getNumArgs() < 2) 197 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 198 if (TheCall->getNumArgs() > 2) 199 return Diag(TheCall->getArg(2)->getLocStart(), 200 diag::err_typecheck_call_too_many_args, 201 SourceRange(TheCall->getArg(2)->getLocStart(), 202 (*(TheCall->arg_end()-1))->getLocEnd())); 203 204 Expr *OrigArg0 = TheCall->getArg(0); 205 Expr *OrigArg1 = TheCall->getArg(1); 206 207 // Do standard promotions between the two arguments, returning their common 208 // type. 209 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 210 211 // If the common type isn't a real floating type, then the arguments were 212 // invalid for this operation. 213 if (!Res->isRealFloatingType()) 214 return Diag(OrigArg0->getLocStart(), 215 diag::err_typecheck_call_invalid_ordered_compare, 216 OrigArg0->getType().getAsString(), 217 OrigArg1->getType().getAsString(), 218 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 219 220 return false; 221} 222 223bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 224 // The signature for these builtins is exact; the only thing we need 225 // to check is that the argument is a constant. 226 SourceLocation Loc; 227 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) { 228 return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange()); 229 } 230 return false; 231} 232 233/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 234// This is declared to take (...), so we have to check everything. 235Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 236 if (TheCall->getNumArgs() < 3) 237 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 238 TheCall->getSourceRange()); 239 240 QualType FAType = TheCall->getArg(0)->getType(); 241 QualType SAType = TheCall->getArg(1)->getType(); 242 243 if (!FAType->isVectorType() || !SAType->isVectorType()) { 244 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 245 SourceRange(TheCall->getArg(0)->getLocStart(), 246 TheCall->getArg(1)->getLocEnd())); 247 return true; 248 } 249 250 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 251 Context.getCanonicalType(SAType).getUnqualifiedType()) { 252 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 253 SourceRange(TheCall->getArg(0)->getLocStart(), 254 TheCall->getArg(1)->getLocEnd())); 255 return true; 256 } 257 258 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 259 if (TheCall->getNumArgs() != numElements+2) { 260 if (TheCall->getNumArgs() < numElements+2) 261 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 262 TheCall->getSourceRange()); 263 else 264 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 265 TheCall->getSourceRange()); 266 return true; 267 } 268 269 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 270 llvm::APSInt Result(32); 271 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) { 272 Diag(TheCall->getLocStart(), 273 diag::err_shufflevector_nonconstant_argument, 274 TheCall->getArg(i)->getSourceRange()); 275 return true; 276 } 277 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) { 278 Diag(TheCall->getLocStart(), 279 diag::err_shufflevector_argument_too_large, 280 TheCall->getArg(i)->getSourceRange()); 281 return true; 282 } 283 } 284 285 llvm::SmallVector<Expr*, 32> exprs; 286 287 for (unsigned i = 0; i < TheCall->getNumArgs(); i++) { 288 exprs.push_back(TheCall->getArg(i)); 289 TheCall->setArg(i, 0); 290 } 291 292 ShuffleVectorExpr* E = new ShuffleVectorExpr( 293 exprs.begin(), numElements+2, FAType, 294 TheCall->getCallee()->getLocStart(), 295 TheCall->getRParenLoc()); 296 297 return E; 298} 299 300/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 301// This is declared to take (const void*, ...) and can take two 302// optional constant int args. 303bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 304 unsigned numArgs = TheCall->getNumArgs(); 305 bool res = false; 306 307 if (numArgs > 3) { 308 res |= Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 309 TheCall->getSourceRange()); 310 } 311 312 // Argument 0 is checked for us and the remaining arguments must be 313 // constant integers. 314 for (unsigned i=1; i<numArgs; ++i) { 315 Expr *Arg = TheCall->getArg(i); 316 QualType RWType = Arg->getType(); 317 318 const BuiltinType *BT = RWType->getAsBuiltinType(); 319 // FIXME: 32 is wrong, needs to be proper width of Int 320 llvm::APSInt Result(32); 321 if (!BT || BT->getKind() != BuiltinType::Int || 322 !Arg->isIntegerConstantExpr(Result, Context)) { 323 if (Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument, 324 SourceRange(Arg->getLocStart(), Arg->getLocEnd()))) { 325 res = true; 326 continue; 327 } 328 } 329 330 // FIXME: gcc issues a warning and rewrites these to 0. These 331 // seems especially odd for the third argument since the default 332 // is 3. 333 if (i==1) { 334 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 335 res |= Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_range, 336 "0", "1", 337 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 338 } else { 339 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 340 res |= Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_range, 341 "0", "3", 342 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 343 } 344 } 345 346 return res; 347} 348 349/// CheckPrintfArguments - Check calls to printf (and similar functions) for 350/// correct use of format strings. 351/// 352/// HasVAListArg - A predicate indicating whether the printf-like 353/// function is passed an explicit va_arg argument (e.g., vprintf) 354/// 355/// format_idx - The index into Args for the format string. 356/// 357/// Improper format strings to functions in the printf family can be 358/// the source of bizarre bugs and very serious security holes. A 359/// good source of information is available in the following paper 360/// (which includes additional references): 361/// 362/// FormatGuard: Automatic Protection From printf Format String 363/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 364/// 365/// Functionality implemented: 366/// 367/// We can statically check the following properties for string 368/// literal format strings for non v.*printf functions (where the 369/// arguments are passed directly): 370// 371/// (1) Are the number of format conversions equal to the number of 372/// data arguments? 373/// 374/// (2) Does each format conversion correctly match the type of the 375/// corresponding data argument? (TODO) 376/// 377/// Moreover, for all printf functions we can: 378/// 379/// (3) Check for a missing format string (when not caught by type checking). 380/// 381/// (4) Check for no-operation flags; e.g. using "#" with format 382/// conversion 'c' (TODO) 383/// 384/// (5) Check the use of '%n', a major source of security holes. 385/// 386/// (6) Check for malformed format conversions that don't specify anything. 387/// 388/// (7) Check for empty format strings. e.g: printf(""); 389/// 390/// (8) Check that the format string is a wide literal. 391/// 392/// (9) Also check the arguments of functions with the __format__ attribute. 393/// (TODO). 394/// 395/// All of these checks can be done by parsing the format string. 396/// 397/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 398void 399Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 400 unsigned format_idx) { 401 Expr *Fn = TheCall->getCallee(); 402 403 // CHECK: printf-like function is called with no format string. 404 if (format_idx >= TheCall->getNumArgs()) { 405 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 406 Fn->getSourceRange()); 407 return; 408 } 409 410 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 411 412 // CHECK: format string is not a string literal. 413 // 414 // Dynamically generated format strings are difficult to 415 // automatically vet at compile time. Requiring that format strings 416 // are string literals: (1) permits the checking of format strings by 417 // the compiler and thereby (2) can practically remove the source of 418 // many format string exploits. 419 420 // Format string can be either ObjC string (e.g. @"%d") or 421 // C string (e.g. "%d") 422 // ObjC string uses the same format specifiers as C string, so we can use 423 // the same format string checking logic for both ObjC and C strings. 424 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 425 StringLiteral *FExpr = NULL; 426 427 if(ObjCFExpr != NULL) 428 FExpr = ObjCFExpr->getString(); 429 else 430 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 431 432 if (FExpr == NULL) { 433 // For vprintf* functions (i.e., HasVAListArg==true), we add a 434 // special check to see if the format string is a function parameter 435 // of the function calling the printf function. If the function 436 // has an attribute indicating it is a printf-like function, then we 437 // should suppress warnings concerning non-literals being used in a call 438 // to a vprintf function. For example: 439 // 440 // void 441 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 442 // va_list ap; 443 // va_start(ap, fmt); 444 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 445 // ... 446 // 447 // 448 // FIXME: We don't have full attribute support yet, so just check to see 449 // if the argument is a DeclRefExpr that references a parameter. We'll 450 // add proper support for checking the attribute later. 451 if (HasVAListArg) 452 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 453 if (isa<ParmVarDecl>(DR->getDecl())) 454 return; 455 456 Diag(TheCall->getArg(format_idx)->getLocStart(), 457 diag::warn_printf_not_string_constant, 458 OrigFormatExpr->getSourceRange()); 459 return; 460 } 461 462 // CHECK: is the format string a wide literal? 463 if (FExpr->isWide()) { 464 Diag(FExpr->getLocStart(), 465 diag::warn_printf_format_string_is_wide_literal, 466 OrigFormatExpr->getSourceRange()); 467 return; 468 } 469 470 // Str - The format string. NOTE: this is NOT null-terminated! 471 const char * const Str = FExpr->getStrData(); 472 473 // CHECK: empty format string? 474 const unsigned StrLen = FExpr->getByteLength(); 475 476 if (StrLen == 0) { 477 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 478 OrigFormatExpr->getSourceRange()); 479 return; 480 } 481 482 // We process the format string using a binary state machine. The 483 // current state is stored in CurrentState. 484 enum { 485 state_OrdChr, 486 state_Conversion 487 } CurrentState = state_OrdChr; 488 489 // numConversions - The number of conversions seen so far. This is 490 // incremented as we traverse the format string. 491 unsigned numConversions = 0; 492 493 // numDataArgs - The number of data arguments after the format 494 // string. This can only be determined for non vprintf-like 495 // functions. For those functions, this value is 1 (the sole 496 // va_arg argument). 497 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 498 499 // Inspect the format string. 500 unsigned StrIdx = 0; 501 502 // LastConversionIdx - Index within the format string where we last saw 503 // a '%' character that starts a new format conversion. 504 unsigned LastConversionIdx = 0; 505 506 for (; StrIdx < StrLen; ++StrIdx) { 507 508 // Is the number of detected conversion conversions greater than 509 // the number of matching data arguments? If so, stop. 510 if (!HasVAListArg && numConversions > numDataArgs) break; 511 512 // Handle "\0" 513 if (Str[StrIdx] == '\0') { 514 // The string returned by getStrData() is not null-terminated, 515 // so the presence of a null character is likely an error. 516 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 517 diag::warn_printf_format_string_contains_null_char, 518 OrigFormatExpr->getSourceRange()); 519 return; 520 } 521 522 // Ordinary characters (not processing a format conversion). 523 if (CurrentState == state_OrdChr) { 524 if (Str[StrIdx] == '%') { 525 CurrentState = state_Conversion; 526 LastConversionIdx = StrIdx; 527 } 528 continue; 529 } 530 531 // Seen '%'. Now processing a format conversion. 532 switch (Str[StrIdx]) { 533 // Handle dynamic precision or width specifier. 534 case '*': { 535 ++numConversions; 536 537 if (!HasVAListArg && numConversions > numDataArgs) { 538 SourceLocation Loc = FExpr->getLocStart(); 539 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 540 541 if (Str[StrIdx-1] == '.') 542 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 543 OrigFormatExpr->getSourceRange()); 544 else 545 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 546 OrigFormatExpr->getSourceRange()); 547 548 // Don't do any more checking. We'll just emit spurious errors. 549 return; 550 } 551 552 // Perform type checking on width/precision specifier. 553 Expr *E = TheCall->getArg(format_idx+numConversions); 554 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 555 if (BT->getKind() == BuiltinType::Int) 556 break; 557 558 SourceLocation Loc = 559 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 560 561 if (Str[StrIdx-1] == '.') 562 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 563 E->getType().getAsString(), E->getSourceRange()); 564 else 565 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 566 E->getType().getAsString(), E->getSourceRange()); 567 568 break; 569 } 570 571 // Characters which can terminate a format conversion 572 // (e.g. "%d"). Characters that specify length modifiers or 573 // other flags are handled by the default case below. 574 // 575 // FIXME: additional checks will go into the following cases. 576 case 'i': 577 case 'd': 578 case 'o': 579 case 'u': 580 case 'x': 581 case 'X': 582 case 'D': 583 case 'O': 584 case 'U': 585 case 'e': 586 case 'E': 587 case 'f': 588 case 'F': 589 case 'g': 590 case 'G': 591 case 'a': 592 case 'A': 593 case 'c': 594 case 'C': 595 case 'S': 596 case 's': 597 case 'p': 598 ++numConversions; 599 CurrentState = state_OrdChr; 600 break; 601 602 // CHECK: Are we using "%n"? Issue a warning. 603 case 'n': { 604 ++numConversions; 605 CurrentState = state_OrdChr; 606 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 607 LastConversionIdx+1); 608 609 Diag(Loc, diag::warn_printf_write_back, OrigFormatExpr->getSourceRange()); 610 break; 611 } 612 613 // Handle "%@" 614 case '@': 615 // %@ is allowed in ObjC format strings only. 616 if(ObjCFExpr != NULL) 617 CurrentState = state_OrdChr; 618 else { 619 // Issue a warning: invalid format conversion. 620 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 621 LastConversionIdx+1); 622 623 Diag(Loc, diag::warn_printf_invalid_conversion, 624 std::string(Str+LastConversionIdx, 625 Str+std::min(LastConversionIdx+2, StrLen)), 626 OrigFormatExpr->getSourceRange()); 627 } 628 ++numConversions; 629 break; 630 631 // Handle "%%" 632 case '%': 633 // Sanity check: Was the first "%" character the previous one? 634 // If not, we will assume that we have a malformed format 635 // conversion, and that the current "%" character is the start 636 // of a new conversion. 637 if (StrIdx - LastConversionIdx == 1) 638 CurrentState = state_OrdChr; 639 else { 640 // Issue a warning: invalid format conversion. 641 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 642 LastConversionIdx+1); 643 644 Diag(Loc, diag::warn_printf_invalid_conversion, 645 std::string(Str+LastConversionIdx, Str+StrIdx), 646 OrigFormatExpr->getSourceRange()); 647 648 // This conversion is broken. Advance to the next format 649 // conversion. 650 LastConversionIdx = StrIdx; 651 ++numConversions; 652 } 653 break; 654 655 default: 656 // This case catches all other characters: flags, widths, etc. 657 // We should eventually process those as well. 658 break; 659 } 660 } 661 662 if (CurrentState == state_Conversion) { 663 // Issue a warning: invalid format conversion. 664 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 665 LastConversionIdx+1); 666 667 Diag(Loc, diag::warn_printf_invalid_conversion, 668 std::string(Str+LastConversionIdx, 669 Str+std::min(LastConversionIdx+2, StrLen)), 670 OrigFormatExpr->getSourceRange()); 671 return; 672 } 673 674 if (!HasVAListArg) { 675 // CHECK: Does the number of format conversions exceed the number 676 // of data arguments? 677 if (numConversions > numDataArgs) { 678 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 679 LastConversionIdx); 680 681 Diag(Loc, diag::warn_printf_insufficient_data_args, 682 OrigFormatExpr->getSourceRange()); 683 } 684 // CHECK: Does the number of data arguments exceed the number of 685 // format conversions in the format string? 686 else if (numConversions < numDataArgs) 687 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 688 diag::warn_printf_too_many_data_args, 689 OrigFormatExpr->getSourceRange()); 690 } 691} 692 693//===--- CHECK: Return Address of Stack Variable --------------------------===// 694 695static DeclRefExpr* EvalVal(Expr *E); 696static DeclRefExpr* EvalAddr(Expr* E); 697 698/// CheckReturnStackAddr - Check if a return statement returns the address 699/// of a stack variable. 700void 701Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 702 SourceLocation ReturnLoc) { 703 704 // Perform checking for returned stack addresses. 705 if (lhsType->isPointerType()) { 706 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 707 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 708 DR->getDecl()->getIdentifier()->getName(), 709 RetValExp->getSourceRange()); 710 } 711 // Perform checking for stack values returned by reference. 712 else if (lhsType->isReferenceType()) { 713 // Check for an implicit cast to a reference. 714 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) 715 if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) 716 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 717 DR->getDecl()->getIdentifier()->getName(), 718 RetValExp->getSourceRange()); 719 } 720} 721 722/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 723/// check if the expression in a return statement evaluates to an address 724/// to a location on the stack. The recursion is used to traverse the 725/// AST of the return expression, with recursion backtracking when we 726/// encounter a subexpression that (1) clearly does not lead to the address 727/// of a stack variable or (2) is something we cannot determine leads to 728/// the address of a stack variable based on such local checking. 729/// 730/// EvalAddr processes expressions that are pointers that are used as 731/// references (and not L-values). EvalVal handles all other values. 732/// At the base case of the recursion is a check for a DeclRefExpr* in 733/// the refers to a stack variable. 734/// 735/// This implementation handles: 736/// 737/// * pointer-to-pointer casts 738/// * implicit conversions from array references to pointers 739/// * taking the address of fields 740/// * arbitrary interplay between "&" and "*" operators 741/// * pointer arithmetic from an address of a stack variable 742/// * taking the address of an array element where the array is on the stack 743static DeclRefExpr* EvalAddr(Expr *E) { 744 // We should only be called for evaluating pointer expressions. 745 assert((E->getType()->isPointerType() || 746 E->getType()->isObjCQualifiedIdType()) && 747 "EvalAddr only works on pointers"); 748 749 // Our "symbolic interpreter" is just a dispatch off the currently 750 // viewed AST node. We then recursively traverse the AST by calling 751 // EvalAddr and EvalVal appropriately. 752 switch (E->getStmtClass()) { 753 case Stmt::ParenExprClass: 754 // Ignore parentheses. 755 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 756 757 case Stmt::UnaryOperatorClass: { 758 // The only unary operator that make sense to handle here 759 // is AddrOf. All others don't make sense as pointers. 760 UnaryOperator *U = cast<UnaryOperator>(E); 761 762 if (U->getOpcode() == UnaryOperator::AddrOf) 763 return EvalVal(U->getSubExpr()); 764 else 765 return NULL; 766 } 767 768 case Stmt::BinaryOperatorClass: { 769 // Handle pointer arithmetic. All other binary operators are not valid 770 // in this context. 771 BinaryOperator *B = cast<BinaryOperator>(E); 772 BinaryOperator::Opcode op = B->getOpcode(); 773 774 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 775 return NULL; 776 777 Expr *Base = B->getLHS(); 778 779 // Determine which argument is the real pointer base. It could be 780 // the RHS argument instead of the LHS. 781 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 782 783 assert (Base->getType()->isPointerType()); 784 return EvalAddr(Base); 785 } 786 787 // For conditional operators we need to see if either the LHS or RHS are 788 // valid DeclRefExpr*s. If one of them is valid, we return it. 789 case Stmt::ConditionalOperatorClass: { 790 ConditionalOperator *C = cast<ConditionalOperator>(E); 791 792 // Handle the GNU extension for missing LHS. 793 if (Expr *lhsExpr = C->getLHS()) 794 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 795 return LHS; 796 797 return EvalAddr(C->getRHS()); 798 } 799 800 // For implicit casts, we need to handle conversions from arrays to 801 // pointer values, and implicit pointer-to-pointer conversions. 802 case Stmt::ImplicitCastExprClass: { 803 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 804 Expr* SubExpr = IE->getSubExpr(); 805 806 if (SubExpr->getType()->isPointerType() || 807 SubExpr->getType()->isObjCQualifiedIdType()) 808 return EvalAddr(SubExpr); 809 else 810 return EvalVal(SubExpr); 811 } 812 813 // For casts, we handle pointer-to-pointer conversions (which 814 // is essentially a no-op from our mini-interpreter's standpoint). 815 // For other casts we abort. 816 case Stmt::CastExprClass: { 817 CastExpr *C = cast<CastExpr>(E); 818 Expr *SubExpr = C->getSubExpr(); 819 820 if (SubExpr->getType()->isPointerType()) 821 return EvalAddr(SubExpr); 822 else 823 return NULL; 824 } 825 826 // C++ casts. For dynamic casts, static casts, and const casts, we 827 // are always converting from a pointer-to-pointer, so we just blow 828 // through the cast. In the case the dynamic cast doesn't fail 829 // (and return NULL), we take the conservative route and report cases 830 // where we return the address of a stack variable. For Reinterpre 831 case Stmt::CXXCastExprClass: { 832 CXXCastExpr *C = cast<CXXCastExpr>(E); 833 834 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { 835 Expr *S = C->getSubExpr(); 836 if (S->getType()->isPointerType()) 837 return EvalAddr(S); 838 else 839 return NULL; 840 } 841 else 842 return EvalAddr(C->getSubExpr()); 843 } 844 845 // Everything else: we simply don't reason about them. 846 default: 847 return NULL; 848 } 849} 850 851 852/// EvalVal - This function is complements EvalAddr in the mutual recursion. 853/// See the comments for EvalAddr for more details. 854static DeclRefExpr* EvalVal(Expr *E) { 855 856 // We should only be called for evaluating non-pointer expressions, or 857 // expressions with a pointer type that are not used as references but instead 858 // are l-values (e.g., DeclRefExpr with a pointer type). 859 860 // Our "symbolic interpreter" is just a dispatch off the currently 861 // viewed AST node. We then recursively traverse the AST by calling 862 // EvalAddr and EvalVal appropriately. 863 switch (E->getStmtClass()) { 864 case Stmt::DeclRefExprClass: { 865 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 866 // at code that refers to a variable's name. We check if it has local 867 // storage within the function, and if so, return the expression. 868 DeclRefExpr *DR = cast<DeclRefExpr>(E); 869 870 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 871 if(V->hasLocalStorage()) return DR; 872 873 return NULL; 874 } 875 876 case Stmt::ParenExprClass: 877 // Ignore parentheses. 878 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 879 880 case Stmt::UnaryOperatorClass: { 881 // The only unary operator that make sense to handle here 882 // is Deref. All others don't resolve to a "name." This includes 883 // handling all sorts of rvalues passed to a unary operator. 884 UnaryOperator *U = cast<UnaryOperator>(E); 885 886 if (U->getOpcode() == UnaryOperator::Deref) 887 return EvalAddr(U->getSubExpr()); 888 889 return NULL; 890 } 891 892 case Stmt::ArraySubscriptExprClass: { 893 // Array subscripts are potential references to data on the stack. We 894 // retrieve the DeclRefExpr* for the array variable if it indeed 895 // has local storage. 896 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 897 } 898 899 case Stmt::ConditionalOperatorClass: { 900 // For conditional operators we need to see if either the LHS or RHS are 901 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 902 ConditionalOperator *C = cast<ConditionalOperator>(E); 903 904 // Handle the GNU extension for missing LHS. 905 if (Expr *lhsExpr = C->getLHS()) 906 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 907 return LHS; 908 909 return EvalVal(C->getRHS()); 910 } 911 912 // Accesses to members are potential references to data on the stack. 913 case Stmt::MemberExprClass: { 914 MemberExpr *M = cast<MemberExpr>(E); 915 916 // Check for indirect access. We only want direct field accesses. 917 if (!M->isArrow()) 918 return EvalVal(M->getBase()); 919 else 920 return NULL; 921 } 922 923 // Everything else: we simply don't reason about them. 924 default: 925 return NULL; 926 } 927} 928 929//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 930 931/// Check for comparisons of floating point operands using != and ==. 932/// Issue a warning if these are no self-comparisons, as they are not likely 933/// to do what the programmer intended. 934void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 935 bool EmitWarning = true; 936 937 Expr* LeftExprSansParen = lex->IgnoreParens(); 938 Expr* RightExprSansParen = rex->IgnoreParens(); 939 940 // Special case: check for x == x (which is OK). 941 // Do not emit warnings for such cases. 942 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 943 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 944 if (DRL->getDecl() == DRR->getDecl()) 945 EmitWarning = false; 946 947 948 // Special case: check for comparisons against literals that can be exactly 949 // represented by APFloat. In such cases, do not emit a warning. This 950 // is a heuristic: often comparison against such literals are used to 951 // detect if a value in a variable has not changed. This clearly can 952 // lead to false negatives. 953 if (EmitWarning) { 954 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 955 if (FLL->isExact()) 956 EmitWarning = false; 957 } 958 else 959 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 960 if (FLR->isExact()) 961 EmitWarning = false; 962 } 963 } 964 965 // Check for comparisons with builtin types. 966 if (EmitWarning) 967 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 968 if (isCallBuiltin(CL)) 969 EmitWarning = false; 970 971 if (EmitWarning) 972 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 973 if (isCallBuiltin(CR)) 974 EmitWarning = false; 975 976 // Emit the diagnostic. 977 if (EmitWarning) 978 Diag(loc, diag::warn_floatingpoint_eq, 979 lex->getSourceRange(),rex->getSourceRange()); 980} 981