SemaChecking.cpp revision 7ff22b259d4d4729f701679e3a7f0e242365e07f
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/Decl.h" 18#include "clang/AST/Expr.h" 19#include "clang/AST/ExprCXX.h" 20#include "clang/AST/ExprObjC.h" 21#include "clang/Lex/Preprocessor.h" 22#include "clang/Lex/LiteralSupport.h" 23#include "clang/Basic/SourceManager.h" 24#include "clang/Basic/Diagnostic.h" 25#include "clang/Basic/LangOptions.h" 26#include "clang/Basic/TargetInfo.h" 27#include "llvm/ADT/OwningPtr.h" 28#include "llvm/ADT/SmallString.h" 29#include "llvm/ADT/StringExtras.h" 30#include "SemaUtil.h" 31using namespace clang; 32 33/// CheckFunctionCall - Check a direct function call for various correctness 34/// and safety properties not strictly enforced by the C type system. 35Action::ExprResult 36Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 37 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 38 // Get the IdentifierInfo* for the called function. 39 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 40 41 switch (FnInfo->getBuiltinID()) { 42 case Builtin::BI__builtin___CFStringMakeConstantString: 43 assert(TheCall->getNumArgs() == 1 && 44 "Wrong # arguments to builtin CFStringMakeConstantString"); 45 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 46 return true; 47 return TheCall.take(); 48 case Builtin::BI__builtin_va_start: 49 if (SemaBuiltinVAStart(TheCall.get())) { 50 return true; 51 } 52 return TheCall.take(); 53 case Builtin::BI__builtin_isgreater: 54 case Builtin::BI__builtin_isgreaterequal: 55 case Builtin::BI__builtin_isless: 56 case Builtin::BI__builtin_islessequal: 57 case Builtin::BI__builtin_islessgreater: 58 case Builtin::BI__builtin_isunordered: 59 if (SemaBuiltinUnorderedCompare(TheCall.get())) 60 return true; 61 return TheCall.take(); 62 case Builtin::BI__builtin_return_address: 63 case Builtin::BI__builtin_frame_address: 64 if (SemaBuiltinStackAddress(TheCall.get())) 65 return true; 66 return TheCall.take(); 67 case Builtin::BI__builtin_shufflevector: 68 return SemaBuiltinShuffleVector(TheCall.get()); 69 } 70 71 // Search the KnownFunctionIDs for the identifier. 72 unsigned i = 0, e = id_num_known_functions; 73 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 74 if (i == e) return TheCall.take(); 75 76 // Printf checking. 77 if (i <= id_vprintf) { 78 // Retrieve the index of the format string parameter and determine 79 // if the function is passed a va_arg argument. 80 unsigned format_idx = 0; 81 bool HasVAListArg = false; 82 83 switch (i) { 84 default: assert(false && "No format string argument index."); 85 case id_printf: format_idx = 0; break; 86 case id_fprintf: format_idx = 1; break; 87 case id_sprintf: format_idx = 1; break; 88 case id_snprintf: format_idx = 2; break; 89 case id_asprintf: format_idx = 1; break; 90 case id_NSLog: format_idx = 0; break; 91 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 92 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 93 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 94 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 95 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 96 } 97 98 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 99 } 100 101 return TheCall.take(); 102} 103 104/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 105/// CFString constructor is correct 106bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 107 Arg = Arg->IgnoreParenCasts(); 108 109 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 110 111 if (!Literal || Literal->isWide()) { 112 Diag(Arg->getLocStart(), 113 diag::err_cfstring_literal_not_string_constant, 114 Arg->getSourceRange()); 115 return true; 116 } 117 118 const char *Data = Literal->getStrData(); 119 unsigned Length = Literal->getByteLength(); 120 121 for (unsigned i = 0; i < Length; ++i) { 122 if (!isascii(Data[i])) { 123 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 124 diag::warn_cfstring_literal_contains_non_ascii_character, 125 Arg->getSourceRange()); 126 break; 127 } 128 129 if (!Data[i]) { 130 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 131 diag::warn_cfstring_literal_contains_nul_character, 132 Arg->getSourceRange()); 133 break; 134 } 135 } 136 137 return false; 138} 139 140/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 141/// Emit an error and return true on failure, return false on success. 142bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 143 Expr *Fn = TheCall->getCallee(); 144 if (TheCall->getNumArgs() > 2) { 145 Diag(TheCall->getArg(2)->getLocStart(), 146 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 147 SourceRange(TheCall->getArg(2)->getLocStart(), 148 (*(TheCall->arg_end()-1))->getLocEnd())); 149 return true; 150 } 151 152 // Determine whether the current function is variadic or not. 153 bool isVariadic; 154 if (CurFunctionDecl) 155 isVariadic = 156 cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic(); 157 else 158 isVariadic = CurMethodDecl->isVariadic(); 159 160 if (!isVariadic) { 161 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 162 return true; 163 } 164 165 // Verify that the second argument to the builtin is the last argument of the 166 // current function or method. 167 bool SecondArgIsLastNamedArgument = false; 168 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 169 170 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 171 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 172 // FIXME: This isn't correct for methods (results in bogus warning). 173 // Get the last formal in the current function. 174 const ParmVarDecl *LastArg; 175 if (CurFunctionDecl) 176 LastArg = *(CurFunctionDecl->param_end()-1); 177 else 178 LastArg = *(CurMethodDecl->param_end()-1); 179 SecondArgIsLastNamedArgument = PV == LastArg; 180 } 181 } 182 183 if (!SecondArgIsLastNamedArgument) 184 Diag(TheCall->getArg(1)->getLocStart(), 185 diag::warn_second_parameter_of_va_start_not_last_named_argument); 186 return false; 187} 188 189/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 190/// friends. This is declared to take (...), so we have to check everything. 191bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 192 if (TheCall->getNumArgs() < 2) 193 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 194 if (TheCall->getNumArgs() > 2) 195 return Diag(TheCall->getArg(2)->getLocStart(), 196 diag::err_typecheck_call_too_many_args, 197 SourceRange(TheCall->getArg(2)->getLocStart(), 198 (*(TheCall->arg_end()-1))->getLocEnd())); 199 200 Expr *OrigArg0 = TheCall->getArg(0); 201 Expr *OrigArg1 = TheCall->getArg(1); 202 203 // Do standard promotions between the two arguments, returning their common 204 // type. 205 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 206 207 // If the common type isn't a real floating type, then the arguments were 208 // invalid for this operation. 209 if (!Res->isRealFloatingType()) 210 return Diag(OrigArg0->getLocStart(), 211 diag::err_typecheck_call_invalid_ordered_compare, 212 OrigArg0->getType().getAsString(), 213 OrigArg1->getType().getAsString(), 214 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 215 216 return false; 217} 218 219bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 220 // The signature for these builtins is exact; the only thing we need 221 // to check is that the argument is a constant. 222 SourceLocation Loc; 223 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) { 224 return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange()); 225 } 226 return false; 227} 228 229/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 230// This is declared to take (...), so we have to check everything. 231Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 232 if (TheCall->getNumArgs() < 3) 233 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 234 TheCall->getSourceRange()); 235 236 QualType FAType = TheCall->getArg(0)->getType(); 237 QualType SAType = TheCall->getArg(1)->getType(); 238 239 if (!FAType->isVectorType() || !SAType->isVectorType()) { 240 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 241 SourceRange(TheCall->getArg(0)->getLocStart(), 242 TheCall->getArg(1)->getLocEnd())); 243 return true; 244 } 245 246 if (FAType.getCanonicalType().getUnqualifiedType() != 247 SAType.getCanonicalType().getUnqualifiedType()) { 248 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 249 SourceRange(TheCall->getArg(0)->getLocStart(), 250 TheCall->getArg(1)->getLocEnd())); 251 return true; 252 } 253 254 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 255 if (TheCall->getNumArgs() != numElements+2) { 256 if (TheCall->getNumArgs() < numElements+2) 257 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 258 TheCall->getSourceRange()); 259 else 260 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 261 TheCall->getSourceRange()); 262 return true; 263 } 264 265 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 266 llvm::APSInt Result(32); 267 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) { 268 Diag(TheCall->getLocStart(), 269 diag::err_shufflevector_nonconstant_argument, 270 TheCall->getArg(i)->getSourceRange()); 271 return true; 272 } 273 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) { 274 Diag(TheCall->getLocStart(), 275 diag::err_shufflevector_argument_too_large, 276 TheCall->getArg(i)->getSourceRange()); 277 return true; 278 } 279 } 280 281 llvm::SmallVector<Expr*, 32> exprs; 282 283 for (unsigned i = 0; i < TheCall->getNumArgs(); i++) { 284 exprs.push_back(TheCall->getArg(i)); 285 TheCall->setArg(i, 0); 286 } 287 288 ShuffleVectorExpr* E = new ShuffleVectorExpr( 289 exprs.begin(), numElements+2, FAType, 290 TheCall->getCallee()->getLocStart(), 291 TheCall->getRParenLoc()); 292 293 return E; 294} 295 296/// CheckPrintfArguments - Check calls to printf (and similar functions) for 297/// correct use of format strings. 298/// 299/// HasVAListArg - A predicate indicating whether the printf-like 300/// function is passed an explicit va_arg argument (e.g., vprintf) 301/// 302/// format_idx - The index into Args for the format string. 303/// 304/// Improper format strings to functions in the printf family can be 305/// the source of bizarre bugs and very serious security holes. A 306/// good source of information is available in the following paper 307/// (which includes additional references): 308/// 309/// FormatGuard: Automatic Protection From printf Format String 310/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 311/// 312/// Functionality implemented: 313/// 314/// We can statically check the following properties for string 315/// literal format strings for non v.*printf functions (where the 316/// arguments are passed directly): 317// 318/// (1) Are the number of format conversions equal to the number of 319/// data arguments? 320/// 321/// (2) Does each format conversion correctly match the type of the 322/// corresponding data argument? (TODO) 323/// 324/// Moreover, for all printf functions we can: 325/// 326/// (3) Check for a missing format string (when not caught by type checking). 327/// 328/// (4) Check for no-operation flags; e.g. using "#" with format 329/// conversion 'c' (TODO) 330/// 331/// (5) Check the use of '%n', a major source of security holes. 332/// 333/// (6) Check for malformed format conversions that don't specify anything. 334/// 335/// (7) Check for empty format strings. e.g: printf(""); 336/// 337/// (8) Check that the format string is a wide literal. 338/// 339/// (9) Also check the arguments of functions with the __format__ attribute. 340/// (TODO). 341/// 342/// All of these checks can be done by parsing the format string. 343/// 344/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 345void 346Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 347 unsigned format_idx) { 348 Expr *Fn = TheCall->getCallee(); 349 350 // CHECK: printf-like function is called with no format string. 351 if (format_idx >= TheCall->getNumArgs()) { 352 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 353 Fn->getSourceRange()); 354 return; 355 } 356 357 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 358 359 // CHECK: format string is not a string literal. 360 // 361 // Dynamically generated format strings are difficult to 362 // automatically vet at compile time. Requiring that format strings 363 // are string literals: (1) permits the checking of format strings by 364 // the compiler and thereby (2) can practically remove the source of 365 // many format string exploits. 366 367 // Format string can be either ObjC string (e.g. @"%d") or 368 // C string (e.g. "%d") 369 // ObjC string uses the same format specifiers as C string, so we can use 370 // the same format string checking logic for both ObjC and C strings. 371 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 372 StringLiteral *FExpr = NULL; 373 374 if(ObjCFExpr != NULL) 375 FExpr = ObjCFExpr->getString(); 376 else 377 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 378 379 if (FExpr == NULL) { 380 // For vprintf* functions (i.e., HasVAListArg==true), we add a 381 // special check to see if the format string is a function parameter 382 // of the function calling the printf function. If the function 383 // has an attribute indicating it is a printf-like function, then we 384 // should suppress warnings concerning non-literals being used in a call 385 // to a vprintf function. For example: 386 // 387 // void 388 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 389 // va_list ap; 390 // va_start(ap, fmt); 391 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 392 // ... 393 // 394 // 395 // FIXME: We don't have full attribute support yet, so just check to see 396 // if the argument is a DeclRefExpr that references a parameter. We'll 397 // add proper support for checking the attribute later. 398 if (HasVAListArg) 399 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 400 if (isa<ParmVarDecl>(DR->getDecl())) 401 return; 402 403 Diag(TheCall->getArg(format_idx)->getLocStart(), 404 diag::warn_printf_not_string_constant, Fn->getSourceRange()); 405 return; 406 } 407 408 // CHECK: is the format string a wide literal? 409 if (FExpr->isWide()) { 410 Diag(FExpr->getLocStart(), 411 diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange()); 412 return; 413 } 414 415 // Str - The format string. NOTE: this is NOT null-terminated! 416 const char * const Str = FExpr->getStrData(); 417 418 // CHECK: empty format string? 419 const unsigned StrLen = FExpr->getByteLength(); 420 421 if (StrLen == 0) { 422 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 423 Fn->getSourceRange()); 424 return; 425 } 426 427 // We process the format string using a binary state machine. The 428 // current state is stored in CurrentState. 429 enum { 430 state_OrdChr, 431 state_Conversion 432 } CurrentState = state_OrdChr; 433 434 // numConversions - The number of conversions seen so far. This is 435 // incremented as we traverse the format string. 436 unsigned numConversions = 0; 437 438 // numDataArgs - The number of data arguments after the format 439 // string. This can only be determined for non vprintf-like 440 // functions. For those functions, this value is 1 (the sole 441 // va_arg argument). 442 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 443 444 // Inspect the format string. 445 unsigned StrIdx = 0; 446 447 // LastConversionIdx - Index within the format string where we last saw 448 // a '%' character that starts a new format conversion. 449 unsigned LastConversionIdx = 0; 450 451 for (; StrIdx < StrLen; ++StrIdx) { 452 453 // Is the number of detected conversion conversions greater than 454 // the number of matching data arguments? If so, stop. 455 if (!HasVAListArg && numConversions > numDataArgs) break; 456 457 // Handle "\0" 458 if (Str[StrIdx] == '\0') { 459 // The string returned by getStrData() is not null-terminated, 460 // so the presence of a null character is likely an error. 461 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 462 diag::warn_printf_format_string_contains_null_char, 463 Fn->getSourceRange()); 464 return; 465 } 466 467 // Ordinary characters (not processing a format conversion). 468 if (CurrentState == state_OrdChr) { 469 if (Str[StrIdx] == '%') { 470 CurrentState = state_Conversion; 471 LastConversionIdx = StrIdx; 472 } 473 continue; 474 } 475 476 // Seen '%'. Now processing a format conversion. 477 switch (Str[StrIdx]) { 478 // Handle dynamic precision or width specifier. 479 case '*': { 480 ++numConversions; 481 482 if (!HasVAListArg && numConversions > numDataArgs) { 483 SourceLocation Loc = FExpr->getLocStart(); 484 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 485 486 if (Str[StrIdx-1] == '.') 487 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 488 Fn->getSourceRange()); 489 else 490 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 491 Fn->getSourceRange()); 492 493 // Don't do any more checking. We'll just emit spurious errors. 494 return; 495 } 496 497 // Perform type checking on width/precision specifier. 498 Expr *E = TheCall->getArg(format_idx+numConversions); 499 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 500 if (BT->getKind() == BuiltinType::Int) 501 break; 502 503 SourceLocation Loc = 504 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 505 506 if (Str[StrIdx-1] == '.') 507 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 508 E->getType().getAsString(), E->getSourceRange()); 509 else 510 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 511 E->getType().getAsString(), E->getSourceRange()); 512 513 break; 514 } 515 516 // Characters which can terminate a format conversion 517 // (e.g. "%d"). Characters that specify length modifiers or 518 // other flags are handled by the default case below. 519 // 520 // FIXME: additional checks will go into the following cases. 521 case 'i': 522 case 'd': 523 case 'o': 524 case 'u': 525 case 'x': 526 case 'X': 527 case 'D': 528 case 'O': 529 case 'U': 530 case 'e': 531 case 'E': 532 case 'f': 533 case 'F': 534 case 'g': 535 case 'G': 536 case 'a': 537 case 'A': 538 case 'c': 539 case 'C': 540 case 'S': 541 case 's': 542 case 'p': 543 ++numConversions; 544 CurrentState = state_OrdChr; 545 break; 546 547 // CHECK: Are we using "%n"? Issue a warning. 548 case 'n': { 549 ++numConversions; 550 CurrentState = state_OrdChr; 551 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 552 LastConversionIdx+1); 553 554 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); 555 break; 556 } 557 558 // Handle "%@" 559 case '@': 560 // %@ is allowed in ObjC format strings only. 561 if(ObjCFExpr != NULL) 562 CurrentState = state_OrdChr; 563 else { 564 // Issue a warning: invalid format conversion. 565 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 566 LastConversionIdx+1); 567 568 Diag(Loc, diag::warn_printf_invalid_conversion, 569 std::string(Str+LastConversionIdx, 570 Str+std::min(LastConversionIdx+2, StrLen)), 571 Fn->getSourceRange()); 572 } 573 ++numConversions; 574 break; 575 576 // Handle "%%" 577 case '%': 578 // Sanity check: Was the first "%" character the previous one? 579 // If not, we will assume that we have a malformed format 580 // conversion, and that the current "%" character is the start 581 // of a new conversion. 582 if (StrIdx - LastConversionIdx == 1) 583 CurrentState = state_OrdChr; 584 else { 585 // Issue a warning: invalid format conversion. 586 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 587 LastConversionIdx+1); 588 589 Diag(Loc, diag::warn_printf_invalid_conversion, 590 std::string(Str+LastConversionIdx, Str+StrIdx), 591 Fn->getSourceRange()); 592 593 // This conversion is broken. Advance to the next format 594 // conversion. 595 LastConversionIdx = StrIdx; 596 ++numConversions; 597 } 598 break; 599 600 default: 601 // This case catches all other characters: flags, widths, etc. 602 // We should eventually process those as well. 603 break; 604 } 605 } 606 607 if (CurrentState == state_Conversion) { 608 // Issue a warning: invalid format conversion. 609 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 610 LastConversionIdx+1); 611 612 Diag(Loc, diag::warn_printf_invalid_conversion, 613 std::string(Str+LastConversionIdx, 614 Str+std::min(LastConversionIdx+2, StrLen)), 615 Fn->getSourceRange()); 616 return; 617 } 618 619 if (!HasVAListArg) { 620 // CHECK: Does the number of format conversions exceed the number 621 // of data arguments? 622 if (numConversions > numDataArgs) { 623 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 624 LastConversionIdx); 625 626 Diag(Loc, diag::warn_printf_insufficient_data_args, 627 Fn->getSourceRange()); 628 } 629 // CHECK: Does the number of data arguments exceed the number of 630 // format conversions in the format string? 631 else if (numConversions < numDataArgs) 632 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 633 diag::warn_printf_too_many_data_args, Fn->getSourceRange()); 634 } 635} 636 637//===--- CHECK: Return Address of Stack Variable --------------------------===// 638 639static DeclRefExpr* EvalVal(Expr *E); 640static DeclRefExpr* EvalAddr(Expr* E); 641 642/// CheckReturnStackAddr - Check if a return statement returns the address 643/// of a stack variable. 644void 645Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 646 SourceLocation ReturnLoc) { 647 648 // Perform checking for returned stack addresses. 649 if (lhsType->isPointerType()) { 650 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 651 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 652 DR->getDecl()->getIdentifier()->getName(), 653 RetValExp->getSourceRange()); 654 } 655 // Perform checking for stack values returned by reference. 656 else if (lhsType->isReferenceType()) { 657 // Check for an implicit cast to a reference. 658 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) 659 if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) 660 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 661 DR->getDecl()->getIdentifier()->getName(), 662 RetValExp->getSourceRange()); 663 } 664} 665 666/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 667/// check if the expression in a return statement evaluates to an address 668/// to a location on the stack. The recursion is used to traverse the 669/// AST of the return expression, with recursion backtracking when we 670/// encounter a subexpression that (1) clearly does not lead to the address 671/// of a stack variable or (2) is something we cannot determine leads to 672/// the address of a stack variable based on such local checking. 673/// 674/// EvalAddr processes expressions that are pointers that are used as 675/// references (and not L-values). EvalVal handles all other values. 676/// At the base case of the recursion is a check for a DeclRefExpr* in 677/// the refers to a stack variable. 678/// 679/// This implementation handles: 680/// 681/// * pointer-to-pointer casts 682/// * implicit conversions from array references to pointers 683/// * taking the address of fields 684/// * arbitrary interplay between "&" and "*" operators 685/// * pointer arithmetic from an address of a stack variable 686/// * taking the address of an array element where the array is on the stack 687static DeclRefExpr* EvalAddr(Expr *E) { 688 // We should only be called for evaluating pointer expressions. 689 assert((E->getType()->isPointerType() || 690 E->getType()->isObjCQualifiedIdType()) && 691 "EvalAddr only works on pointers"); 692 693 // Our "symbolic interpreter" is just a dispatch off the currently 694 // viewed AST node. We then recursively traverse the AST by calling 695 // EvalAddr and EvalVal appropriately. 696 switch (E->getStmtClass()) { 697 case Stmt::ParenExprClass: 698 // Ignore parentheses. 699 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 700 701 case Stmt::UnaryOperatorClass: { 702 // The only unary operator that make sense to handle here 703 // is AddrOf. All others don't make sense as pointers. 704 UnaryOperator *U = cast<UnaryOperator>(E); 705 706 if (U->getOpcode() == UnaryOperator::AddrOf) 707 return EvalVal(U->getSubExpr()); 708 else 709 return NULL; 710 } 711 712 case Stmt::BinaryOperatorClass: { 713 // Handle pointer arithmetic. All other binary operators are not valid 714 // in this context. 715 BinaryOperator *B = cast<BinaryOperator>(E); 716 BinaryOperator::Opcode op = B->getOpcode(); 717 718 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 719 return NULL; 720 721 Expr *Base = B->getLHS(); 722 723 // Determine which argument is the real pointer base. It could be 724 // the RHS argument instead of the LHS. 725 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 726 727 assert (Base->getType()->isPointerType()); 728 return EvalAddr(Base); 729 } 730 731 // For conditional operators we need to see if either the LHS or RHS are 732 // valid DeclRefExpr*s. If one of them is valid, we return it. 733 case Stmt::ConditionalOperatorClass: { 734 ConditionalOperator *C = cast<ConditionalOperator>(E); 735 736 // Handle the GNU extension for missing LHS. 737 if (Expr *lhsExpr = C->getLHS()) 738 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 739 return LHS; 740 741 return EvalAddr(C->getRHS()); 742 } 743 744 // For implicit casts, we need to handle conversions from arrays to 745 // pointer values, and implicit pointer-to-pointer conversions. 746 case Stmt::ImplicitCastExprClass: { 747 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 748 Expr* SubExpr = IE->getSubExpr(); 749 750 if (SubExpr->getType()->isPointerType() || 751 SubExpr->getType()->isObjCQualifiedIdType()) 752 return EvalAddr(SubExpr); 753 else 754 return EvalVal(SubExpr); 755 } 756 757 // For casts, we handle pointer-to-pointer conversions (which 758 // is essentially a no-op from our mini-interpreter's standpoint). 759 // For other casts we abort. 760 case Stmt::CastExprClass: { 761 CastExpr *C = cast<CastExpr>(E); 762 Expr *SubExpr = C->getSubExpr(); 763 764 if (SubExpr->getType()->isPointerType()) 765 return EvalAddr(SubExpr); 766 else 767 return NULL; 768 } 769 770 // C++ casts. For dynamic casts, static casts, and const casts, we 771 // are always converting from a pointer-to-pointer, so we just blow 772 // through the cast. In the case the dynamic cast doesn't fail 773 // (and return NULL), we take the conservative route and report cases 774 // where we return the address of a stack variable. For Reinterpre 775 case Stmt::CXXCastExprClass: { 776 CXXCastExpr *C = cast<CXXCastExpr>(E); 777 778 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { 779 Expr *S = C->getSubExpr(); 780 if (S->getType()->isPointerType()) 781 return EvalAddr(S); 782 else 783 return NULL; 784 } 785 else 786 return EvalAddr(C->getSubExpr()); 787 } 788 789 // Everything else: we simply don't reason about them. 790 default: 791 return NULL; 792 } 793} 794 795 796/// EvalVal - This function is complements EvalAddr in the mutual recursion. 797/// See the comments for EvalAddr for more details. 798static DeclRefExpr* EvalVal(Expr *E) { 799 800 // We should only be called for evaluating non-pointer expressions, or 801 // expressions with a pointer type that are not used as references but instead 802 // are l-values (e.g., DeclRefExpr with a pointer type). 803 804 // Our "symbolic interpreter" is just a dispatch off the currently 805 // viewed AST node. We then recursively traverse the AST by calling 806 // EvalAddr and EvalVal appropriately. 807 switch (E->getStmtClass()) { 808 case Stmt::DeclRefExprClass: { 809 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 810 // at code that refers to a variable's name. We check if it has local 811 // storage within the function, and if so, return the expression. 812 DeclRefExpr *DR = cast<DeclRefExpr>(E); 813 814 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 815 if(V->hasLocalStorage()) return DR; 816 817 return NULL; 818 } 819 820 case Stmt::ParenExprClass: 821 // Ignore parentheses. 822 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 823 824 case Stmt::UnaryOperatorClass: { 825 // The only unary operator that make sense to handle here 826 // is Deref. All others don't resolve to a "name." This includes 827 // handling all sorts of rvalues passed to a unary operator. 828 UnaryOperator *U = cast<UnaryOperator>(E); 829 830 if (U->getOpcode() == UnaryOperator::Deref) 831 return EvalAddr(U->getSubExpr()); 832 833 return NULL; 834 } 835 836 case Stmt::ArraySubscriptExprClass: { 837 // Array subscripts are potential references to data on the stack. We 838 // retrieve the DeclRefExpr* for the array variable if it indeed 839 // has local storage. 840 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 841 } 842 843 case Stmt::ConditionalOperatorClass: { 844 // For conditional operators we need to see if either the LHS or RHS are 845 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 846 ConditionalOperator *C = cast<ConditionalOperator>(E); 847 848 // Handle the GNU extension for missing LHS. 849 if (Expr *lhsExpr = C->getLHS()) 850 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 851 return LHS; 852 853 return EvalVal(C->getRHS()); 854 } 855 856 // Accesses to members are potential references to data on the stack. 857 case Stmt::MemberExprClass: { 858 MemberExpr *M = cast<MemberExpr>(E); 859 860 // Check for indirect access. We only want direct field accesses. 861 if (!M->isArrow()) 862 return EvalVal(M->getBase()); 863 else 864 return NULL; 865 } 866 867 // Everything else: we simply don't reason about them. 868 default: 869 return NULL; 870 } 871} 872 873//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 874 875/// Check for comparisons of floating point operands using != and ==. 876/// Issue a warning if these are no self-comparisons, as they are not likely 877/// to do what the programmer intended. 878void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 879 bool EmitWarning = true; 880 881 Expr* LeftExprSansParen = lex->IgnoreParens(); 882 Expr* RightExprSansParen = rex->IgnoreParens(); 883 884 // Special case: check for x == x (which is OK). 885 // Do not emit warnings for such cases. 886 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 887 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 888 if (DRL->getDecl() == DRR->getDecl()) 889 EmitWarning = false; 890 891 892 // Special case: check for comparisons against literals that can be exactly 893 // represented by APFloat. In such cases, do not emit a warning. This 894 // is a heuristic: often comparison against such literals are used to 895 // detect if a value in a variable has not changed. This clearly can 896 // lead to false negatives. 897 if (EmitWarning) { 898 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 899 if (FLL->isExact()) 900 EmitWarning = false; 901 } 902 else 903 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 904 if (FLR->isExact()) 905 EmitWarning = false; 906 } 907 } 908 909 // Check for comparisons with builtin types. 910 if (EmitWarning) 911 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 912 if (isCallBuiltin(CL)) 913 EmitWarning = false; 914 915 if (EmitWarning) 916 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 917 if (isCallBuiltin(CR)) 918 EmitWarning = false; 919 920 // Emit the diagnostic. 921 if (EmitWarning) 922 Diag(loc, diag::warn_floatingpoint_eq, 923 lex->getSourceRange(),rex->getSourceRange()); 924} 925