SemaChecking.cpp revision 3e9d2530c4118e0e8207de24f9059cb3a8eb8cf4
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/Decl.h" 18#include "clang/AST/Expr.h" 19#include "clang/AST/ExprCXX.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Lex/LiteralSupport.h" 22#include "clang/Basic/SourceManager.h" 23#include "clang/Basic/Diagnostic.h" 24#include "clang/Basic/LangOptions.h" 25#include "clang/Basic/TargetInfo.h" 26#include "llvm/ADT/SmallString.h" 27#include "llvm/ADT/StringExtras.h" 28#include "SemaUtil.h" 29using namespace clang; 30 31/// CheckFunctionCall - Check a direct function call for various correctness 32/// and safety properties not strictly enforced by the C type system. 33Action::ExprResult 34Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 35 36 // Get the IdentifierInfo* for the called function. 37 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 38 39 switch (FnInfo->getBuiltinID()) { 40 case Builtin::BI__builtin___CFStringMakeConstantString: 41 assert(TheCall->getNumArgs() == 1 && 42 "Wrong # arguments to builtin CFStringMakeConstantString"); 43 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) { 44 delete TheCall; 45 return true; 46 } 47 return TheCall; 48 case Builtin::BI__builtin_va_start: 49 if (SemaBuiltinVAStart(TheCall)) { 50 delete TheCall; 51 return true; 52 } 53 return TheCall; 54 case Builtin::BI__builtin_isgreater: 55 case Builtin::BI__builtin_isgreaterequal: 56 case Builtin::BI__builtin_isless: 57 case Builtin::BI__builtin_islessequal: 58 case Builtin::BI__builtin_islessgreater: 59 case Builtin::BI__builtin_isunordered: 60 if (SemaBuiltinUnorderedCompare(TheCall)) { 61 delete TheCall; 62 return true; 63 } 64 return TheCall; 65 case Builtin::BI__builtin_shufflevector: 66 return SemaBuiltinShuffleVector(TheCall); 67 } 68 69 // Search the KnownFunctionIDs for the identifier. 70 unsigned i = 0, e = id_num_known_functions; 71 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 72 if (i == e) return TheCall; 73 74 // Printf checking. 75 if (i <= id_vprintf) { 76 // Retrieve the index of the format string parameter and determine 77 // if the function is passed a va_arg argument. 78 unsigned format_idx = 0; 79 bool HasVAListArg = false; 80 81 switch (i) { 82 default: assert(false && "No format string argument index."); 83 case id_printf: format_idx = 0; break; 84 case id_fprintf: format_idx = 1; break; 85 case id_sprintf: format_idx = 1; break; 86 case id_snprintf: format_idx = 2; break; 87 case id_asprintf: format_idx = 1; break; 88 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 89 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 90 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 91 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 92 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 93 } 94 95 CheckPrintfArguments(TheCall, HasVAListArg, format_idx); 96 } 97 98 return TheCall; 99} 100 101/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 102/// CFString constructor is correct 103bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 104 Arg = Arg->IgnoreParenCasts(); 105 106 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 107 108 if (!Literal || Literal->isWide()) { 109 Diag(Arg->getLocStart(), 110 diag::err_cfstring_literal_not_string_constant, 111 Arg->getSourceRange()); 112 return true; 113 } 114 115 const char *Data = Literal->getStrData(); 116 unsigned Length = Literal->getByteLength(); 117 118 for (unsigned i = 0; i < Length; ++i) { 119 if (!isascii(Data[i])) { 120 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 121 diag::warn_cfstring_literal_contains_non_ascii_character, 122 Arg->getSourceRange()); 123 break; 124 } 125 126 if (!Data[i]) { 127 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 128 diag::warn_cfstring_literal_contains_nul_character, 129 Arg->getSourceRange()); 130 break; 131 } 132 } 133 134 return false; 135} 136 137/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 138/// Emit an error and return true on failure, return false on success. 139bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 140 Expr *Fn = TheCall->getCallee(); 141 if (TheCall->getNumArgs() > 2) { 142 Diag(TheCall->getArg(2)->getLocStart(), 143 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 144 SourceRange(TheCall->getArg(2)->getLocStart(), 145 (*(TheCall->arg_end()-1))->getLocEnd())); 146 return true; 147 } 148 149 // Determine whether the current function is variadic or not. 150 bool isVariadic; 151 if (CurFunctionDecl) 152 isVariadic = 153 cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic(); 154 else 155 isVariadic = CurMethodDecl->isVariadic(); 156 157 if (!isVariadic) { 158 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 159 return true; 160 } 161 162 // Verify that the second argument to the builtin is the last argument of the 163 // current function or method. 164 bool SecondArgIsLastNamedArgument = false; 165 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 166 167 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 168 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 169 // FIXME: This isn't correct for methods (results in bogus warning). 170 // Get the last formal in the current function. 171 const ParmVarDecl *LastArg; 172 if (CurFunctionDecl) 173 LastArg = *(CurFunctionDecl->param_end()-1); 174 else 175 LastArg = *(CurMethodDecl->param_end()-1); 176 SecondArgIsLastNamedArgument = PV == LastArg; 177 } 178 } 179 180 if (!SecondArgIsLastNamedArgument) 181 Diag(TheCall->getArg(1)->getLocStart(), 182 diag::warn_second_parameter_of_va_start_not_last_named_argument); 183 return false; 184} 185 186/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 187/// friends. This is declared to take (...), so we have to check everything. 188bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 189 if (TheCall->getNumArgs() < 2) 190 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 191 if (TheCall->getNumArgs() > 2) 192 return Diag(TheCall->getArg(2)->getLocStart(), 193 diag::err_typecheck_call_too_many_args, 194 SourceRange(TheCall->getArg(2)->getLocStart(), 195 (*(TheCall->arg_end()-1))->getLocEnd())); 196 197 Expr *OrigArg0 = TheCall->getArg(0); 198 Expr *OrigArg1 = TheCall->getArg(1); 199 200 // Do standard promotions between the two arguments, returning their common 201 // type. 202 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 203 204 // If the common type isn't a real floating type, then the arguments were 205 // invalid for this operation. 206 if (!Res->isRealFloatingType()) 207 return Diag(OrigArg0->getLocStart(), 208 diag::err_typecheck_call_invalid_ordered_compare, 209 OrigArg0->getType().getAsString(), 210 OrigArg1->getType().getAsString(), 211 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 212 213 return false; 214} 215 216/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 217// This is declared to take (...), so we have to check everything. 218Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 219 if (TheCall->getNumArgs() < 3) 220 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 221 TheCall->getSourceRange()); 222 223 QualType FAType = TheCall->getArg(0)->getType(); 224 QualType SAType = TheCall->getArg(1)->getType(); 225 226 if (!FAType->isVectorType() || !SAType->isVectorType()) { 227 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 228 SourceRange(TheCall->getArg(0)->getLocStart(), 229 TheCall->getArg(1)->getLocEnd())); 230 delete TheCall; 231 return true; 232 } 233 234 if (TheCall->getArg(0)->getType().getCanonicalType().getUnqualifiedType() != 235 TheCall->getArg(1)->getType().getCanonicalType().getUnqualifiedType()) { 236 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 237 SourceRange(TheCall->getArg(0)->getLocStart(), 238 TheCall->getArg(1)->getLocEnd())); 239 delete TheCall; 240 return true; 241 } 242 243 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 244 if (TheCall->getNumArgs() != numElements+2) { 245 if (TheCall->getNumArgs() < numElements+2) 246 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 247 TheCall->getSourceRange()); 248 else 249 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 250 TheCall->getSourceRange()); 251 delete TheCall; 252 return true; 253 } 254 255 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 256 llvm::APSInt Result(32); 257 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) { 258 Diag(TheCall->getLocStart(), 259 diag::err_shufflevector_nonconstant_argument, 260 TheCall->getArg(i)->getSourceRange()); 261 delete TheCall; 262 return true; 263 } 264 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) { 265 Diag(TheCall->getLocStart(), 266 diag::err_shufflevector_argument_too_large, 267 TheCall->getArg(i)->getSourceRange()); 268 delete TheCall; 269 return true; 270 } 271 } 272 273 llvm::SmallVector<Expr*, 32> exprs; 274 275 for (unsigned i = 0; i < TheCall->getNumArgs(); i++) { 276 exprs.push_back(TheCall->getArg(i)); 277 TheCall->setArg(i, 0); 278 } 279 280 ShuffleVectorExpr* E = new ShuffleVectorExpr( 281 exprs.begin(), numElements+2, FAType, 282 TheCall->getCallee()->getLocStart(), 283 TheCall->getRParenLoc()); 284 285 delete TheCall; 286 287 return E; 288} 289 290/// CheckPrintfArguments - Check calls to printf (and similar functions) for 291/// correct use of format strings. 292/// 293/// HasVAListArg - A predicate indicating whether the printf-like 294/// function is passed an explicit va_arg argument (e.g., vprintf) 295/// 296/// format_idx - The index into Args for the format string. 297/// 298/// Improper format strings to functions in the printf family can be 299/// the source of bizarre bugs and very serious security holes. A 300/// good source of information is available in the following paper 301/// (which includes additional references): 302/// 303/// FormatGuard: Automatic Protection From printf Format String 304/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 305/// 306/// Functionality implemented: 307/// 308/// We can statically check the following properties for string 309/// literal format strings for non v.*printf functions (where the 310/// arguments are passed directly): 311// 312/// (1) Are the number of format conversions equal to the number of 313/// data arguments? 314/// 315/// (2) Does each format conversion correctly match the type of the 316/// corresponding data argument? (TODO) 317/// 318/// Moreover, for all printf functions we can: 319/// 320/// (3) Check for a missing format string (when not caught by type checking). 321/// 322/// (4) Check for no-operation flags; e.g. using "#" with format 323/// conversion 'c' (TODO) 324/// 325/// (5) Check the use of '%n', a major source of security holes. 326/// 327/// (6) Check for malformed format conversions that don't specify anything. 328/// 329/// (7) Check for empty format strings. e.g: printf(""); 330/// 331/// (8) Check that the format string is a wide literal. 332/// 333/// (9) Also check the arguments of functions with the __format__ attribute. 334/// (TODO). 335/// 336/// All of these checks can be done by parsing the format string. 337/// 338/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 339void 340Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 341 unsigned format_idx) { 342 Expr *Fn = TheCall->getCallee(); 343 344 // CHECK: printf-like function is called with no format string. 345 if (format_idx >= TheCall->getNumArgs()) { 346 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 347 Fn->getSourceRange()); 348 return; 349 } 350 351 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 352 353 // CHECK: format string is not a string literal. 354 // 355 // Dynamically generated format strings are difficult to 356 // automatically vet at compile time. Requiring that format strings 357 // are string literals: (1) permits the checking of format strings by 358 // the compiler and thereby (2) can practically remove the source of 359 // many format string exploits. 360 StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 361 if (FExpr == NULL) { 362 // For vprintf* functions (i.e., HasVAListArg==true), we add a 363 // special check to see if the format string is a function parameter 364 // of the function calling the printf function. If the function 365 // has an attribute indicating it is a printf-like function, then we 366 // should suppress warnings concerning non-literals being used in a call 367 // to a vprintf function. For example: 368 // 369 // void 370 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 371 // va_list ap; 372 // va_start(ap, fmt); 373 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 374 // ... 375 // 376 // 377 // FIXME: We don't have full attribute support yet, so just check to see 378 // if the argument is a DeclRefExpr that references a parameter. We'll 379 // add proper support for checking the attribute later. 380 if (HasVAListArg) 381 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 382 if (isa<ParmVarDecl>(DR->getDecl())) 383 return; 384 385 Diag(TheCall->getArg(format_idx)->getLocStart(), 386 diag::warn_printf_not_string_constant, Fn->getSourceRange()); 387 return; 388 } 389 390 // CHECK: is the format string a wide literal? 391 if (FExpr->isWide()) { 392 Diag(FExpr->getLocStart(), 393 diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange()); 394 return; 395 } 396 397 // Str - The format string. NOTE: this is NOT null-terminated! 398 const char * const Str = FExpr->getStrData(); 399 400 // CHECK: empty format string? 401 const unsigned StrLen = FExpr->getByteLength(); 402 403 if (StrLen == 0) { 404 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 405 Fn->getSourceRange()); 406 return; 407 } 408 409 // We process the format string using a binary state machine. The 410 // current state is stored in CurrentState. 411 enum { 412 state_OrdChr, 413 state_Conversion 414 } CurrentState = state_OrdChr; 415 416 // numConversions - The number of conversions seen so far. This is 417 // incremented as we traverse the format string. 418 unsigned numConversions = 0; 419 420 // numDataArgs - The number of data arguments after the format 421 // string. This can only be determined for non vprintf-like 422 // functions. For those functions, this value is 1 (the sole 423 // va_arg argument). 424 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 425 426 // Inspect the format string. 427 unsigned StrIdx = 0; 428 429 // LastConversionIdx - Index within the format string where we last saw 430 // a '%' character that starts a new format conversion. 431 unsigned LastConversionIdx = 0; 432 433 for (; StrIdx < StrLen; ++StrIdx) { 434 435 // Is the number of detected conversion conversions greater than 436 // the number of matching data arguments? If so, stop. 437 if (!HasVAListArg && numConversions > numDataArgs) break; 438 439 // Handle "\0" 440 if (Str[StrIdx] == '\0') { 441 // The string returned by getStrData() is not null-terminated, 442 // so the presence of a null character is likely an error. 443 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 444 diag::warn_printf_format_string_contains_null_char, 445 Fn->getSourceRange()); 446 return; 447 } 448 449 // Ordinary characters (not processing a format conversion). 450 if (CurrentState == state_OrdChr) { 451 if (Str[StrIdx] == '%') { 452 CurrentState = state_Conversion; 453 LastConversionIdx = StrIdx; 454 } 455 continue; 456 } 457 458 // Seen '%'. Now processing a format conversion. 459 switch (Str[StrIdx]) { 460 // Handle dynamic precision or width specifier. 461 case '*': { 462 ++numConversions; 463 464 if (!HasVAListArg && numConversions > numDataArgs) { 465 SourceLocation Loc = FExpr->getLocStart(); 466 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 467 468 if (Str[StrIdx-1] == '.') 469 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 470 Fn->getSourceRange()); 471 else 472 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 473 Fn->getSourceRange()); 474 475 // Don't do any more checking. We'll just emit spurious errors. 476 return; 477 } 478 479 // Perform type checking on width/precision specifier. 480 Expr *E = TheCall->getArg(format_idx+numConversions); 481 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 482 if (BT->getKind() == BuiltinType::Int) 483 break; 484 485 SourceLocation Loc = 486 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 487 488 if (Str[StrIdx-1] == '.') 489 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 490 E->getType().getAsString(), E->getSourceRange()); 491 else 492 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 493 E->getType().getAsString(), E->getSourceRange()); 494 495 break; 496 } 497 498 // Characters which can terminate a format conversion 499 // (e.g. "%d"). Characters that specify length modifiers or 500 // other flags are handled by the default case below. 501 // 502 // FIXME: additional checks will go into the following cases. 503 case 'i': 504 case 'd': 505 case 'o': 506 case 'u': 507 case 'x': 508 case 'X': 509 case 'D': 510 case 'O': 511 case 'U': 512 case 'e': 513 case 'E': 514 case 'f': 515 case 'F': 516 case 'g': 517 case 'G': 518 case 'a': 519 case 'A': 520 case 'c': 521 case 'C': 522 case 'S': 523 case 's': 524 case 'p': 525 ++numConversions; 526 CurrentState = state_OrdChr; 527 break; 528 529 // CHECK: Are we using "%n"? Issue a warning. 530 case 'n': { 531 ++numConversions; 532 CurrentState = state_OrdChr; 533 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 534 LastConversionIdx+1); 535 536 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); 537 break; 538 } 539 540 // Handle "%%" 541 case '%': 542 // Sanity check: Was the first "%" character the previous one? 543 // If not, we will assume that we have a malformed format 544 // conversion, and that the current "%" character is the start 545 // of a new conversion. 546 if (StrIdx - LastConversionIdx == 1) 547 CurrentState = state_OrdChr; 548 else { 549 // Issue a warning: invalid format conversion. 550 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 551 LastConversionIdx+1); 552 553 Diag(Loc, diag::warn_printf_invalid_conversion, 554 std::string(Str+LastConversionIdx, Str+StrIdx), 555 Fn->getSourceRange()); 556 557 // This conversion is broken. Advance to the next format 558 // conversion. 559 LastConversionIdx = StrIdx; 560 ++numConversions; 561 } 562 break; 563 564 default: 565 // This case catches all other characters: flags, widths, etc. 566 // We should eventually process those as well. 567 break; 568 } 569 } 570 571 if (CurrentState == state_Conversion) { 572 // Issue a warning: invalid format conversion. 573 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 574 LastConversionIdx+1); 575 576 Diag(Loc, diag::warn_printf_invalid_conversion, 577 std::string(Str+LastConversionIdx, 578 Str+std::min(LastConversionIdx+2, StrLen)), 579 Fn->getSourceRange()); 580 return; 581 } 582 583 if (!HasVAListArg) { 584 // CHECK: Does the number of format conversions exceed the number 585 // of data arguments? 586 if (numConversions > numDataArgs) { 587 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 588 LastConversionIdx); 589 590 Diag(Loc, diag::warn_printf_insufficient_data_args, 591 Fn->getSourceRange()); 592 } 593 // CHECK: Does the number of data arguments exceed the number of 594 // format conversions in the format string? 595 else if (numConversions < numDataArgs) 596 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 597 diag::warn_printf_too_many_data_args, Fn->getSourceRange()); 598 } 599} 600 601//===--- CHECK: Return Address of Stack Variable --------------------------===// 602 603static DeclRefExpr* EvalVal(Expr *E); 604static DeclRefExpr* EvalAddr(Expr* E); 605 606/// CheckReturnStackAddr - Check if a return statement returns the address 607/// of a stack variable. 608void 609Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 610 SourceLocation ReturnLoc) { 611 612 // Perform checking for returned stack addresses. 613 if (lhsType->isPointerType()) { 614 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 615 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 616 DR->getDecl()->getIdentifier()->getName(), 617 RetValExp->getSourceRange()); 618 } 619 // Perform checking for stack values returned by reference. 620 else if (lhsType->isReferenceType()) { 621 // Check for an implicit cast to a reference. 622 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) 623 if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) 624 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 625 DR->getDecl()->getIdentifier()->getName(), 626 RetValExp->getSourceRange()); 627 } 628} 629 630/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 631/// check if the expression in a return statement evaluates to an address 632/// to a location on the stack. The recursion is used to traverse the 633/// AST of the return expression, with recursion backtracking when we 634/// encounter a subexpression that (1) clearly does not lead to the address 635/// of a stack variable or (2) is something we cannot determine leads to 636/// the address of a stack variable based on such local checking. 637/// 638/// EvalAddr processes expressions that are pointers that are used as 639/// references (and not L-values). EvalVal handles all other values. 640/// At the base case of the recursion is a check for a DeclRefExpr* in 641/// the refers to a stack variable. 642/// 643/// This implementation handles: 644/// 645/// * pointer-to-pointer casts 646/// * implicit conversions from array references to pointers 647/// * taking the address of fields 648/// * arbitrary interplay between "&" and "*" operators 649/// * pointer arithmetic from an address of a stack variable 650/// * taking the address of an array element where the array is on the stack 651static DeclRefExpr* EvalAddr(Expr *E) { 652 // We should only be called for evaluating pointer expressions. 653 assert((E->getType()->isPointerType() || 654 E->getType()->isObjCQualifiedIdType()) && 655 "EvalAddr only works on pointers"); 656 657 // Our "symbolic interpreter" is just a dispatch off the currently 658 // viewed AST node. We then recursively traverse the AST by calling 659 // EvalAddr and EvalVal appropriately. 660 switch (E->getStmtClass()) { 661 case Stmt::ParenExprClass: 662 // Ignore parentheses. 663 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 664 665 case Stmt::UnaryOperatorClass: { 666 // The only unary operator that make sense to handle here 667 // is AddrOf. All others don't make sense as pointers. 668 UnaryOperator *U = cast<UnaryOperator>(E); 669 670 if (U->getOpcode() == UnaryOperator::AddrOf) 671 return EvalVal(U->getSubExpr()); 672 else 673 return NULL; 674 } 675 676 case Stmt::BinaryOperatorClass: { 677 // Handle pointer arithmetic. All other binary operators are not valid 678 // in this context. 679 BinaryOperator *B = cast<BinaryOperator>(E); 680 BinaryOperator::Opcode op = B->getOpcode(); 681 682 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 683 return NULL; 684 685 Expr *Base = B->getLHS(); 686 687 // Determine which argument is the real pointer base. It could be 688 // the RHS argument instead of the LHS. 689 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 690 691 assert (Base->getType()->isPointerType()); 692 return EvalAddr(Base); 693 } 694 695 // For conditional operators we need to see if either the LHS or RHS are 696 // valid DeclRefExpr*s. If one of them is valid, we return it. 697 case Stmt::ConditionalOperatorClass: { 698 ConditionalOperator *C = cast<ConditionalOperator>(E); 699 700 // Handle the GNU extension for missing LHS. 701 if (Expr *lhsExpr = C->getLHS()) 702 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 703 return LHS; 704 705 return EvalAddr(C->getRHS()); 706 } 707 708 // For implicit casts, we need to handle conversions from arrays to 709 // pointer values, and implicit pointer-to-pointer conversions. 710 case Stmt::ImplicitCastExprClass: { 711 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 712 Expr* SubExpr = IE->getSubExpr(); 713 714 if (SubExpr->getType()->isPointerType() || 715 SubExpr->getType()->isObjCQualifiedIdType()) 716 return EvalAddr(SubExpr); 717 else 718 return EvalVal(SubExpr); 719 } 720 721 // For casts, we handle pointer-to-pointer conversions (which 722 // is essentially a no-op from our mini-interpreter's standpoint). 723 // For other casts we abort. 724 case Stmt::CastExprClass: { 725 CastExpr *C = cast<CastExpr>(E); 726 Expr *SubExpr = C->getSubExpr(); 727 728 if (SubExpr->getType()->isPointerType()) 729 return EvalAddr(SubExpr); 730 else 731 return NULL; 732 } 733 734 // C++ casts. For dynamic casts, static casts, and const casts, we 735 // are always converting from a pointer-to-pointer, so we just blow 736 // through the cast. In the case the dynamic cast doesn't fail 737 // (and return NULL), we take the conservative route and report cases 738 // where we return the address of a stack variable. For Reinterpre 739 case Stmt::CXXCastExprClass: { 740 CXXCastExpr *C = cast<CXXCastExpr>(E); 741 742 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { 743 Expr *S = C->getSubExpr(); 744 if (S->getType()->isPointerType()) 745 return EvalAddr(S); 746 else 747 return NULL; 748 } 749 else 750 return EvalAddr(C->getSubExpr()); 751 } 752 753 // Everything else: we simply don't reason about them. 754 default: 755 return NULL; 756 } 757} 758 759 760/// EvalVal - This function is complements EvalAddr in the mutual recursion. 761/// See the comments for EvalAddr for more details. 762static DeclRefExpr* EvalVal(Expr *E) { 763 764 // We should only be called for evaluating non-pointer expressions, or 765 // expressions with a pointer type that are not used as references but instead 766 // are l-values (e.g., DeclRefExpr with a pointer type). 767 768 // Our "symbolic interpreter" is just a dispatch off the currently 769 // viewed AST node. We then recursively traverse the AST by calling 770 // EvalAddr and EvalVal appropriately. 771 switch (E->getStmtClass()) { 772 case Stmt::DeclRefExprClass: { 773 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 774 // at code that refers to a variable's name. We check if it has local 775 // storage within the function, and if so, return the expression. 776 DeclRefExpr *DR = cast<DeclRefExpr>(E); 777 778 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 779 if(V->hasLocalStorage()) return DR; 780 781 return NULL; 782 } 783 784 case Stmt::ParenExprClass: 785 // Ignore parentheses. 786 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 787 788 case Stmt::UnaryOperatorClass: { 789 // The only unary operator that make sense to handle here 790 // is Deref. All others don't resolve to a "name." This includes 791 // handling all sorts of rvalues passed to a unary operator. 792 UnaryOperator *U = cast<UnaryOperator>(E); 793 794 if (U->getOpcode() == UnaryOperator::Deref) 795 return EvalAddr(U->getSubExpr()); 796 797 return NULL; 798 } 799 800 case Stmt::ArraySubscriptExprClass: { 801 // Array subscripts are potential references to data on the stack. We 802 // retrieve the DeclRefExpr* for the array variable if it indeed 803 // has local storage. 804 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 805 } 806 807 case Stmt::ConditionalOperatorClass: { 808 // For conditional operators we need to see if either the LHS or RHS are 809 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 810 ConditionalOperator *C = cast<ConditionalOperator>(E); 811 812 // Handle the GNU extension for missing LHS. 813 if (Expr *lhsExpr = C->getLHS()) 814 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 815 return LHS; 816 817 return EvalVal(C->getRHS()); 818 } 819 820 // Accesses to members are potential references to data on the stack. 821 case Stmt::MemberExprClass: { 822 MemberExpr *M = cast<MemberExpr>(E); 823 824 // Check for indirect access. We only want direct field accesses. 825 if (!M->isArrow()) 826 return EvalVal(M->getBase()); 827 else 828 return NULL; 829 } 830 831 // Everything else: we simply don't reason about them. 832 default: 833 return NULL; 834 } 835} 836 837//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 838 839/// Check for comparisons of floating point operands using != and ==. 840/// Issue a warning if these are no self-comparisons, as they are not likely 841/// to do what the programmer intended. 842void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 843 bool EmitWarning = true; 844 845 Expr* LeftExprSansParen = lex->IgnoreParens(); 846 Expr* RightExprSansParen = rex->IgnoreParens(); 847 848 // Special case: check for x == x (which is OK). 849 // Do not emit warnings for such cases. 850 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 851 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 852 if (DRL->getDecl() == DRR->getDecl()) 853 EmitWarning = false; 854 855 856 // Special case: check for comparisons against literals that can be exactly 857 // represented by APFloat. In such cases, do not emit a warning. This 858 // is a heuristic: often comparison against such literals are used to 859 // detect if a value in a variable has not changed. This clearly can 860 // lead to false negatives. 861 if (EmitWarning) { 862 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 863 if (FLL->isExact()) 864 EmitWarning = false; 865 } 866 else 867 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 868 if (FLR->isExact()) 869 EmitWarning = false; 870 } 871 } 872 873 // Check for comparisons with builtin types. 874 if (EmitWarning) 875 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 876 if (isCallBuiltin(CL)) 877 EmitWarning = false; 878 879 if (EmitWarning) 880 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 881 if (isCallBuiltin(CR)) 882 EmitWarning = false; 883 884 // Emit the diagnostic. 885 if (EmitWarning) 886 Diag(loc, diag::warn_floatingpoint_eq, 887 lex->getSourceRange(),rex->getSourceRange()); 888} 889