SemaChecking.cpp revision ba2561a0ab11afa64014828c759c491378ccc539
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/Decl.h" 18#include "clang/AST/Expr.h" 19#include "clang/AST/ExprCXX.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Lex/LiteralSupport.h" 22#include "clang/Basic/SourceManager.h" 23#include "clang/Basic/Diagnostic.h" 24#include "clang/Basic/LangOptions.h" 25#include "clang/Basic/TargetInfo.h" 26#include "llvm/ADT/OwningPtr.h" 27#include "llvm/ADT/SmallString.h" 28#include "llvm/ADT/StringExtras.h" 29#include "SemaUtil.h" 30using namespace clang; 31 32/// CheckFunctionCall - Check a direct function call for various correctness 33/// and safety properties not strictly enforced by the C type system. 34Action::ExprResult 35Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 36 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 37 // Get the IdentifierInfo* for the called function. 38 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 39 40 switch (FnInfo->getBuiltinID()) { 41 case Builtin::BI__builtin___CFStringMakeConstantString: 42 assert(TheCall->getNumArgs() == 1 && 43 "Wrong # arguments to builtin CFStringMakeConstantString"); 44 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 45 return true; 46 return TheCall.take(); 47 case Builtin::BI__builtin_va_start: 48 if (SemaBuiltinVAStart(TheCall.get())) { 49 return true; 50 } 51 return TheCall.take(); 52 case Builtin::BI__builtin_isgreater: 53 case Builtin::BI__builtin_isgreaterequal: 54 case Builtin::BI__builtin_isless: 55 case Builtin::BI__builtin_islessequal: 56 case Builtin::BI__builtin_islessgreater: 57 case Builtin::BI__builtin_isunordered: 58 if (SemaBuiltinUnorderedCompare(TheCall.get())) 59 return true; 60 return TheCall.take(); 61 case Builtin::BI__builtin_shufflevector: 62 return SemaBuiltinShuffleVector(TheCall.get()); 63 } 64 65 // Search the KnownFunctionIDs for the identifier. 66 unsigned i = 0, e = id_num_known_functions; 67 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 68 if (i == e) return TheCall.take(); 69 70 // Printf checking. 71 if (i <= id_vprintf) { 72 // Retrieve the index of the format string parameter and determine 73 // if the function is passed a va_arg argument. 74 unsigned format_idx = 0; 75 bool HasVAListArg = false; 76 77 switch (i) { 78 default: assert(false && "No format string argument index."); 79 case id_printf: format_idx = 0; break; 80 case id_fprintf: format_idx = 1; break; 81 case id_sprintf: format_idx = 1; break; 82 case id_snprintf: format_idx = 2; break; 83 case id_asprintf: format_idx = 1; break; 84 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 85 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 86 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 87 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 88 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 89 } 90 91 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 92 } 93 94 return TheCall.take(); 95} 96 97/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 98/// CFString constructor is correct 99bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 100 Arg = Arg->IgnoreParenCasts(); 101 102 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 103 104 if (!Literal || Literal->isWide()) { 105 Diag(Arg->getLocStart(), 106 diag::err_cfstring_literal_not_string_constant, 107 Arg->getSourceRange()); 108 return true; 109 } 110 111 const char *Data = Literal->getStrData(); 112 unsigned Length = Literal->getByteLength(); 113 114 for (unsigned i = 0; i < Length; ++i) { 115 if (!isascii(Data[i])) { 116 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 117 diag::warn_cfstring_literal_contains_non_ascii_character, 118 Arg->getSourceRange()); 119 break; 120 } 121 122 if (!Data[i]) { 123 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 124 diag::warn_cfstring_literal_contains_nul_character, 125 Arg->getSourceRange()); 126 break; 127 } 128 } 129 130 return false; 131} 132 133/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 134/// Emit an error and return true on failure, return false on success. 135bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 136 Expr *Fn = TheCall->getCallee(); 137 if (TheCall->getNumArgs() > 2) { 138 Diag(TheCall->getArg(2)->getLocStart(), 139 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 140 SourceRange(TheCall->getArg(2)->getLocStart(), 141 (*(TheCall->arg_end()-1))->getLocEnd())); 142 return true; 143 } 144 145 // Determine whether the current function is variadic or not. 146 bool isVariadic; 147 if (CurFunctionDecl) 148 isVariadic = 149 cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic(); 150 else 151 isVariadic = CurMethodDecl->isVariadic(); 152 153 if (!isVariadic) { 154 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 155 return true; 156 } 157 158 // Verify that the second argument to the builtin is the last argument of the 159 // current function or method. 160 bool SecondArgIsLastNamedArgument = false; 161 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 162 163 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 164 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 165 // FIXME: This isn't correct for methods (results in bogus warning). 166 // Get the last formal in the current function. 167 const ParmVarDecl *LastArg; 168 if (CurFunctionDecl) 169 LastArg = *(CurFunctionDecl->param_end()-1); 170 else 171 LastArg = *(CurMethodDecl->param_end()-1); 172 SecondArgIsLastNamedArgument = PV == LastArg; 173 } 174 } 175 176 if (!SecondArgIsLastNamedArgument) 177 Diag(TheCall->getArg(1)->getLocStart(), 178 diag::warn_second_parameter_of_va_start_not_last_named_argument); 179 return false; 180} 181 182/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 183/// friends. This is declared to take (...), so we have to check everything. 184bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 185 if (TheCall->getNumArgs() < 2) 186 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 187 if (TheCall->getNumArgs() > 2) 188 return Diag(TheCall->getArg(2)->getLocStart(), 189 diag::err_typecheck_call_too_many_args, 190 SourceRange(TheCall->getArg(2)->getLocStart(), 191 (*(TheCall->arg_end()-1))->getLocEnd())); 192 193 Expr *OrigArg0 = TheCall->getArg(0); 194 Expr *OrigArg1 = TheCall->getArg(1); 195 196 // Do standard promotions between the two arguments, returning their common 197 // type. 198 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 199 200 // If the common type isn't a real floating type, then the arguments were 201 // invalid for this operation. 202 if (!Res->isRealFloatingType()) 203 return Diag(OrigArg0->getLocStart(), 204 diag::err_typecheck_call_invalid_ordered_compare, 205 OrigArg0->getType().getAsString(), 206 OrigArg1->getType().getAsString(), 207 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 208 209 return false; 210} 211 212/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 213// This is declared to take (...), so we have to check everything. 214Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 215 if (TheCall->getNumArgs() < 3) 216 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 217 TheCall->getSourceRange()); 218 219 QualType FAType = TheCall->getArg(0)->getType(); 220 QualType SAType = TheCall->getArg(1)->getType(); 221 222 if (!FAType->isVectorType() || !SAType->isVectorType()) { 223 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 224 SourceRange(TheCall->getArg(0)->getLocStart(), 225 TheCall->getArg(1)->getLocEnd())); 226 return true; 227 } 228 229 if (FAType.getCanonicalType().getUnqualifiedType() != 230 SAType.getCanonicalType().getUnqualifiedType()) { 231 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 232 SourceRange(TheCall->getArg(0)->getLocStart(), 233 TheCall->getArg(1)->getLocEnd())); 234 return true; 235 } 236 237 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 238 if (TheCall->getNumArgs() != numElements+2) { 239 if (TheCall->getNumArgs() < numElements+2) 240 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 241 TheCall->getSourceRange()); 242 else 243 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 244 TheCall->getSourceRange()); 245 return true; 246 } 247 248 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 249 llvm::APSInt Result(32); 250 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) { 251 Diag(TheCall->getLocStart(), 252 diag::err_shufflevector_nonconstant_argument, 253 TheCall->getArg(i)->getSourceRange()); 254 return true; 255 } 256 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) { 257 Diag(TheCall->getLocStart(), 258 diag::err_shufflevector_argument_too_large, 259 TheCall->getArg(i)->getSourceRange()); 260 return true; 261 } 262 } 263 264 llvm::SmallVector<Expr*, 32> exprs; 265 266 for (unsigned i = 0; i < TheCall->getNumArgs(); i++) { 267 exprs.push_back(TheCall->getArg(i)); 268 TheCall->setArg(i, 0); 269 } 270 271 ShuffleVectorExpr* E = new ShuffleVectorExpr( 272 exprs.begin(), numElements+2, FAType, 273 TheCall->getCallee()->getLocStart(), 274 TheCall->getRParenLoc()); 275 276 return E; 277} 278 279/// CheckPrintfArguments - Check calls to printf (and similar functions) for 280/// correct use of format strings. 281/// 282/// HasVAListArg - A predicate indicating whether the printf-like 283/// function is passed an explicit va_arg argument (e.g., vprintf) 284/// 285/// format_idx - The index into Args for the format string. 286/// 287/// Improper format strings to functions in the printf family can be 288/// the source of bizarre bugs and very serious security holes. A 289/// good source of information is available in the following paper 290/// (which includes additional references): 291/// 292/// FormatGuard: Automatic Protection From printf Format String 293/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 294/// 295/// Functionality implemented: 296/// 297/// We can statically check the following properties for string 298/// literal format strings for non v.*printf functions (where the 299/// arguments are passed directly): 300// 301/// (1) Are the number of format conversions equal to the number of 302/// data arguments? 303/// 304/// (2) Does each format conversion correctly match the type of the 305/// corresponding data argument? (TODO) 306/// 307/// Moreover, for all printf functions we can: 308/// 309/// (3) Check for a missing format string (when not caught by type checking). 310/// 311/// (4) Check for no-operation flags; e.g. using "#" with format 312/// conversion 'c' (TODO) 313/// 314/// (5) Check the use of '%n', a major source of security holes. 315/// 316/// (6) Check for malformed format conversions that don't specify anything. 317/// 318/// (7) Check for empty format strings. e.g: printf(""); 319/// 320/// (8) Check that the format string is a wide literal. 321/// 322/// (9) Also check the arguments of functions with the __format__ attribute. 323/// (TODO). 324/// 325/// All of these checks can be done by parsing the format string. 326/// 327/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 328void 329Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 330 unsigned format_idx) { 331 Expr *Fn = TheCall->getCallee(); 332 333 // CHECK: printf-like function is called with no format string. 334 if (format_idx >= TheCall->getNumArgs()) { 335 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 336 Fn->getSourceRange()); 337 return; 338 } 339 340 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 341 342 // CHECK: format string is not a string literal. 343 // 344 // Dynamically generated format strings are difficult to 345 // automatically vet at compile time. Requiring that format strings 346 // are string literals: (1) permits the checking of format strings by 347 // the compiler and thereby (2) can practically remove the source of 348 // many format string exploits. 349 StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 350 if (FExpr == NULL) { 351 // For vprintf* functions (i.e., HasVAListArg==true), we add a 352 // special check to see if the format string is a function parameter 353 // of the function calling the printf function. If the function 354 // has an attribute indicating it is a printf-like function, then we 355 // should suppress warnings concerning non-literals being used in a call 356 // to a vprintf function. For example: 357 // 358 // void 359 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 360 // va_list ap; 361 // va_start(ap, fmt); 362 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 363 // ... 364 // 365 // 366 // FIXME: We don't have full attribute support yet, so just check to see 367 // if the argument is a DeclRefExpr that references a parameter. We'll 368 // add proper support for checking the attribute later. 369 if (HasVAListArg) 370 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 371 if (isa<ParmVarDecl>(DR->getDecl())) 372 return; 373 374 Diag(TheCall->getArg(format_idx)->getLocStart(), 375 diag::warn_printf_not_string_constant, Fn->getSourceRange()); 376 return; 377 } 378 379 // CHECK: is the format string a wide literal? 380 if (FExpr->isWide()) { 381 Diag(FExpr->getLocStart(), 382 diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange()); 383 return; 384 } 385 386 // Str - The format string. NOTE: this is NOT null-terminated! 387 const char * const Str = FExpr->getStrData(); 388 389 // CHECK: empty format string? 390 const unsigned StrLen = FExpr->getByteLength(); 391 392 if (StrLen == 0) { 393 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 394 Fn->getSourceRange()); 395 return; 396 } 397 398 // We process the format string using a binary state machine. The 399 // current state is stored in CurrentState. 400 enum { 401 state_OrdChr, 402 state_Conversion 403 } CurrentState = state_OrdChr; 404 405 // numConversions - The number of conversions seen so far. This is 406 // incremented as we traverse the format string. 407 unsigned numConversions = 0; 408 409 // numDataArgs - The number of data arguments after the format 410 // string. This can only be determined for non vprintf-like 411 // functions. For those functions, this value is 1 (the sole 412 // va_arg argument). 413 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 414 415 // Inspect the format string. 416 unsigned StrIdx = 0; 417 418 // LastConversionIdx - Index within the format string where we last saw 419 // a '%' character that starts a new format conversion. 420 unsigned LastConversionIdx = 0; 421 422 for (; StrIdx < StrLen; ++StrIdx) { 423 424 // Is the number of detected conversion conversions greater than 425 // the number of matching data arguments? If so, stop. 426 if (!HasVAListArg && numConversions > numDataArgs) break; 427 428 // Handle "\0" 429 if (Str[StrIdx] == '\0') { 430 // The string returned by getStrData() is not null-terminated, 431 // so the presence of a null character is likely an error. 432 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 433 diag::warn_printf_format_string_contains_null_char, 434 Fn->getSourceRange()); 435 return; 436 } 437 438 // Ordinary characters (not processing a format conversion). 439 if (CurrentState == state_OrdChr) { 440 if (Str[StrIdx] == '%') { 441 CurrentState = state_Conversion; 442 LastConversionIdx = StrIdx; 443 } 444 continue; 445 } 446 447 // Seen '%'. Now processing a format conversion. 448 switch (Str[StrIdx]) { 449 // Handle dynamic precision or width specifier. 450 case '*': { 451 ++numConversions; 452 453 if (!HasVAListArg && numConversions > numDataArgs) { 454 SourceLocation Loc = FExpr->getLocStart(); 455 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 456 457 if (Str[StrIdx-1] == '.') 458 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 459 Fn->getSourceRange()); 460 else 461 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 462 Fn->getSourceRange()); 463 464 // Don't do any more checking. We'll just emit spurious errors. 465 return; 466 } 467 468 // Perform type checking on width/precision specifier. 469 Expr *E = TheCall->getArg(format_idx+numConversions); 470 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 471 if (BT->getKind() == BuiltinType::Int) 472 break; 473 474 SourceLocation Loc = 475 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 476 477 if (Str[StrIdx-1] == '.') 478 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 479 E->getType().getAsString(), E->getSourceRange()); 480 else 481 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 482 E->getType().getAsString(), E->getSourceRange()); 483 484 break; 485 } 486 487 // Characters which can terminate a format conversion 488 // (e.g. "%d"). Characters that specify length modifiers or 489 // other flags are handled by the default case below. 490 // 491 // FIXME: additional checks will go into the following cases. 492 case 'i': 493 case 'd': 494 case 'o': 495 case 'u': 496 case 'x': 497 case 'X': 498 case 'D': 499 case 'O': 500 case 'U': 501 case 'e': 502 case 'E': 503 case 'f': 504 case 'F': 505 case 'g': 506 case 'G': 507 case 'a': 508 case 'A': 509 case 'c': 510 case 'C': 511 case 'S': 512 case 's': 513 case 'p': 514 ++numConversions; 515 CurrentState = state_OrdChr; 516 break; 517 518 // CHECK: Are we using "%n"? Issue a warning. 519 case 'n': { 520 ++numConversions; 521 CurrentState = state_OrdChr; 522 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 523 LastConversionIdx+1); 524 525 Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); 526 break; 527 } 528 529 // Handle "%%" 530 case '%': 531 // Sanity check: Was the first "%" character the previous one? 532 // If not, we will assume that we have a malformed format 533 // conversion, and that the current "%" character is the start 534 // of a new conversion. 535 if (StrIdx - LastConversionIdx == 1) 536 CurrentState = state_OrdChr; 537 else { 538 // Issue a warning: invalid format conversion. 539 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 540 LastConversionIdx+1); 541 542 Diag(Loc, diag::warn_printf_invalid_conversion, 543 std::string(Str+LastConversionIdx, Str+StrIdx), 544 Fn->getSourceRange()); 545 546 // This conversion is broken. Advance to the next format 547 // conversion. 548 LastConversionIdx = StrIdx; 549 ++numConversions; 550 } 551 break; 552 553 default: 554 // This case catches all other characters: flags, widths, etc. 555 // We should eventually process those as well. 556 break; 557 } 558 } 559 560 if (CurrentState == state_Conversion) { 561 // Issue a warning: invalid format conversion. 562 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 563 LastConversionIdx+1); 564 565 Diag(Loc, diag::warn_printf_invalid_conversion, 566 std::string(Str+LastConversionIdx, 567 Str+std::min(LastConversionIdx+2, StrLen)), 568 Fn->getSourceRange()); 569 return; 570 } 571 572 if (!HasVAListArg) { 573 // CHECK: Does the number of format conversions exceed the number 574 // of data arguments? 575 if (numConversions > numDataArgs) { 576 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 577 LastConversionIdx); 578 579 Diag(Loc, diag::warn_printf_insufficient_data_args, 580 Fn->getSourceRange()); 581 } 582 // CHECK: Does the number of data arguments exceed the number of 583 // format conversions in the format string? 584 else if (numConversions < numDataArgs) 585 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 586 diag::warn_printf_too_many_data_args, Fn->getSourceRange()); 587 } 588} 589 590//===--- CHECK: Return Address of Stack Variable --------------------------===// 591 592static DeclRefExpr* EvalVal(Expr *E); 593static DeclRefExpr* EvalAddr(Expr* E); 594 595/// CheckReturnStackAddr - Check if a return statement returns the address 596/// of a stack variable. 597void 598Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 599 SourceLocation ReturnLoc) { 600 601 // Perform checking for returned stack addresses. 602 if (lhsType->isPointerType()) { 603 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 604 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 605 DR->getDecl()->getIdentifier()->getName(), 606 RetValExp->getSourceRange()); 607 } 608 // Perform checking for stack values returned by reference. 609 else if (lhsType->isReferenceType()) { 610 // Check for an implicit cast to a reference. 611 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) 612 if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) 613 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 614 DR->getDecl()->getIdentifier()->getName(), 615 RetValExp->getSourceRange()); 616 } 617} 618 619/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 620/// check if the expression in a return statement evaluates to an address 621/// to a location on the stack. The recursion is used to traverse the 622/// AST of the return expression, with recursion backtracking when we 623/// encounter a subexpression that (1) clearly does not lead to the address 624/// of a stack variable or (2) is something we cannot determine leads to 625/// the address of a stack variable based on such local checking. 626/// 627/// EvalAddr processes expressions that are pointers that are used as 628/// references (and not L-values). EvalVal handles all other values. 629/// At the base case of the recursion is a check for a DeclRefExpr* in 630/// the refers to a stack variable. 631/// 632/// This implementation handles: 633/// 634/// * pointer-to-pointer casts 635/// * implicit conversions from array references to pointers 636/// * taking the address of fields 637/// * arbitrary interplay between "&" and "*" operators 638/// * pointer arithmetic from an address of a stack variable 639/// * taking the address of an array element where the array is on the stack 640static DeclRefExpr* EvalAddr(Expr *E) { 641 // We should only be called for evaluating pointer expressions. 642 assert((E->getType()->isPointerType() || 643 E->getType()->isObjCQualifiedIdType()) && 644 "EvalAddr only works on pointers"); 645 646 // Our "symbolic interpreter" is just a dispatch off the currently 647 // viewed AST node. We then recursively traverse the AST by calling 648 // EvalAddr and EvalVal appropriately. 649 switch (E->getStmtClass()) { 650 case Stmt::ParenExprClass: 651 // Ignore parentheses. 652 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 653 654 case Stmt::UnaryOperatorClass: { 655 // The only unary operator that make sense to handle here 656 // is AddrOf. All others don't make sense as pointers. 657 UnaryOperator *U = cast<UnaryOperator>(E); 658 659 if (U->getOpcode() == UnaryOperator::AddrOf) 660 return EvalVal(U->getSubExpr()); 661 else 662 return NULL; 663 } 664 665 case Stmt::BinaryOperatorClass: { 666 // Handle pointer arithmetic. All other binary operators are not valid 667 // in this context. 668 BinaryOperator *B = cast<BinaryOperator>(E); 669 BinaryOperator::Opcode op = B->getOpcode(); 670 671 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 672 return NULL; 673 674 Expr *Base = B->getLHS(); 675 676 // Determine which argument is the real pointer base. It could be 677 // the RHS argument instead of the LHS. 678 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 679 680 assert (Base->getType()->isPointerType()); 681 return EvalAddr(Base); 682 } 683 684 // For conditional operators we need to see if either the LHS or RHS are 685 // valid DeclRefExpr*s. If one of them is valid, we return it. 686 case Stmt::ConditionalOperatorClass: { 687 ConditionalOperator *C = cast<ConditionalOperator>(E); 688 689 // Handle the GNU extension for missing LHS. 690 if (Expr *lhsExpr = C->getLHS()) 691 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 692 return LHS; 693 694 return EvalAddr(C->getRHS()); 695 } 696 697 // For implicit casts, we need to handle conversions from arrays to 698 // pointer values, and implicit pointer-to-pointer conversions. 699 case Stmt::ImplicitCastExprClass: { 700 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 701 Expr* SubExpr = IE->getSubExpr(); 702 703 if (SubExpr->getType()->isPointerType() || 704 SubExpr->getType()->isObjCQualifiedIdType()) 705 return EvalAddr(SubExpr); 706 else 707 return EvalVal(SubExpr); 708 } 709 710 // For casts, we handle pointer-to-pointer conversions (which 711 // is essentially a no-op from our mini-interpreter's standpoint). 712 // For other casts we abort. 713 case Stmt::CastExprClass: { 714 CastExpr *C = cast<CastExpr>(E); 715 Expr *SubExpr = C->getSubExpr(); 716 717 if (SubExpr->getType()->isPointerType()) 718 return EvalAddr(SubExpr); 719 else 720 return NULL; 721 } 722 723 // C++ casts. For dynamic casts, static casts, and const casts, we 724 // are always converting from a pointer-to-pointer, so we just blow 725 // through the cast. In the case the dynamic cast doesn't fail 726 // (and return NULL), we take the conservative route and report cases 727 // where we return the address of a stack variable. For Reinterpre 728 case Stmt::CXXCastExprClass: { 729 CXXCastExpr *C = cast<CXXCastExpr>(E); 730 731 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { 732 Expr *S = C->getSubExpr(); 733 if (S->getType()->isPointerType()) 734 return EvalAddr(S); 735 else 736 return NULL; 737 } 738 else 739 return EvalAddr(C->getSubExpr()); 740 } 741 742 // Everything else: we simply don't reason about them. 743 default: 744 return NULL; 745 } 746} 747 748 749/// EvalVal - This function is complements EvalAddr in the mutual recursion. 750/// See the comments for EvalAddr for more details. 751static DeclRefExpr* EvalVal(Expr *E) { 752 753 // We should only be called for evaluating non-pointer expressions, or 754 // expressions with a pointer type that are not used as references but instead 755 // are l-values (e.g., DeclRefExpr with a pointer type). 756 757 // Our "symbolic interpreter" is just a dispatch off the currently 758 // viewed AST node. We then recursively traverse the AST by calling 759 // EvalAddr and EvalVal appropriately. 760 switch (E->getStmtClass()) { 761 case Stmt::DeclRefExprClass: { 762 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 763 // at code that refers to a variable's name. We check if it has local 764 // storage within the function, and if so, return the expression. 765 DeclRefExpr *DR = cast<DeclRefExpr>(E); 766 767 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 768 if(V->hasLocalStorage()) return DR; 769 770 return NULL; 771 } 772 773 case Stmt::ParenExprClass: 774 // Ignore parentheses. 775 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 776 777 case Stmt::UnaryOperatorClass: { 778 // The only unary operator that make sense to handle here 779 // is Deref. All others don't resolve to a "name." This includes 780 // handling all sorts of rvalues passed to a unary operator. 781 UnaryOperator *U = cast<UnaryOperator>(E); 782 783 if (U->getOpcode() == UnaryOperator::Deref) 784 return EvalAddr(U->getSubExpr()); 785 786 return NULL; 787 } 788 789 case Stmt::ArraySubscriptExprClass: { 790 // Array subscripts are potential references to data on the stack. We 791 // retrieve the DeclRefExpr* for the array variable if it indeed 792 // has local storage. 793 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 794 } 795 796 case Stmt::ConditionalOperatorClass: { 797 // For conditional operators we need to see if either the LHS or RHS are 798 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 799 ConditionalOperator *C = cast<ConditionalOperator>(E); 800 801 // Handle the GNU extension for missing LHS. 802 if (Expr *lhsExpr = C->getLHS()) 803 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 804 return LHS; 805 806 return EvalVal(C->getRHS()); 807 } 808 809 // Accesses to members are potential references to data on the stack. 810 case Stmt::MemberExprClass: { 811 MemberExpr *M = cast<MemberExpr>(E); 812 813 // Check for indirect access. We only want direct field accesses. 814 if (!M->isArrow()) 815 return EvalVal(M->getBase()); 816 else 817 return NULL; 818 } 819 820 // Everything else: we simply don't reason about them. 821 default: 822 return NULL; 823 } 824} 825 826//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 827 828/// Check for comparisons of floating point operands using != and ==. 829/// Issue a warning if these are no self-comparisons, as they are not likely 830/// to do what the programmer intended. 831void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 832 bool EmitWarning = true; 833 834 Expr* LeftExprSansParen = lex->IgnoreParens(); 835 Expr* RightExprSansParen = rex->IgnoreParens(); 836 837 // Special case: check for x == x (which is OK). 838 // Do not emit warnings for such cases. 839 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 840 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 841 if (DRL->getDecl() == DRR->getDecl()) 842 EmitWarning = false; 843 844 845 // Special case: check for comparisons against literals that can be exactly 846 // represented by APFloat. In such cases, do not emit a warning. This 847 // is a heuristic: often comparison against such literals are used to 848 // detect if a value in a variable has not changed. This clearly can 849 // lead to false negatives. 850 if (EmitWarning) { 851 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 852 if (FLL->isExact()) 853 EmitWarning = false; 854 } 855 else 856 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 857 if (FLR->isExact()) 858 EmitWarning = false; 859 } 860 } 861 862 // Check for comparisons with builtin types. 863 if (EmitWarning) 864 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 865 if (isCallBuiltin(CL)) 866 EmitWarning = false; 867 868 if (EmitWarning) 869 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 870 if (isCallBuiltin(CR)) 871 EmitWarning = false; 872 873 // Emit the diagnostic. 874 if (EmitWarning) 875 Diag(loc, diag::warn_floatingpoint_eq, 876 lex->getSourceRange(),rex->getSourceRange()); 877} 878