SemaChecking.cpp revision dcd5ef12488e4c7ea844327835896ca86b609a97
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Basic/Diagnostic.h" 22#include "SemaUtil.h" 23using namespace clang; 24 25/// CheckFunctionCall - Check a direct function call for various correctness 26/// and safety properties not strictly enforced by the C type system. 27Action::ExprResult 28Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 29 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 30 // Get the IdentifierInfo* for the called function. 31 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 32 33 // None of the checks below are needed for functions that don't have 34 // simple names (e.g., C++ conversion functions). 35 if (!FnInfo) 36 return TheCall.take(); 37 38 switch (FnInfo->getBuiltinID()) { 39 case Builtin::BI__builtin___CFStringMakeConstantString: 40 assert(TheCall->getNumArgs() == 1 && 41 "Wrong # arguments to builtin CFStringMakeConstantString"); 42 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 43 return true; 44 return TheCall.take(); 45 case Builtin::BI__builtin_stdarg_start: 46 case Builtin::BI__builtin_va_start: 47 if (SemaBuiltinVAStart(TheCall.get())) 48 return true; 49 return TheCall.take(); 50 case Builtin::BI__builtin_isgreater: 51 case Builtin::BI__builtin_isgreaterequal: 52 case Builtin::BI__builtin_isless: 53 case Builtin::BI__builtin_islessequal: 54 case Builtin::BI__builtin_islessgreater: 55 case Builtin::BI__builtin_isunordered: 56 if (SemaBuiltinUnorderedCompare(TheCall.get())) 57 return true; 58 return TheCall.take(); 59 case Builtin::BI__builtin_return_address: 60 case Builtin::BI__builtin_frame_address: 61 if (SemaBuiltinStackAddress(TheCall.get())) 62 return true; 63 return TheCall.take(); 64 case Builtin::BI__builtin_shufflevector: 65 return SemaBuiltinShuffleVector(TheCall.get()); 66 case Builtin::BI__builtin_prefetch: 67 if (SemaBuiltinPrefetch(TheCall.get())) 68 return true; 69 return TheCall.take(); 70 case Builtin::BI__builtin_object_size: 71 if (SemaBuiltinObjectSize(TheCall.get())) 72 return true; 73 } 74 75 // FIXME: This mechanism should be abstracted to be less fragile and 76 // more efficient. For example, just map function ids to custom 77 // handlers. 78 79 // Search the KnownFunctionIDs for the identifier. 80 unsigned i = 0, e = id_num_known_functions; 81 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 82 if (i == e) return TheCall.take(); 83 84 // Printf checking. 85 if (i <= id_vprintf) { 86 // Retrieve the index of the format string parameter and determine 87 // if the function is passed a va_arg argument. 88 unsigned format_idx = 0; 89 bool HasVAListArg = false; 90 91 switch (i) { 92 default: assert(false && "No format string argument index."); 93 case id_NSLog: format_idx = 0; break; 94 case id_asprintf: format_idx = 1; break; 95 case id_fprintf: format_idx = 1; break; 96 case id_printf: format_idx = 0; break; 97 case id_snprintf: format_idx = 2; break; 98 case id_snprintf_chk: format_idx = 4; break; 99 case id_sprintf: format_idx = 1; break; 100 case id_sprintf_chk: format_idx = 3; break; 101 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 102 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 103 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 104 case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break; 105 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 106 case id_vsprintf_chk: format_idx = 3; HasVAListArg = true; break; 107 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 108 } 109 110 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 111 } 112 113 return TheCall.take(); 114} 115 116/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 117/// CFString constructor is correct 118bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 119 Arg = Arg->IgnoreParenCasts(); 120 121 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 122 123 if (!Literal || Literal->isWide()) { 124 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 125 << Arg->getSourceRange(); 126 return true; 127 } 128 129 const char *Data = Literal->getStrData(); 130 unsigned Length = Literal->getByteLength(); 131 132 for (unsigned i = 0; i < Length; ++i) { 133 if (!isascii(Data[i])) { 134 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 135 diag::warn_cfstring_literal_contains_non_ascii_character) 136 << Arg->getSourceRange(); 137 break; 138 } 139 140 if (!Data[i]) { 141 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 142 diag::warn_cfstring_literal_contains_nul_character) 143 << Arg->getSourceRange(); 144 break; 145 } 146 } 147 148 return false; 149} 150 151/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 152/// Emit an error and return true on failure, return false on success. 153bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 154 Expr *Fn = TheCall->getCallee(); 155 if (TheCall->getNumArgs() > 2) { 156 Diag(TheCall->getArg(2)->getLocStart(), 157 diag::err_typecheck_call_too_many_args) 158 << Fn->getSourceRange() 159 << SourceRange(TheCall->getArg(2)->getLocStart(), 160 (*(TheCall->arg_end()-1))->getLocEnd()); 161 return true; 162 } 163 164 // Determine whether the current function is variadic or not. 165 bool isVariadic; 166 if (getCurFunctionDecl()) 167 isVariadic = 168 cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic(); 169 else 170 isVariadic = getCurMethodDecl()->isVariadic(); 171 172 if (!isVariadic) { 173 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 174 return true; 175 } 176 177 // Verify that the second argument to the builtin is the last argument of the 178 // current function or method. 179 bool SecondArgIsLastNamedArgument = false; 180 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 181 182 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 183 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 184 // FIXME: This isn't correct for methods (results in bogus warning). 185 // Get the last formal in the current function. 186 const ParmVarDecl *LastArg; 187 if (getCurFunctionDecl()) 188 LastArg = *(getCurFunctionDecl()->param_end()-1); 189 else 190 LastArg = *(getCurMethodDecl()->param_end()-1); 191 SecondArgIsLastNamedArgument = PV == LastArg; 192 } 193 } 194 195 if (!SecondArgIsLastNamedArgument) 196 Diag(TheCall->getArg(1)->getLocStart(), 197 diag::warn_second_parameter_of_va_start_not_last_named_argument); 198 return false; 199} 200 201/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 202/// friends. This is declared to take (...), so we have to check everything. 203bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 204 if (TheCall->getNumArgs() < 2) 205 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 206 if (TheCall->getNumArgs() > 2) 207 return Diag(TheCall->getArg(2)->getLocStart(), 208 diag::err_typecheck_call_too_many_args) 209 << SourceRange(TheCall->getArg(2)->getLocStart(), 210 (*(TheCall->arg_end()-1))->getLocEnd()); 211 212 Expr *OrigArg0 = TheCall->getArg(0); 213 Expr *OrigArg1 = TheCall->getArg(1); 214 215 // Do standard promotions between the two arguments, returning their common 216 // type. 217 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 218 219 // If the common type isn't a real floating type, then the arguments were 220 // invalid for this operation. 221 if (!Res->isRealFloatingType()) 222 return Diag(OrigArg0->getLocStart(), 223 diag::err_typecheck_call_invalid_ordered_compare) 224 << OrigArg0->getType().getAsString() << OrigArg1->getType().getAsString() 225 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 226 227 return false; 228} 229 230bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 231 // The signature for these builtins is exact; the only thing we need 232 // to check is that the argument is a constant. 233 SourceLocation Loc; 234 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 235 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 236 237 return false; 238} 239 240/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 241// This is declared to take (...), so we have to check everything. 242Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 243 if (TheCall->getNumArgs() < 3) 244 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 245 << TheCall->getSourceRange(); 246 247 QualType FAType = TheCall->getArg(0)->getType(); 248 QualType SAType = TheCall->getArg(1)->getType(); 249 250 if (!FAType->isVectorType() || !SAType->isVectorType()) { 251 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 252 << SourceRange(TheCall->getArg(0)->getLocStart(), 253 TheCall->getArg(1)->getLocEnd()); 254 return true; 255 } 256 257 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 258 Context.getCanonicalType(SAType).getUnqualifiedType()) { 259 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 260 << SourceRange(TheCall->getArg(0)->getLocStart(), 261 TheCall->getArg(1)->getLocEnd()); 262 return true; 263 } 264 265 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 266 if (TheCall->getNumArgs() != numElements+2) { 267 if (TheCall->getNumArgs() < numElements+2) 268 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 269 << TheCall->getSourceRange(); 270 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 271 << TheCall->getSourceRange(); 272 } 273 274 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 275 llvm::APSInt Result(32); 276 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 277 return Diag(TheCall->getLocStart(), 278 diag::err_shufflevector_nonconstant_argument) 279 << TheCall->getArg(i)->getSourceRange(); 280 281 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 282 return Diag(TheCall->getLocStart(), 283 diag::err_shufflevector_argument_too_large) 284 << TheCall->getArg(i)->getSourceRange(); 285 } 286 287 llvm::SmallVector<Expr*, 32> exprs; 288 289 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 290 exprs.push_back(TheCall->getArg(i)); 291 TheCall->setArg(i, 0); 292 } 293 294 return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 295 TheCall->getCallee()->getLocStart(), 296 TheCall->getRParenLoc()); 297} 298 299/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 300// This is declared to take (const void*, ...) and can take two 301// optional constant int args. 302bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 303 unsigned NumArgs = TheCall->getNumArgs(); 304 305 if (NumArgs > 3) 306 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 307 << TheCall->getSourceRange(); 308 309 // Argument 0 is checked for us and the remaining arguments must be 310 // constant integers. 311 for (unsigned i = 1; i != NumArgs; ++i) { 312 Expr *Arg = TheCall->getArg(i); 313 QualType RWType = Arg->getType(); 314 315 const BuiltinType *BT = RWType->getAsBuiltinType(); 316 llvm::APSInt Result; 317 if (!BT || BT->getKind() != BuiltinType::Int || 318 !Arg->isIntegerConstantExpr(Result, Context)) 319 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 320 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 321 322 // FIXME: gcc issues a warning and rewrites these to 0. These 323 // seems especially odd for the third argument since the default 324 // is 3. 325 if (i == 1) { 326 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 327 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 328 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 329 } else { 330 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 331 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 332 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 333 } 334 } 335 336 return false; 337} 338 339/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 340/// int type). This simply type checks that type is one of the defined 341/// constants (0-3). 342bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 343 Expr *Arg = TheCall->getArg(1); 344 QualType ArgType = Arg->getType(); 345 const BuiltinType *BT = ArgType->getAsBuiltinType(); 346 llvm::APSInt Result(32); 347 if (!BT || BT->getKind() != BuiltinType::Int || 348 !Arg->isIntegerConstantExpr(Result, Context)) { 349 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 350 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 351 } 352 353 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 354 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 355 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 356 } 357 358 return false; 359} 360 361/// CheckPrintfArguments - Check calls to printf (and similar functions) for 362/// correct use of format strings. 363/// 364/// HasVAListArg - A predicate indicating whether the printf-like 365/// function is passed an explicit va_arg argument (e.g., vprintf) 366/// 367/// format_idx - The index into Args for the format string. 368/// 369/// Improper format strings to functions in the printf family can be 370/// the source of bizarre bugs and very serious security holes. A 371/// good source of information is available in the following paper 372/// (which includes additional references): 373/// 374/// FormatGuard: Automatic Protection From printf Format String 375/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 376/// 377/// Functionality implemented: 378/// 379/// We can statically check the following properties for string 380/// literal format strings for non v.*printf functions (where the 381/// arguments are passed directly): 382// 383/// (1) Are the number of format conversions equal to the number of 384/// data arguments? 385/// 386/// (2) Does each format conversion correctly match the type of the 387/// corresponding data argument? (TODO) 388/// 389/// Moreover, for all printf functions we can: 390/// 391/// (3) Check for a missing format string (when not caught by type checking). 392/// 393/// (4) Check for no-operation flags; e.g. using "#" with format 394/// conversion 'c' (TODO) 395/// 396/// (5) Check the use of '%n', a major source of security holes. 397/// 398/// (6) Check for malformed format conversions that don't specify anything. 399/// 400/// (7) Check for empty format strings. e.g: printf(""); 401/// 402/// (8) Check that the format string is a wide literal. 403/// 404/// (9) Also check the arguments of functions with the __format__ attribute. 405/// (TODO). 406/// 407/// All of these checks can be done by parsing the format string. 408/// 409/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 410void 411Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 412 unsigned format_idx) { 413 Expr *Fn = TheCall->getCallee(); 414 415 // CHECK: printf-like function is called with no format string. 416 if (format_idx >= TheCall->getNumArgs()) { 417 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 418 << Fn->getSourceRange(); 419 return; 420 } 421 422 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 423 424 // CHECK: format string is not a string literal. 425 // 426 // Dynamically generated format strings are difficult to 427 // automatically vet at compile time. Requiring that format strings 428 // are string literals: (1) permits the checking of format strings by 429 // the compiler and thereby (2) can practically remove the source of 430 // many format string exploits. 431 432 // Format string can be either ObjC string (e.g. @"%d") or 433 // C string (e.g. "%d") 434 // ObjC string uses the same format specifiers as C string, so we can use 435 // the same format string checking logic for both ObjC and C strings. 436 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 437 StringLiteral *FExpr = NULL; 438 439 if(ObjCFExpr != NULL) 440 FExpr = ObjCFExpr->getString(); 441 else 442 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 443 444 if (FExpr == NULL) { 445 // For vprintf* functions (i.e., HasVAListArg==true), we add a 446 // special check to see if the format string is a function parameter 447 // of the function calling the printf function. If the function 448 // has an attribute indicating it is a printf-like function, then we 449 // should suppress warnings concerning non-literals being used in a call 450 // to a vprintf function. For example: 451 // 452 // void 453 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 454 // va_list ap; 455 // va_start(ap, fmt); 456 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 457 // ... 458 // 459 // 460 // FIXME: We don't have full attribute support yet, so just check to see 461 // if the argument is a DeclRefExpr that references a parameter. We'll 462 // add proper support for checking the attribute later. 463 if (HasVAListArg) 464 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 465 if (isa<ParmVarDecl>(DR->getDecl())) 466 return; 467 468 Diag(TheCall->getArg(format_idx)->getLocStart(), 469 diag::warn_printf_not_string_constant) 470 << OrigFormatExpr->getSourceRange(); 471 return; 472 } 473 474 // CHECK: is the format string a wide literal? 475 if (FExpr->isWide()) { 476 Diag(FExpr->getLocStart(), 477 diag::warn_printf_format_string_is_wide_literal) 478 << OrigFormatExpr->getSourceRange(); 479 return; 480 } 481 482 // Str - The format string. NOTE: this is NOT null-terminated! 483 const char * const Str = FExpr->getStrData(); 484 485 // CHECK: empty format string? 486 const unsigned StrLen = FExpr->getByteLength(); 487 488 if (StrLen == 0) { 489 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 490 << OrigFormatExpr->getSourceRange(); 491 return; 492 } 493 494 // We process the format string using a binary state machine. The 495 // current state is stored in CurrentState. 496 enum { 497 state_OrdChr, 498 state_Conversion 499 } CurrentState = state_OrdChr; 500 501 // numConversions - The number of conversions seen so far. This is 502 // incremented as we traverse the format string. 503 unsigned numConversions = 0; 504 505 // numDataArgs - The number of data arguments after the format 506 // string. This can only be determined for non vprintf-like 507 // functions. For those functions, this value is 1 (the sole 508 // va_arg argument). 509 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 510 511 // Inspect the format string. 512 unsigned StrIdx = 0; 513 514 // LastConversionIdx - Index within the format string where we last saw 515 // a '%' character that starts a new format conversion. 516 unsigned LastConversionIdx = 0; 517 518 for (; StrIdx < StrLen; ++StrIdx) { 519 520 // Is the number of detected conversion conversions greater than 521 // the number of matching data arguments? If so, stop. 522 if (!HasVAListArg && numConversions > numDataArgs) break; 523 524 // Handle "\0" 525 if (Str[StrIdx] == '\0') { 526 // The string returned by getStrData() is not null-terminated, 527 // so the presence of a null character is likely an error. 528 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 529 diag::warn_printf_format_string_contains_null_char) 530 << OrigFormatExpr->getSourceRange(); 531 return; 532 } 533 534 // Ordinary characters (not processing a format conversion). 535 if (CurrentState == state_OrdChr) { 536 if (Str[StrIdx] == '%') { 537 CurrentState = state_Conversion; 538 LastConversionIdx = StrIdx; 539 } 540 continue; 541 } 542 543 // Seen '%'. Now processing a format conversion. 544 switch (Str[StrIdx]) { 545 // Handle dynamic precision or width specifier. 546 case '*': { 547 ++numConversions; 548 549 if (!HasVAListArg && numConversions > numDataArgs) { 550 SourceLocation Loc = FExpr->getLocStart(); 551 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 552 553 if (Str[StrIdx-1] == '.') 554 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 555 << OrigFormatExpr->getSourceRange(); 556 else 557 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 558 << OrigFormatExpr->getSourceRange(); 559 560 // Don't do any more checking. We'll just emit spurious errors. 561 return; 562 } 563 564 // Perform type checking on width/precision specifier. 565 Expr *E = TheCall->getArg(format_idx+numConversions); 566 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 567 if (BT->getKind() == BuiltinType::Int) 568 break; 569 570 SourceLocation Loc = 571 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 572 573 if (Str[StrIdx-1] == '.') 574 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 575 << E->getType().getAsString() << E->getSourceRange(); 576 else 577 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 578 << E->getType().getAsString() << E->getSourceRange(); 579 580 break; 581 } 582 583 // Characters which can terminate a format conversion 584 // (e.g. "%d"). Characters that specify length modifiers or 585 // other flags are handled by the default case below. 586 // 587 // FIXME: additional checks will go into the following cases. 588 case 'i': 589 case 'd': 590 case 'o': 591 case 'u': 592 case 'x': 593 case 'X': 594 case 'D': 595 case 'O': 596 case 'U': 597 case 'e': 598 case 'E': 599 case 'f': 600 case 'F': 601 case 'g': 602 case 'G': 603 case 'a': 604 case 'A': 605 case 'c': 606 case 'C': 607 case 'S': 608 case 's': 609 case 'p': 610 ++numConversions; 611 CurrentState = state_OrdChr; 612 break; 613 614 // CHECK: Are we using "%n"? Issue a warning. 615 case 'n': { 616 ++numConversions; 617 CurrentState = state_OrdChr; 618 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 619 LastConversionIdx+1); 620 621 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 622 break; 623 } 624 625 // Handle "%@" 626 case '@': 627 // %@ is allowed in ObjC format strings only. 628 if(ObjCFExpr != NULL) 629 CurrentState = state_OrdChr; 630 else { 631 // Issue a warning: invalid format conversion. 632 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 633 LastConversionIdx+1); 634 635 Diag(Loc, diag::warn_printf_invalid_conversion, 636 std::string(Str+LastConversionIdx, 637 Str+std::min(LastConversionIdx+2, StrLen)), 638 OrigFormatExpr->getSourceRange()); 639 } 640 ++numConversions; 641 break; 642 643 // Handle "%%" 644 case '%': 645 // Sanity check: Was the first "%" character the previous one? 646 // If not, we will assume that we have a malformed format 647 // conversion, and that the current "%" character is the start 648 // of a new conversion. 649 if (StrIdx - LastConversionIdx == 1) 650 CurrentState = state_OrdChr; 651 else { 652 // Issue a warning: invalid format conversion. 653 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 654 LastConversionIdx+1); 655 656 Diag(Loc, diag::warn_printf_invalid_conversion, 657 std::string(Str+LastConversionIdx, Str+StrIdx), 658 OrigFormatExpr->getSourceRange()); 659 660 // This conversion is broken. Advance to the next format 661 // conversion. 662 LastConversionIdx = StrIdx; 663 ++numConversions; 664 } 665 break; 666 667 default: 668 // This case catches all other characters: flags, widths, etc. 669 // We should eventually process those as well. 670 break; 671 } 672 } 673 674 if (CurrentState == state_Conversion) { 675 // Issue a warning: invalid format conversion. 676 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 677 LastConversionIdx+1); 678 679 Diag(Loc, diag::warn_printf_invalid_conversion, 680 std::string(Str+LastConversionIdx, 681 Str+std::min(LastConversionIdx+2, StrLen)), 682 OrigFormatExpr->getSourceRange()); 683 return; 684 } 685 686 if (!HasVAListArg) { 687 // CHECK: Does the number of format conversions exceed the number 688 // of data arguments? 689 if (numConversions > numDataArgs) { 690 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 691 LastConversionIdx); 692 693 Diag(Loc, diag::warn_printf_insufficient_data_args) 694 << OrigFormatExpr->getSourceRange(); 695 } 696 // CHECK: Does the number of data arguments exceed the number of 697 // format conversions in the format string? 698 else if (numConversions < numDataArgs) 699 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 700 diag::warn_printf_too_many_data_args) 701 << OrigFormatExpr->getSourceRange(); 702 } 703} 704 705//===--- CHECK: Return Address of Stack Variable --------------------------===// 706 707static DeclRefExpr* EvalVal(Expr *E); 708static DeclRefExpr* EvalAddr(Expr* E); 709 710/// CheckReturnStackAddr - Check if a return statement returns the address 711/// of a stack variable. 712void 713Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 714 SourceLocation ReturnLoc) { 715 716 // Perform checking for returned stack addresses. 717 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 718 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 719 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 720 DR->getDecl()->getIdentifier()->getName(), 721 RetValExp->getSourceRange()); 722 723 // Skip over implicit cast expressions when checking for block expressions. 724 if (ImplicitCastExpr *IcExpr = 725 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 726 RetValExp = IcExpr->getSubExpr(); 727 728 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 729 Diag(C->getLocStart(), diag::err_ret_local_block) 730 << C->getSourceRange(); 731 } 732 // Perform checking for stack values returned by reference. 733 else if (lhsType->isReferenceType()) { 734 // Check for a reference to the stack 735 if (DeclRefExpr *DR = EvalVal(RetValExp)) 736 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 737 << DR->getDecl()->getIdentifier()->getName() 738 << RetValExp->getSourceRange(); 739 } 740} 741 742/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 743/// check if the expression in a return statement evaluates to an address 744/// to a location on the stack. The recursion is used to traverse the 745/// AST of the return expression, with recursion backtracking when we 746/// encounter a subexpression that (1) clearly does not lead to the address 747/// of a stack variable or (2) is something we cannot determine leads to 748/// the address of a stack variable based on such local checking. 749/// 750/// EvalAddr processes expressions that are pointers that are used as 751/// references (and not L-values). EvalVal handles all other values. 752/// At the base case of the recursion is a check for a DeclRefExpr* in 753/// the refers to a stack variable. 754/// 755/// This implementation handles: 756/// 757/// * pointer-to-pointer casts 758/// * implicit conversions from array references to pointers 759/// * taking the address of fields 760/// * arbitrary interplay between "&" and "*" operators 761/// * pointer arithmetic from an address of a stack variable 762/// * taking the address of an array element where the array is on the stack 763static DeclRefExpr* EvalAddr(Expr *E) { 764 // We should only be called for evaluating pointer expressions. 765 assert((E->getType()->isPointerType() || 766 E->getType()->isBlockPointerType() || 767 E->getType()->isObjCQualifiedIdType()) && 768 "EvalAddr only works on pointers"); 769 770 // Our "symbolic interpreter" is just a dispatch off the currently 771 // viewed AST node. We then recursively traverse the AST by calling 772 // EvalAddr and EvalVal appropriately. 773 switch (E->getStmtClass()) { 774 case Stmt::ParenExprClass: 775 // Ignore parentheses. 776 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 777 778 case Stmt::UnaryOperatorClass: { 779 // The only unary operator that make sense to handle here 780 // is AddrOf. All others don't make sense as pointers. 781 UnaryOperator *U = cast<UnaryOperator>(E); 782 783 if (U->getOpcode() == UnaryOperator::AddrOf) 784 return EvalVal(U->getSubExpr()); 785 else 786 return NULL; 787 } 788 789 case Stmt::BinaryOperatorClass: { 790 // Handle pointer arithmetic. All other binary operators are not valid 791 // in this context. 792 BinaryOperator *B = cast<BinaryOperator>(E); 793 BinaryOperator::Opcode op = B->getOpcode(); 794 795 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 796 return NULL; 797 798 Expr *Base = B->getLHS(); 799 800 // Determine which argument is the real pointer base. It could be 801 // the RHS argument instead of the LHS. 802 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 803 804 assert (Base->getType()->isPointerType()); 805 return EvalAddr(Base); 806 } 807 808 // For conditional operators we need to see if either the LHS or RHS are 809 // valid DeclRefExpr*s. If one of them is valid, we return it. 810 case Stmt::ConditionalOperatorClass: { 811 ConditionalOperator *C = cast<ConditionalOperator>(E); 812 813 // Handle the GNU extension for missing LHS. 814 if (Expr *lhsExpr = C->getLHS()) 815 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 816 return LHS; 817 818 return EvalAddr(C->getRHS()); 819 } 820 821 // For casts, we need to handle conversions from arrays to 822 // pointer values, and pointer-to-pointer conversions. 823 case Stmt::ImplicitCastExprClass: 824 case Stmt::CStyleCastExprClass: 825 case Stmt::CXXFunctionalCastExprClass: { 826 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 827 QualType T = SubExpr->getType(); 828 829 if (SubExpr->getType()->isPointerType() || 830 SubExpr->getType()->isBlockPointerType() || 831 SubExpr->getType()->isObjCQualifiedIdType()) 832 return EvalAddr(SubExpr); 833 else if (T->isArrayType()) 834 return EvalVal(SubExpr); 835 else 836 return 0; 837 } 838 839 // C++ casts. For dynamic casts, static casts, and const casts, we 840 // are always converting from a pointer-to-pointer, so we just blow 841 // through the cast. In the case the dynamic cast doesn't fail (and 842 // return NULL), we take the conservative route and report cases 843 // where we return the address of a stack variable. For Reinterpre 844 // FIXME: The comment about is wrong; we're not always converting 845 // from pointer to pointer. I'm guessing that this code should also 846 // handle references to objects. 847 case Stmt::CXXStaticCastExprClass: 848 case Stmt::CXXDynamicCastExprClass: 849 case Stmt::CXXConstCastExprClass: 850 case Stmt::CXXReinterpretCastExprClass: { 851 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 852 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 853 return EvalAddr(S); 854 else 855 return NULL; 856 } 857 858 // Everything else: we simply don't reason about them. 859 default: 860 return NULL; 861 } 862} 863 864 865/// EvalVal - This function is complements EvalAddr in the mutual recursion. 866/// See the comments for EvalAddr for more details. 867static DeclRefExpr* EvalVal(Expr *E) { 868 869 // We should only be called for evaluating non-pointer expressions, or 870 // expressions with a pointer type that are not used as references but instead 871 // are l-values (e.g., DeclRefExpr with a pointer type). 872 873 // Our "symbolic interpreter" is just a dispatch off the currently 874 // viewed AST node. We then recursively traverse the AST by calling 875 // EvalAddr and EvalVal appropriately. 876 switch (E->getStmtClass()) { 877 case Stmt::DeclRefExprClass: { 878 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 879 // at code that refers to a variable's name. We check if it has local 880 // storage within the function, and if so, return the expression. 881 DeclRefExpr *DR = cast<DeclRefExpr>(E); 882 883 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 884 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 885 886 return NULL; 887 } 888 889 case Stmt::ParenExprClass: 890 // Ignore parentheses. 891 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 892 893 case Stmt::UnaryOperatorClass: { 894 // The only unary operator that make sense to handle here 895 // is Deref. All others don't resolve to a "name." This includes 896 // handling all sorts of rvalues passed to a unary operator. 897 UnaryOperator *U = cast<UnaryOperator>(E); 898 899 if (U->getOpcode() == UnaryOperator::Deref) 900 return EvalAddr(U->getSubExpr()); 901 902 return NULL; 903 } 904 905 case Stmt::ArraySubscriptExprClass: { 906 // Array subscripts are potential references to data on the stack. We 907 // retrieve the DeclRefExpr* for the array variable if it indeed 908 // has local storage. 909 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 910 } 911 912 case Stmt::ConditionalOperatorClass: { 913 // For conditional operators we need to see if either the LHS or RHS are 914 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 915 ConditionalOperator *C = cast<ConditionalOperator>(E); 916 917 // Handle the GNU extension for missing LHS. 918 if (Expr *lhsExpr = C->getLHS()) 919 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 920 return LHS; 921 922 return EvalVal(C->getRHS()); 923 } 924 925 // Accesses to members are potential references to data on the stack. 926 case Stmt::MemberExprClass: { 927 MemberExpr *M = cast<MemberExpr>(E); 928 929 // Check for indirect access. We only want direct field accesses. 930 if (!M->isArrow()) 931 return EvalVal(M->getBase()); 932 else 933 return NULL; 934 } 935 936 // Everything else: we simply don't reason about them. 937 default: 938 return NULL; 939 } 940} 941 942//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 943 944/// Check for comparisons of floating point operands using != and ==. 945/// Issue a warning if these are no self-comparisons, as they are not likely 946/// to do what the programmer intended. 947void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 948 bool EmitWarning = true; 949 950 Expr* LeftExprSansParen = lex->IgnoreParens(); 951 Expr* RightExprSansParen = rex->IgnoreParens(); 952 953 // Special case: check for x == x (which is OK). 954 // Do not emit warnings for such cases. 955 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 956 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 957 if (DRL->getDecl() == DRR->getDecl()) 958 EmitWarning = false; 959 960 961 // Special case: check for comparisons against literals that can be exactly 962 // represented by APFloat. In such cases, do not emit a warning. This 963 // is a heuristic: often comparison against such literals are used to 964 // detect if a value in a variable has not changed. This clearly can 965 // lead to false negatives. 966 if (EmitWarning) { 967 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 968 if (FLL->isExact()) 969 EmitWarning = false; 970 } 971 else 972 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 973 if (FLR->isExact()) 974 EmitWarning = false; 975 } 976 } 977 978 // Check for comparisons with builtin types. 979 if (EmitWarning) 980 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 981 if (isCallBuiltin(CL)) 982 EmitWarning = false; 983 984 if (EmitWarning) 985 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 986 if (isCallBuiltin(CR)) 987 EmitWarning = false; 988 989 // Emit the diagnostic. 990 if (EmitWarning) 991 Diag(loc, diag::warn_floatingpoint_eq) 992 << lex->getSourceRange() << rex->getSourceRange(); 993} 994