SemaChecking.cpp revision 56f20ae1010aa71defd7572f660b41288c56cdd1
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Basic/Diagnostic.h" 22#include "SemaUtil.h" 23using namespace clang; 24 25/// CheckFunctionCall - Check a direct function call for various correctness 26/// and safety properties not strictly enforced by the C type system. 27Action::ExprResult 28Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 29 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 30 // Get the IdentifierInfo* for the called function. 31 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 32 33 // None of the checks below are needed for functions that don't have 34 // simple names (e.g., C++ conversion functions). 35 if (!FnInfo) 36 return TheCall.take(); 37 38 switch (FnInfo->getBuiltinID()) { 39 case Builtin::BI__builtin___CFStringMakeConstantString: 40 assert(TheCall->getNumArgs() == 1 && 41 "Wrong # arguments to builtin CFStringMakeConstantString"); 42 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 43 return true; 44 return TheCall.take(); 45 case Builtin::BI__builtin_stdarg_start: 46 case Builtin::BI__builtin_va_start: 47 if (SemaBuiltinVAStart(TheCall.get())) 48 return true; 49 return TheCall.take(); 50 case Builtin::BI__builtin_isgreater: 51 case Builtin::BI__builtin_isgreaterequal: 52 case Builtin::BI__builtin_isless: 53 case Builtin::BI__builtin_islessequal: 54 case Builtin::BI__builtin_islessgreater: 55 case Builtin::BI__builtin_isunordered: 56 if (SemaBuiltinUnorderedCompare(TheCall.get())) 57 return true; 58 return TheCall.take(); 59 case Builtin::BI__builtin_return_address: 60 case Builtin::BI__builtin_frame_address: 61 if (SemaBuiltinStackAddress(TheCall.get())) 62 return true; 63 return TheCall.take(); 64 case Builtin::BI__builtin_shufflevector: 65 return SemaBuiltinShuffleVector(TheCall.get()); 66 case Builtin::BI__builtin_prefetch: 67 if (SemaBuiltinPrefetch(TheCall.get())) 68 return true; 69 return TheCall.take(); 70 case Builtin::BI__builtin_object_size: 71 if (SemaBuiltinObjectSize(TheCall.get())) 72 return true; 73 } 74 75 // FIXME: This mechanism should be abstracted to be less fragile and 76 // more efficient. For example, just map function ids to custom 77 // handlers. 78 79 // Search the KnownFunctionIDs for the identifier. 80 unsigned i = 0, e = id_num_known_functions; 81 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 82 if (i == e) return TheCall.take(); 83 84 // Printf checking. 85 if (i <= id_vprintf) { 86 // Retrieve the index of the format string parameter and determine 87 // if the function is passed a va_arg argument. 88 unsigned format_idx = 0; 89 bool HasVAListArg = false; 90 91 switch (i) { 92 default: assert(false && "No format string argument index."); 93 case id_NSLog: format_idx = 0; break; 94 case id_asprintf: format_idx = 1; break; 95 case id_fprintf: format_idx = 1; break; 96 case id_printf: format_idx = 0; break; 97 case id_snprintf: format_idx = 2; break; 98 case id_snprintf_chk: format_idx = 4; break; 99 case id_sprintf: format_idx = 1; break; 100 case id_sprintf_chk: format_idx = 3; break; 101 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 102 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 103 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 104 case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break; 105 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 106 case id_vsprintf_chk: format_idx = 3; HasVAListArg = true; break; 107 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 108 } 109 110 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 111 } 112 113 return TheCall.take(); 114} 115 116/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 117/// CFString constructor is correct 118bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 119 Arg = Arg->IgnoreParenCasts(); 120 121 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 122 123 if (!Literal || Literal->isWide()) { 124 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 125 << Arg->getSourceRange(); 126 return true; 127 } 128 129 const char *Data = Literal->getStrData(); 130 unsigned Length = Literal->getByteLength(); 131 132 for (unsigned i = 0; i < Length; ++i) { 133 if (!isascii(Data[i])) { 134 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 135 diag::warn_cfstring_literal_contains_non_ascii_character) 136 << Arg->getSourceRange(); 137 break; 138 } 139 140 if (!Data[i]) { 141 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 142 diag::warn_cfstring_literal_contains_nul_character) 143 << Arg->getSourceRange(); 144 break; 145 } 146 } 147 148 return false; 149} 150 151/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 152/// Emit an error and return true on failure, return false on success. 153bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 154 Expr *Fn = TheCall->getCallee(); 155 if (TheCall->getNumArgs() > 2) { 156 Diag(TheCall->getArg(2)->getLocStart(), 157 diag::err_typecheck_call_too_many_args) 158 << 0 /*function call*/ << Fn->getSourceRange() 159 << SourceRange(TheCall->getArg(2)->getLocStart(), 160 (*(TheCall->arg_end()-1))->getLocEnd()); 161 return true; 162 } 163 164 if (TheCall->getNumArgs() < 2) { 165 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 166 << 0 /*function call*/; 167 } 168 169 // Determine whether the current function is variadic or not. 170 bool isVariadic; 171 if (getCurFunctionDecl()) { 172 if (FunctionTypeProto* FTP = 173 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType())) 174 isVariadic = FTP->isVariadic(); 175 else 176 isVariadic = false; 177 } else { 178 isVariadic = getCurMethodDecl()->isVariadic(); 179 } 180 181 if (!isVariadic) { 182 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 183 return true; 184 } 185 186 // Verify that the second argument to the builtin is the last argument of the 187 // current function or method. 188 bool SecondArgIsLastNamedArgument = false; 189 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 190 191 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 192 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 193 // FIXME: This isn't correct for methods (results in bogus warning). 194 // Get the last formal in the current function. 195 const ParmVarDecl *LastArg; 196 if (FunctionDecl *FD = getCurFunctionDecl()) 197 LastArg = *(FD->param_end()-1); 198 else 199 LastArg = *(getCurMethodDecl()->param_end()-1); 200 SecondArgIsLastNamedArgument = PV == LastArg; 201 } 202 } 203 204 if (!SecondArgIsLastNamedArgument) 205 Diag(TheCall->getArg(1)->getLocStart(), 206 diag::warn_second_parameter_of_va_start_not_last_named_argument); 207 return false; 208} 209 210/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 211/// friends. This is declared to take (...), so we have to check everything. 212bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 213 if (TheCall->getNumArgs() < 2) 214 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 215 << 0 /*function call*/; 216 if (TheCall->getNumArgs() > 2) 217 return Diag(TheCall->getArg(2)->getLocStart(), 218 diag::err_typecheck_call_too_many_args) 219 << 0 /*function call*/ 220 << SourceRange(TheCall->getArg(2)->getLocStart(), 221 (*(TheCall->arg_end()-1))->getLocEnd()); 222 223 Expr *OrigArg0 = TheCall->getArg(0); 224 Expr *OrigArg1 = TheCall->getArg(1); 225 226 // Do standard promotions between the two arguments, returning their common 227 // type. 228 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 229 230 // If the common type isn't a real floating type, then the arguments were 231 // invalid for this operation. 232 if (!Res->isRealFloatingType()) 233 return Diag(OrigArg0->getLocStart(), 234 diag::err_typecheck_call_invalid_ordered_compare) 235 << OrigArg0->getType() << OrigArg1->getType() 236 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 237 238 return false; 239} 240 241bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 242 // The signature for these builtins is exact; the only thing we need 243 // to check is that the argument is a constant. 244 SourceLocation Loc; 245 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 246 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 247 248 return false; 249} 250 251/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 252// This is declared to take (...), so we have to check everything. 253Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 254 if (TheCall->getNumArgs() < 3) 255 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 256 << 0 /*function call*/ << TheCall->getSourceRange(); 257 258 QualType FAType = TheCall->getArg(0)->getType(); 259 QualType SAType = TheCall->getArg(1)->getType(); 260 261 if (!FAType->isVectorType() || !SAType->isVectorType()) { 262 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 263 << SourceRange(TheCall->getArg(0)->getLocStart(), 264 TheCall->getArg(1)->getLocEnd()); 265 return true; 266 } 267 268 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 269 Context.getCanonicalType(SAType).getUnqualifiedType()) { 270 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 271 << SourceRange(TheCall->getArg(0)->getLocStart(), 272 TheCall->getArg(1)->getLocEnd()); 273 return true; 274 } 275 276 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 277 if (TheCall->getNumArgs() != numElements+2) { 278 if (TheCall->getNumArgs() < numElements+2) 279 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 280 << 0 /*function call*/ << TheCall->getSourceRange(); 281 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 282 << 0 /*function call*/ << TheCall->getSourceRange(); 283 } 284 285 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 286 llvm::APSInt Result(32); 287 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 288 return Diag(TheCall->getLocStart(), 289 diag::err_shufflevector_nonconstant_argument) 290 << TheCall->getArg(i)->getSourceRange(); 291 292 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 293 return Diag(TheCall->getLocStart(), 294 diag::err_shufflevector_argument_too_large) 295 << TheCall->getArg(i)->getSourceRange(); 296 } 297 298 llvm::SmallVector<Expr*, 32> exprs; 299 300 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 301 exprs.push_back(TheCall->getArg(i)); 302 TheCall->setArg(i, 0); 303 } 304 305 return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 306 TheCall->getCallee()->getLocStart(), 307 TheCall->getRParenLoc()); 308} 309 310/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 311// This is declared to take (const void*, ...) and can take two 312// optional constant int args. 313bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 314 unsigned NumArgs = TheCall->getNumArgs(); 315 316 if (NumArgs > 3) 317 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 318 << 0 /*function call*/ << TheCall->getSourceRange(); 319 320 // Argument 0 is checked for us and the remaining arguments must be 321 // constant integers. 322 for (unsigned i = 1; i != NumArgs; ++i) { 323 Expr *Arg = TheCall->getArg(i); 324 QualType RWType = Arg->getType(); 325 326 const BuiltinType *BT = RWType->getAsBuiltinType(); 327 llvm::APSInt Result; 328 if (!BT || BT->getKind() != BuiltinType::Int || 329 !Arg->isIntegerConstantExpr(Result, Context)) 330 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 331 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 332 333 // FIXME: gcc issues a warning and rewrites these to 0. These 334 // seems especially odd for the third argument since the default 335 // is 3. 336 if (i == 1) { 337 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 338 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 339 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 340 } else { 341 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 342 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 343 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 344 } 345 } 346 347 return false; 348} 349 350/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 351/// int type). This simply type checks that type is one of the defined 352/// constants (0-3). 353bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 354 Expr *Arg = TheCall->getArg(1); 355 QualType ArgType = Arg->getType(); 356 const BuiltinType *BT = ArgType->getAsBuiltinType(); 357 llvm::APSInt Result(32); 358 if (!BT || BT->getKind() != BuiltinType::Int || 359 !Arg->isIntegerConstantExpr(Result, Context)) { 360 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 361 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 362 } 363 364 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 365 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 366 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 367 } 368 369 return false; 370} 371 372/// CheckPrintfArguments - Check calls to printf (and similar functions) for 373/// correct use of format strings. 374/// 375/// HasVAListArg - A predicate indicating whether the printf-like 376/// function is passed an explicit va_arg argument (e.g., vprintf) 377/// 378/// format_idx - The index into Args for the format string. 379/// 380/// Improper format strings to functions in the printf family can be 381/// the source of bizarre bugs and very serious security holes. A 382/// good source of information is available in the following paper 383/// (which includes additional references): 384/// 385/// FormatGuard: Automatic Protection From printf Format String 386/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 387/// 388/// Functionality implemented: 389/// 390/// We can statically check the following properties for string 391/// literal format strings for non v.*printf functions (where the 392/// arguments are passed directly): 393// 394/// (1) Are the number of format conversions equal to the number of 395/// data arguments? 396/// 397/// (2) Does each format conversion correctly match the type of the 398/// corresponding data argument? (TODO) 399/// 400/// Moreover, for all printf functions we can: 401/// 402/// (3) Check for a missing format string (when not caught by type checking). 403/// 404/// (4) Check for no-operation flags; e.g. using "#" with format 405/// conversion 'c' (TODO) 406/// 407/// (5) Check the use of '%n', a major source of security holes. 408/// 409/// (6) Check for malformed format conversions that don't specify anything. 410/// 411/// (7) Check for empty format strings. e.g: printf(""); 412/// 413/// (8) Check that the format string is a wide literal. 414/// 415/// (9) Also check the arguments of functions with the __format__ attribute. 416/// (TODO). 417/// 418/// All of these checks can be done by parsing the format string. 419/// 420/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 421void 422Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 423 unsigned format_idx) { 424 Expr *Fn = TheCall->getCallee(); 425 426 // CHECK: printf-like function is called with no format string. 427 if (format_idx >= TheCall->getNumArgs()) { 428 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 429 << Fn->getSourceRange(); 430 return; 431 } 432 433 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 434 435 // CHECK: format string is not a string literal. 436 // 437 // Dynamically generated format strings are difficult to 438 // automatically vet at compile time. Requiring that format strings 439 // are string literals: (1) permits the checking of format strings by 440 // the compiler and thereby (2) can practically remove the source of 441 // many format string exploits. 442 443 // Format string can be either ObjC string (e.g. @"%d") or 444 // C string (e.g. "%d") 445 // ObjC string uses the same format specifiers as C string, so we can use 446 // the same format string checking logic for both ObjC and C strings. 447 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 448 StringLiteral *FExpr = NULL; 449 450 if(ObjCFExpr != NULL) 451 FExpr = ObjCFExpr->getString(); 452 else 453 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 454 455 if (FExpr == NULL) { 456 // For vprintf* functions (i.e., HasVAListArg==true), we add a 457 // special check to see if the format string is a function parameter 458 // of the function calling the printf function. If the function 459 // has an attribute indicating it is a printf-like function, then we 460 // should suppress warnings concerning non-literals being used in a call 461 // to a vprintf function. For example: 462 // 463 // void 464 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 465 // va_list ap; 466 // va_start(ap, fmt); 467 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 468 // ... 469 // 470 // 471 // FIXME: We don't have full attribute support yet, so just check to see 472 // if the argument is a DeclRefExpr that references a parameter. We'll 473 // add proper support for checking the attribute later. 474 if (HasVAListArg) 475 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 476 if (isa<ParmVarDecl>(DR->getDecl())) 477 return; 478 479 Diag(TheCall->getArg(format_idx)->getLocStart(), 480 diag::warn_printf_not_string_constant) 481 << OrigFormatExpr->getSourceRange(); 482 return; 483 } 484 485 // CHECK: is the format string a wide literal? 486 if (FExpr->isWide()) { 487 Diag(FExpr->getLocStart(), 488 diag::warn_printf_format_string_is_wide_literal) 489 << OrigFormatExpr->getSourceRange(); 490 return; 491 } 492 493 // Str - The format string. NOTE: this is NOT null-terminated! 494 const char * const Str = FExpr->getStrData(); 495 496 // CHECK: empty format string? 497 const unsigned StrLen = FExpr->getByteLength(); 498 499 if (StrLen == 0) { 500 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 501 << OrigFormatExpr->getSourceRange(); 502 return; 503 } 504 505 // We process the format string using a binary state machine. The 506 // current state is stored in CurrentState. 507 enum { 508 state_OrdChr, 509 state_Conversion 510 } CurrentState = state_OrdChr; 511 512 // numConversions - The number of conversions seen so far. This is 513 // incremented as we traverse the format string. 514 unsigned numConversions = 0; 515 516 // numDataArgs - The number of data arguments after the format 517 // string. This can only be determined for non vprintf-like 518 // functions. For those functions, this value is 1 (the sole 519 // va_arg argument). 520 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 521 522 // Inspect the format string. 523 unsigned StrIdx = 0; 524 525 // LastConversionIdx - Index within the format string where we last saw 526 // a '%' character that starts a new format conversion. 527 unsigned LastConversionIdx = 0; 528 529 for (; StrIdx < StrLen; ++StrIdx) { 530 531 // Is the number of detected conversion conversions greater than 532 // the number of matching data arguments? If so, stop. 533 if (!HasVAListArg && numConversions > numDataArgs) break; 534 535 // Handle "\0" 536 if (Str[StrIdx] == '\0') { 537 // The string returned by getStrData() is not null-terminated, 538 // so the presence of a null character is likely an error. 539 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 540 diag::warn_printf_format_string_contains_null_char) 541 << OrigFormatExpr->getSourceRange(); 542 return; 543 } 544 545 // Ordinary characters (not processing a format conversion). 546 if (CurrentState == state_OrdChr) { 547 if (Str[StrIdx] == '%') { 548 CurrentState = state_Conversion; 549 LastConversionIdx = StrIdx; 550 } 551 continue; 552 } 553 554 // Seen '%'. Now processing a format conversion. 555 switch (Str[StrIdx]) { 556 // Handle dynamic precision or width specifier. 557 case '*': { 558 ++numConversions; 559 560 if (!HasVAListArg && numConversions > numDataArgs) { 561 SourceLocation Loc = FExpr->getLocStart(); 562 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 563 564 if (Str[StrIdx-1] == '.') 565 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 566 << OrigFormatExpr->getSourceRange(); 567 else 568 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 569 << OrigFormatExpr->getSourceRange(); 570 571 // Don't do any more checking. We'll just emit spurious errors. 572 return; 573 } 574 575 // Perform type checking on width/precision specifier. 576 Expr *E = TheCall->getArg(format_idx+numConversions); 577 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 578 if (BT->getKind() == BuiltinType::Int) 579 break; 580 581 SourceLocation Loc = 582 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 583 584 if (Str[StrIdx-1] == '.') 585 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 586 << E->getType() << E->getSourceRange(); 587 else 588 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 589 << E->getType() << E->getSourceRange(); 590 591 break; 592 } 593 594 // Characters which can terminate a format conversion 595 // (e.g. "%d"). Characters that specify length modifiers or 596 // other flags are handled by the default case below. 597 // 598 // FIXME: additional checks will go into the following cases. 599 case 'i': 600 case 'd': 601 case 'o': 602 case 'u': 603 case 'x': 604 case 'X': 605 case 'D': 606 case 'O': 607 case 'U': 608 case 'e': 609 case 'E': 610 case 'f': 611 case 'F': 612 case 'g': 613 case 'G': 614 case 'a': 615 case 'A': 616 case 'c': 617 case 'C': 618 case 'S': 619 case 's': 620 case 'p': 621 ++numConversions; 622 CurrentState = state_OrdChr; 623 break; 624 625 // CHECK: Are we using "%n"? Issue a warning. 626 case 'n': { 627 ++numConversions; 628 CurrentState = state_OrdChr; 629 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 630 LastConversionIdx+1); 631 632 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 633 break; 634 } 635 636 // Handle "%@" 637 case '@': 638 // %@ is allowed in ObjC format strings only. 639 if(ObjCFExpr != NULL) 640 CurrentState = state_OrdChr; 641 else { 642 // Issue a warning: invalid format conversion. 643 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 644 LastConversionIdx+1); 645 646 Diag(Loc, diag::warn_printf_invalid_conversion) 647 << std::string(Str+LastConversionIdx, 648 Str+std::min(LastConversionIdx+2, StrLen)) 649 << OrigFormatExpr->getSourceRange(); 650 } 651 ++numConversions; 652 break; 653 654 // Handle "%%" 655 case '%': 656 // Sanity check: Was the first "%" character the previous one? 657 // If not, we will assume that we have a malformed format 658 // conversion, and that the current "%" character is the start 659 // of a new conversion. 660 if (StrIdx - LastConversionIdx == 1) 661 CurrentState = state_OrdChr; 662 else { 663 // Issue a warning: invalid format conversion. 664 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 665 LastConversionIdx+1); 666 667 Diag(Loc, diag::warn_printf_invalid_conversion) 668 << std::string(Str+LastConversionIdx, Str+StrIdx) 669 << OrigFormatExpr->getSourceRange(); 670 671 // This conversion is broken. Advance to the next format 672 // conversion. 673 LastConversionIdx = StrIdx; 674 ++numConversions; 675 } 676 break; 677 678 default: 679 // This case catches all other characters: flags, widths, etc. 680 // We should eventually process those as well. 681 break; 682 } 683 } 684 685 if (CurrentState == state_Conversion) { 686 // Issue a warning: invalid format conversion. 687 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 688 LastConversionIdx+1); 689 690 Diag(Loc, diag::warn_printf_invalid_conversion) 691 << std::string(Str+LastConversionIdx, 692 Str+std::min(LastConversionIdx+2, StrLen)) 693 << OrigFormatExpr->getSourceRange(); 694 return; 695 } 696 697 if (!HasVAListArg) { 698 // CHECK: Does the number of format conversions exceed the number 699 // of data arguments? 700 if (numConversions > numDataArgs) { 701 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 702 LastConversionIdx); 703 704 Diag(Loc, diag::warn_printf_insufficient_data_args) 705 << OrigFormatExpr->getSourceRange(); 706 } 707 // CHECK: Does the number of data arguments exceed the number of 708 // format conversions in the format string? 709 else if (numConversions < numDataArgs) 710 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 711 diag::warn_printf_too_many_data_args) 712 << OrigFormatExpr->getSourceRange(); 713 } 714} 715 716//===--- CHECK: Return Address of Stack Variable --------------------------===// 717 718static DeclRefExpr* EvalVal(Expr *E); 719static DeclRefExpr* EvalAddr(Expr* E); 720 721/// CheckReturnStackAddr - Check if a return statement returns the address 722/// of a stack variable. 723void 724Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 725 SourceLocation ReturnLoc) { 726 727 // Perform checking for returned stack addresses. 728 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 729 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 730 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 731 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 732 733 // Skip over implicit cast expressions when checking for block expressions. 734 if (ImplicitCastExpr *IcExpr = 735 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 736 RetValExp = IcExpr->getSubExpr(); 737 738 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 739 Diag(C->getLocStart(), diag::err_ret_local_block) 740 << C->getSourceRange(); 741 } 742 // Perform checking for stack values returned by reference. 743 else if (lhsType->isReferenceType()) { 744 // Check for a reference to the stack 745 if (DeclRefExpr *DR = EvalVal(RetValExp)) 746 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 747 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 748 } 749} 750 751/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 752/// check if the expression in a return statement evaluates to an address 753/// to a location on the stack. The recursion is used to traverse the 754/// AST of the return expression, with recursion backtracking when we 755/// encounter a subexpression that (1) clearly does not lead to the address 756/// of a stack variable or (2) is something we cannot determine leads to 757/// the address of a stack variable based on such local checking. 758/// 759/// EvalAddr processes expressions that are pointers that are used as 760/// references (and not L-values). EvalVal handles all other values. 761/// At the base case of the recursion is a check for a DeclRefExpr* in 762/// the refers to a stack variable. 763/// 764/// This implementation handles: 765/// 766/// * pointer-to-pointer casts 767/// * implicit conversions from array references to pointers 768/// * taking the address of fields 769/// * arbitrary interplay between "&" and "*" operators 770/// * pointer arithmetic from an address of a stack variable 771/// * taking the address of an array element where the array is on the stack 772static DeclRefExpr* EvalAddr(Expr *E) { 773 // We should only be called for evaluating pointer expressions. 774 assert((E->getType()->isPointerType() || 775 E->getType()->isBlockPointerType() || 776 E->getType()->isObjCQualifiedIdType()) && 777 "EvalAddr only works on pointers"); 778 779 // Our "symbolic interpreter" is just a dispatch off the currently 780 // viewed AST node. We then recursively traverse the AST by calling 781 // EvalAddr and EvalVal appropriately. 782 switch (E->getStmtClass()) { 783 case Stmt::ParenExprClass: 784 // Ignore parentheses. 785 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 786 787 case Stmt::UnaryOperatorClass: { 788 // The only unary operator that make sense to handle here 789 // is AddrOf. All others don't make sense as pointers. 790 UnaryOperator *U = cast<UnaryOperator>(E); 791 792 if (U->getOpcode() == UnaryOperator::AddrOf) 793 return EvalVal(U->getSubExpr()); 794 else 795 return NULL; 796 } 797 798 case Stmt::BinaryOperatorClass: { 799 // Handle pointer arithmetic. All other binary operators are not valid 800 // in this context. 801 BinaryOperator *B = cast<BinaryOperator>(E); 802 BinaryOperator::Opcode op = B->getOpcode(); 803 804 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 805 return NULL; 806 807 Expr *Base = B->getLHS(); 808 809 // Determine which argument is the real pointer base. It could be 810 // the RHS argument instead of the LHS. 811 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 812 813 assert (Base->getType()->isPointerType()); 814 return EvalAddr(Base); 815 } 816 817 // For conditional operators we need to see if either the LHS or RHS are 818 // valid DeclRefExpr*s. If one of them is valid, we return it. 819 case Stmt::ConditionalOperatorClass: { 820 ConditionalOperator *C = cast<ConditionalOperator>(E); 821 822 // Handle the GNU extension for missing LHS. 823 if (Expr *lhsExpr = C->getLHS()) 824 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 825 return LHS; 826 827 return EvalAddr(C->getRHS()); 828 } 829 830 // For casts, we need to handle conversions from arrays to 831 // pointer values, and pointer-to-pointer conversions. 832 case Stmt::ImplicitCastExprClass: 833 case Stmt::CStyleCastExprClass: 834 case Stmt::CXXFunctionalCastExprClass: { 835 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 836 QualType T = SubExpr->getType(); 837 838 if (SubExpr->getType()->isPointerType() || 839 SubExpr->getType()->isBlockPointerType() || 840 SubExpr->getType()->isObjCQualifiedIdType()) 841 return EvalAddr(SubExpr); 842 else if (T->isArrayType()) 843 return EvalVal(SubExpr); 844 else 845 return 0; 846 } 847 848 // C++ casts. For dynamic casts, static casts, and const casts, we 849 // are always converting from a pointer-to-pointer, so we just blow 850 // through the cast. In the case the dynamic cast doesn't fail (and 851 // return NULL), we take the conservative route and report cases 852 // where we return the address of a stack variable. For Reinterpre 853 // FIXME: The comment about is wrong; we're not always converting 854 // from pointer to pointer. I'm guessing that this code should also 855 // handle references to objects. 856 case Stmt::CXXStaticCastExprClass: 857 case Stmt::CXXDynamicCastExprClass: 858 case Stmt::CXXConstCastExprClass: 859 case Stmt::CXXReinterpretCastExprClass: { 860 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 861 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 862 return EvalAddr(S); 863 else 864 return NULL; 865 } 866 867 // Everything else: we simply don't reason about them. 868 default: 869 return NULL; 870 } 871} 872 873 874/// EvalVal - This function is complements EvalAddr in the mutual recursion. 875/// See the comments for EvalAddr for more details. 876static DeclRefExpr* EvalVal(Expr *E) { 877 878 // We should only be called for evaluating non-pointer expressions, or 879 // expressions with a pointer type that are not used as references but instead 880 // are l-values (e.g., DeclRefExpr with a pointer type). 881 882 // Our "symbolic interpreter" is just a dispatch off the currently 883 // viewed AST node. We then recursively traverse the AST by calling 884 // EvalAddr and EvalVal appropriately. 885 switch (E->getStmtClass()) { 886 case Stmt::DeclRefExprClass: { 887 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 888 // at code that refers to a variable's name. We check if it has local 889 // storage within the function, and if so, return the expression. 890 DeclRefExpr *DR = cast<DeclRefExpr>(E); 891 892 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 893 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 894 895 return NULL; 896 } 897 898 case Stmt::ParenExprClass: 899 // Ignore parentheses. 900 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 901 902 case Stmt::UnaryOperatorClass: { 903 // The only unary operator that make sense to handle here 904 // is Deref. All others don't resolve to a "name." This includes 905 // handling all sorts of rvalues passed to a unary operator. 906 UnaryOperator *U = cast<UnaryOperator>(E); 907 908 if (U->getOpcode() == UnaryOperator::Deref) 909 return EvalAddr(U->getSubExpr()); 910 911 return NULL; 912 } 913 914 case Stmt::ArraySubscriptExprClass: { 915 // Array subscripts are potential references to data on the stack. We 916 // retrieve the DeclRefExpr* for the array variable if it indeed 917 // has local storage. 918 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 919 } 920 921 case Stmt::ConditionalOperatorClass: { 922 // For conditional operators we need to see if either the LHS or RHS are 923 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 924 ConditionalOperator *C = cast<ConditionalOperator>(E); 925 926 // Handle the GNU extension for missing LHS. 927 if (Expr *lhsExpr = C->getLHS()) 928 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 929 return LHS; 930 931 return EvalVal(C->getRHS()); 932 } 933 934 // Accesses to members are potential references to data on the stack. 935 case Stmt::MemberExprClass: { 936 MemberExpr *M = cast<MemberExpr>(E); 937 938 // Check for indirect access. We only want direct field accesses. 939 if (!M->isArrow()) 940 return EvalVal(M->getBase()); 941 else 942 return NULL; 943 } 944 945 // Everything else: we simply don't reason about them. 946 default: 947 return NULL; 948 } 949} 950 951//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 952 953/// Check for comparisons of floating point operands using != and ==. 954/// Issue a warning if these are no self-comparisons, as they are not likely 955/// to do what the programmer intended. 956void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 957 bool EmitWarning = true; 958 959 Expr* LeftExprSansParen = lex->IgnoreParens(); 960 Expr* RightExprSansParen = rex->IgnoreParens(); 961 962 // Special case: check for x == x (which is OK). 963 // Do not emit warnings for such cases. 964 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 965 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 966 if (DRL->getDecl() == DRR->getDecl()) 967 EmitWarning = false; 968 969 970 // Special case: check for comparisons against literals that can be exactly 971 // represented by APFloat. In such cases, do not emit a warning. This 972 // is a heuristic: often comparison against such literals are used to 973 // detect if a value in a variable has not changed. This clearly can 974 // lead to false negatives. 975 if (EmitWarning) { 976 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 977 if (FLL->isExact()) 978 EmitWarning = false; 979 } 980 else 981 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 982 if (FLR->isExact()) 983 EmitWarning = false; 984 } 985 } 986 987 // Check for comparisons with builtin types. 988 if (EmitWarning) 989 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 990 if (isCallBuiltin(CL)) 991 EmitWarning = false; 992 993 if (EmitWarning) 994 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 995 if (isCallBuiltin(CR)) 996 EmitWarning = false; 997 998 // Emit the diagnostic. 999 if (EmitWarning) 1000 Diag(loc, diag::warn_floatingpoint_eq) 1001 << lex->getSourceRange() << rex->getSourceRange(); 1002} 1003