SemaChecking.cpp revision 0835a3cdeefe714b4959d31127ea155e56393125
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The LLVM Compiler Infrastructure 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details. 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file implements extra semantic analysis beyond what is enforced 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by the C type system. 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "Sema.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "clang/AST/ASTContext.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Basic/Diagnostic.h" 22#include "SemaUtil.h" 23using namespace clang; 24 25/// CheckFunctionCall - Check a direct function call for various correctness 26/// and safety properties not strictly enforced by the C type system. 27Action::ExprResult 28Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 29 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 30 // Get the IdentifierInfo* for the called function. 31 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 32 33 switch (FnInfo->getBuiltinID()) { 34 case Builtin::BI__builtin___CFStringMakeConstantString: 35 assert(TheCall->getNumArgs() == 1 && 36 "Wrong # arguments to builtin CFStringMakeConstantString"); 37 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 38 return true; 39 return TheCall.take(); 40 case Builtin::BI__builtin_stdarg_start: 41 case Builtin::BI__builtin_va_start: 42 if (SemaBuiltinVAStart(TheCall.get())) 43 return true; 44 return TheCall.take(); 45 case Builtin::BI__builtin_isgreater: 46 case Builtin::BI__builtin_isgreaterequal: 47 case Builtin::BI__builtin_isless: 48 case Builtin::BI__builtin_islessequal: 49 case Builtin::BI__builtin_islessgreater: 50 case Builtin::BI__builtin_isunordered: 51 if (SemaBuiltinUnorderedCompare(TheCall.get())) 52 return true; 53 return TheCall.take(); 54 case Builtin::BI__builtin_return_address: 55 case Builtin::BI__builtin_frame_address: 56 if (SemaBuiltinStackAddress(TheCall.get())) 57 return true; 58 return TheCall.take(); 59 case Builtin::BI__builtin_shufflevector: 60 return SemaBuiltinShuffleVector(TheCall.get()); 61 case Builtin::BI__builtin_prefetch: 62 if (SemaBuiltinPrefetch(TheCall.get())) 63 return true; 64 return TheCall.take(); 65 } 66 67 // Search the KnownFunctionIDs for the identifier. 68 unsigned i = 0, e = id_num_known_functions; 69 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 70 if (i == e) return TheCall.take(); 71 72 // Printf checking. 73 if (i <= id_vprintf) { 74 // Retrieve the index of the format string parameter and determine 75 // if the function is passed a va_arg argument. 76 unsigned format_idx = 0; 77 bool HasVAListArg = false; 78 79 switch (i) { 80 default: assert(false && "No format string argument index."); 81 case id_printf: format_idx = 0; break; 82 case id_fprintf: format_idx = 1; break; 83 case id_sprintf: format_idx = 1; break; 84 case id_snprintf: format_idx = 2; break; 85 case id_asprintf: format_idx = 1; break; 86 case id_NSLog: format_idx = 0; break; 87 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 88 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 89 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 90 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 91 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 92 } 93 94 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 95 } 96 97 return TheCall.take(); 98} 99 100/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 101/// CFString constructor is correct 102bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 103 Arg = Arg->IgnoreParenCasts(); 104 105 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 106 107 if (!Literal || Literal->isWide()) { 108 Diag(Arg->getLocStart(), 109 diag::err_cfstring_literal_not_string_constant, 110 Arg->getSourceRange()); 111 return true; 112 } 113 114 const char *Data = Literal->getStrData(); 115 unsigned Length = Literal->getByteLength(); 116 117 for (unsigned i = 0; i < Length; ++i) { 118 if (!isascii(Data[i])) { 119 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 120 diag::warn_cfstring_literal_contains_non_ascii_character, 121 Arg->getSourceRange()); 122 break; 123 } 124 125 if (!Data[i]) { 126 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 127 diag::warn_cfstring_literal_contains_nul_character, 128 Arg->getSourceRange()); 129 break; 130 } 131 } 132 133 return false; 134} 135 136/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 137/// Emit an error and return true on failure, return false on success. 138bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 139 Expr *Fn = TheCall->getCallee(); 140 if (TheCall->getNumArgs() > 2) { 141 Diag(TheCall->getArg(2)->getLocStart(), 142 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 143 SourceRange(TheCall->getArg(2)->getLocStart(), 144 (*(TheCall->arg_end()-1))->getLocEnd())); 145 return true; 146 } 147 148 // Determine whether the current function is variadic or not. 149 bool isVariadic; 150 if (getCurFunctionDecl()) 151 isVariadic = 152 cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic(); 153 else 154 isVariadic = getCurMethodDecl()->isVariadic(); 155 156 if (!isVariadic) { 157 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 158 return true; 159 } 160 161 // Verify that the second argument to the builtin is the last argument of the 162 // current function or method. 163 bool SecondArgIsLastNamedArgument = false; 164 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 165 166 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 167 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 168 // FIXME: This isn't correct for methods (results in bogus warning). 169 // Get the last formal in the current function. 170 const ParmVarDecl *LastArg; 171 if (getCurFunctionDecl()) 172 LastArg = *(getCurFunctionDecl()->param_end()-1); 173 else 174 LastArg = *(getCurMethodDecl()->param_end()-1); 175 SecondArgIsLastNamedArgument = PV == LastArg; 176 } 177 } 178 179 if (!SecondArgIsLastNamedArgument) 180 Diag(TheCall->getArg(1)->getLocStart(), 181 diag::warn_second_parameter_of_va_start_not_last_named_argument); 182 return false; 183} 184 185/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 186/// friends. This is declared to take (...), so we have to check everything. 187bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 188 if (TheCall->getNumArgs() < 2) 189 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 190 if (TheCall->getNumArgs() > 2) 191 return Diag(TheCall->getArg(2)->getLocStart(), 192 diag::err_typecheck_call_too_many_args, 193 SourceRange(TheCall->getArg(2)->getLocStart(), 194 (*(TheCall->arg_end()-1))->getLocEnd())); 195 196 Expr *OrigArg0 = TheCall->getArg(0); 197 Expr *OrigArg1 = TheCall->getArg(1); 198 199 // Do standard promotions between the two arguments, returning their common 200 // type. 201 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 202 203 // If the common type isn't a real floating type, then the arguments were 204 // invalid for this operation. 205 if (!Res->isRealFloatingType()) 206 return Diag(OrigArg0->getLocStart(), 207 diag::err_typecheck_call_invalid_ordered_compare, 208 OrigArg0->getType().getAsString(), 209 OrigArg1->getType().getAsString(), 210 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 211 212 return false; 213} 214 215bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 216 // The signature for these builtins is exact; the only thing we need 217 // to check is that the argument is a constant. 218 SourceLocation Loc; 219 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 220 return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange()); 221 222 return false; 223} 224 225/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 226// This is declared to take (...), so we have to check everything. 227Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 228 if (TheCall->getNumArgs() < 3) 229 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 230 TheCall->getSourceRange()); 231 232 QualType FAType = TheCall->getArg(0)->getType(); 233 QualType SAType = TheCall->getArg(1)->getType(); 234 235 if (!FAType->isVectorType() || !SAType->isVectorType()) { 236 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 237 SourceRange(TheCall->getArg(0)->getLocStart(), 238 TheCall->getArg(1)->getLocEnd())); 239 return true; 240 } 241 242 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 243 Context.getCanonicalType(SAType).getUnqualifiedType()) { 244 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 245 SourceRange(TheCall->getArg(0)->getLocStart(), 246 TheCall->getArg(1)->getLocEnd())); 247 return true; 248 } 249 250 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 251 if (TheCall->getNumArgs() != numElements+2) { 252 if (TheCall->getNumArgs() < numElements+2) 253 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 254 TheCall->getSourceRange()); 255 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 256 TheCall->getSourceRange()); 257 } 258 259 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 260 llvm::APSInt Result(32); 261 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 262 return Diag(TheCall->getLocStart(), 263 diag::err_shufflevector_nonconstant_argument, 264 TheCall->getArg(i)->getSourceRange()); 265 266 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 267 return Diag(TheCall->getLocStart(), 268 diag::err_shufflevector_argument_too_large, 269 TheCall->getArg(i)->getSourceRange()); 270 } 271 272 llvm::SmallVector<Expr*, 32> exprs; 273 274 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 275 exprs.push_back(TheCall->getArg(i)); 276 TheCall->setArg(i, 0); 277 } 278 279 return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 280 TheCall->getCallee()->getLocStart(), 281 TheCall->getRParenLoc()); 282} 283 284/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 285// This is declared to take (const void*, ...) and can take two 286// optional constant int args. 287bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 288 unsigned numArgs = TheCall->getNumArgs(); 289 bool res = false; 290 291 if (numArgs > 3) { 292 res |= Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 293 TheCall->getSourceRange()); 294 } 295 296 // Argument 0 is checked for us and the remaining arguments must be 297 // constant integers. 298 for (unsigned i=1; i<numArgs; ++i) { 299 Expr *Arg = TheCall->getArg(i); 300 QualType RWType = Arg->getType(); 301 302 const BuiltinType *BT = RWType->getAsBuiltinType(); 303 // FIXME: 32 is wrong, needs to be proper width of Int 304 llvm::APSInt Result(32); 305 if (!BT || BT->getKind() != BuiltinType::Int || 306 !Arg->isIntegerConstantExpr(Result, Context)) { 307 if (Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument, 308 SourceRange(Arg->getLocStart(), Arg->getLocEnd()))) { 309 res = true; 310 continue; 311 } 312 } 313 314 // FIXME: gcc issues a warning and rewrites these to 0. These 315 // seems especially odd for the third argument since the default 316 // is 3. 317 if (i==1) { 318 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 319 res |= Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_range, 320 "0", "1", 321 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 322 } else { 323 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 324 res |= Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_range, 325 "0", "3", 326 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 327 } 328 } 329 330 return res; 331} 332 333/// CheckPrintfArguments - Check calls to printf (and similar functions) for 334/// correct use of format strings. 335/// 336/// HasVAListArg - A predicate indicating whether the printf-like 337/// function is passed an explicit va_arg argument (e.g., vprintf) 338/// 339/// format_idx - The index into Args for the format string. 340/// 341/// Improper format strings to functions in the printf family can be 342/// the source of bizarre bugs and very serious security holes. A 343/// good source of information is available in the following paper 344/// (which includes additional references): 345/// 346/// FormatGuard: Automatic Protection From printf Format String 347/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 348/// 349/// Functionality implemented: 350/// 351/// We can statically check the following properties for string 352/// literal format strings for non v.*printf functions (where the 353/// arguments are passed directly): 354// 355/// (1) Are the number of format conversions equal to the number of 356/// data arguments? 357/// 358/// (2) Does each format conversion correctly match the type of the 359/// corresponding data argument? (TODO) 360/// 361/// Moreover, for all printf functions we can: 362/// 363/// (3) Check for a missing format string (when not caught by type checking). 364/// 365/// (4) Check for no-operation flags; e.g. using "#" with format 366/// conversion 'c' (TODO) 367/// 368/// (5) Check the use of '%n', a major source of security holes. 369/// 370/// (6) Check for malformed format conversions that don't specify anything. 371/// 372/// (7) Check for empty format strings. e.g: printf(""); 373/// 374/// (8) Check that the format string is a wide literal. 375/// 376/// (9) Also check the arguments of functions with the __format__ attribute. 377/// (TODO). 378/// 379/// All of these checks can be done by parsing the format string. 380/// 381/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 382void 383Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 384 unsigned format_idx) { 385 Expr *Fn = TheCall->getCallee(); 386 387 // CHECK: printf-like function is called with no format string. 388 if (format_idx >= TheCall->getNumArgs()) { 389 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 390 Fn->getSourceRange()); 391 return; 392 } 393 394 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 395 396 // CHECK: format string is not a string literal. 397 // 398 // Dynamically generated format strings are difficult to 399 // automatically vet at compile time. Requiring that format strings 400 // are string literals: (1) permits the checking of format strings by 401 // the compiler and thereby (2) can practically remove the source of 402 // many format string exploits. 403 404 // Format string can be either ObjC string (e.g. @"%d") or 405 // C string (e.g. "%d") 406 // ObjC string uses the same format specifiers as C string, so we can use 407 // the same format string checking logic for both ObjC and C strings. 408 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 409 StringLiteral *FExpr = NULL; 410 411 if(ObjCFExpr != NULL) 412 FExpr = ObjCFExpr->getString(); 413 else 414 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 415 416 if (FExpr == NULL) { 417 // For vprintf* functions (i.e., HasVAListArg==true), we add a 418 // special check to see if the format string is a function parameter 419 // of the function calling the printf function. If the function 420 // has an attribute indicating it is a printf-like function, then we 421 // should suppress warnings concerning non-literals being used in a call 422 // to a vprintf function. For example: 423 // 424 // void 425 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 426 // va_list ap; 427 // va_start(ap, fmt); 428 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 429 // ... 430 // 431 // 432 // FIXME: We don't have full attribute support yet, so just check to see 433 // if the argument is a DeclRefExpr that references a parameter. We'll 434 // add proper support for checking the attribute later. 435 if (HasVAListArg) 436 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 437 if (isa<ParmVarDecl>(DR->getDecl())) 438 return; 439 440 Diag(TheCall->getArg(format_idx)->getLocStart(), 441 diag::warn_printf_not_string_constant, 442 OrigFormatExpr->getSourceRange()); 443 return; 444 } 445 446 // CHECK: is the format string a wide literal? 447 if (FExpr->isWide()) { 448 Diag(FExpr->getLocStart(), 449 diag::warn_printf_format_string_is_wide_literal, 450 OrigFormatExpr->getSourceRange()); 451 return; 452 } 453 454 // Str - The format string. NOTE: this is NOT null-terminated! 455 const char * const Str = FExpr->getStrData(); 456 457 // CHECK: empty format string? 458 const unsigned StrLen = FExpr->getByteLength(); 459 460 if (StrLen == 0) { 461 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 462 OrigFormatExpr->getSourceRange()); 463 return; 464 } 465 466 // We process the format string using a binary state machine. The 467 // current state is stored in CurrentState. 468 enum { 469 state_OrdChr, 470 state_Conversion 471 } CurrentState = state_OrdChr; 472 473 // numConversions - The number of conversions seen so far. This is 474 // incremented as we traverse the format string. 475 unsigned numConversions = 0; 476 477 // numDataArgs - The number of data arguments after the format 478 // string. This can only be determined for non vprintf-like 479 // functions. For those functions, this value is 1 (the sole 480 // va_arg argument). 481 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 482 483 // Inspect the format string. 484 unsigned StrIdx = 0; 485 486 // LastConversionIdx - Index within the format string where we last saw 487 // a '%' character that starts a new format conversion. 488 unsigned LastConversionIdx = 0; 489 490 for (; StrIdx < StrLen; ++StrIdx) { 491 492 // Is the number of detected conversion conversions greater than 493 // the number of matching data arguments? If so, stop. 494 if (!HasVAListArg && numConversions > numDataArgs) break; 495 496 // Handle "\0" 497 if (Str[StrIdx] == '\0') { 498 // The string returned by getStrData() is not null-terminated, 499 // so the presence of a null character is likely an error. 500 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 501 diag::warn_printf_format_string_contains_null_char, 502 OrigFormatExpr->getSourceRange()); 503 return; 504 } 505 506 // Ordinary characters (not processing a format conversion). 507 if (CurrentState == state_OrdChr) { 508 if (Str[StrIdx] == '%') { 509 CurrentState = state_Conversion; 510 LastConversionIdx = StrIdx; 511 } 512 continue; 513 } 514 515 // Seen '%'. Now processing a format conversion. 516 switch (Str[StrIdx]) { 517 // Handle dynamic precision or width specifier. 518 case '*': { 519 ++numConversions; 520 521 if (!HasVAListArg && numConversions > numDataArgs) { 522 SourceLocation Loc = FExpr->getLocStart(); 523 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 524 525 if (Str[StrIdx-1] == '.') 526 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 527 OrigFormatExpr->getSourceRange()); 528 else 529 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 530 OrigFormatExpr->getSourceRange()); 531 532 // Don't do any more checking. We'll just emit spurious errors. 533 return; 534 } 535 536 // Perform type checking on width/precision specifier. 537 Expr *E = TheCall->getArg(format_idx+numConversions); 538 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 539 if (BT->getKind() == BuiltinType::Int) 540 break; 541 542 SourceLocation Loc = 543 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 544 545 if (Str[StrIdx-1] == '.') 546 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 547 E->getType().getAsString(), E->getSourceRange()); 548 else 549 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 550 E->getType().getAsString(), E->getSourceRange()); 551 552 break; 553 } 554 555 // Characters which can terminate a format conversion 556 // (e.g. "%d"). Characters that specify length modifiers or 557 // other flags are handled by the default case below. 558 // 559 // FIXME: additional checks will go into the following cases. 560 case 'i': 561 case 'd': 562 case 'o': 563 case 'u': 564 case 'x': 565 case 'X': 566 case 'D': 567 case 'O': 568 case 'U': 569 case 'e': 570 case 'E': 571 case 'f': 572 case 'F': 573 case 'g': 574 case 'G': 575 case 'a': 576 case 'A': 577 case 'c': 578 case 'C': 579 case 'S': 580 case 's': 581 case 'p': 582 ++numConversions; 583 CurrentState = state_OrdChr; 584 break; 585 586 // CHECK: Are we using "%n"? Issue a warning. 587 case 'n': { 588 ++numConversions; 589 CurrentState = state_OrdChr; 590 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 591 LastConversionIdx+1); 592 593 Diag(Loc, diag::warn_printf_write_back, OrigFormatExpr->getSourceRange()); 594 break; 595 } 596 597 // Handle "%@" 598 case '@': 599 // %@ is allowed in ObjC format strings only. 600 if(ObjCFExpr != NULL) 601 CurrentState = state_OrdChr; 602 else { 603 // Issue a warning: invalid format conversion. 604 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 605 LastConversionIdx+1); 606 607 Diag(Loc, diag::warn_printf_invalid_conversion, 608 std::string(Str+LastConversionIdx, 609 Str+std::min(LastConversionIdx+2, StrLen)), 610 OrigFormatExpr->getSourceRange()); 611 } 612 ++numConversions; 613 break; 614 615 // Handle "%%" 616 case '%': 617 // Sanity check: Was the first "%" character the previous one? 618 // If not, we will assume that we have a malformed format 619 // conversion, and that the current "%" character is the start 620 // of a new conversion. 621 if (StrIdx - LastConversionIdx == 1) 622 CurrentState = state_OrdChr; 623 else { 624 // Issue a warning: invalid format conversion. 625 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 626 LastConversionIdx+1); 627 628 Diag(Loc, diag::warn_printf_invalid_conversion, 629 std::string(Str+LastConversionIdx, Str+StrIdx), 630 OrigFormatExpr->getSourceRange()); 631 632 // This conversion is broken. Advance to the next format 633 // conversion. 634 LastConversionIdx = StrIdx; 635 ++numConversions; 636 } 637 break; 638 639 default: 640 // This case catches all other characters: flags, widths, etc. 641 // We should eventually process those as well. 642 break; 643 } 644 } 645 646 if (CurrentState == state_Conversion) { 647 // Issue a warning: invalid format conversion. 648 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 649 LastConversionIdx+1); 650 651 Diag(Loc, diag::warn_printf_invalid_conversion, 652 std::string(Str+LastConversionIdx, 653 Str+std::min(LastConversionIdx+2, StrLen)), 654 OrigFormatExpr->getSourceRange()); 655 return; 656 } 657 658 if (!HasVAListArg) { 659 // CHECK: Does the number of format conversions exceed the number 660 // of data arguments? 661 if (numConversions > numDataArgs) { 662 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 663 LastConversionIdx); 664 665 Diag(Loc, diag::warn_printf_insufficient_data_args, 666 OrigFormatExpr->getSourceRange()); 667 } 668 // CHECK: Does the number of data arguments exceed the number of 669 // format conversions in the format string? 670 else if (numConversions < numDataArgs) 671 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 672 diag::warn_printf_too_many_data_args, 673 OrigFormatExpr->getSourceRange()); 674 } 675} 676 677//===--- CHECK: Return Address of Stack Variable --------------------------===// 678 679static DeclRefExpr* EvalVal(Expr *E); 680static DeclRefExpr* EvalAddr(Expr* E); 681 682/// CheckReturnStackAddr - Check if a return statement returns the address 683/// of a stack variable. 684void 685Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 686 SourceLocation ReturnLoc) { 687 688 // Perform checking for returned stack addresses. 689 if (lhsType->isPointerType()) { 690 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 691 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 692 DR->getDecl()->getIdentifier()->getName(), 693 RetValExp->getSourceRange()); 694 } 695 // Perform checking for stack values returned by reference. 696 else if (lhsType->isReferenceType()) { 697 // Check for an implicit cast to a reference. 698 if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp)) 699 if (DeclRefExpr *DR = EvalVal(I->getSubExpr())) 700 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 701 DR->getDecl()->getIdentifier()->getName(), 702 RetValExp->getSourceRange()); 703 } 704} 705 706/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 707/// check if the expression in a return statement evaluates to an address 708/// to a location on the stack. The recursion is used to traverse the 709/// AST of the return expression, with recursion backtracking when we 710/// encounter a subexpression that (1) clearly does not lead to the address 711/// of a stack variable or (2) is something we cannot determine leads to 712/// the address of a stack variable based on such local checking. 713/// 714/// EvalAddr processes expressions that are pointers that are used as 715/// references (and not L-values). EvalVal handles all other values. 716/// At the base case of the recursion is a check for a DeclRefExpr* in 717/// the refers to a stack variable. 718/// 719/// This implementation handles: 720/// 721/// * pointer-to-pointer casts 722/// * implicit conversions from array references to pointers 723/// * taking the address of fields 724/// * arbitrary interplay between "&" and "*" operators 725/// * pointer arithmetic from an address of a stack variable 726/// * taking the address of an array element where the array is on the stack 727static DeclRefExpr* EvalAddr(Expr *E) { 728 // We should only be called for evaluating pointer expressions. 729 assert((E->getType()->isPointerType() || 730 E->getType()->isObjCQualifiedIdType()) && 731 "EvalAddr only works on pointers"); 732 733 // Our "symbolic interpreter" is just a dispatch off the currently 734 // viewed AST node. We then recursively traverse the AST by calling 735 // EvalAddr and EvalVal appropriately. 736 switch (E->getStmtClass()) { 737 case Stmt::ParenExprClass: 738 // Ignore parentheses. 739 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 740 741 case Stmt::UnaryOperatorClass: { 742 // The only unary operator that make sense to handle here 743 // is AddrOf. All others don't make sense as pointers. 744 UnaryOperator *U = cast<UnaryOperator>(E); 745 746 if (U->getOpcode() == UnaryOperator::AddrOf) 747 return EvalVal(U->getSubExpr()); 748 else 749 return NULL; 750 } 751 752 case Stmt::BinaryOperatorClass: { 753 // Handle pointer arithmetic. All other binary operators are not valid 754 // in this context. 755 BinaryOperator *B = cast<BinaryOperator>(E); 756 BinaryOperator::Opcode op = B->getOpcode(); 757 758 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 759 return NULL; 760 761 Expr *Base = B->getLHS(); 762 763 // Determine which argument is the real pointer base. It could be 764 // the RHS argument instead of the LHS. 765 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 766 767 assert (Base->getType()->isPointerType()); 768 return EvalAddr(Base); 769 } 770 771 // For conditional operators we need to see if either the LHS or RHS are 772 // valid DeclRefExpr*s. If one of them is valid, we return it. 773 case Stmt::ConditionalOperatorClass: { 774 ConditionalOperator *C = cast<ConditionalOperator>(E); 775 776 // Handle the GNU extension for missing LHS. 777 if (Expr *lhsExpr = C->getLHS()) 778 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 779 return LHS; 780 781 return EvalAddr(C->getRHS()); 782 } 783 784 // For casts, we need to handle conversions from arrays to 785 // pointer values, and pointer-to-pointer conversions. 786 case Stmt::ExplicitCastExprClass: 787 case Stmt::ImplicitCastExprClass: { 788 789 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 790 QualType T = SubExpr->getType(); 791 792 if (T->isPointerType() || T->isObjCQualifiedIdType()) 793 return EvalAddr(SubExpr); 794 else if (T->isArrayType()) 795 return EvalVal(SubExpr); 796 else 797 return 0; 798 } 799 800 // C++ casts. For dynamic casts, static casts, and const casts, we 801 // are always converting from a pointer-to-pointer, so we just blow 802 // through the cast. In the case the dynamic cast doesn't fail 803 // (and return NULL), we take the conservative route and report cases 804 // where we return the address of a stack variable. For Reinterpre 805 case Stmt::CXXCastExprClass: { 806 CXXCastExpr *C = cast<CXXCastExpr>(E); 807 808 if (C->getOpcode() == CXXCastExpr::ReinterpretCast) { 809 Expr *S = C->getSubExpr(); 810 if (S->getType()->isPointerType()) 811 return EvalAddr(S); 812 else 813 return NULL; 814 } 815 else 816 return EvalAddr(C->getSubExpr()); 817 } 818 819 // Everything else: we simply don't reason about them. 820 default: 821 return NULL; 822 } 823} 824 825 826/// EvalVal - This function is complements EvalAddr in the mutual recursion. 827/// See the comments for EvalAddr for more details. 828static DeclRefExpr* EvalVal(Expr *E) { 829 830 // We should only be called for evaluating non-pointer expressions, or 831 // expressions with a pointer type that are not used as references but instead 832 // are l-values (e.g., DeclRefExpr with a pointer type). 833 834 // Our "symbolic interpreter" is just a dispatch off the currently 835 // viewed AST node. We then recursively traverse the AST by calling 836 // EvalAddr and EvalVal appropriately. 837 switch (E->getStmtClass()) { 838 case Stmt::DeclRefExprClass: { 839 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 840 // at code that refers to a variable's name. We check if it has local 841 // storage within the function, and if so, return the expression. 842 DeclRefExpr *DR = cast<DeclRefExpr>(E); 843 844 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 845 if(V->hasLocalStorage()) return DR; 846 847 return NULL; 848 } 849 850 case Stmt::ParenExprClass: 851 // Ignore parentheses. 852 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 853 854 case Stmt::UnaryOperatorClass: { 855 // The only unary operator that make sense to handle here 856 // is Deref. All others don't resolve to a "name." This includes 857 // handling all sorts of rvalues passed to a unary operator. 858 UnaryOperator *U = cast<UnaryOperator>(E); 859 860 if (U->getOpcode() == UnaryOperator::Deref) 861 return EvalAddr(U->getSubExpr()); 862 863 return NULL; 864 } 865 866 case Stmt::ArraySubscriptExprClass: { 867 // Array subscripts are potential references to data on the stack. We 868 // retrieve the DeclRefExpr* for the array variable if it indeed 869 // has local storage. 870 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 871 } 872 873 case Stmt::ConditionalOperatorClass: { 874 // For conditional operators we need to see if either the LHS or RHS are 875 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 876 ConditionalOperator *C = cast<ConditionalOperator>(E); 877 878 // Handle the GNU extension for missing LHS. 879 if (Expr *lhsExpr = C->getLHS()) 880 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 881 return LHS; 882 883 return EvalVal(C->getRHS()); 884 } 885 886 // Accesses to members are potential references to data on the stack. 887 case Stmt::MemberExprClass: { 888 MemberExpr *M = cast<MemberExpr>(E); 889 890 // Check for indirect access. We only want direct field accesses. 891 if (!M->isArrow()) 892 return EvalVal(M->getBase()); 893 else 894 return NULL; 895 } 896 897 // Everything else: we simply don't reason about them. 898 default: 899 return NULL; 900 } 901} 902 903//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 904 905/// Check for comparisons of floating point operands using != and ==. 906/// Issue a warning if these are no self-comparisons, as they are not likely 907/// to do what the programmer intended. 908void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 909 bool EmitWarning = true; 910 911 Expr* LeftExprSansParen = lex->IgnoreParens(); 912 Expr* RightExprSansParen = rex->IgnoreParens(); 913 914 // Special case: check for x == x (which is OK). 915 // Do not emit warnings for such cases. 916 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 917 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 918 if (DRL->getDecl() == DRR->getDecl()) 919 EmitWarning = false; 920 921 922 // Special case: check for comparisons against literals that can be exactly 923 // represented by APFloat. In such cases, do not emit a warning. This 924 // is a heuristic: often comparison against such literals are used to 925 // detect if a value in a variable has not changed. This clearly can 926 // lead to false negatives. 927 if (EmitWarning) { 928 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 929 if (FLL->isExact()) 930 EmitWarning = false; 931 } 932 else 933 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 934 if (FLR->isExact()) 935 EmitWarning = false; 936 } 937 } 938 939 // Check for comparisons with builtin types. 940 if (EmitWarning) 941 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 942 if (isCallBuiltin(CL)) 943 EmitWarning = false; 944 945 if (EmitWarning) 946 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 947 if (isCallBuiltin(CR)) 948 EmitWarning = false; 949 950 // Emit the diagnostic. 951 if (EmitWarning) 952 Diag(loc, diag::warn_floatingpoint_eq, 953 lex->getSourceRange(),rex->getSourceRange()); 954} 955