SemaChecking.cpp revision 2c21a073525cdfa68e4439b7af551385dc2796ab
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Basic/Diagnostic.h" 22#include "SemaUtil.h" 23using namespace clang; 24 25/// CheckFunctionCall - Check a direct function call for various correctness 26/// and safety properties not strictly enforced by the C type system. 27Action::ExprResult 28Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 29 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 30 // Get the IdentifierInfo* for the called function. 31 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 32 33 // None of the checks below are needed for functions that don't have 34 // simple names (e.g., C++ conversion functions). 35 if (!FnInfo) 36 return TheCall.take(); 37 38 switch (FnInfo->getBuiltinID()) { 39 case Builtin::BI__builtin___CFStringMakeConstantString: 40 assert(TheCall->getNumArgs() == 1 && 41 "Wrong # arguments to builtin CFStringMakeConstantString"); 42 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 43 return true; 44 return TheCall.take(); 45 case Builtin::BI__builtin_stdarg_start: 46 case Builtin::BI__builtin_va_start: 47 if (SemaBuiltinVAStart(TheCall.get())) 48 return true; 49 return TheCall.take(); 50 case Builtin::BI__builtin_isgreater: 51 case Builtin::BI__builtin_isgreaterequal: 52 case Builtin::BI__builtin_isless: 53 case Builtin::BI__builtin_islessequal: 54 case Builtin::BI__builtin_islessgreater: 55 case Builtin::BI__builtin_isunordered: 56 if (SemaBuiltinUnorderedCompare(TheCall.get())) 57 return true; 58 return TheCall.take(); 59 case Builtin::BI__builtin_return_address: 60 case Builtin::BI__builtin_frame_address: 61 if (SemaBuiltinStackAddress(TheCall.get())) 62 return true; 63 return TheCall.take(); 64 case Builtin::BI__builtin_shufflevector: 65 return SemaBuiltinShuffleVector(TheCall.get()); 66 case Builtin::BI__builtin_prefetch: 67 if (SemaBuiltinPrefetch(TheCall.get())) 68 return true; 69 return TheCall.take(); 70 case Builtin::BI__builtin_object_size: 71 if (SemaBuiltinObjectSize(TheCall.get())) 72 return true; 73 } 74 75 // FIXME: This mechanism should be abstracted to be less fragile and 76 // more efficient. For example, just map function ids to custom 77 // handlers. 78 79 // Search the KnownFunctionIDs for the identifier. 80 unsigned i = 0, e = id_num_known_functions; 81 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 82 if (i == e) return TheCall.take(); 83 84 // Printf checking. 85 if (i <= id_vprintf) { 86 // Retrieve the index of the format string parameter and determine 87 // if the function is passed a va_arg argument. 88 unsigned format_idx = 0; 89 bool HasVAListArg = false; 90 91 switch (i) { 92 default: assert(false && "No format string argument index."); 93 case id_NSLog: format_idx = 0; break; 94 case id_asprintf: format_idx = 1; break; 95 case id_fprintf: format_idx = 1; break; 96 case id_printf: format_idx = 0; break; 97 case id_snprintf: format_idx = 2; break; 98 case id_snprintf_chk: format_idx = 4; break; 99 case id_sprintf: format_idx = 1; break; 100 case id_sprintf_chk: format_idx = 3; break; 101 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 102 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 103 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 104 case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break; 105 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 106 case id_vsprintf_chk: format_idx = 3; HasVAListArg = true; break; 107 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 108 } 109 110 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 111 } 112 113 return TheCall.take(); 114} 115 116/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 117/// CFString constructor is correct 118bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 119 Arg = Arg->IgnoreParenCasts(); 120 121 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 122 123 if (!Literal || Literal->isWide()) { 124 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 125 << Arg->getSourceRange(); 126 return true; 127 } 128 129 const char *Data = Literal->getStrData(); 130 unsigned Length = Literal->getByteLength(); 131 132 for (unsigned i = 0; i < Length; ++i) { 133 if (!isascii(Data[i])) { 134 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 135 diag::warn_cfstring_literal_contains_non_ascii_character) 136 << Arg->getSourceRange(); 137 break; 138 } 139 140 if (!Data[i]) { 141 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 142 diag::warn_cfstring_literal_contains_nul_character) 143 << Arg->getSourceRange(); 144 break; 145 } 146 } 147 148 return false; 149} 150 151/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 152/// Emit an error and return true on failure, return false on success. 153bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 154 Expr *Fn = TheCall->getCallee(); 155 if (TheCall->getNumArgs() > 2) { 156 Diag(TheCall->getArg(2)->getLocStart(), 157 diag::err_typecheck_call_too_many_args) 158 << 0 /*function call*/ << Fn->getSourceRange() 159 << SourceRange(TheCall->getArg(2)->getLocStart(), 160 (*(TheCall->arg_end()-1))->getLocEnd()); 161 return true; 162 } 163 164 // Determine whether the current function is variadic or not. 165 bool isVariadic; 166 if (getCurFunctionDecl()) 167 isVariadic = 168 cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic(); 169 else 170 isVariadic = getCurMethodDecl()->isVariadic(); 171 172 if (!isVariadic) { 173 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 174 return true; 175 } 176 177 // Verify that the second argument to the builtin is the last argument of the 178 // current function or method. 179 bool SecondArgIsLastNamedArgument = false; 180 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 181 182 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 183 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 184 // FIXME: This isn't correct for methods (results in bogus warning). 185 // Get the last formal in the current function. 186 const ParmVarDecl *LastArg; 187 if (getCurFunctionDecl()) 188 LastArg = *(getCurFunctionDecl()->param_end()-1); 189 else 190 LastArg = *(getCurMethodDecl()->param_end()-1); 191 SecondArgIsLastNamedArgument = PV == LastArg; 192 } 193 } 194 195 if (!SecondArgIsLastNamedArgument) 196 Diag(TheCall->getArg(1)->getLocStart(), 197 diag::warn_second_parameter_of_va_start_not_last_named_argument); 198 return false; 199} 200 201/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 202/// friends. This is declared to take (...), so we have to check everything. 203bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 204 if (TheCall->getNumArgs() < 2) 205 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 206 << 0 /*function call*/; 207 if (TheCall->getNumArgs() > 2) 208 return Diag(TheCall->getArg(2)->getLocStart(), 209 diag::err_typecheck_call_too_many_args) 210 << 0 /*function call*/ 211 << SourceRange(TheCall->getArg(2)->getLocStart(), 212 (*(TheCall->arg_end()-1))->getLocEnd()); 213 214 Expr *OrigArg0 = TheCall->getArg(0); 215 Expr *OrigArg1 = TheCall->getArg(1); 216 217 // Do standard promotions between the two arguments, returning their common 218 // type. 219 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 220 221 // If the common type isn't a real floating type, then the arguments were 222 // invalid for this operation. 223 if (!Res->isRealFloatingType()) 224 return Diag(OrigArg0->getLocStart(), 225 diag::err_typecheck_call_invalid_ordered_compare) 226 << OrigArg0->getType().getAsString() << OrigArg1->getType().getAsString() 227 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 228 229 return false; 230} 231 232bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 233 // The signature for these builtins is exact; the only thing we need 234 // to check is that the argument is a constant. 235 SourceLocation Loc; 236 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 237 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 238 239 return false; 240} 241 242/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 243// This is declared to take (...), so we have to check everything. 244Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 245 if (TheCall->getNumArgs() < 3) 246 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 247 << 0 /*function call*/ << TheCall->getSourceRange(); 248 249 QualType FAType = TheCall->getArg(0)->getType(); 250 QualType SAType = TheCall->getArg(1)->getType(); 251 252 if (!FAType->isVectorType() || !SAType->isVectorType()) { 253 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 254 << SourceRange(TheCall->getArg(0)->getLocStart(), 255 TheCall->getArg(1)->getLocEnd()); 256 return true; 257 } 258 259 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 260 Context.getCanonicalType(SAType).getUnqualifiedType()) { 261 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 262 << SourceRange(TheCall->getArg(0)->getLocStart(), 263 TheCall->getArg(1)->getLocEnd()); 264 return true; 265 } 266 267 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 268 if (TheCall->getNumArgs() != numElements+2) { 269 if (TheCall->getNumArgs() < numElements+2) 270 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 271 << 0 /*function call*/ << TheCall->getSourceRange(); 272 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 273 << 0 /*function call*/ << TheCall->getSourceRange(); 274 } 275 276 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 277 llvm::APSInt Result(32); 278 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 279 return Diag(TheCall->getLocStart(), 280 diag::err_shufflevector_nonconstant_argument) 281 << TheCall->getArg(i)->getSourceRange(); 282 283 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 284 return Diag(TheCall->getLocStart(), 285 diag::err_shufflevector_argument_too_large) 286 << TheCall->getArg(i)->getSourceRange(); 287 } 288 289 llvm::SmallVector<Expr*, 32> exprs; 290 291 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 292 exprs.push_back(TheCall->getArg(i)); 293 TheCall->setArg(i, 0); 294 } 295 296 return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 297 TheCall->getCallee()->getLocStart(), 298 TheCall->getRParenLoc()); 299} 300 301/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 302// This is declared to take (const void*, ...) and can take two 303// optional constant int args. 304bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 305 unsigned NumArgs = TheCall->getNumArgs(); 306 307 if (NumArgs > 3) 308 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 309 << 0 /*function call*/ << TheCall->getSourceRange(); 310 311 // Argument 0 is checked for us and the remaining arguments must be 312 // constant integers. 313 for (unsigned i = 1; i != NumArgs; ++i) { 314 Expr *Arg = TheCall->getArg(i); 315 QualType RWType = Arg->getType(); 316 317 const BuiltinType *BT = RWType->getAsBuiltinType(); 318 llvm::APSInt Result; 319 if (!BT || BT->getKind() != BuiltinType::Int || 320 !Arg->isIntegerConstantExpr(Result, Context)) 321 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 322 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 323 324 // FIXME: gcc issues a warning and rewrites these to 0. These 325 // seems especially odd for the third argument since the default 326 // is 3. 327 if (i == 1) { 328 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 329 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 330 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 331 } else { 332 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 333 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 334 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 335 } 336 } 337 338 return false; 339} 340 341/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 342/// int type). This simply type checks that type is one of the defined 343/// constants (0-3). 344bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 345 Expr *Arg = TheCall->getArg(1); 346 QualType ArgType = Arg->getType(); 347 const BuiltinType *BT = ArgType->getAsBuiltinType(); 348 llvm::APSInt Result(32); 349 if (!BT || BT->getKind() != BuiltinType::Int || 350 !Arg->isIntegerConstantExpr(Result, Context)) { 351 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 352 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 353 } 354 355 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 356 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 357 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 358 } 359 360 return false; 361} 362 363/// CheckPrintfArguments - Check calls to printf (and similar functions) for 364/// correct use of format strings. 365/// 366/// HasVAListArg - A predicate indicating whether the printf-like 367/// function is passed an explicit va_arg argument (e.g., vprintf) 368/// 369/// format_idx - The index into Args for the format string. 370/// 371/// Improper format strings to functions in the printf family can be 372/// the source of bizarre bugs and very serious security holes. A 373/// good source of information is available in the following paper 374/// (which includes additional references): 375/// 376/// FormatGuard: Automatic Protection From printf Format String 377/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 378/// 379/// Functionality implemented: 380/// 381/// We can statically check the following properties for string 382/// literal format strings for non v.*printf functions (where the 383/// arguments are passed directly): 384// 385/// (1) Are the number of format conversions equal to the number of 386/// data arguments? 387/// 388/// (2) Does each format conversion correctly match the type of the 389/// corresponding data argument? (TODO) 390/// 391/// Moreover, for all printf functions we can: 392/// 393/// (3) Check for a missing format string (when not caught by type checking). 394/// 395/// (4) Check for no-operation flags; e.g. using "#" with format 396/// conversion 'c' (TODO) 397/// 398/// (5) Check the use of '%n', a major source of security holes. 399/// 400/// (6) Check for malformed format conversions that don't specify anything. 401/// 402/// (7) Check for empty format strings. e.g: printf(""); 403/// 404/// (8) Check that the format string is a wide literal. 405/// 406/// (9) Also check the arguments of functions with the __format__ attribute. 407/// (TODO). 408/// 409/// All of these checks can be done by parsing the format string. 410/// 411/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 412void 413Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 414 unsigned format_idx) { 415 Expr *Fn = TheCall->getCallee(); 416 417 // CHECK: printf-like function is called with no format string. 418 if (format_idx >= TheCall->getNumArgs()) { 419 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 420 << Fn->getSourceRange(); 421 return; 422 } 423 424 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 425 426 // CHECK: format string is not a string literal. 427 // 428 // Dynamically generated format strings are difficult to 429 // automatically vet at compile time. Requiring that format strings 430 // are string literals: (1) permits the checking of format strings by 431 // the compiler and thereby (2) can practically remove the source of 432 // many format string exploits. 433 434 // Format string can be either ObjC string (e.g. @"%d") or 435 // C string (e.g. "%d") 436 // ObjC string uses the same format specifiers as C string, so we can use 437 // the same format string checking logic for both ObjC and C strings. 438 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 439 StringLiteral *FExpr = NULL; 440 441 if(ObjCFExpr != NULL) 442 FExpr = ObjCFExpr->getString(); 443 else 444 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 445 446 if (FExpr == NULL) { 447 // For vprintf* functions (i.e., HasVAListArg==true), we add a 448 // special check to see if the format string is a function parameter 449 // of the function calling the printf function. If the function 450 // has an attribute indicating it is a printf-like function, then we 451 // should suppress warnings concerning non-literals being used in a call 452 // to a vprintf function. For example: 453 // 454 // void 455 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 456 // va_list ap; 457 // va_start(ap, fmt); 458 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 459 // ... 460 // 461 // 462 // FIXME: We don't have full attribute support yet, so just check to see 463 // if the argument is a DeclRefExpr that references a parameter. We'll 464 // add proper support for checking the attribute later. 465 if (HasVAListArg) 466 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 467 if (isa<ParmVarDecl>(DR->getDecl())) 468 return; 469 470 Diag(TheCall->getArg(format_idx)->getLocStart(), 471 diag::warn_printf_not_string_constant) 472 << OrigFormatExpr->getSourceRange(); 473 return; 474 } 475 476 // CHECK: is the format string a wide literal? 477 if (FExpr->isWide()) { 478 Diag(FExpr->getLocStart(), 479 diag::warn_printf_format_string_is_wide_literal) 480 << OrigFormatExpr->getSourceRange(); 481 return; 482 } 483 484 // Str - The format string. NOTE: this is NOT null-terminated! 485 const char * const Str = FExpr->getStrData(); 486 487 // CHECK: empty format string? 488 const unsigned StrLen = FExpr->getByteLength(); 489 490 if (StrLen == 0) { 491 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 492 << OrigFormatExpr->getSourceRange(); 493 return; 494 } 495 496 // We process the format string using a binary state machine. The 497 // current state is stored in CurrentState. 498 enum { 499 state_OrdChr, 500 state_Conversion 501 } CurrentState = state_OrdChr; 502 503 // numConversions - The number of conversions seen so far. This is 504 // incremented as we traverse the format string. 505 unsigned numConversions = 0; 506 507 // numDataArgs - The number of data arguments after the format 508 // string. This can only be determined for non vprintf-like 509 // functions. For those functions, this value is 1 (the sole 510 // va_arg argument). 511 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 512 513 // Inspect the format string. 514 unsigned StrIdx = 0; 515 516 // LastConversionIdx - Index within the format string where we last saw 517 // a '%' character that starts a new format conversion. 518 unsigned LastConversionIdx = 0; 519 520 for (; StrIdx < StrLen; ++StrIdx) { 521 522 // Is the number of detected conversion conversions greater than 523 // the number of matching data arguments? If so, stop. 524 if (!HasVAListArg && numConversions > numDataArgs) break; 525 526 // Handle "\0" 527 if (Str[StrIdx] == '\0') { 528 // The string returned by getStrData() is not null-terminated, 529 // so the presence of a null character is likely an error. 530 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 531 diag::warn_printf_format_string_contains_null_char) 532 << OrigFormatExpr->getSourceRange(); 533 return; 534 } 535 536 // Ordinary characters (not processing a format conversion). 537 if (CurrentState == state_OrdChr) { 538 if (Str[StrIdx] == '%') { 539 CurrentState = state_Conversion; 540 LastConversionIdx = StrIdx; 541 } 542 continue; 543 } 544 545 // Seen '%'. Now processing a format conversion. 546 switch (Str[StrIdx]) { 547 // Handle dynamic precision or width specifier. 548 case '*': { 549 ++numConversions; 550 551 if (!HasVAListArg && numConversions > numDataArgs) { 552 SourceLocation Loc = FExpr->getLocStart(); 553 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 554 555 if (Str[StrIdx-1] == '.') 556 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 557 << OrigFormatExpr->getSourceRange(); 558 else 559 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 560 << OrigFormatExpr->getSourceRange(); 561 562 // Don't do any more checking. We'll just emit spurious errors. 563 return; 564 } 565 566 // Perform type checking on width/precision specifier. 567 Expr *E = TheCall->getArg(format_idx+numConversions); 568 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 569 if (BT->getKind() == BuiltinType::Int) 570 break; 571 572 SourceLocation Loc = 573 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 574 575 if (Str[StrIdx-1] == '.') 576 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 577 << E->getType().getAsString() << E->getSourceRange(); 578 else 579 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 580 << E->getType().getAsString() << E->getSourceRange(); 581 582 break; 583 } 584 585 // Characters which can terminate a format conversion 586 // (e.g. "%d"). Characters that specify length modifiers or 587 // other flags are handled by the default case below. 588 // 589 // FIXME: additional checks will go into the following cases. 590 case 'i': 591 case 'd': 592 case 'o': 593 case 'u': 594 case 'x': 595 case 'X': 596 case 'D': 597 case 'O': 598 case 'U': 599 case 'e': 600 case 'E': 601 case 'f': 602 case 'F': 603 case 'g': 604 case 'G': 605 case 'a': 606 case 'A': 607 case 'c': 608 case 'C': 609 case 'S': 610 case 's': 611 case 'p': 612 ++numConversions; 613 CurrentState = state_OrdChr; 614 break; 615 616 // CHECK: Are we using "%n"? Issue a warning. 617 case 'n': { 618 ++numConversions; 619 CurrentState = state_OrdChr; 620 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 621 LastConversionIdx+1); 622 623 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 624 break; 625 } 626 627 // Handle "%@" 628 case '@': 629 // %@ is allowed in ObjC format strings only. 630 if(ObjCFExpr != NULL) 631 CurrentState = state_OrdChr; 632 else { 633 // Issue a warning: invalid format conversion. 634 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 635 LastConversionIdx+1); 636 637 Diag(Loc, diag::warn_printf_invalid_conversion) 638 << std::string(Str+LastConversionIdx, 639 Str+std::min(LastConversionIdx+2, StrLen)) 640 << OrigFormatExpr->getSourceRange(); 641 } 642 ++numConversions; 643 break; 644 645 // Handle "%%" 646 case '%': 647 // Sanity check: Was the first "%" character the previous one? 648 // If not, we will assume that we have a malformed format 649 // conversion, and that the current "%" character is the start 650 // of a new conversion. 651 if (StrIdx - LastConversionIdx == 1) 652 CurrentState = state_OrdChr; 653 else { 654 // Issue a warning: invalid format conversion. 655 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 656 LastConversionIdx+1); 657 658 Diag(Loc, diag::warn_printf_invalid_conversion) 659 << std::string(Str+LastConversionIdx, Str+StrIdx) 660 << OrigFormatExpr->getSourceRange(); 661 662 // This conversion is broken. Advance to the next format 663 // conversion. 664 LastConversionIdx = StrIdx; 665 ++numConversions; 666 } 667 break; 668 669 default: 670 // This case catches all other characters: flags, widths, etc. 671 // We should eventually process those as well. 672 break; 673 } 674 } 675 676 if (CurrentState == state_Conversion) { 677 // Issue a warning: invalid format conversion. 678 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 679 LastConversionIdx+1); 680 681 Diag(Loc, diag::warn_printf_invalid_conversion) 682 << std::string(Str+LastConversionIdx, 683 Str+std::min(LastConversionIdx+2, StrLen)) 684 << OrigFormatExpr->getSourceRange(); 685 return; 686 } 687 688 if (!HasVAListArg) { 689 // CHECK: Does the number of format conversions exceed the number 690 // of data arguments? 691 if (numConversions > numDataArgs) { 692 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 693 LastConversionIdx); 694 695 Diag(Loc, diag::warn_printf_insufficient_data_args) 696 << OrigFormatExpr->getSourceRange(); 697 } 698 // CHECK: Does the number of data arguments exceed the number of 699 // format conversions in the format string? 700 else if (numConversions < numDataArgs) 701 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 702 diag::warn_printf_too_many_data_args) 703 << OrigFormatExpr->getSourceRange(); 704 } 705} 706 707//===--- CHECK: Return Address of Stack Variable --------------------------===// 708 709static DeclRefExpr* EvalVal(Expr *E); 710static DeclRefExpr* EvalAddr(Expr* E); 711 712/// CheckReturnStackAddr - Check if a return statement returns the address 713/// of a stack variable. 714void 715Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 716 SourceLocation ReturnLoc) { 717 718 // Perform checking for returned stack addresses. 719 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 720 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 721 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 722 << DR->getDecl()->getIdentifier() << RetValExp->getSourceRange(); 723 724 // Skip over implicit cast expressions when checking for block expressions. 725 if (ImplicitCastExpr *IcExpr = 726 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 727 RetValExp = IcExpr->getSubExpr(); 728 729 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 730 Diag(C->getLocStart(), diag::err_ret_local_block) 731 << C->getSourceRange(); 732 } 733 // Perform checking for stack values returned by reference. 734 else if (lhsType->isReferenceType()) { 735 // Check for a reference to the stack 736 if (DeclRefExpr *DR = EvalVal(RetValExp)) 737 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 738 << DR->getDecl()->getIdentifier() 739 << RetValExp->getSourceRange(); 740 } 741} 742 743/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 744/// check if the expression in a return statement evaluates to an address 745/// to a location on the stack. The recursion is used to traverse the 746/// AST of the return expression, with recursion backtracking when we 747/// encounter a subexpression that (1) clearly does not lead to the address 748/// of a stack variable or (2) is something we cannot determine leads to 749/// the address of a stack variable based on such local checking. 750/// 751/// EvalAddr processes expressions that are pointers that are used as 752/// references (and not L-values). EvalVal handles all other values. 753/// At the base case of the recursion is a check for a DeclRefExpr* in 754/// the refers to a stack variable. 755/// 756/// This implementation handles: 757/// 758/// * pointer-to-pointer casts 759/// * implicit conversions from array references to pointers 760/// * taking the address of fields 761/// * arbitrary interplay between "&" and "*" operators 762/// * pointer arithmetic from an address of a stack variable 763/// * taking the address of an array element where the array is on the stack 764static DeclRefExpr* EvalAddr(Expr *E) { 765 // We should only be called for evaluating pointer expressions. 766 assert((E->getType()->isPointerType() || 767 E->getType()->isBlockPointerType() || 768 E->getType()->isObjCQualifiedIdType()) && 769 "EvalAddr only works on pointers"); 770 771 // Our "symbolic interpreter" is just a dispatch off the currently 772 // viewed AST node. We then recursively traverse the AST by calling 773 // EvalAddr and EvalVal appropriately. 774 switch (E->getStmtClass()) { 775 case Stmt::ParenExprClass: 776 // Ignore parentheses. 777 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 778 779 case Stmt::UnaryOperatorClass: { 780 // The only unary operator that make sense to handle here 781 // is AddrOf. All others don't make sense as pointers. 782 UnaryOperator *U = cast<UnaryOperator>(E); 783 784 if (U->getOpcode() == UnaryOperator::AddrOf) 785 return EvalVal(U->getSubExpr()); 786 else 787 return NULL; 788 } 789 790 case Stmt::BinaryOperatorClass: { 791 // Handle pointer arithmetic. All other binary operators are not valid 792 // in this context. 793 BinaryOperator *B = cast<BinaryOperator>(E); 794 BinaryOperator::Opcode op = B->getOpcode(); 795 796 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 797 return NULL; 798 799 Expr *Base = B->getLHS(); 800 801 // Determine which argument is the real pointer base. It could be 802 // the RHS argument instead of the LHS. 803 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 804 805 assert (Base->getType()->isPointerType()); 806 return EvalAddr(Base); 807 } 808 809 // For conditional operators we need to see if either the LHS or RHS are 810 // valid DeclRefExpr*s. If one of them is valid, we return it. 811 case Stmt::ConditionalOperatorClass: { 812 ConditionalOperator *C = cast<ConditionalOperator>(E); 813 814 // Handle the GNU extension for missing LHS. 815 if (Expr *lhsExpr = C->getLHS()) 816 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 817 return LHS; 818 819 return EvalAddr(C->getRHS()); 820 } 821 822 // For casts, we need to handle conversions from arrays to 823 // pointer values, and pointer-to-pointer conversions. 824 case Stmt::ImplicitCastExprClass: 825 case Stmt::CStyleCastExprClass: 826 case Stmt::CXXFunctionalCastExprClass: { 827 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 828 QualType T = SubExpr->getType(); 829 830 if (SubExpr->getType()->isPointerType() || 831 SubExpr->getType()->isBlockPointerType() || 832 SubExpr->getType()->isObjCQualifiedIdType()) 833 return EvalAddr(SubExpr); 834 else if (T->isArrayType()) 835 return EvalVal(SubExpr); 836 else 837 return 0; 838 } 839 840 // C++ casts. For dynamic casts, static casts, and const casts, we 841 // are always converting from a pointer-to-pointer, so we just blow 842 // through the cast. In the case the dynamic cast doesn't fail (and 843 // return NULL), we take the conservative route and report cases 844 // where we return the address of a stack variable. For Reinterpre 845 // FIXME: The comment about is wrong; we're not always converting 846 // from pointer to pointer. I'm guessing that this code should also 847 // handle references to objects. 848 case Stmt::CXXStaticCastExprClass: 849 case Stmt::CXXDynamicCastExprClass: 850 case Stmt::CXXConstCastExprClass: 851 case Stmt::CXXReinterpretCastExprClass: { 852 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 853 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 854 return EvalAddr(S); 855 else 856 return NULL; 857 } 858 859 // Everything else: we simply don't reason about them. 860 default: 861 return NULL; 862 } 863} 864 865 866/// EvalVal - This function is complements EvalAddr in the mutual recursion. 867/// See the comments for EvalAddr for more details. 868static DeclRefExpr* EvalVal(Expr *E) { 869 870 // We should only be called for evaluating non-pointer expressions, or 871 // expressions with a pointer type that are not used as references but instead 872 // are l-values (e.g., DeclRefExpr with a pointer type). 873 874 // Our "symbolic interpreter" is just a dispatch off the currently 875 // viewed AST node. We then recursively traverse the AST by calling 876 // EvalAddr and EvalVal appropriately. 877 switch (E->getStmtClass()) { 878 case Stmt::DeclRefExprClass: { 879 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 880 // at code that refers to a variable's name. We check if it has local 881 // storage within the function, and if so, return the expression. 882 DeclRefExpr *DR = cast<DeclRefExpr>(E); 883 884 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 885 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 886 887 return NULL; 888 } 889 890 case Stmt::ParenExprClass: 891 // Ignore parentheses. 892 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 893 894 case Stmt::UnaryOperatorClass: { 895 // The only unary operator that make sense to handle here 896 // is Deref. All others don't resolve to a "name." This includes 897 // handling all sorts of rvalues passed to a unary operator. 898 UnaryOperator *U = cast<UnaryOperator>(E); 899 900 if (U->getOpcode() == UnaryOperator::Deref) 901 return EvalAddr(U->getSubExpr()); 902 903 return NULL; 904 } 905 906 case Stmt::ArraySubscriptExprClass: { 907 // Array subscripts are potential references to data on the stack. We 908 // retrieve the DeclRefExpr* for the array variable if it indeed 909 // has local storage. 910 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 911 } 912 913 case Stmt::ConditionalOperatorClass: { 914 // For conditional operators we need to see if either the LHS or RHS are 915 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 916 ConditionalOperator *C = cast<ConditionalOperator>(E); 917 918 // Handle the GNU extension for missing LHS. 919 if (Expr *lhsExpr = C->getLHS()) 920 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 921 return LHS; 922 923 return EvalVal(C->getRHS()); 924 } 925 926 // Accesses to members are potential references to data on the stack. 927 case Stmt::MemberExprClass: { 928 MemberExpr *M = cast<MemberExpr>(E); 929 930 // Check for indirect access. We only want direct field accesses. 931 if (!M->isArrow()) 932 return EvalVal(M->getBase()); 933 else 934 return NULL; 935 } 936 937 // Everything else: we simply don't reason about them. 938 default: 939 return NULL; 940 } 941} 942 943//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 944 945/// Check for comparisons of floating point operands using != and ==. 946/// Issue a warning if these are no self-comparisons, as they are not likely 947/// to do what the programmer intended. 948void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 949 bool EmitWarning = true; 950 951 Expr* LeftExprSansParen = lex->IgnoreParens(); 952 Expr* RightExprSansParen = rex->IgnoreParens(); 953 954 // Special case: check for x == x (which is OK). 955 // Do not emit warnings for such cases. 956 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 957 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 958 if (DRL->getDecl() == DRR->getDecl()) 959 EmitWarning = false; 960 961 962 // Special case: check for comparisons against literals that can be exactly 963 // represented by APFloat. In such cases, do not emit a warning. This 964 // is a heuristic: often comparison against such literals are used to 965 // detect if a value in a variable has not changed. This clearly can 966 // lead to false negatives. 967 if (EmitWarning) { 968 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 969 if (FLL->isExact()) 970 EmitWarning = false; 971 } 972 else 973 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 974 if (FLR->isExact()) 975 EmitWarning = false; 976 } 977 } 978 979 // Check for comparisons with builtin types. 980 if (EmitWarning) 981 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 982 if (isCallBuiltin(CL)) 983 EmitWarning = false; 984 985 if (EmitWarning) 986 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 987 if (isCallBuiltin(CR)) 988 EmitWarning = false; 989 990 // Emit the diagnostic. 991 if (EmitWarning) 992 Diag(loc, diag::warn_floatingpoint_eq) 993 << lex->getSourceRange() << rex->getSourceRange(); 994} 995