SemaChecking.cpp revision 6eec8e883de118b431e3ead5b1e604a6ac68ff6b
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Basic/Diagnostic.h" 22#include "SemaUtil.h" 23using namespace clang; 24 25/// CheckFunctionCall - Check a direct function call for various correctness 26/// and safety properties not strictly enforced by the C type system. 27Action::ExprResult 28Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 29 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 30 // Get the IdentifierInfo* for the called function. 31 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 32 33 switch (FnInfo->getBuiltinID()) { 34 case Builtin::BI__builtin___CFStringMakeConstantString: 35 assert(TheCall->getNumArgs() == 1 && 36 "Wrong # arguments to builtin CFStringMakeConstantString"); 37 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 38 return true; 39 return TheCall.take(); 40 case Builtin::BI__builtin_stdarg_start: 41 case Builtin::BI__builtin_va_start: 42 if (SemaBuiltinVAStart(TheCall.get())) 43 return true; 44 return TheCall.take(); 45 case Builtin::BI__builtin_isgreater: 46 case Builtin::BI__builtin_isgreaterequal: 47 case Builtin::BI__builtin_isless: 48 case Builtin::BI__builtin_islessequal: 49 case Builtin::BI__builtin_islessgreater: 50 case Builtin::BI__builtin_isunordered: 51 if (SemaBuiltinUnorderedCompare(TheCall.get())) 52 return true; 53 return TheCall.take(); 54 case Builtin::BI__builtin_return_address: 55 case Builtin::BI__builtin_frame_address: 56 if (SemaBuiltinStackAddress(TheCall.get())) 57 return true; 58 return TheCall.take(); 59 case Builtin::BI__builtin_shufflevector: 60 return SemaBuiltinShuffleVector(TheCall.get()); 61 case Builtin::BI__builtin_prefetch: 62 if (SemaBuiltinPrefetch(TheCall.get())) 63 return true; 64 return TheCall.take(); 65 case Builtin::BI__builtin_object_size: 66 if (SemaBuiltinObjectSize(TheCall.get())) 67 return true; 68 } 69 70 // FIXME: This mechanism should be abstracted to be less fragile and 71 // more efficient. For example, just map function ids to custom 72 // handlers. 73 74 // Search the KnownFunctionIDs for the identifier. 75 unsigned i = 0, e = id_num_known_functions; 76 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 77 if (i == e) return TheCall.take(); 78 79 // Printf checking. 80 if (i <= id_vprintf) { 81 // Retrieve the index of the format string parameter and determine 82 // if the function is passed a va_arg argument. 83 unsigned format_idx = 0; 84 bool HasVAListArg = false; 85 86 switch (i) { 87 default: assert(false && "No format string argument index."); 88 case id_NSLog: format_idx = 0; break; 89 case id_asprintf: format_idx = 1; break; 90 case id_fprintf: format_idx = 1; break; 91 case id_printf: format_idx = 0; break; 92 case id_snprintf: format_idx = 2; break; 93 case id_snprintf_chk: format_idx = 4; break; 94 case id_sprintf: format_idx = 1; break; 95 case id_sprintf_chk: format_idx = 3; break; 96 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 97 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 98 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 99 case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break; 100 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 101 case id_vsprintf_chk: format_idx = 3; HasVAListArg = true; break; 102 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 103 } 104 105 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 106 } 107 108 return TheCall.take(); 109} 110 111/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 112/// CFString constructor is correct 113bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 114 Arg = Arg->IgnoreParenCasts(); 115 116 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 117 118 if (!Literal || Literal->isWide()) { 119 Diag(Arg->getLocStart(), 120 diag::err_cfstring_literal_not_string_constant, 121 Arg->getSourceRange()); 122 return true; 123 } 124 125 const char *Data = Literal->getStrData(); 126 unsigned Length = Literal->getByteLength(); 127 128 for (unsigned i = 0; i < Length; ++i) { 129 if (!isascii(Data[i])) { 130 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 131 diag::warn_cfstring_literal_contains_non_ascii_character, 132 Arg->getSourceRange()); 133 break; 134 } 135 136 if (!Data[i]) { 137 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 138 diag::warn_cfstring_literal_contains_nul_character, 139 Arg->getSourceRange()); 140 break; 141 } 142 } 143 144 return false; 145} 146 147/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 148/// Emit an error and return true on failure, return false on success. 149bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 150 Expr *Fn = TheCall->getCallee(); 151 if (TheCall->getNumArgs() > 2) { 152 Diag(TheCall->getArg(2)->getLocStart(), 153 diag::err_typecheck_call_too_many_args, Fn->getSourceRange(), 154 SourceRange(TheCall->getArg(2)->getLocStart(), 155 (*(TheCall->arg_end()-1))->getLocEnd())); 156 return true; 157 } 158 159 // Determine whether the current function is variadic or not. 160 bool isVariadic; 161 if (getCurFunctionDecl()) 162 isVariadic = 163 cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic(); 164 else 165 isVariadic = getCurMethodDecl()->isVariadic(); 166 167 if (!isVariadic) { 168 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 169 return true; 170 } 171 172 // Verify that the second argument to the builtin is the last argument of the 173 // current function or method. 174 bool SecondArgIsLastNamedArgument = false; 175 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 176 177 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 178 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 179 // FIXME: This isn't correct for methods (results in bogus warning). 180 // Get the last formal in the current function. 181 const ParmVarDecl *LastArg; 182 if (getCurFunctionDecl()) 183 LastArg = *(getCurFunctionDecl()->param_end()-1); 184 else 185 LastArg = *(getCurMethodDecl()->param_end()-1); 186 SecondArgIsLastNamedArgument = PV == LastArg; 187 } 188 } 189 190 if (!SecondArgIsLastNamedArgument) 191 Diag(TheCall->getArg(1)->getLocStart(), 192 diag::warn_second_parameter_of_va_start_not_last_named_argument); 193 return false; 194} 195 196/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 197/// friends. This is declared to take (...), so we have to check everything. 198bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 199 if (TheCall->getNumArgs() < 2) 200 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args); 201 if (TheCall->getNumArgs() > 2) 202 return Diag(TheCall->getArg(2)->getLocStart(), 203 diag::err_typecheck_call_too_many_args, 204 SourceRange(TheCall->getArg(2)->getLocStart(), 205 (*(TheCall->arg_end()-1))->getLocEnd())); 206 207 Expr *OrigArg0 = TheCall->getArg(0); 208 Expr *OrigArg1 = TheCall->getArg(1); 209 210 // Do standard promotions between the two arguments, returning their common 211 // type. 212 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 213 214 // If the common type isn't a real floating type, then the arguments were 215 // invalid for this operation. 216 if (!Res->isRealFloatingType()) 217 return Diag(OrigArg0->getLocStart(), 218 diag::err_typecheck_call_invalid_ordered_compare, 219 OrigArg0->getType().getAsString(), 220 OrigArg1->getType().getAsString(), 221 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd())); 222 223 return false; 224} 225 226bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 227 // The signature for these builtins is exact; the only thing we need 228 // to check is that the argument is a constant. 229 SourceLocation Loc; 230 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 231 return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange()); 232 233 return false; 234} 235 236/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 237// This is declared to take (...), so we have to check everything. 238Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 239 if (TheCall->getNumArgs() < 3) 240 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 241 TheCall->getSourceRange()); 242 243 QualType FAType = TheCall->getArg(0)->getType(); 244 QualType SAType = TheCall->getArg(1)->getType(); 245 246 if (!FAType->isVectorType() || !SAType->isVectorType()) { 247 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector, 248 SourceRange(TheCall->getArg(0)->getLocStart(), 249 TheCall->getArg(1)->getLocEnd())); 250 return true; 251 } 252 253 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 254 Context.getCanonicalType(SAType).getUnqualifiedType()) { 255 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector, 256 SourceRange(TheCall->getArg(0)->getLocStart(), 257 TheCall->getArg(1)->getLocEnd())); 258 return true; 259 } 260 261 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 262 if (TheCall->getNumArgs() != numElements+2) { 263 if (TheCall->getNumArgs() < numElements+2) 264 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args, 265 TheCall->getSourceRange()); 266 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 267 TheCall->getSourceRange()); 268 } 269 270 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 271 llvm::APSInt Result(32); 272 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 273 return Diag(TheCall->getLocStart(), 274 diag::err_shufflevector_nonconstant_argument, 275 TheCall->getArg(i)->getSourceRange()); 276 277 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 278 return Diag(TheCall->getLocStart(), 279 diag::err_shufflevector_argument_too_large, 280 TheCall->getArg(i)->getSourceRange()); 281 } 282 283 llvm::SmallVector<Expr*, 32> exprs; 284 285 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 286 exprs.push_back(TheCall->getArg(i)); 287 TheCall->setArg(i, 0); 288 } 289 290 return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 291 TheCall->getCallee()->getLocStart(), 292 TheCall->getRParenLoc()); 293} 294 295/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 296// This is declared to take (const void*, ...) and can take two 297// optional constant int args. 298bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 299 unsigned numArgs = TheCall->getNumArgs(); 300 bool res = false; 301 302 if (numArgs > 3) { 303 res |= Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args, 304 TheCall->getSourceRange()); 305 } 306 307 // Argument 0 is checked for us and the remaining arguments must be 308 // constant integers. 309 for (unsigned i=1; i<numArgs; ++i) { 310 Expr *Arg = TheCall->getArg(i); 311 QualType RWType = Arg->getType(); 312 313 const BuiltinType *BT = RWType->getAsBuiltinType(); 314 llvm::APSInt Result; 315 if (!BT || BT->getKind() != BuiltinType::Int || 316 !Arg->isIntegerConstantExpr(Result, Context)) { 317 if (Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument, 318 SourceRange(Arg->getLocStart(), Arg->getLocEnd()))) { 319 res = true; 320 continue; 321 } 322 } 323 324 // FIXME: gcc issues a warning and rewrites these to 0. These 325 // seems especially odd for the third argument since the default 326 // is 3. 327 if (i==1) { 328 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 329 res |= Diag(TheCall->getLocStart(), diag::err_argument_invalid_range, 330 "0", "1", 331 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 332 } else { 333 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 334 res |= Diag(TheCall->getLocStart(), diag::err_argument_invalid_range, 335 "0", "3", 336 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 337 } 338 } 339 340 return res; 341} 342 343/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 344/// int type). This simply type checks that type is one of the defined 345/// constants (0-3). 346bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 347 Expr *Arg = TheCall->getArg(1); 348 QualType ArgType = Arg->getType(); 349 const BuiltinType *BT = ArgType->getAsBuiltinType(); 350 llvm::APSInt Result(32); 351 if (!BT || BT->getKind() != BuiltinType::Int || 352 !Arg->isIntegerConstantExpr(Result, Context)) { 353 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument, 354 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 355 } 356 357 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 358 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range, 359 "0", "3", 360 SourceRange(Arg->getLocStart(), Arg->getLocEnd())); 361 } 362 363 return false; 364} 365 366/// CheckPrintfArguments - Check calls to printf (and similar functions) for 367/// correct use of format strings. 368/// 369/// HasVAListArg - A predicate indicating whether the printf-like 370/// function is passed an explicit va_arg argument (e.g., vprintf) 371/// 372/// format_idx - The index into Args for the format string. 373/// 374/// Improper format strings to functions in the printf family can be 375/// the source of bizarre bugs and very serious security holes. A 376/// good source of information is available in the following paper 377/// (which includes additional references): 378/// 379/// FormatGuard: Automatic Protection From printf Format String 380/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 381/// 382/// Functionality implemented: 383/// 384/// We can statically check the following properties for string 385/// literal format strings for non v.*printf functions (where the 386/// arguments are passed directly): 387// 388/// (1) Are the number of format conversions equal to the number of 389/// data arguments? 390/// 391/// (2) Does each format conversion correctly match the type of the 392/// corresponding data argument? (TODO) 393/// 394/// Moreover, for all printf functions we can: 395/// 396/// (3) Check for a missing format string (when not caught by type checking). 397/// 398/// (4) Check for no-operation flags; e.g. using "#" with format 399/// conversion 'c' (TODO) 400/// 401/// (5) Check the use of '%n', a major source of security holes. 402/// 403/// (6) Check for malformed format conversions that don't specify anything. 404/// 405/// (7) Check for empty format strings. e.g: printf(""); 406/// 407/// (8) Check that the format string is a wide literal. 408/// 409/// (9) Also check the arguments of functions with the __format__ attribute. 410/// (TODO). 411/// 412/// All of these checks can be done by parsing the format string. 413/// 414/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 415void 416Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 417 unsigned format_idx) { 418 Expr *Fn = TheCall->getCallee(); 419 420 // CHECK: printf-like function is called with no format string. 421 if (format_idx >= TheCall->getNumArgs()) { 422 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string, 423 Fn->getSourceRange()); 424 return; 425 } 426 427 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 428 429 // CHECK: format string is not a string literal. 430 // 431 // Dynamically generated format strings are difficult to 432 // automatically vet at compile time. Requiring that format strings 433 // are string literals: (1) permits the checking of format strings by 434 // the compiler and thereby (2) can practically remove the source of 435 // many format string exploits. 436 437 // Format string can be either ObjC string (e.g. @"%d") or 438 // C string (e.g. "%d") 439 // ObjC string uses the same format specifiers as C string, so we can use 440 // the same format string checking logic for both ObjC and C strings. 441 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 442 StringLiteral *FExpr = NULL; 443 444 if(ObjCFExpr != NULL) 445 FExpr = ObjCFExpr->getString(); 446 else 447 FExpr = dyn_cast<StringLiteral>(OrigFormatExpr); 448 449 if (FExpr == NULL) { 450 // For vprintf* functions (i.e., HasVAListArg==true), we add a 451 // special check to see if the format string is a function parameter 452 // of the function calling the printf function. If the function 453 // has an attribute indicating it is a printf-like function, then we 454 // should suppress warnings concerning non-literals being used in a call 455 // to a vprintf function. For example: 456 // 457 // void 458 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 459 // va_list ap; 460 // va_start(ap, fmt); 461 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 462 // ... 463 // 464 // 465 // FIXME: We don't have full attribute support yet, so just check to see 466 // if the argument is a DeclRefExpr that references a parameter. We'll 467 // add proper support for checking the attribute later. 468 if (HasVAListArg) 469 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 470 if (isa<ParmVarDecl>(DR->getDecl())) 471 return; 472 473 Diag(TheCall->getArg(format_idx)->getLocStart(), 474 diag::warn_printf_not_string_constant, 475 OrigFormatExpr->getSourceRange()); 476 return; 477 } 478 479 // CHECK: is the format string a wide literal? 480 if (FExpr->isWide()) { 481 Diag(FExpr->getLocStart(), 482 diag::warn_printf_format_string_is_wide_literal, 483 OrigFormatExpr->getSourceRange()); 484 return; 485 } 486 487 // Str - The format string. NOTE: this is NOT null-terminated! 488 const char * const Str = FExpr->getStrData(); 489 490 // CHECK: empty format string? 491 const unsigned StrLen = FExpr->getByteLength(); 492 493 if (StrLen == 0) { 494 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string, 495 OrigFormatExpr->getSourceRange()); 496 return; 497 } 498 499 // We process the format string using a binary state machine. The 500 // current state is stored in CurrentState. 501 enum { 502 state_OrdChr, 503 state_Conversion 504 } CurrentState = state_OrdChr; 505 506 // numConversions - The number of conversions seen so far. This is 507 // incremented as we traverse the format string. 508 unsigned numConversions = 0; 509 510 // numDataArgs - The number of data arguments after the format 511 // string. This can only be determined for non vprintf-like 512 // functions. For those functions, this value is 1 (the sole 513 // va_arg argument). 514 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 515 516 // Inspect the format string. 517 unsigned StrIdx = 0; 518 519 // LastConversionIdx - Index within the format string where we last saw 520 // a '%' character that starts a new format conversion. 521 unsigned LastConversionIdx = 0; 522 523 for (; StrIdx < StrLen; ++StrIdx) { 524 525 // Is the number of detected conversion conversions greater than 526 // the number of matching data arguments? If so, stop. 527 if (!HasVAListArg && numConversions > numDataArgs) break; 528 529 // Handle "\0" 530 if (Str[StrIdx] == '\0') { 531 // The string returned by getStrData() is not null-terminated, 532 // so the presence of a null character is likely an error. 533 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 534 diag::warn_printf_format_string_contains_null_char, 535 OrigFormatExpr->getSourceRange()); 536 return; 537 } 538 539 // Ordinary characters (not processing a format conversion). 540 if (CurrentState == state_OrdChr) { 541 if (Str[StrIdx] == '%') { 542 CurrentState = state_Conversion; 543 LastConversionIdx = StrIdx; 544 } 545 continue; 546 } 547 548 // Seen '%'. Now processing a format conversion. 549 switch (Str[StrIdx]) { 550 // Handle dynamic precision or width specifier. 551 case '*': { 552 ++numConversions; 553 554 if (!HasVAListArg && numConversions > numDataArgs) { 555 SourceLocation Loc = FExpr->getLocStart(); 556 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 557 558 if (Str[StrIdx-1] == '.') 559 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg, 560 OrigFormatExpr->getSourceRange()); 561 else 562 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg, 563 OrigFormatExpr->getSourceRange()); 564 565 // Don't do any more checking. We'll just emit spurious errors. 566 return; 567 } 568 569 // Perform type checking on width/precision specifier. 570 Expr *E = TheCall->getArg(format_idx+numConversions); 571 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 572 if (BT->getKind() == BuiltinType::Int) 573 break; 574 575 SourceLocation Loc = 576 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 577 578 if (Str[StrIdx-1] == '.') 579 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type, 580 E->getType().getAsString(), E->getSourceRange()); 581 else 582 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type, 583 E->getType().getAsString(), E->getSourceRange()); 584 585 break; 586 } 587 588 // Characters which can terminate a format conversion 589 // (e.g. "%d"). Characters that specify length modifiers or 590 // other flags are handled by the default case below. 591 // 592 // FIXME: additional checks will go into the following cases. 593 case 'i': 594 case 'd': 595 case 'o': 596 case 'u': 597 case 'x': 598 case 'X': 599 case 'D': 600 case 'O': 601 case 'U': 602 case 'e': 603 case 'E': 604 case 'f': 605 case 'F': 606 case 'g': 607 case 'G': 608 case 'a': 609 case 'A': 610 case 'c': 611 case 'C': 612 case 'S': 613 case 's': 614 case 'p': 615 ++numConversions; 616 CurrentState = state_OrdChr; 617 break; 618 619 // CHECK: Are we using "%n"? Issue a warning. 620 case 'n': { 621 ++numConversions; 622 CurrentState = state_OrdChr; 623 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 624 LastConversionIdx+1); 625 626 Diag(Loc, diag::warn_printf_write_back, OrigFormatExpr->getSourceRange()); 627 break; 628 } 629 630 // Handle "%@" 631 case '@': 632 // %@ is allowed in ObjC format strings only. 633 if(ObjCFExpr != NULL) 634 CurrentState = state_OrdChr; 635 else { 636 // Issue a warning: invalid format conversion. 637 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 638 LastConversionIdx+1); 639 640 Diag(Loc, diag::warn_printf_invalid_conversion, 641 std::string(Str+LastConversionIdx, 642 Str+std::min(LastConversionIdx+2, StrLen)), 643 OrigFormatExpr->getSourceRange()); 644 } 645 ++numConversions; 646 break; 647 648 // Handle "%%" 649 case '%': 650 // Sanity check: Was the first "%" character the previous one? 651 // If not, we will assume that we have a malformed format 652 // conversion, and that the current "%" character is the start 653 // of a new conversion. 654 if (StrIdx - LastConversionIdx == 1) 655 CurrentState = state_OrdChr; 656 else { 657 // Issue a warning: invalid format conversion. 658 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 659 LastConversionIdx+1); 660 661 Diag(Loc, diag::warn_printf_invalid_conversion, 662 std::string(Str+LastConversionIdx, Str+StrIdx), 663 OrigFormatExpr->getSourceRange()); 664 665 // This conversion is broken. Advance to the next format 666 // conversion. 667 LastConversionIdx = StrIdx; 668 ++numConversions; 669 } 670 break; 671 672 default: 673 // This case catches all other characters: flags, widths, etc. 674 // We should eventually process those as well. 675 break; 676 } 677 } 678 679 if (CurrentState == state_Conversion) { 680 // Issue a warning: invalid format conversion. 681 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 682 LastConversionIdx+1); 683 684 Diag(Loc, diag::warn_printf_invalid_conversion, 685 std::string(Str+LastConversionIdx, 686 Str+std::min(LastConversionIdx+2, StrLen)), 687 OrigFormatExpr->getSourceRange()); 688 return; 689 } 690 691 if (!HasVAListArg) { 692 // CHECK: Does the number of format conversions exceed the number 693 // of data arguments? 694 if (numConversions > numDataArgs) { 695 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 696 LastConversionIdx); 697 698 Diag(Loc, diag::warn_printf_insufficient_data_args, 699 OrigFormatExpr->getSourceRange()); 700 } 701 // CHECK: Does the number of data arguments exceed the number of 702 // format conversions in the format string? 703 else if (numConversions < numDataArgs) 704 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 705 diag::warn_printf_too_many_data_args, 706 OrigFormatExpr->getSourceRange()); 707 } 708} 709 710//===--- CHECK: Return Address of Stack Variable --------------------------===// 711 712static DeclRefExpr* EvalVal(Expr *E); 713static DeclRefExpr* EvalAddr(Expr* E); 714 715/// CheckReturnStackAddr - Check if a return statement returns the address 716/// of a stack variable. 717void 718Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 719 SourceLocation ReturnLoc) { 720 721 // Perform checking for returned stack addresses. 722 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 723 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 724 Diag(DR->getLocStart(), diag::warn_ret_stack_addr, 725 DR->getDecl()->getIdentifier()->getName(), 726 RetValExp->getSourceRange()); 727 728 // Skip over implicit cast expressions when checking for block expressions. 729 if (ImplicitCastExpr *IcExpr = 730 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 731 RetValExp = IcExpr->getSubExpr(); 732 733 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 734 Diag(C->getLocStart(), diag::err_ret_local_block, 735 C->getSourceRange()); 736 } 737 // Perform checking for stack values returned by reference. 738 else if (lhsType->isReferenceType()) { 739 // Check for a reference to the stack 740 if (DeclRefExpr *DR = EvalVal(RetValExp)) 741 Diag(DR->getLocStart(), diag::warn_ret_stack_ref, 742 DR->getDecl()->getIdentifier()->getName(), 743 RetValExp->getSourceRange()); 744 } 745} 746 747/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 748/// check if the expression in a return statement evaluates to an address 749/// to a location on the stack. The recursion is used to traverse the 750/// AST of the return expression, with recursion backtracking when we 751/// encounter a subexpression that (1) clearly does not lead to the address 752/// of a stack variable or (2) is something we cannot determine leads to 753/// the address of a stack variable based on such local checking. 754/// 755/// EvalAddr processes expressions that are pointers that are used as 756/// references (and not L-values). EvalVal handles all other values. 757/// At the base case of the recursion is a check for a DeclRefExpr* in 758/// the refers to a stack variable. 759/// 760/// This implementation handles: 761/// 762/// * pointer-to-pointer casts 763/// * implicit conversions from array references to pointers 764/// * taking the address of fields 765/// * arbitrary interplay between "&" and "*" operators 766/// * pointer arithmetic from an address of a stack variable 767/// * taking the address of an array element where the array is on the stack 768static DeclRefExpr* EvalAddr(Expr *E) { 769 // We should only be called for evaluating pointer expressions. 770 assert((E->getType()->isPointerType() || 771 E->getType()->isBlockPointerType() || 772 E->getType()->isObjCQualifiedIdType()) && 773 "EvalAddr only works on pointers"); 774 775 // Our "symbolic interpreter" is just a dispatch off the currently 776 // viewed AST node. We then recursively traverse the AST by calling 777 // EvalAddr and EvalVal appropriately. 778 switch (E->getStmtClass()) { 779 case Stmt::ParenExprClass: 780 // Ignore parentheses. 781 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 782 783 case Stmt::UnaryOperatorClass: { 784 // The only unary operator that make sense to handle here 785 // is AddrOf. All others don't make sense as pointers. 786 UnaryOperator *U = cast<UnaryOperator>(E); 787 788 if (U->getOpcode() == UnaryOperator::AddrOf) 789 return EvalVal(U->getSubExpr()); 790 else 791 return NULL; 792 } 793 794 case Stmt::BinaryOperatorClass: { 795 // Handle pointer arithmetic. All other binary operators are not valid 796 // in this context. 797 BinaryOperator *B = cast<BinaryOperator>(E); 798 BinaryOperator::Opcode op = B->getOpcode(); 799 800 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 801 return NULL; 802 803 Expr *Base = B->getLHS(); 804 805 // Determine which argument is the real pointer base. It could be 806 // the RHS argument instead of the LHS. 807 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 808 809 assert (Base->getType()->isPointerType()); 810 return EvalAddr(Base); 811 } 812 813 // For conditional operators we need to see if either the LHS or RHS are 814 // valid DeclRefExpr*s. If one of them is valid, we return it. 815 case Stmt::ConditionalOperatorClass: { 816 ConditionalOperator *C = cast<ConditionalOperator>(E); 817 818 // Handle the GNU extension for missing LHS. 819 if (Expr *lhsExpr = C->getLHS()) 820 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 821 return LHS; 822 823 return EvalAddr(C->getRHS()); 824 } 825 826 // For casts, we need to handle conversions from arrays to 827 // pointer values, and pointer-to-pointer conversions. 828 case Stmt::ImplicitCastExprClass: 829 case Stmt::CStyleCastExprClass: 830 case Stmt::CXXFunctionalCastExprClass: { 831 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 832 QualType T = SubExpr->getType(); 833 834 if (SubExpr->getType()->isPointerType() || 835 SubExpr->getType()->isBlockPointerType() || 836 SubExpr->getType()->isObjCQualifiedIdType()) 837 return EvalAddr(SubExpr); 838 else if (T->isArrayType()) 839 return EvalVal(SubExpr); 840 else 841 return 0; 842 } 843 844 // C++ casts. For dynamic casts, static casts, and const casts, we 845 // are always converting from a pointer-to-pointer, so we just blow 846 // through the cast. In the case the dynamic cast doesn't fail (and 847 // return NULL), we take the conservative route and report cases 848 // where we return the address of a stack variable. For Reinterpre 849 // FIXME: The comment about is wrong; we're not always converting 850 // from pointer to pointer. I'm guessing that this code should also 851 // handle references to objects. 852 case Stmt::CXXStaticCastExprClass: 853 case Stmt::CXXDynamicCastExprClass: 854 case Stmt::CXXConstCastExprClass: 855 case Stmt::CXXReinterpretCastExprClass: { 856 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 857 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 858 return EvalAddr(S); 859 else 860 return NULL; 861 } 862 863 // Everything else: we simply don't reason about them. 864 default: 865 return NULL; 866 } 867} 868 869 870/// EvalVal - This function is complements EvalAddr in the mutual recursion. 871/// See the comments for EvalAddr for more details. 872static DeclRefExpr* EvalVal(Expr *E) { 873 874 // We should only be called for evaluating non-pointer expressions, or 875 // expressions with a pointer type that are not used as references but instead 876 // are l-values (e.g., DeclRefExpr with a pointer type). 877 878 // Our "symbolic interpreter" is just a dispatch off the currently 879 // viewed AST node. We then recursively traverse the AST by calling 880 // EvalAddr and EvalVal appropriately. 881 switch (E->getStmtClass()) { 882 case Stmt::DeclRefExprClass: { 883 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 884 // at code that refers to a variable's name. We check if it has local 885 // storage within the function, and if so, return the expression. 886 DeclRefExpr *DR = cast<DeclRefExpr>(E); 887 888 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 889 if(V->hasLocalStorage()) return DR; 890 891 return NULL; 892 } 893 894 case Stmt::ParenExprClass: 895 // Ignore parentheses. 896 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 897 898 case Stmt::UnaryOperatorClass: { 899 // The only unary operator that make sense to handle here 900 // is Deref. All others don't resolve to a "name." This includes 901 // handling all sorts of rvalues passed to a unary operator. 902 UnaryOperator *U = cast<UnaryOperator>(E); 903 904 if (U->getOpcode() == UnaryOperator::Deref) 905 return EvalAddr(U->getSubExpr()); 906 907 return NULL; 908 } 909 910 case Stmt::ArraySubscriptExprClass: { 911 // Array subscripts are potential references to data on the stack. We 912 // retrieve the DeclRefExpr* for the array variable if it indeed 913 // has local storage. 914 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 915 } 916 917 case Stmt::ConditionalOperatorClass: { 918 // For conditional operators we need to see if either the LHS or RHS are 919 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 920 ConditionalOperator *C = cast<ConditionalOperator>(E); 921 922 // Handle the GNU extension for missing LHS. 923 if (Expr *lhsExpr = C->getLHS()) 924 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 925 return LHS; 926 927 return EvalVal(C->getRHS()); 928 } 929 930 // Accesses to members are potential references to data on the stack. 931 case Stmt::MemberExprClass: { 932 MemberExpr *M = cast<MemberExpr>(E); 933 934 // Check for indirect access. We only want direct field accesses. 935 if (!M->isArrow()) 936 return EvalVal(M->getBase()); 937 else 938 return NULL; 939 } 940 941 // Everything else: we simply don't reason about them. 942 default: 943 return NULL; 944 } 945} 946 947//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 948 949/// Check for comparisons of floating point operands using != and ==. 950/// Issue a warning if these are no self-comparisons, as they are not likely 951/// to do what the programmer intended. 952void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 953 bool EmitWarning = true; 954 955 Expr* LeftExprSansParen = lex->IgnoreParens(); 956 Expr* RightExprSansParen = rex->IgnoreParens(); 957 958 // Special case: check for x == x (which is OK). 959 // Do not emit warnings for such cases. 960 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 961 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 962 if (DRL->getDecl() == DRR->getDecl()) 963 EmitWarning = false; 964 965 966 // Special case: check for comparisons against literals that can be exactly 967 // represented by APFloat. In such cases, do not emit a warning. This 968 // is a heuristic: often comparison against such literals are used to 969 // detect if a value in a variable has not changed. This clearly can 970 // lead to false negatives. 971 if (EmitWarning) { 972 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 973 if (FLL->isExact()) 974 EmitWarning = false; 975 } 976 else 977 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 978 if (FLR->isExact()) 979 EmitWarning = false; 980 } 981 } 982 983 // Check for comparisons with builtin types. 984 if (EmitWarning) 985 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 986 if (isCallBuiltin(CL)) 987 EmitWarning = false; 988 989 if (EmitWarning) 990 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 991 if (isCallBuiltin(CR)) 992 EmitWarning = false; 993 994 // Emit the diagnostic. 995 if (EmitWarning) 996 Diag(loc, diag::warn_floatingpoint_eq, 997 lex->getSourceRange(),rex->getSourceRange()); 998} 999