SemaChecking.cpp revision 370ab3f1373841d70582feac9e35c3c6b3489f63
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "SemaUtil.h" 22using namespace clang; 23 24/// CheckFunctionCall - Check a direct function call for various correctness 25/// and safety properties not strictly enforced by the C type system. 26Action::OwningExprResult 27Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 28 OwningExprResult TheCallResult(Owned(TheCall)); 29 // Get the IdentifierInfo* for the called function. 30 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 31 32 // None of the checks below are needed for functions that don't have 33 // simple names (e.g., C++ conversion functions). 34 if (!FnInfo) 35 return move(TheCallResult); 36 37 switch (FDecl->getBuiltinID()) { 38 case Builtin::BI__builtin___CFStringMakeConstantString: 39 assert(TheCall->getNumArgs() == 1 && 40 "Wrong # arguments to builtin CFStringMakeConstantString"); 41 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 42 return ExprError(); 43 return move(TheCallResult); 44 case Builtin::BI__builtin_stdarg_start: 45 case Builtin::BI__builtin_va_start: 46 if (SemaBuiltinVAStart(TheCall)) 47 return ExprError(); 48 return move(TheCallResult); 49 case Builtin::BI__builtin_isgreater: 50 case Builtin::BI__builtin_isgreaterequal: 51 case Builtin::BI__builtin_isless: 52 case Builtin::BI__builtin_islessequal: 53 case Builtin::BI__builtin_islessgreater: 54 case Builtin::BI__builtin_isunordered: 55 if (SemaBuiltinUnorderedCompare(TheCall)) 56 return ExprError(); 57 return move(TheCallResult); 58 case Builtin::BI__builtin_return_address: 59 case Builtin::BI__builtin_frame_address: 60 if (SemaBuiltinStackAddress(TheCall)) 61 return ExprError(); 62 return move(TheCallResult); 63 case Builtin::BI__builtin_shufflevector: 64 return SemaBuiltinShuffleVector(TheCall); 65 // TheCall will be freed by the smart pointer here, but that's fine, since 66 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 67 case Builtin::BI__builtin_prefetch: 68 if (SemaBuiltinPrefetch(TheCall)) 69 return ExprError(); 70 return move(TheCallResult); 71 case Builtin::BI__builtin_object_size: 72 if (SemaBuiltinObjectSize(TheCall)) 73 return ExprError(); 74 } 75 76 // FIXME: This mechanism should be abstracted to be less fragile and 77 // more efficient. For example, just map function ids to custom 78 // handlers. 79 80 // Printf checking. 81 unsigned format_idx = 0; 82 bool HasVAListArg = false; 83 if (FDecl->getBuiltinID() && 84 Context.BuiltinInfo.isPrintfLike(FDecl->getBuiltinID(), format_idx, 85 HasVAListArg)) { 86 // Found a printf builtin. 87 } else if (FnInfo == KnownFunctionIDs[id_NSLog]) { 88 format_idx = 0; 89 HasVAListArg = false; 90 } else if (FnInfo == KnownFunctionIDs[id_asprintf]) { 91 format_idx = 1; 92 HasVAListArg = false; 93 } else if (FnInfo == KnownFunctionIDs[id_vasprintf]) { 94 format_idx = 1; 95 HasVAListArg = true; 96 } else { 97 return move(TheCallResult); 98 } 99 100 CheckPrintfArguments(TheCall, HasVAListArg, format_idx); 101 102 return move(TheCallResult); 103} 104 105/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 106/// CFString constructor is correct 107bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 108 Arg = Arg->IgnoreParenCasts(); 109 110 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 111 112 if (!Literal || Literal->isWide()) { 113 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 114 << Arg->getSourceRange(); 115 return true; 116 } 117 118 const char *Data = Literal->getStrData(); 119 unsigned Length = Literal->getByteLength(); 120 121 for (unsigned i = 0; i < Length; ++i) { 122 if (!isascii(Data[i])) { 123 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 124 diag::warn_cfstring_literal_contains_non_ascii_character) 125 << Arg->getSourceRange(); 126 break; 127 } 128 129 if (!Data[i]) { 130 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 131 diag::warn_cfstring_literal_contains_nul_character) 132 << Arg->getSourceRange(); 133 break; 134 } 135 } 136 137 return false; 138} 139 140/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 141/// Emit an error and return true on failure, return false on success. 142bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 143 Expr *Fn = TheCall->getCallee(); 144 if (TheCall->getNumArgs() > 2) { 145 Diag(TheCall->getArg(2)->getLocStart(), 146 diag::err_typecheck_call_too_many_args) 147 << 0 /*function call*/ << Fn->getSourceRange() 148 << SourceRange(TheCall->getArg(2)->getLocStart(), 149 (*(TheCall->arg_end()-1))->getLocEnd()); 150 return true; 151 } 152 153 if (TheCall->getNumArgs() < 2) { 154 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 155 << 0 /*function call*/; 156 } 157 158 // Determine whether the current function is variadic or not. 159 bool isVariadic; 160 if (getCurFunctionDecl()) { 161 if (FunctionTypeProto* FTP = 162 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType())) 163 isVariadic = FTP->isVariadic(); 164 else 165 isVariadic = false; 166 } else { 167 isVariadic = getCurMethodDecl()->isVariadic(); 168 } 169 170 if (!isVariadic) { 171 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 172 return true; 173 } 174 175 // Verify that the second argument to the builtin is the last argument of the 176 // current function or method. 177 bool SecondArgIsLastNamedArgument = false; 178 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 179 180 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 181 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 182 // FIXME: This isn't correct for methods (results in bogus warning). 183 // Get the last formal in the current function. 184 const ParmVarDecl *LastArg; 185 if (FunctionDecl *FD = getCurFunctionDecl()) 186 LastArg = *(FD->param_end()-1); 187 else 188 LastArg = *(getCurMethodDecl()->param_end()-1); 189 SecondArgIsLastNamedArgument = PV == LastArg; 190 } 191 } 192 193 if (!SecondArgIsLastNamedArgument) 194 Diag(TheCall->getArg(1)->getLocStart(), 195 diag::warn_second_parameter_of_va_start_not_last_named_argument); 196 return false; 197} 198 199/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 200/// friends. This is declared to take (...), so we have to check everything. 201bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 202 if (TheCall->getNumArgs() < 2) 203 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 204 << 0 /*function call*/; 205 if (TheCall->getNumArgs() > 2) 206 return Diag(TheCall->getArg(2)->getLocStart(), 207 diag::err_typecheck_call_too_many_args) 208 << 0 /*function call*/ 209 << SourceRange(TheCall->getArg(2)->getLocStart(), 210 (*(TheCall->arg_end()-1))->getLocEnd()); 211 212 Expr *OrigArg0 = TheCall->getArg(0); 213 Expr *OrigArg1 = TheCall->getArg(1); 214 215 // Do standard promotions between the two arguments, returning their common 216 // type. 217 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 218 219 // If the common type isn't a real floating type, then the arguments were 220 // invalid for this operation. 221 if (!Res->isRealFloatingType()) 222 return Diag(OrigArg0->getLocStart(), 223 diag::err_typecheck_call_invalid_ordered_compare) 224 << OrigArg0->getType() << OrigArg1->getType() 225 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 226 227 return false; 228} 229 230bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 231 // The signature for these builtins is exact; the only thing we need 232 // to check is that the argument is a constant. 233 SourceLocation Loc; 234 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 235 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 236 237 return false; 238} 239 240/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 241// This is declared to take (...), so we have to check everything. 242Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 243 if (TheCall->getNumArgs() < 3) 244 return ExprError(Diag(TheCall->getLocEnd(), 245 diag::err_typecheck_call_too_few_args) 246 << 0 /*function call*/ << TheCall->getSourceRange()); 247 248 QualType FAType = TheCall->getArg(0)->getType(); 249 QualType SAType = TheCall->getArg(1)->getType(); 250 251 if (!FAType->isVectorType() || !SAType->isVectorType()) { 252 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 253 << SourceRange(TheCall->getArg(0)->getLocStart(), 254 TheCall->getArg(1)->getLocEnd()); 255 return ExprError(); 256 } 257 258 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 259 Context.getCanonicalType(SAType).getUnqualifiedType()) { 260 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 261 << SourceRange(TheCall->getArg(0)->getLocStart(), 262 TheCall->getArg(1)->getLocEnd()); 263 return ExprError(); 264 } 265 266 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 267 if (TheCall->getNumArgs() != numElements+2) { 268 if (TheCall->getNumArgs() < numElements+2) 269 return ExprError(Diag(TheCall->getLocEnd(), 270 diag::err_typecheck_call_too_few_args) 271 << 0 /*function call*/ << TheCall->getSourceRange()); 272 return ExprError(Diag(TheCall->getLocEnd(), 273 diag::err_typecheck_call_too_many_args) 274 << 0 /*function call*/ << TheCall->getSourceRange()); 275 } 276 277 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 278 llvm::APSInt Result(32); 279 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 280 return ExprError(Diag(TheCall->getLocStart(), 281 diag::err_shufflevector_nonconstant_argument) 282 << TheCall->getArg(i)->getSourceRange()); 283 284 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 285 return ExprError(Diag(TheCall->getLocStart(), 286 diag::err_shufflevector_argument_too_large) 287 << TheCall->getArg(i)->getSourceRange()); 288 } 289 290 llvm::SmallVector<Expr*, 32> exprs; 291 292 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 293 exprs.push_back(TheCall->getArg(i)); 294 TheCall->setArg(i, 0); 295 } 296 297 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 298 FAType, 299 TheCall->getCallee()->getLocStart(), 300 TheCall->getRParenLoc())); 301} 302 303/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 304// This is declared to take (const void*, ...) and can take two 305// optional constant int args. 306bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 307 unsigned NumArgs = TheCall->getNumArgs(); 308 309 if (NumArgs > 3) 310 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 311 << 0 /*function call*/ << TheCall->getSourceRange(); 312 313 // Argument 0 is checked for us and the remaining arguments must be 314 // constant integers. 315 for (unsigned i = 1; i != NumArgs; ++i) { 316 Expr *Arg = TheCall->getArg(i); 317 QualType RWType = Arg->getType(); 318 319 const BuiltinType *BT = RWType->getAsBuiltinType(); 320 llvm::APSInt Result; 321 if (!BT || BT->getKind() != BuiltinType::Int || 322 !Arg->isIntegerConstantExpr(Result, Context)) 323 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 324 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 325 326 // FIXME: gcc issues a warning and rewrites these to 0. These 327 // seems especially odd for the third argument since the default 328 // is 3. 329 if (i == 1) { 330 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 331 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 332 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 333 } else { 334 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 335 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 336 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 337 } 338 } 339 340 return false; 341} 342 343/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 344/// int type). This simply type checks that type is one of the defined 345/// constants (0-3). 346bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 347 Expr *Arg = TheCall->getArg(1); 348 QualType ArgType = Arg->getType(); 349 const BuiltinType *BT = ArgType->getAsBuiltinType(); 350 llvm::APSInt Result(32); 351 if (!BT || BT->getKind() != BuiltinType::Int || 352 !Arg->isIntegerConstantExpr(Result, Context)) { 353 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 354 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 355 } 356 357 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 358 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 359 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 360 } 361 362 return false; 363} 364 365// Handle i > 1 ? "x" : "y", recursivelly 366bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg, 367 unsigned format_idx) { 368 369 switch (E->getStmtClass()) { 370 case Stmt::ConditionalOperatorClass: { 371 ConditionalOperator *C = cast<ConditionalOperator>(E); 372 return SemaCheckStringLiteral(C->getLHS(), TheCall, 373 HasVAListArg, format_idx) 374 && SemaCheckStringLiteral(C->getRHS(), TheCall, 375 HasVAListArg, format_idx); 376 } 377 378 case Stmt::ImplicitCastExprClass: { 379 ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E); 380 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 381 format_idx); 382 } 383 384 case Stmt::ParenExprClass: { 385 ParenExpr *Expr = dyn_cast<ParenExpr>(E); 386 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 387 format_idx); 388 } 389 390 default: { 391 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E); 392 StringLiteral *StrE = NULL; 393 394 if (ObjCFExpr) 395 StrE = ObjCFExpr->getString(); 396 else 397 StrE = dyn_cast<StringLiteral>(E); 398 399 if (StrE) { 400 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx); 401 return true; 402 } 403 404 return false; 405 } 406 } 407} 408 409 410/// CheckPrintfArguments - Check calls to printf (and similar functions) for 411/// correct use of format strings. 412/// 413/// HasVAListArg - A predicate indicating whether the printf-like 414/// function is passed an explicit va_arg argument (e.g., vprintf) 415/// 416/// format_idx - The index into Args for the format string. 417/// 418/// Improper format strings to functions in the printf family can be 419/// the source of bizarre bugs and very serious security holes. A 420/// good source of information is available in the following paper 421/// (which includes additional references): 422/// 423/// FormatGuard: Automatic Protection From printf Format String 424/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 425/// 426/// Functionality implemented: 427/// 428/// We can statically check the following properties for string 429/// literal format strings for non v.*printf functions (where the 430/// arguments are passed directly): 431// 432/// (1) Are the number of format conversions equal to the number of 433/// data arguments? 434/// 435/// (2) Does each format conversion correctly match the type of the 436/// corresponding data argument? (TODO) 437/// 438/// Moreover, for all printf functions we can: 439/// 440/// (3) Check for a missing format string (when not caught by type checking). 441/// 442/// (4) Check for no-operation flags; e.g. using "#" with format 443/// conversion 'c' (TODO) 444/// 445/// (5) Check the use of '%n', a major source of security holes. 446/// 447/// (6) Check for malformed format conversions that don't specify anything. 448/// 449/// (7) Check for empty format strings. e.g: printf(""); 450/// 451/// (8) Check that the format string is a wide literal. 452/// 453/// (9) Also check the arguments of functions with the __format__ attribute. 454/// (TODO). 455/// 456/// All of these checks can be done by parsing the format string. 457/// 458/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 459void 460Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 461 unsigned format_idx) { 462 Expr *Fn = TheCall->getCallee(); 463 464 // CHECK: printf-like function is called with no format string. 465 if (format_idx >= TheCall->getNumArgs()) { 466 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 467 << Fn->getSourceRange(); 468 return; 469 } 470 471 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 472 473 // CHECK: format string is not a string literal. 474 // 475 // Dynamically generated format strings are difficult to 476 // automatically vet at compile time. Requiring that format strings 477 // are string literals: (1) permits the checking of format strings by 478 // the compiler and thereby (2) can practically remove the source of 479 // many format string exploits. 480 481 // Format string can be either ObjC string (e.g. @"%d") or 482 // C string (e.g. "%d") 483 // ObjC string uses the same format specifiers as C string, so we can use 484 // the same format string checking logic for both ObjC and C strings. 485 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx); 486 487 if (!isFExpr) { 488 // For vprintf* functions (i.e., HasVAListArg==true), we add a 489 // special check to see if the format string is a function parameter 490 // of the function calling the printf function. If the function 491 // has an attribute indicating it is a printf-like function, then we 492 // should suppress warnings concerning non-literals being used in a call 493 // to a vprintf function. For example: 494 // 495 // void 496 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 497 // va_list ap; 498 // va_start(ap, fmt); 499 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 500 // ... 501 // 502 // 503 // FIXME: We don't have full attribute support yet, so just check to see 504 // if the argument is a DeclRefExpr that references a parameter. We'll 505 // add proper support for checking the attribute later. 506 if (HasVAListArg) 507 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 508 if (isa<ParmVarDecl>(DR->getDecl())) 509 return; 510 511 Diag(TheCall->getArg(format_idx)->getLocStart(), 512 diag::warn_printf_not_string_constant) 513 << OrigFormatExpr->getSourceRange(); 514 return; 515 } 516} 517 518void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, 519 CallExpr *TheCall, bool HasVAListArg, unsigned format_idx) { 520 521 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 522 // CHECK: is the format string a wide literal? 523 if (FExpr->isWide()) { 524 Diag(FExpr->getLocStart(), 525 diag::warn_printf_format_string_is_wide_literal) 526 << OrigFormatExpr->getSourceRange(); 527 return; 528 } 529 530 // Str - The format string. NOTE: this is NOT null-terminated! 531 const char * const Str = FExpr->getStrData(); 532 533 // CHECK: empty format string? 534 const unsigned StrLen = FExpr->getByteLength(); 535 536 if (StrLen == 0) { 537 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 538 << OrigFormatExpr->getSourceRange(); 539 return; 540 } 541 542 // We process the format string using a binary state machine. The 543 // current state is stored in CurrentState. 544 enum { 545 state_OrdChr, 546 state_Conversion 547 } CurrentState = state_OrdChr; 548 549 // numConversions - The number of conversions seen so far. This is 550 // incremented as we traverse the format string. 551 unsigned numConversions = 0; 552 553 // numDataArgs - The number of data arguments after the format 554 // string. This can only be determined for non vprintf-like 555 // functions. For those functions, this value is 1 (the sole 556 // va_arg argument). 557 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 558 559 // Inspect the format string. 560 unsigned StrIdx = 0; 561 562 // LastConversionIdx - Index within the format string where we last saw 563 // a '%' character that starts a new format conversion. 564 unsigned LastConversionIdx = 0; 565 566 for (; StrIdx < StrLen; ++StrIdx) { 567 568 // Is the number of detected conversion conversions greater than 569 // the number of matching data arguments? If so, stop. 570 if (!HasVAListArg && numConversions > numDataArgs) break; 571 572 // Handle "\0" 573 if (Str[StrIdx] == '\0') { 574 // The string returned by getStrData() is not null-terminated, 575 // so the presence of a null character is likely an error. 576 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 577 diag::warn_printf_format_string_contains_null_char) 578 << OrigFormatExpr->getSourceRange(); 579 return; 580 } 581 582 // Ordinary characters (not processing a format conversion). 583 if (CurrentState == state_OrdChr) { 584 if (Str[StrIdx] == '%') { 585 CurrentState = state_Conversion; 586 LastConversionIdx = StrIdx; 587 } 588 continue; 589 } 590 591 // Seen '%'. Now processing a format conversion. 592 switch (Str[StrIdx]) { 593 // Handle dynamic precision or width specifier. 594 case '*': { 595 ++numConversions; 596 597 if (!HasVAListArg && numConversions > numDataArgs) { 598 SourceLocation Loc = FExpr->getLocStart(); 599 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 600 601 if (Str[StrIdx-1] == '.') 602 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 603 << OrigFormatExpr->getSourceRange(); 604 else 605 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 606 << OrigFormatExpr->getSourceRange(); 607 608 // Don't do any more checking. We'll just emit spurious errors. 609 return; 610 } 611 612 // Perform type checking on width/precision specifier. 613 Expr *E = TheCall->getArg(format_idx+numConversions); 614 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 615 if (BT->getKind() == BuiltinType::Int) 616 break; 617 618 SourceLocation Loc = 619 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 620 621 if (Str[StrIdx-1] == '.') 622 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 623 << E->getType() << E->getSourceRange(); 624 else 625 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 626 << E->getType() << E->getSourceRange(); 627 628 break; 629 } 630 631 // Characters which can terminate a format conversion 632 // (e.g. "%d"). Characters that specify length modifiers or 633 // other flags are handled by the default case below. 634 // 635 // FIXME: additional checks will go into the following cases. 636 case 'i': 637 case 'd': 638 case 'o': 639 case 'u': 640 case 'x': 641 case 'X': 642 case 'D': 643 case 'O': 644 case 'U': 645 case 'e': 646 case 'E': 647 case 'f': 648 case 'F': 649 case 'g': 650 case 'G': 651 case 'a': 652 case 'A': 653 case 'c': 654 case 'C': 655 case 'S': 656 case 's': 657 case 'p': 658 ++numConversions; 659 CurrentState = state_OrdChr; 660 break; 661 662 // CHECK: Are we using "%n"? Issue a warning. 663 case 'n': { 664 ++numConversions; 665 CurrentState = state_OrdChr; 666 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 667 LastConversionIdx+1); 668 669 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 670 break; 671 } 672 673 // Handle "%@" 674 case '@': 675 // %@ is allowed in ObjC format strings only. 676 if(ObjCFExpr != NULL) 677 CurrentState = state_OrdChr; 678 else { 679 // Issue a warning: invalid format conversion. 680 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 681 LastConversionIdx+1); 682 683 Diag(Loc, diag::warn_printf_invalid_conversion) 684 << std::string(Str+LastConversionIdx, 685 Str+std::min(LastConversionIdx+2, StrLen)) 686 << OrigFormatExpr->getSourceRange(); 687 } 688 ++numConversions; 689 break; 690 691 // Handle "%%" 692 case '%': 693 // Sanity check: Was the first "%" character the previous one? 694 // If not, we will assume that we have a malformed format 695 // conversion, and that the current "%" character is the start 696 // of a new conversion. 697 if (StrIdx - LastConversionIdx == 1) 698 CurrentState = state_OrdChr; 699 else { 700 // Issue a warning: invalid format conversion. 701 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 702 LastConversionIdx+1); 703 704 Diag(Loc, diag::warn_printf_invalid_conversion) 705 << std::string(Str+LastConversionIdx, Str+StrIdx) 706 << OrigFormatExpr->getSourceRange(); 707 708 // This conversion is broken. Advance to the next format 709 // conversion. 710 LastConversionIdx = StrIdx; 711 ++numConversions; 712 } 713 break; 714 715 default: 716 // This case catches all other characters: flags, widths, etc. 717 // We should eventually process those as well. 718 break; 719 } 720 } 721 722 if (CurrentState == state_Conversion) { 723 // Issue a warning: invalid format conversion. 724 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 725 LastConversionIdx+1); 726 727 Diag(Loc, diag::warn_printf_invalid_conversion) 728 << std::string(Str+LastConversionIdx, 729 Str+std::min(LastConversionIdx+2, StrLen)) 730 << OrigFormatExpr->getSourceRange(); 731 return; 732 } 733 734 if (!HasVAListArg) { 735 // CHECK: Does the number of format conversions exceed the number 736 // of data arguments? 737 if (numConversions > numDataArgs) { 738 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 739 LastConversionIdx); 740 741 Diag(Loc, diag::warn_printf_insufficient_data_args) 742 << OrigFormatExpr->getSourceRange(); 743 } 744 // CHECK: Does the number of data arguments exceed the number of 745 // format conversions in the format string? 746 else if (numConversions < numDataArgs) 747 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 748 diag::warn_printf_too_many_data_args) 749 << OrigFormatExpr->getSourceRange(); 750 } 751} 752 753//===--- CHECK: Return Address of Stack Variable --------------------------===// 754 755static DeclRefExpr* EvalVal(Expr *E); 756static DeclRefExpr* EvalAddr(Expr* E); 757 758/// CheckReturnStackAddr - Check if a return statement returns the address 759/// of a stack variable. 760void 761Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 762 SourceLocation ReturnLoc) { 763 764 // Perform checking for returned stack addresses. 765 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 766 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 767 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 768 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 769 770 // Skip over implicit cast expressions when checking for block expressions. 771 if (ImplicitCastExpr *IcExpr = 772 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 773 RetValExp = IcExpr->getSubExpr(); 774 775 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 776 Diag(C->getLocStart(), diag::err_ret_local_block) 777 << C->getSourceRange(); 778 } 779 // Perform checking for stack values returned by reference. 780 else if (lhsType->isReferenceType()) { 781 // Check for a reference to the stack 782 if (DeclRefExpr *DR = EvalVal(RetValExp)) 783 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 784 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 785 } 786} 787 788/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 789/// check if the expression in a return statement evaluates to an address 790/// to a location on the stack. The recursion is used to traverse the 791/// AST of the return expression, with recursion backtracking when we 792/// encounter a subexpression that (1) clearly does not lead to the address 793/// of a stack variable or (2) is something we cannot determine leads to 794/// the address of a stack variable based on such local checking. 795/// 796/// EvalAddr processes expressions that are pointers that are used as 797/// references (and not L-values). EvalVal handles all other values. 798/// At the base case of the recursion is a check for a DeclRefExpr* in 799/// the refers to a stack variable. 800/// 801/// This implementation handles: 802/// 803/// * pointer-to-pointer casts 804/// * implicit conversions from array references to pointers 805/// * taking the address of fields 806/// * arbitrary interplay between "&" and "*" operators 807/// * pointer arithmetic from an address of a stack variable 808/// * taking the address of an array element where the array is on the stack 809static DeclRefExpr* EvalAddr(Expr *E) { 810 // We should only be called for evaluating pointer expressions. 811 assert((E->getType()->isPointerType() || 812 E->getType()->isBlockPointerType() || 813 E->getType()->isObjCQualifiedIdType()) && 814 "EvalAddr only works on pointers"); 815 816 // Our "symbolic interpreter" is just a dispatch off the currently 817 // viewed AST node. We then recursively traverse the AST by calling 818 // EvalAddr and EvalVal appropriately. 819 switch (E->getStmtClass()) { 820 case Stmt::ParenExprClass: 821 // Ignore parentheses. 822 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 823 824 case Stmt::UnaryOperatorClass: { 825 // The only unary operator that make sense to handle here 826 // is AddrOf. All others don't make sense as pointers. 827 UnaryOperator *U = cast<UnaryOperator>(E); 828 829 if (U->getOpcode() == UnaryOperator::AddrOf) 830 return EvalVal(U->getSubExpr()); 831 else 832 return NULL; 833 } 834 835 case Stmt::BinaryOperatorClass: { 836 // Handle pointer arithmetic. All other binary operators are not valid 837 // in this context. 838 BinaryOperator *B = cast<BinaryOperator>(E); 839 BinaryOperator::Opcode op = B->getOpcode(); 840 841 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 842 return NULL; 843 844 Expr *Base = B->getLHS(); 845 846 // Determine which argument is the real pointer base. It could be 847 // the RHS argument instead of the LHS. 848 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 849 850 assert (Base->getType()->isPointerType()); 851 return EvalAddr(Base); 852 } 853 854 // For conditional operators we need to see if either the LHS or RHS are 855 // valid DeclRefExpr*s. If one of them is valid, we return it. 856 case Stmt::ConditionalOperatorClass: { 857 ConditionalOperator *C = cast<ConditionalOperator>(E); 858 859 // Handle the GNU extension for missing LHS. 860 if (Expr *lhsExpr = C->getLHS()) 861 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 862 return LHS; 863 864 return EvalAddr(C->getRHS()); 865 } 866 867 // For casts, we need to handle conversions from arrays to 868 // pointer values, and pointer-to-pointer conversions. 869 case Stmt::ImplicitCastExprClass: 870 case Stmt::CStyleCastExprClass: 871 case Stmt::CXXFunctionalCastExprClass: { 872 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 873 QualType T = SubExpr->getType(); 874 875 if (SubExpr->getType()->isPointerType() || 876 SubExpr->getType()->isBlockPointerType() || 877 SubExpr->getType()->isObjCQualifiedIdType()) 878 return EvalAddr(SubExpr); 879 else if (T->isArrayType()) 880 return EvalVal(SubExpr); 881 else 882 return 0; 883 } 884 885 // C++ casts. For dynamic casts, static casts, and const casts, we 886 // are always converting from a pointer-to-pointer, so we just blow 887 // through the cast. In the case the dynamic cast doesn't fail (and 888 // return NULL), we take the conservative route and report cases 889 // where we return the address of a stack variable. For Reinterpre 890 // FIXME: The comment about is wrong; we're not always converting 891 // from pointer to pointer. I'm guessing that this code should also 892 // handle references to objects. 893 case Stmt::CXXStaticCastExprClass: 894 case Stmt::CXXDynamicCastExprClass: 895 case Stmt::CXXConstCastExprClass: 896 case Stmt::CXXReinterpretCastExprClass: { 897 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 898 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 899 return EvalAddr(S); 900 else 901 return NULL; 902 } 903 904 // Everything else: we simply don't reason about them. 905 default: 906 return NULL; 907 } 908} 909 910 911/// EvalVal - This function is complements EvalAddr in the mutual recursion. 912/// See the comments for EvalAddr for more details. 913static DeclRefExpr* EvalVal(Expr *E) { 914 915 // We should only be called for evaluating non-pointer expressions, or 916 // expressions with a pointer type that are not used as references but instead 917 // are l-values (e.g., DeclRefExpr with a pointer type). 918 919 // Our "symbolic interpreter" is just a dispatch off the currently 920 // viewed AST node. We then recursively traverse the AST by calling 921 // EvalAddr and EvalVal appropriately. 922 switch (E->getStmtClass()) { 923 case Stmt::DeclRefExprClass: 924 case Stmt::QualifiedDeclRefExprClass: { 925 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 926 // at code that refers to a variable's name. We check if it has local 927 // storage within the function, and if so, return the expression. 928 DeclRefExpr *DR = cast<DeclRefExpr>(E); 929 930 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 931 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 932 933 return NULL; 934 } 935 936 case Stmt::ParenExprClass: 937 // Ignore parentheses. 938 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 939 940 case Stmt::UnaryOperatorClass: { 941 // The only unary operator that make sense to handle here 942 // is Deref. All others don't resolve to a "name." This includes 943 // handling all sorts of rvalues passed to a unary operator. 944 UnaryOperator *U = cast<UnaryOperator>(E); 945 946 if (U->getOpcode() == UnaryOperator::Deref) 947 return EvalAddr(U->getSubExpr()); 948 949 return NULL; 950 } 951 952 case Stmt::ArraySubscriptExprClass: { 953 // Array subscripts are potential references to data on the stack. We 954 // retrieve the DeclRefExpr* for the array variable if it indeed 955 // has local storage. 956 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 957 } 958 959 case Stmt::ConditionalOperatorClass: { 960 // For conditional operators we need to see if either the LHS or RHS are 961 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 962 ConditionalOperator *C = cast<ConditionalOperator>(E); 963 964 // Handle the GNU extension for missing LHS. 965 if (Expr *lhsExpr = C->getLHS()) 966 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 967 return LHS; 968 969 return EvalVal(C->getRHS()); 970 } 971 972 // Accesses to members are potential references to data on the stack. 973 case Stmt::MemberExprClass: { 974 MemberExpr *M = cast<MemberExpr>(E); 975 976 // Check for indirect access. We only want direct field accesses. 977 if (!M->isArrow()) 978 return EvalVal(M->getBase()); 979 else 980 return NULL; 981 } 982 983 // Everything else: we simply don't reason about them. 984 default: 985 return NULL; 986 } 987} 988 989//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 990 991/// Check for comparisons of floating point operands using != and ==. 992/// Issue a warning if these are no self-comparisons, as they are not likely 993/// to do what the programmer intended. 994void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 995 bool EmitWarning = true; 996 997 Expr* LeftExprSansParen = lex->IgnoreParens(); 998 Expr* RightExprSansParen = rex->IgnoreParens(); 999 1000 // Special case: check for x == x (which is OK). 1001 // Do not emit warnings for such cases. 1002 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1003 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1004 if (DRL->getDecl() == DRR->getDecl()) 1005 EmitWarning = false; 1006 1007 1008 // Special case: check for comparisons against literals that can be exactly 1009 // represented by APFloat. In such cases, do not emit a warning. This 1010 // is a heuristic: often comparison against such literals are used to 1011 // detect if a value in a variable has not changed. This clearly can 1012 // lead to false negatives. 1013 if (EmitWarning) { 1014 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1015 if (FLL->isExact()) 1016 EmitWarning = false; 1017 } 1018 else 1019 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1020 if (FLR->isExact()) 1021 EmitWarning = false; 1022 } 1023 } 1024 1025 // Check for comparisons with builtin types. 1026 if (EmitWarning) 1027 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1028 if (isCallBuiltin(CL)) 1029 EmitWarning = false; 1030 1031 if (EmitWarning) 1032 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1033 if (isCallBuiltin(CR)) 1034 EmitWarning = false; 1035 1036 // Emit the diagnostic. 1037 if (EmitWarning) 1038 Diag(loc, diag::warn_floatingpoint_eq) 1039 << lex->getSourceRange() << rex->getSourceRange(); 1040} 1041