SemaChecking.cpp revision d30ef87f34015d18bde20b9632032d0063d761aa
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "clang/Basic/Diagnostic.h" 22#include "SemaUtil.h" 23using namespace clang; 24 25/// CheckFunctionCall - Check a direct function call for various correctness 26/// and safety properties not strictly enforced by the C type system. 27Action::ExprResult 28Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) { 29 llvm::OwningPtr<CallExpr> TheCall(TheCallRaw); 30 // Get the IdentifierInfo* for the called function. 31 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 32 33 // None of the checks below are needed for functions that don't have 34 // simple names (e.g., C++ conversion functions). 35 if (!FnInfo) 36 return TheCall.take(); 37 38 switch (FnInfo->getBuiltinID()) { 39 case Builtin::BI__builtin___CFStringMakeConstantString: 40 assert(TheCall->getNumArgs() == 1 && 41 "Wrong # arguments to builtin CFStringMakeConstantString"); 42 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 43 return true; 44 return TheCall.take(); 45 case Builtin::BI__builtin_stdarg_start: 46 case Builtin::BI__builtin_va_start: 47 if (SemaBuiltinVAStart(TheCall.get())) 48 return true; 49 return TheCall.take(); 50 case Builtin::BI__builtin_isgreater: 51 case Builtin::BI__builtin_isgreaterequal: 52 case Builtin::BI__builtin_isless: 53 case Builtin::BI__builtin_islessequal: 54 case Builtin::BI__builtin_islessgreater: 55 case Builtin::BI__builtin_isunordered: 56 if (SemaBuiltinUnorderedCompare(TheCall.get())) 57 return true; 58 return TheCall.take(); 59 case Builtin::BI__builtin_return_address: 60 case Builtin::BI__builtin_frame_address: 61 if (SemaBuiltinStackAddress(TheCall.get())) 62 return true; 63 return TheCall.take(); 64 case Builtin::BI__builtin_shufflevector: 65 return SemaBuiltinShuffleVector(TheCall.get()); 66 case Builtin::BI__builtin_prefetch: 67 if (SemaBuiltinPrefetch(TheCall.get())) 68 return true; 69 return TheCall.take(); 70 case Builtin::BI__builtin_object_size: 71 if (SemaBuiltinObjectSize(TheCall.get())) 72 return true; 73 } 74 75 // FIXME: This mechanism should be abstracted to be less fragile and 76 // more efficient. For example, just map function ids to custom 77 // handlers. 78 79 // Search the KnownFunctionIDs for the identifier. 80 unsigned i = 0, e = id_num_known_functions; 81 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 82 if (i == e) return TheCall.take(); 83 84 // Printf checking. 85 if (i <= id_vprintf) { 86 // Retrieve the index of the format string parameter and determine 87 // if the function is passed a va_arg argument. 88 unsigned format_idx = 0; 89 bool HasVAListArg = false; 90 91 switch (i) { 92 default: assert(false && "No format string argument index."); 93 case id_NSLog: format_idx = 0; break; 94 case id_asprintf: format_idx = 1; break; 95 case id_fprintf: format_idx = 1; break; 96 case id_printf: format_idx = 0; break; 97 case id_snprintf: format_idx = 2; break; 98 case id_snprintf_chk: format_idx = 4; break; 99 case id_sprintf: format_idx = 1; break; 100 case id_sprintf_chk: format_idx = 3; break; 101 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 102 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 103 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 104 case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break; 105 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 106 case id_vsprintf_chk: format_idx = 3; HasVAListArg = true; break; 107 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 108 } 109 110 CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx); 111 } 112 113 return TheCall.take(); 114} 115 116/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 117/// CFString constructor is correct 118bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 119 Arg = Arg->IgnoreParenCasts(); 120 121 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 122 123 if (!Literal || Literal->isWide()) { 124 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 125 << Arg->getSourceRange(); 126 return true; 127 } 128 129 const char *Data = Literal->getStrData(); 130 unsigned Length = Literal->getByteLength(); 131 132 for (unsigned i = 0; i < Length; ++i) { 133 if (!isascii(Data[i])) { 134 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 135 diag::warn_cfstring_literal_contains_non_ascii_character) 136 << Arg->getSourceRange(); 137 break; 138 } 139 140 if (!Data[i]) { 141 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 142 diag::warn_cfstring_literal_contains_nul_character) 143 << Arg->getSourceRange(); 144 break; 145 } 146 } 147 148 return false; 149} 150 151/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 152/// Emit an error and return true on failure, return false on success. 153bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 154 Expr *Fn = TheCall->getCallee(); 155 if (TheCall->getNumArgs() > 2) { 156 Diag(TheCall->getArg(2)->getLocStart(), 157 diag::err_typecheck_call_too_many_args) 158 << 0 /*function call*/ << Fn->getSourceRange() 159 << SourceRange(TheCall->getArg(2)->getLocStart(), 160 (*(TheCall->arg_end()-1))->getLocEnd()); 161 return true; 162 } 163 164 if (TheCall->getNumArgs() < 2) { 165 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 166 << 0 /*function call*/; 167 } 168 169 // Determine whether the current function is variadic or not. 170 bool isVariadic; 171 if (getCurFunctionDecl()) { 172 if (FunctionTypeProto* FTP = 173 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType())) 174 isVariadic = FTP->isVariadic(); 175 else 176 isVariadic = false; 177 } else { 178 isVariadic = getCurMethodDecl()->isVariadic(); 179 } 180 181 if (!isVariadic) { 182 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 183 return true; 184 } 185 186 // Verify that the second argument to the builtin is the last argument of the 187 // current function or method. 188 bool SecondArgIsLastNamedArgument = false; 189 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 190 191 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 192 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 193 // FIXME: This isn't correct for methods (results in bogus warning). 194 // Get the last formal in the current function. 195 const ParmVarDecl *LastArg; 196 if (FunctionDecl *FD = getCurFunctionDecl()) 197 LastArg = *(FD->param_end()-1); 198 else 199 LastArg = *(getCurMethodDecl()->param_end()-1); 200 SecondArgIsLastNamedArgument = PV == LastArg; 201 } 202 } 203 204 if (!SecondArgIsLastNamedArgument) 205 Diag(TheCall->getArg(1)->getLocStart(), 206 diag::warn_second_parameter_of_va_start_not_last_named_argument); 207 return false; 208} 209 210/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 211/// friends. This is declared to take (...), so we have to check everything. 212bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 213 if (TheCall->getNumArgs() < 2) 214 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 215 << 0 /*function call*/; 216 if (TheCall->getNumArgs() > 2) 217 return Diag(TheCall->getArg(2)->getLocStart(), 218 diag::err_typecheck_call_too_many_args) 219 << 0 /*function call*/ 220 << SourceRange(TheCall->getArg(2)->getLocStart(), 221 (*(TheCall->arg_end()-1))->getLocEnd()); 222 223 Expr *OrigArg0 = TheCall->getArg(0); 224 Expr *OrigArg1 = TheCall->getArg(1); 225 226 // Do standard promotions between the two arguments, returning their common 227 // type. 228 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 229 230 // If the common type isn't a real floating type, then the arguments were 231 // invalid for this operation. 232 if (!Res->isRealFloatingType()) 233 return Diag(OrigArg0->getLocStart(), 234 diag::err_typecheck_call_invalid_ordered_compare) 235 << OrigArg0->getType() << OrigArg1->getType() 236 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 237 238 return false; 239} 240 241bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 242 // The signature for these builtins is exact; the only thing we need 243 // to check is that the argument is a constant. 244 SourceLocation Loc; 245 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 246 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 247 248 return false; 249} 250 251/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 252// This is declared to take (...), so we have to check everything. 253Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 254 if (TheCall->getNumArgs() < 3) 255 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 256 << 0 /*function call*/ << TheCall->getSourceRange(); 257 258 QualType FAType = TheCall->getArg(0)->getType(); 259 QualType SAType = TheCall->getArg(1)->getType(); 260 261 if (!FAType->isVectorType() || !SAType->isVectorType()) { 262 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 263 << SourceRange(TheCall->getArg(0)->getLocStart(), 264 TheCall->getArg(1)->getLocEnd()); 265 return true; 266 } 267 268 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 269 Context.getCanonicalType(SAType).getUnqualifiedType()) { 270 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 271 << SourceRange(TheCall->getArg(0)->getLocStart(), 272 TheCall->getArg(1)->getLocEnd()); 273 return true; 274 } 275 276 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 277 if (TheCall->getNumArgs() != numElements+2) { 278 if (TheCall->getNumArgs() < numElements+2) 279 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 280 << 0 /*function call*/ << TheCall->getSourceRange(); 281 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 282 << 0 /*function call*/ << TheCall->getSourceRange(); 283 } 284 285 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 286 llvm::APSInt Result(32); 287 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 288 return Diag(TheCall->getLocStart(), 289 diag::err_shufflevector_nonconstant_argument) 290 << TheCall->getArg(i)->getSourceRange(); 291 292 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 293 return Diag(TheCall->getLocStart(), 294 diag::err_shufflevector_argument_too_large) 295 << TheCall->getArg(i)->getSourceRange(); 296 } 297 298 llvm::SmallVector<Expr*, 32> exprs; 299 300 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 301 exprs.push_back(TheCall->getArg(i)); 302 TheCall->setArg(i, 0); 303 } 304 305 return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 306 TheCall->getCallee()->getLocStart(), 307 TheCall->getRParenLoc()); 308} 309 310/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 311// This is declared to take (const void*, ...) and can take two 312// optional constant int args. 313bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 314 unsigned NumArgs = TheCall->getNumArgs(); 315 316 if (NumArgs > 3) 317 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 318 << 0 /*function call*/ << TheCall->getSourceRange(); 319 320 // Argument 0 is checked for us and the remaining arguments must be 321 // constant integers. 322 for (unsigned i = 1; i != NumArgs; ++i) { 323 Expr *Arg = TheCall->getArg(i); 324 QualType RWType = Arg->getType(); 325 326 const BuiltinType *BT = RWType->getAsBuiltinType(); 327 llvm::APSInt Result; 328 if (!BT || BT->getKind() != BuiltinType::Int || 329 !Arg->isIntegerConstantExpr(Result, Context)) 330 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 331 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 332 333 // FIXME: gcc issues a warning and rewrites these to 0. These 334 // seems especially odd for the third argument since the default 335 // is 3. 336 if (i == 1) { 337 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 338 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 339 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 340 } else { 341 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 342 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 343 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 344 } 345 } 346 347 return false; 348} 349 350/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 351/// int type). This simply type checks that type is one of the defined 352/// constants (0-3). 353bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 354 Expr *Arg = TheCall->getArg(1); 355 QualType ArgType = Arg->getType(); 356 const BuiltinType *BT = ArgType->getAsBuiltinType(); 357 llvm::APSInt Result(32); 358 if (!BT || BT->getKind() != BuiltinType::Int || 359 !Arg->isIntegerConstantExpr(Result, Context)) { 360 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 361 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 362 } 363 364 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 365 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 366 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 367 } 368 369 return false; 370} 371 372// Handle i > 1 ? "x" : "y", recursivelly 373bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg, 374 unsigned format_idx) { 375 376 switch (E->getStmtClass()) { 377 case Stmt::ConditionalOperatorClass: { 378 ConditionalOperator *C = cast<ConditionalOperator>(E); 379 return SemaCheckStringLiteral(C->getLHS(), TheCall, 380 HasVAListArg, format_idx) 381 && SemaCheckStringLiteral(C->getRHS(), TheCall, 382 HasVAListArg, format_idx); 383 } 384 385 case Stmt::ImplicitCastExprClass: { 386 ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E); 387 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 388 format_idx); 389 } 390 391 case Stmt::ParenExprClass: { 392 ParenExpr *Expr = dyn_cast<ParenExpr>(E); 393 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 394 format_idx); 395 } 396 397 default: { 398 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E); 399 StringLiteral *StrE = NULL; 400 401 if (ObjCFExpr) 402 StrE = ObjCFExpr->getString(); 403 else 404 StrE = dyn_cast<StringLiteral>(E); 405 406 if (StrE) { 407 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx); 408 return true; 409 } 410 411 return false; 412 } 413 } 414} 415 416 417/// CheckPrintfArguments - Check calls to printf (and similar functions) for 418/// correct use of format strings. 419/// 420/// HasVAListArg - A predicate indicating whether the printf-like 421/// function is passed an explicit va_arg argument (e.g., vprintf) 422/// 423/// format_idx - The index into Args for the format string. 424/// 425/// Improper format strings to functions in the printf family can be 426/// the source of bizarre bugs and very serious security holes. A 427/// good source of information is available in the following paper 428/// (which includes additional references): 429/// 430/// FormatGuard: Automatic Protection From printf Format String 431/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 432/// 433/// Functionality implemented: 434/// 435/// We can statically check the following properties for string 436/// literal format strings for non v.*printf functions (where the 437/// arguments are passed directly): 438// 439/// (1) Are the number of format conversions equal to the number of 440/// data arguments? 441/// 442/// (2) Does each format conversion correctly match the type of the 443/// corresponding data argument? (TODO) 444/// 445/// Moreover, for all printf functions we can: 446/// 447/// (3) Check for a missing format string (when not caught by type checking). 448/// 449/// (4) Check for no-operation flags; e.g. using "#" with format 450/// conversion 'c' (TODO) 451/// 452/// (5) Check the use of '%n', a major source of security holes. 453/// 454/// (6) Check for malformed format conversions that don't specify anything. 455/// 456/// (7) Check for empty format strings. e.g: printf(""); 457/// 458/// (8) Check that the format string is a wide literal. 459/// 460/// (9) Also check the arguments of functions with the __format__ attribute. 461/// (TODO). 462/// 463/// All of these checks can be done by parsing the format string. 464/// 465/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 466void 467Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 468 unsigned format_idx) { 469 Expr *Fn = TheCall->getCallee(); 470 471 // CHECK: printf-like function is called with no format string. 472 if (format_idx >= TheCall->getNumArgs()) { 473 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 474 << Fn->getSourceRange(); 475 return; 476 } 477 478 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 479 480 // CHECK: format string is not a string literal. 481 // 482 // Dynamically generated format strings are difficult to 483 // automatically vet at compile time. Requiring that format strings 484 // are string literals: (1) permits the checking of format strings by 485 // the compiler and thereby (2) can practically remove the source of 486 // many format string exploits. 487 488 // Format string can be either ObjC string (e.g. @"%d") or 489 // C string (e.g. "%d") 490 // ObjC string uses the same format specifiers as C string, so we can use 491 // the same format string checking logic for both ObjC and C strings. 492 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx); 493 494 if (!isFExpr) { 495 // For vprintf* functions (i.e., HasVAListArg==true), we add a 496 // special check to see if the format string is a function parameter 497 // of the function calling the printf function. If the function 498 // has an attribute indicating it is a printf-like function, then we 499 // should suppress warnings concerning non-literals being used in a call 500 // to a vprintf function. For example: 501 // 502 // void 503 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 504 // va_list ap; 505 // va_start(ap, fmt); 506 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 507 // ... 508 // 509 // 510 // FIXME: We don't have full attribute support yet, so just check to see 511 // if the argument is a DeclRefExpr that references a parameter. We'll 512 // add proper support for checking the attribute later. 513 if (HasVAListArg) 514 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 515 if (isa<ParmVarDecl>(DR->getDecl())) 516 return; 517 518 Diag(TheCall->getArg(format_idx)->getLocStart(), 519 diag::warn_printf_not_string_constant) 520 << OrigFormatExpr->getSourceRange(); 521 return; 522 } 523} 524 525void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, 526 CallExpr *TheCall, bool HasVAListArg, unsigned format_idx) { 527 528 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 529 // CHECK: is the format string a wide literal? 530 if (FExpr->isWide()) { 531 Diag(FExpr->getLocStart(), 532 diag::warn_printf_format_string_is_wide_literal) 533 << OrigFormatExpr->getSourceRange(); 534 return; 535 } 536 537 // Str - The format string. NOTE: this is NOT null-terminated! 538 const char * const Str = FExpr->getStrData(); 539 540 // CHECK: empty format string? 541 const unsigned StrLen = FExpr->getByteLength(); 542 543 if (StrLen == 0) { 544 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 545 << OrigFormatExpr->getSourceRange(); 546 return; 547 } 548 549 // We process the format string using a binary state machine. The 550 // current state is stored in CurrentState. 551 enum { 552 state_OrdChr, 553 state_Conversion 554 } CurrentState = state_OrdChr; 555 556 // numConversions - The number of conversions seen so far. This is 557 // incremented as we traverse the format string. 558 unsigned numConversions = 0; 559 560 // numDataArgs - The number of data arguments after the format 561 // string. This can only be determined for non vprintf-like 562 // functions. For those functions, this value is 1 (the sole 563 // va_arg argument). 564 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 565 566 // Inspect the format string. 567 unsigned StrIdx = 0; 568 569 // LastConversionIdx - Index within the format string where we last saw 570 // a '%' character that starts a new format conversion. 571 unsigned LastConversionIdx = 0; 572 573 for (; StrIdx < StrLen; ++StrIdx) { 574 575 // Is the number of detected conversion conversions greater than 576 // the number of matching data arguments? If so, stop. 577 if (!HasVAListArg && numConversions > numDataArgs) break; 578 579 // Handle "\0" 580 if (Str[StrIdx] == '\0') { 581 // The string returned by getStrData() is not null-terminated, 582 // so the presence of a null character is likely an error. 583 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 584 diag::warn_printf_format_string_contains_null_char) 585 << OrigFormatExpr->getSourceRange(); 586 return; 587 } 588 589 // Ordinary characters (not processing a format conversion). 590 if (CurrentState == state_OrdChr) { 591 if (Str[StrIdx] == '%') { 592 CurrentState = state_Conversion; 593 LastConversionIdx = StrIdx; 594 } 595 continue; 596 } 597 598 // Seen '%'. Now processing a format conversion. 599 switch (Str[StrIdx]) { 600 // Handle dynamic precision or width specifier. 601 case '*': { 602 ++numConversions; 603 604 if (!HasVAListArg && numConversions > numDataArgs) { 605 SourceLocation Loc = FExpr->getLocStart(); 606 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 607 608 if (Str[StrIdx-1] == '.') 609 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 610 << OrigFormatExpr->getSourceRange(); 611 else 612 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 613 << OrigFormatExpr->getSourceRange(); 614 615 // Don't do any more checking. We'll just emit spurious errors. 616 return; 617 } 618 619 // Perform type checking on width/precision specifier. 620 Expr *E = TheCall->getArg(format_idx+numConversions); 621 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 622 if (BT->getKind() == BuiltinType::Int) 623 break; 624 625 SourceLocation Loc = 626 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 627 628 if (Str[StrIdx-1] == '.') 629 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 630 << E->getType() << E->getSourceRange(); 631 else 632 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 633 << E->getType() << E->getSourceRange(); 634 635 break; 636 } 637 638 // Characters which can terminate a format conversion 639 // (e.g. "%d"). Characters that specify length modifiers or 640 // other flags are handled by the default case below. 641 // 642 // FIXME: additional checks will go into the following cases. 643 case 'i': 644 case 'd': 645 case 'o': 646 case 'u': 647 case 'x': 648 case 'X': 649 case 'D': 650 case 'O': 651 case 'U': 652 case 'e': 653 case 'E': 654 case 'f': 655 case 'F': 656 case 'g': 657 case 'G': 658 case 'a': 659 case 'A': 660 case 'c': 661 case 'C': 662 case 'S': 663 case 's': 664 case 'p': 665 ++numConversions; 666 CurrentState = state_OrdChr; 667 break; 668 669 // CHECK: Are we using "%n"? Issue a warning. 670 case 'n': { 671 ++numConversions; 672 CurrentState = state_OrdChr; 673 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 674 LastConversionIdx+1); 675 676 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 677 break; 678 } 679 680 // Handle "%@" 681 case '@': 682 // %@ is allowed in ObjC format strings only. 683 if(ObjCFExpr != NULL) 684 CurrentState = state_OrdChr; 685 else { 686 // Issue a warning: invalid format conversion. 687 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 688 LastConversionIdx+1); 689 690 Diag(Loc, diag::warn_printf_invalid_conversion) 691 << std::string(Str+LastConversionIdx, 692 Str+std::min(LastConversionIdx+2, StrLen)) 693 << OrigFormatExpr->getSourceRange(); 694 } 695 ++numConversions; 696 break; 697 698 // Handle "%%" 699 case '%': 700 // Sanity check: Was the first "%" character the previous one? 701 // If not, we will assume that we have a malformed format 702 // conversion, and that the current "%" character is the start 703 // of a new conversion. 704 if (StrIdx - LastConversionIdx == 1) 705 CurrentState = state_OrdChr; 706 else { 707 // Issue a warning: invalid format conversion. 708 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 709 LastConversionIdx+1); 710 711 Diag(Loc, diag::warn_printf_invalid_conversion) 712 << std::string(Str+LastConversionIdx, Str+StrIdx) 713 << OrigFormatExpr->getSourceRange(); 714 715 // This conversion is broken. Advance to the next format 716 // conversion. 717 LastConversionIdx = StrIdx; 718 ++numConversions; 719 } 720 break; 721 722 default: 723 // This case catches all other characters: flags, widths, etc. 724 // We should eventually process those as well. 725 break; 726 } 727 } 728 729 if (CurrentState == state_Conversion) { 730 // Issue a warning: invalid format conversion. 731 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 732 LastConversionIdx+1); 733 734 Diag(Loc, diag::warn_printf_invalid_conversion) 735 << std::string(Str+LastConversionIdx, 736 Str+std::min(LastConversionIdx+2, StrLen)) 737 << OrigFormatExpr->getSourceRange(); 738 return; 739 } 740 741 if (!HasVAListArg) { 742 // CHECK: Does the number of format conversions exceed the number 743 // of data arguments? 744 if (numConversions > numDataArgs) { 745 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 746 LastConversionIdx); 747 748 Diag(Loc, diag::warn_printf_insufficient_data_args) 749 << OrigFormatExpr->getSourceRange(); 750 } 751 // CHECK: Does the number of data arguments exceed the number of 752 // format conversions in the format string? 753 else if (numConversions < numDataArgs) 754 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 755 diag::warn_printf_too_many_data_args) 756 << OrigFormatExpr->getSourceRange(); 757 } 758} 759 760//===--- CHECK: Return Address of Stack Variable --------------------------===// 761 762static DeclRefExpr* EvalVal(Expr *E); 763static DeclRefExpr* EvalAddr(Expr* E); 764 765/// CheckReturnStackAddr - Check if a return statement returns the address 766/// of a stack variable. 767void 768Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 769 SourceLocation ReturnLoc) { 770 771 // Perform checking for returned stack addresses. 772 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 773 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 774 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 775 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 776 777 // Skip over implicit cast expressions when checking for block expressions. 778 if (ImplicitCastExpr *IcExpr = 779 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 780 RetValExp = IcExpr->getSubExpr(); 781 782 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 783 Diag(C->getLocStart(), diag::err_ret_local_block) 784 << C->getSourceRange(); 785 } 786 // Perform checking for stack values returned by reference. 787 else if (lhsType->isReferenceType()) { 788 // Check for a reference to the stack 789 if (DeclRefExpr *DR = EvalVal(RetValExp)) 790 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 791 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 792 } 793} 794 795/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 796/// check if the expression in a return statement evaluates to an address 797/// to a location on the stack. The recursion is used to traverse the 798/// AST of the return expression, with recursion backtracking when we 799/// encounter a subexpression that (1) clearly does not lead to the address 800/// of a stack variable or (2) is something we cannot determine leads to 801/// the address of a stack variable based on such local checking. 802/// 803/// EvalAddr processes expressions that are pointers that are used as 804/// references (and not L-values). EvalVal handles all other values. 805/// At the base case of the recursion is a check for a DeclRefExpr* in 806/// the refers to a stack variable. 807/// 808/// This implementation handles: 809/// 810/// * pointer-to-pointer casts 811/// * implicit conversions from array references to pointers 812/// * taking the address of fields 813/// * arbitrary interplay between "&" and "*" operators 814/// * pointer arithmetic from an address of a stack variable 815/// * taking the address of an array element where the array is on the stack 816static DeclRefExpr* EvalAddr(Expr *E) { 817 // We should only be called for evaluating pointer expressions. 818 assert((E->getType()->isPointerType() || 819 E->getType()->isBlockPointerType() || 820 E->getType()->isObjCQualifiedIdType()) && 821 "EvalAddr only works on pointers"); 822 823 // Our "symbolic interpreter" is just a dispatch off the currently 824 // viewed AST node. We then recursively traverse the AST by calling 825 // EvalAddr and EvalVal appropriately. 826 switch (E->getStmtClass()) { 827 case Stmt::ParenExprClass: 828 // Ignore parentheses. 829 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 830 831 case Stmt::UnaryOperatorClass: { 832 // The only unary operator that make sense to handle here 833 // is AddrOf. All others don't make sense as pointers. 834 UnaryOperator *U = cast<UnaryOperator>(E); 835 836 if (U->getOpcode() == UnaryOperator::AddrOf) 837 return EvalVal(U->getSubExpr()); 838 else 839 return NULL; 840 } 841 842 case Stmt::BinaryOperatorClass: { 843 // Handle pointer arithmetic. All other binary operators are not valid 844 // in this context. 845 BinaryOperator *B = cast<BinaryOperator>(E); 846 BinaryOperator::Opcode op = B->getOpcode(); 847 848 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 849 return NULL; 850 851 Expr *Base = B->getLHS(); 852 853 // Determine which argument is the real pointer base. It could be 854 // the RHS argument instead of the LHS. 855 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 856 857 assert (Base->getType()->isPointerType()); 858 return EvalAddr(Base); 859 } 860 861 // For conditional operators we need to see if either the LHS or RHS are 862 // valid DeclRefExpr*s. If one of them is valid, we return it. 863 case Stmt::ConditionalOperatorClass: { 864 ConditionalOperator *C = cast<ConditionalOperator>(E); 865 866 // Handle the GNU extension for missing LHS. 867 if (Expr *lhsExpr = C->getLHS()) 868 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 869 return LHS; 870 871 return EvalAddr(C->getRHS()); 872 } 873 874 // For casts, we need to handle conversions from arrays to 875 // pointer values, and pointer-to-pointer conversions. 876 case Stmt::ImplicitCastExprClass: 877 case Stmt::CStyleCastExprClass: 878 case Stmt::CXXFunctionalCastExprClass: { 879 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 880 QualType T = SubExpr->getType(); 881 882 if (SubExpr->getType()->isPointerType() || 883 SubExpr->getType()->isBlockPointerType() || 884 SubExpr->getType()->isObjCQualifiedIdType()) 885 return EvalAddr(SubExpr); 886 else if (T->isArrayType()) 887 return EvalVal(SubExpr); 888 else 889 return 0; 890 } 891 892 // C++ casts. For dynamic casts, static casts, and const casts, we 893 // are always converting from a pointer-to-pointer, so we just blow 894 // through the cast. In the case the dynamic cast doesn't fail (and 895 // return NULL), we take the conservative route and report cases 896 // where we return the address of a stack variable. For Reinterpre 897 // FIXME: The comment about is wrong; we're not always converting 898 // from pointer to pointer. I'm guessing that this code should also 899 // handle references to objects. 900 case Stmt::CXXStaticCastExprClass: 901 case Stmt::CXXDynamicCastExprClass: 902 case Stmt::CXXConstCastExprClass: 903 case Stmt::CXXReinterpretCastExprClass: { 904 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 905 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 906 return EvalAddr(S); 907 else 908 return NULL; 909 } 910 911 // Everything else: we simply don't reason about them. 912 default: 913 return NULL; 914 } 915} 916 917 918/// EvalVal - This function is complements EvalAddr in the mutual recursion. 919/// See the comments for EvalAddr for more details. 920static DeclRefExpr* EvalVal(Expr *E) { 921 922 // We should only be called for evaluating non-pointer expressions, or 923 // expressions with a pointer type that are not used as references but instead 924 // are l-values (e.g., DeclRefExpr with a pointer type). 925 926 // Our "symbolic interpreter" is just a dispatch off the currently 927 // viewed AST node. We then recursively traverse the AST by calling 928 // EvalAddr and EvalVal appropriately. 929 switch (E->getStmtClass()) { 930 case Stmt::DeclRefExprClass: 931 case Stmt::QualifiedDeclRefExprClass: { 932 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 933 // at code that refers to a variable's name. We check if it has local 934 // storage within the function, and if so, return the expression. 935 DeclRefExpr *DR = cast<DeclRefExpr>(E); 936 937 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 938 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 939 940 return NULL; 941 } 942 943 case Stmt::ParenExprClass: 944 // Ignore parentheses. 945 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 946 947 case Stmt::UnaryOperatorClass: { 948 // The only unary operator that make sense to handle here 949 // is Deref. All others don't resolve to a "name." This includes 950 // handling all sorts of rvalues passed to a unary operator. 951 UnaryOperator *U = cast<UnaryOperator>(E); 952 953 if (U->getOpcode() == UnaryOperator::Deref) 954 return EvalAddr(U->getSubExpr()); 955 956 return NULL; 957 } 958 959 case Stmt::ArraySubscriptExprClass: { 960 // Array subscripts are potential references to data on the stack. We 961 // retrieve the DeclRefExpr* for the array variable if it indeed 962 // has local storage. 963 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 964 } 965 966 case Stmt::ConditionalOperatorClass: { 967 // For conditional operators we need to see if either the LHS or RHS are 968 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 969 ConditionalOperator *C = cast<ConditionalOperator>(E); 970 971 // Handle the GNU extension for missing LHS. 972 if (Expr *lhsExpr = C->getLHS()) 973 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 974 return LHS; 975 976 return EvalVal(C->getRHS()); 977 } 978 979 // Accesses to members are potential references to data on the stack. 980 case Stmt::MemberExprClass: { 981 MemberExpr *M = cast<MemberExpr>(E); 982 983 // Check for indirect access. We only want direct field accesses. 984 if (!M->isArrow()) 985 return EvalVal(M->getBase()); 986 else 987 return NULL; 988 } 989 990 // Everything else: we simply don't reason about them. 991 default: 992 return NULL; 993 } 994} 995 996//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 997 998/// Check for comparisons of floating point operands using != and ==. 999/// Issue a warning if these are no self-comparisons, as they are not likely 1000/// to do what the programmer intended. 1001void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1002 bool EmitWarning = true; 1003 1004 Expr* LeftExprSansParen = lex->IgnoreParens(); 1005 Expr* RightExprSansParen = rex->IgnoreParens(); 1006 1007 // Special case: check for x == x (which is OK). 1008 // Do not emit warnings for such cases. 1009 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1010 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1011 if (DRL->getDecl() == DRR->getDecl()) 1012 EmitWarning = false; 1013 1014 1015 // Special case: check for comparisons against literals that can be exactly 1016 // represented by APFloat. In such cases, do not emit a warning. This 1017 // is a heuristic: often comparison against such literals are used to 1018 // detect if a value in a variable has not changed. This clearly can 1019 // lead to false negatives. 1020 if (EmitWarning) { 1021 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1022 if (FLL->isExact()) 1023 EmitWarning = false; 1024 } 1025 else 1026 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1027 if (FLR->isExact()) 1028 EmitWarning = false; 1029 } 1030 } 1031 1032 // Check for comparisons with builtin types. 1033 if (EmitWarning) 1034 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1035 if (isCallBuiltin(CL)) 1036 EmitWarning = false; 1037 1038 if (EmitWarning) 1039 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1040 if (isCallBuiltin(CR)) 1041 EmitWarning = false; 1042 1043 // Emit the diagnostic. 1044 if (EmitWarning) 1045 Diag(loc, diag::warn_floatingpoint_eq) 1046 << lex->getSourceRange() << rex->getSourceRange(); 1047} 1048