SemaChecking.cpp revision 500d3297d2a21edeac4d46cbcbe21bc2352c2a28
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21#include "SemaUtil.h" 22using namespace clang; 23 24/// CheckFunctionCall - Check a direct function call for various correctness 25/// and safety properties not strictly enforced by the C type system. 26Action::OwningExprResult 27Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 28 OwningExprResult TheCallResult(Owned(TheCall)); 29 // Get the IdentifierInfo* for the called function. 30 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 31 32 // None of the checks below are needed for functions that don't have 33 // simple names (e.g., C++ conversion functions). 34 if (!FnInfo) 35 return move(TheCallResult); 36 37 switch (FnInfo->getBuiltinID()) { 38 case Builtin::BI__builtin___CFStringMakeConstantString: 39 assert(TheCall->getNumArgs() == 1 && 40 "Wrong # arguments to builtin CFStringMakeConstantString"); 41 if (CheckBuiltinCFStringArgument(TheCall->getArg(0))) 42 return ExprError(); 43 return move(TheCallResult); 44 case Builtin::BI__builtin_stdarg_start: 45 case Builtin::BI__builtin_va_start: 46 if (SemaBuiltinVAStart(TheCall)) 47 return ExprError(); 48 return move(TheCallResult); 49 case Builtin::BI__builtin_isgreater: 50 case Builtin::BI__builtin_isgreaterequal: 51 case Builtin::BI__builtin_isless: 52 case Builtin::BI__builtin_islessequal: 53 case Builtin::BI__builtin_islessgreater: 54 case Builtin::BI__builtin_isunordered: 55 if (SemaBuiltinUnorderedCompare(TheCall)) 56 return ExprError(); 57 return move(TheCallResult); 58 case Builtin::BI__builtin_return_address: 59 case Builtin::BI__builtin_frame_address: 60 if (SemaBuiltinStackAddress(TheCall)) 61 return ExprError(); 62 return move(TheCallResult); 63 case Builtin::BI__builtin_shufflevector: 64 return SemaBuiltinShuffleVector(TheCall); 65 // TheCall will be freed by the smart pointer here, but that's fine, since 66 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 67 case Builtin::BI__builtin_prefetch: 68 if (SemaBuiltinPrefetch(TheCall)) 69 return ExprError(); 70 return move(TheCallResult); 71 case Builtin::BI__builtin_object_size: 72 if (SemaBuiltinObjectSize(TheCall)) 73 return ExprError(); 74 } 75 76 // FIXME: This mechanism should be abstracted to be less fragile and 77 // more efficient. For example, just map function ids to custom 78 // handlers. 79 80 // Search the KnownFunctionIDs for the identifier. 81 unsigned i = 0, e = id_num_known_functions; 82 for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } 83 if (i == e) return move(TheCallResult); 84 85 // Printf checking. 86 if (i <= id_vprintf) { 87 // Retrieve the index of the format string parameter and determine 88 // if the function is passed a va_arg argument. 89 unsigned format_idx = 0; 90 bool HasVAListArg = false; 91 92 switch (i) { 93 default: assert(false && "No format string argument index."); 94 case id_NSLog: format_idx = 0; break; 95 case id_asprintf: format_idx = 1; break; 96 case id_fprintf: format_idx = 1; break; 97 case id_printf: format_idx = 0; break; 98 case id_snprintf: format_idx = 2; break; 99 case id_snprintf_chk: format_idx = 4; break; 100 case id_sprintf: format_idx = 1; break; 101 case id_sprintf_chk: format_idx = 3; break; 102 case id_vasprintf: format_idx = 1; HasVAListArg = true; break; 103 case id_vfprintf: format_idx = 1; HasVAListArg = true; break; 104 case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; 105 case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break; 106 case id_vsprintf: format_idx = 1; HasVAListArg = true; break; 107 case id_vsprintf_chk: format_idx = 3; HasVAListArg = true; break; 108 case id_vprintf: format_idx = 0; HasVAListArg = true; break; 109 } 110 111 CheckPrintfArguments(TheCall, HasVAListArg, format_idx); 112 } 113 114 return move(TheCallResult); 115} 116 117/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin 118/// CFString constructor is correct 119bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) { 120 Arg = Arg->IgnoreParenCasts(); 121 122 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 123 124 if (!Literal || Literal->isWide()) { 125 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 126 << Arg->getSourceRange(); 127 return true; 128 } 129 130 const char *Data = Literal->getStrData(); 131 unsigned Length = Literal->getByteLength(); 132 133 for (unsigned i = 0; i < Length; ++i) { 134 if (!isascii(Data[i])) { 135 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 136 diag::warn_cfstring_literal_contains_non_ascii_character) 137 << Arg->getSourceRange(); 138 break; 139 } 140 141 if (!Data[i]) { 142 Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1), 143 diag::warn_cfstring_literal_contains_nul_character) 144 << Arg->getSourceRange(); 145 break; 146 } 147 } 148 149 return false; 150} 151 152/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 153/// Emit an error and return true on failure, return false on success. 154bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 155 Expr *Fn = TheCall->getCallee(); 156 if (TheCall->getNumArgs() > 2) { 157 Diag(TheCall->getArg(2)->getLocStart(), 158 diag::err_typecheck_call_too_many_args) 159 << 0 /*function call*/ << Fn->getSourceRange() 160 << SourceRange(TheCall->getArg(2)->getLocStart(), 161 (*(TheCall->arg_end()-1))->getLocEnd()); 162 return true; 163 } 164 165 if (TheCall->getNumArgs() < 2) { 166 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 167 << 0 /*function call*/; 168 } 169 170 // Determine whether the current function is variadic or not. 171 bool isVariadic; 172 if (getCurFunctionDecl()) { 173 if (FunctionTypeProto* FTP = 174 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType())) 175 isVariadic = FTP->isVariadic(); 176 else 177 isVariadic = false; 178 } else { 179 isVariadic = getCurMethodDecl()->isVariadic(); 180 } 181 182 if (!isVariadic) { 183 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 184 return true; 185 } 186 187 // Verify that the second argument to the builtin is the last argument of the 188 // current function or method. 189 bool SecondArgIsLastNamedArgument = false; 190 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 191 192 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 193 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 194 // FIXME: This isn't correct for methods (results in bogus warning). 195 // Get the last formal in the current function. 196 const ParmVarDecl *LastArg; 197 if (FunctionDecl *FD = getCurFunctionDecl()) 198 LastArg = *(FD->param_end()-1); 199 else 200 LastArg = *(getCurMethodDecl()->param_end()-1); 201 SecondArgIsLastNamedArgument = PV == LastArg; 202 } 203 } 204 205 if (!SecondArgIsLastNamedArgument) 206 Diag(TheCall->getArg(1)->getLocStart(), 207 diag::warn_second_parameter_of_va_start_not_last_named_argument); 208 return false; 209} 210 211/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 212/// friends. This is declared to take (...), so we have to check everything. 213bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 214 if (TheCall->getNumArgs() < 2) 215 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 216 << 0 /*function call*/; 217 if (TheCall->getNumArgs() > 2) 218 return Diag(TheCall->getArg(2)->getLocStart(), 219 diag::err_typecheck_call_too_many_args) 220 << 0 /*function call*/ 221 << SourceRange(TheCall->getArg(2)->getLocStart(), 222 (*(TheCall->arg_end()-1))->getLocEnd()); 223 224 Expr *OrigArg0 = TheCall->getArg(0); 225 Expr *OrigArg1 = TheCall->getArg(1); 226 227 // Do standard promotions between the two arguments, returning their common 228 // type. 229 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 230 231 // If the common type isn't a real floating type, then the arguments were 232 // invalid for this operation. 233 if (!Res->isRealFloatingType()) 234 return Diag(OrigArg0->getLocStart(), 235 diag::err_typecheck_call_invalid_ordered_compare) 236 << OrigArg0->getType() << OrigArg1->getType() 237 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 238 239 return false; 240} 241 242bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 243 // The signature for these builtins is exact; the only thing we need 244 // to check is that the argument is a constant. 245 SourceLocation Loc; 246 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 247 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 248 249 return false; 250} 251 252/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 253// This is declared to take (...), so we have to check everything. 254Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 255 if (TheCall->getNumArgs() < 3) 256 return ExprError(Diag(TheCall->getLocEnd(), 257 diag::err_typecheck_call_too_few_args) 258 << 0 /*function call*/ << TheCall->getSourceRange()); 259 260 QualType FAType = TheCall->getArg(0)->getType(); 261 QualType SAType = TheCall->getArg(1)->getType(); 262 263 if (!FAType->isVectorType() || !SAType->isVectorType()) { 264 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 265 << SourceRange(TheCall->getArg(0)->getLocStart(), 266 TheCall->getArg(1)->getLocEnd()); 267 return ExprError(); 268 } 269 270 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 271 Context.getCanonicalType(SAType).getUnqualifiedType()) { 272 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 273 << SourceRange(TheCall->getArg(0)->getLocStart(), 274 TheCall->getArg(1)->getLocEnd()); 275 return ExprError(); 276 } 277 278 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 279 if (TheCall->getNumArgs() != numElements+2) { 280 if (TheCall->getNumArgs() < numElements+2) 281 return ExprError(Diag(TheCall->getLocEnd(), 282 diag::err_typecheck_call_too_few_args) 283 << 0 /*function call*/ << TheCall->getSourceRange()); 284 return ExprError(Diag(TheCall->getLocEnd(), 285 diag::err_typecheck_call_too_many_args) 286 << 0 /*function call*/ << TheCall->getSourceRange()); 287 } 288 289 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 290 llvm::APSInt Result(32); 291 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 292 return ExprError(Diag(TheCall->getLocStart(), 293 diag::err_shufflevector_nonconstant_argument) 294 << TheCall->getArg(i)->getSourceRange()); 295 296 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 297 return ExprError(Diag(TheCall->getLocStart(), 298 diag::err_shufflevector_argument_too_large) 299 << TheCall->getArg(i)->getSourceRange()); 300 } 301 302 llvm::SmallVector<Expr*, 32> exprs; 303 304 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 305 exprs.push_back(TheCall->getArg(i)); 306 TheCall->setArg(i, 0); 307 } 308 309 return Owned(new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType, 310 TheCall->getCallee()->getLocStart(), 311 TheCall->getRParenLoc())); 312} 313 314/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 315// This is declared to take (const void*, ...) and can take two 316// optional constant int args. 317bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 318 unsigned NumArgs = TheCall->getNumArgs(); 319 320 if (NumArgs > 3) 321 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 322 << 0 /*function call*/ << TheCall->getSourceRange(); 323 324 // Argument 0 is checked for us and the remaining arguments must be 325 // constant integers. 326 for (unsigned i = 1; i != NumArgs; ++i) { 327 Expr *Arg = TheCall->getArg(i); 328 QualType RWType = Arg->getType(); 329 330 const BuiltinType *BT = RWType->getAsBuiltinType(); 331 llvm::APSInt Result; 332 if (!BT || BT->getKind() != BuiltinType::Int || 333 !Arg->isIntegerConstantExpr(Result, Context)) 334 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 335 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 336 337 // FIXME: gcc issues a warning and rewrites these to 0. These 338 // seems especially odd for the third argument since the default 339 // is 3. 340 if (i == 1) { 341 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 342 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 343 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 344 } else { 345 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 346 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 347 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 348 } 349 } 350 351 return false; 352} 353 354/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 355/// int type). This simply type checks that type is one of the defined 356/// constants (0-3). 357bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 358 Expr *Arg = TheCall->getArg(1); 359 QualType ArgType = Arg->getType(); 360 const BuiltinType *BT = ArgType->getAsBuiltinType(); 361 llvm::APSInt Result(32); 362 if (!BT || BT->getKind() != BuiltinType::Int || 363 !Arg->isIntegerConstantExpr(Result, Context)) { 364 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 365 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 366 } 367 368 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 369 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 370 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 371 } 372 373 return false; 374} 375 376// Handle i > 1 ? "x" : "y", recursivelly 377bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg, 378 unsigned format_idx) { 379 380 switch (E->getStmtClass()) { 381 case Stmt::ConditionalOperatorClass: { 382 ConditionalOperator *C = cast<ConditionalOperator>(E); 383 return SemaCheckStringLiteral(C->getLHS(), TheCall, 384 HasVAListArg, format_idx) 385 && SemaCheckStringLiteral(C->getRHS(), TheCall, 386 HasVAListArg, format_idx); 387 } 388 389 case Stmt::ImplicitCastExprClass: { 390 ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E); 391 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 392 format_idx); 393 } 394 395 case Stmt::ParenExprClass: { 396 ParenExpr *Expr = dyn_cast<ParenExpr>(E); 397 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 398 format_idx); 399 } 400 401 default: { 402 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E); 403 StringLiteral *StrE = NULL; 404 405 if (ObjCFExpr) 406 StrE = ObjCFExpr->getString(); 407 else 408 StrE = dyn_cast<StringLiteral>(E); 409 410 if (StrE) { 411 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx); 412 return true; 413 } 414 415 return false; 416 } 417 } 418} 419 420 421/// CheckPrintfArguments - Check calls to printf (and similar functions) for 422/// correct use of format strings. 423/// 424/// HasVAListArg - A predicate indicating whether the printf-like 425/// function is passed an explicit va_arg argument (e.g., vprintf) 426/// 427/// format_idx - The index into Args for the format string. 428/// 429/// Improper format strings to functions in the printf family can be 430/// the source of bizarre bugs and very serious security holes. A 431/// good source of information is available in the following paper 432/// (which includes additional references): 433/// 434/// FormatGuard: Automatic Protection From printf Format String 435/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 436/// 437/// Functionality implemented: 438/// 439/// We can statically check the following properties for string 440/// literal format strings for non v.*printf functions (where the 441/// arguments are passed directly): 442// 443/// (1) Are the number of format conversions equal to the number of 444/// data arguments? 445/// 446/// (2) Does each format conversion correctly match the type of the 447/// corresponding data argument? (TODO) 448/// 449/// Moreover, for all printf functions we can: 450/// 451/// (3) Check for a missing format string (when not caught by type checking). 452/// 453/// (4) Check for no-operation flags; e.g. using "#" with format 454/// conversion 'c' (TODO) 455/// 456/// (5) Check the use of '%n', a major source of security holes. 457/// 458/// (6) Check for malformed format conversions that don't specify anything. 459/// 460/// (7) Check for empty format strings. e.g: printf(""); 461/// 462/// (8) Check that the format string is a wide literal. 463/// 464/// (9) Also check the arguments of functions with the __format__ attribute. 465/// (TODO). 466/// 467/// All of these checks can be done by parsing the format string. 468/// 469/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 470void 471Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 472 unsigned format_idx) { 473 Expr *Fn = TheCall->getCallee(); 474 475 // CHECK: printf-like function is called with no format string. 476 if (format_idx >= TheCall->getNumArgs()) { 477 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 478 << Fn->getSourceRange(); 479 return; 480 } 481 482 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 483 484 // CHECK: format string is not a string literal. 485 // 486 // Dynamically generated format strings are difficult to 487 // automatically vet at compile time. Requiring that format strings 488 // are string literals: (1) permits the checking of format strings by 489 // the compiler and thereby (2) can practically remove the source of 490 // many format string exploits. 491 492 // Format string can be either ObjC string (e.g. @"%d") or 493 // C string (e.g. "%d") 494 // ObjC string uses the same format specifiers as C string, so we can use 495 // the same format string checking logic for both ObjC and C strings. 496 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx); 497 498 if (!isFExpr) { 499 // For vprintf* functions (i.e., HasVAListArg==true), we add a 500 // special check to see if the format string is a function parameter 501 // of the function calling the printf function. If the function 502 // has an attribute indicating it is a printf-like function, then we 503 // should suppress warnings concerning non-literals being used in a call 504 // to a vprintf function. For example: 505 // 506 // void 507 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 508 // va_list ap; 509 // va_start(ap, fmt); 510 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 511 // ... 512 // 513 // 514 // FIXME: We don't have full attribute support yet, so just check to see 515 // if the argument is a DeclRefExpr that references a parameter. We'll 516 // add proper support for checking the attribute later. 517 if (HasVAListArg) 518 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 519 if (isa<ParmVarDecl>(DR->getDecl())) 520 return; 521 522 Diag(TheCall->getArg(format_idx)->getLocStart(), 523 diag::warn_printf_not_string_constant) 524 << OrigFormatExpr->getSourceRange(); 525 return; 526 } 527} 528 529void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, 530 CallExpr *TheCall, bool HasVAListArg, unsigned format_idx) { 531 532 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 533 // CHECK: is the format string a wide literal? 534 if (FExpr->isWide()) { 535 Diag(FExpr->getLocStart(), 536 diag::warn_printf_format_string_is_wide_literal) 537 << OrigFormatExpr->getSourceRange(); 538 return; 539 } 540 541 // Str - The format string. NOTE: this is NOT null-terminated! 542 const char * const Str = FExpr->getStrData(); 543 544 // CHECK: empty format string? 545 const unsigned StrLen = FExpr->getByteLength(); 546 547 if (StrLen == 0) { 548 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 549 << OrigFormatExpr->getSourceRange(); 550 return; 551 } 552 553 // We process the format string using a binary state machine. The 554 // current state is stored in CurrentState. 555 enum { 556 state_OrdChr, 557 state_Conversion 558 } CurrentState = state_OrdChr; 559 560 // numConversions - The number of conversions seen so far. This is 561 // incremented as we traverse the format string. 562 unsigned numConversions = 0; 563 564 // numDataArgs - The number of data arguments after the format 565 // string. This can only be determined for non vprintf-like 566 // functions. For those functions, this value is 1 (the sole 567 // va_arg argument). 568 unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1); 569 570 // Inspect the format string. 571 unsigned StrIdx = 0; 572 573 // LastConversionIdx - Index within the format string where we last saw 574 // a '%' character that starts a new format conversion. 575 unsigned LastConversionIdx = 0; 576 577 for (; StrIdx < StrLen; ++StrIdx) { 578 579 // Is the number of detected conversion conversions greater than 580 // the number of matching data arguments? If so, stop. 581 if (!HasVAListArg && numConversions > numDataArgs) break; 582 583 // Handle "\0" 584 if (Str[StrIdx] == '\0') { 585 // The string returned by getStrData() is not null-terminated, 586 // so the presence of a null character is likely an error. 587 Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1), 588 diag::warn_printf_format_string_contains_null_char) 589 << OrigFormatExpr->getSourceRange(); 590 return; 591 } 592 593 // Ordinary characters (not processing a format conversion). 594 if (CurrentState == state_OrdChr) { 595 if (Str[StrIdx] == '%') { 596 CurrentState = state_Conversion; 597 LastConversionIdx = StrIdx; 598 } 599 continue; 600 } 601 602 // Seen '%'. Now processing a format conversion. 603 switch (Str[StrIdx]) { 604 // Handle dynamic precision or width specifier. 605 case '*': { 606 ++numConversions; 607 608 if (!HasVAListArg && numConversions > numDataArgs) { 609 SourceLocation Loc = FExpr->getLocStart(); 610 Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1); 611 612 if (Str[StrIdx-1] == '.') 613 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 614 << OrigFormatExpr->getSourceRange(); 615 else 616 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 617 << OrigFormatExpr->getSourceRange(); 618 619 // Don't do any more checking. We'll just emit spurious errors. 620 return; 621 } 622 623 // Perform type checking on width/precision specifier. 624 Expr *E = TheCall->getArg(format_idx+numConversions); 625 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 626 if (BT->getKind() == BuiltinType::Int) 627 break; 628 629 SourceLocation Loc = 630 PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1); 631 632 if (Str[StrIdx-1] == '.') 633 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 634 << E->getType() << E->getSourceRange(); 635 else 636 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 637 << E->getType() << E->getSourceRange(); 638 639 break; 640 } 641 642 // Characters which can terminate a format conversion 643 // (e.g. "%d"). Characters that specify length modifiers or 644 // other flags are handled by the default case below. 645 // 646 // FIXME: additional checks will go into the following cases. 647 case 'i': 648 case 'd': 649 case 'o': 650 case 'u': 651 case 'x': 652 case 'X': 653 case 'D': 654 case 'O': 655 case 'U': 656 case 'e': 657 case 'E': 658 case 'f': 659 case 'F': 660 case 'g': 661 case 'G': 662 case 'a': 663 case 'A': 664 case 'c': 665 case 'C': 666 case 'S': 667 case 's': 668 case 'p': 669 ++numConversions; 670 CurrentState = state_OrdChr; 671 break; 672 673 // CHECK: Are we using "%n"? Issue a warning. 674 case 'n': { 675 ++numConversions; 676 CurrentState = state_OrdChr; 677 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 678 LastConversionIdx+1); 679 680 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 681 break; 682 } 683 684 // Handle "%@" 685 case '@': 686 // %@ is allowed in ObjC format strings only. 687 if(ObjCFExpr != NULL) 688 CurrentState = state_OrdChr; 689 else { 690 // Issue a warning: invalid format conversion. 691 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 692 LastConversionIdx+1); 693 694 Diag(Loc, diag::warn_printf_invalid_conversion) 695 << std::string(Str+LastConversionIdx, 696 Str+std::min(LastConversionIdx+2, StrLen)) 697 << OrigFormatExpr->getSourceRange(); 698 } 699 ++numConversions; 700 break; 701 702 // Handle "%%" 703 case '%': 704 // Sanity check: Was the first "%" character the previous one? 705 // If not, we will assume that we have a malformed format 706 // conversion, and that the current "%" character is the start 707 // of a new conversion. 708 if (StrIdx - LastConversionIdx == 1) 709 CurrentState = state_OrdChr; 710 else { 711 // Issue a warning: invalid format conversion. 712 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 713 LastConversionIdx+1); 714 715 Diag(Loc, diag::warn_printf_invalid_conversion) 716 << std::string(Str+LastConversionIdx, Str+StrIdx) 717 << OrigFormatExpr->getSourceRange(); 718 719 // This conversion is broken. Advance to the next format 720 // conversion. 721 LastConversionIdx = StrIdx; 722 ++numConversions; 723 } 724 break; 725 726 default: 727 // This case catches all other characters: flags, widths, etc. 728 // We should eventually process those as well. 729 break; 730 } 731 } 732 733 if (CurrentState == state_Conversion) { 734 // Issue a warning: invalid format conversion. 735 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 736 LastConversionIdx+1); 737 738 Diag(Loc, diag::warn_printf_invalid_conversion) 739 << std::string(Str+LastConversionIdx, 740 Str+std::min(LastConversionIdx+2, StrLen)) 741 << OrigFormatExpr->getSourceRange(); 742 return; 743 } 744 745 if (!HasVAListArg) { 746 // CHECK: Does the number of format conversions exceed the number 747 // of data arguments? 748 if (numConversions > numDataArgs) { 749 SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(), 750 LastConversionIdx); 751 752 Diag(Loc, diag::warn_printf_insufficient_data_args) 753 << OrigFormatExpr->getSourceRange(); 754 } 755 // CHECK: Does the number of data arguments exceed the number of 756 // format conversions in the format string? 757 else if (numConversions < numDataArgs) 758 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 759 diag::warn_printf_too_many_data_args) 760 << OrigFormatExpr->getSourceRange(); 761 } 762} 763 764//===--- CHECK: Return Address of Stack Variable --------------------------===// 765 766static DeclRefExpr* EvalVal(Expr *E); 767static DeclRefExpr* EvalAddr(Expr* E); 768 769/// CheckReturnStackAddr - Check if a return statement returns the address 770/// of a stack variable. 771void 772Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 773 SourceLocation ReturnLoc) { 774 775 // Perform checking for returned stack addresses. 776 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 777 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 778 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 779 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 780 781 // Skip over implicit cast expressions when checking for block expressions. 782 if (ImplicitCastExpr *IcExpr = 783 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 784 RetValExp = IcExpr->getSubExpr(); 785 786 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 787 Diag(C->getLocStart(), diag::err_ret_local_block) 788 << C->getSourceRange(); 789 } 790 // Perform checking for stack values returned by reference. 791 else if (lhsType->isReferenceType()) { 792 // Check for a reference to the stack 793 if (DeclRefExpr *DR = EvalVal(RetValExp)) 794 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 795 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 796 } 797} 798 799/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 800/// check if the expression in a return statement evaluates to an address 801/// to a location on the stack. The recursion is used to traverse the 802/// AST of the return expression, with recursion backtracking when we 803/// encounter a subexpression that (1) clearly does not lead to the address 804/// of a stack variable or (2) is something we cannot determine leads to 805/// the address of a stack variable based on such local checking. 806/// 807/// EvalAddr processes expressions that are pointers that are used as 808/// references (and not L-values). EvalVal handles all other values. 809/// At the base case of the recursion is a check for a DeclRefExpr* in 810/// the refers to a stack variable. 811/// 812/// This implementation handles: 813/// 814/// * pointer-to-pointer casts 815/// * implicit conversions from array references to pointers 816/// * taking the address of fields 817/// * arbitrary interplay between "&" and "*" operators 818/// * pointer arithmetic from an address of a stack variable 819/// * taking the address of an array element where the array is on the stack 820static DeclRefExpr* EvalAddr(Expr *E) { 821 // We should only be called for evaluating pointer expressions. 822 assert((E->getType()->isPointerType() || 823 E->getType()->isBlockPointerType() || 824 E->getType()->isObjCQualifiedIdType()) && 825 "EvalAddr only works on pointers"); 826 827 // Our "symbolic interpreter" is just a dispatch off the currently 828 // viewed AST node. We then recursively traverse the AST by calling 829 // EvalAddr and EvalVal appropriately. 830 switch (E->getStmtClass()) { 831 case Stmt::ParenExprClass: 832 // Ignore parentheses. 833 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 834 835 case Stmt::UnaryOperatorClass: { 836 // The only unary operator that make sense to handle here 837 // is AddrOf. All others don't make sense as pointers. 838 UnaryOperator *U = cast<UnaryOperator>(E); 839 840 if (U->getOpcode() == UnaryOperator::AddrOf) 841 return EvalVal(U->getSubExpr()); 842 else 843 return NULL; 844 } 845 846 case Stmt::BinaryOperatorClass: { 847 // Handle pointer arithmetic. All other binary operators are not valid 848 // in this context. 849 BinaryOperator *B = cast<BinaryOperator>(E); 850 BinaryOperator::Opcode op = B->getOpcode(); 851 852 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 853 return NULL; 854 855 Expr *Base = B->getLHS(); 856 857 // Determine which argument is the real pointer base. It could be 858 // the RHS argument instead of the LHS. 859 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 860 861 assert (Base->getType()->isPointerType()); 862 return EvalAddr(Base); 863 } 864 865 // For conditional operators we need to see if either the LHS or RHS are 866 // valid DeclRefExpr*s. If one of them is valid, we return it. 867 case Stmt::ConditionalOperatorClass: { 868 ConditionalOperator *C = cast<ConditionalOperator>(E); 869 870 // Handle the GNU extension for missing LHS. 871 if (Expr *lhsExpr = C->getLHS()) 872 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 873 return LHS; 874 875 return EvalAddr(C->getRHS()); 876 } 877 878 // For casts, we need to handle conversions from arrays to 879 // pointer values, and pointer-to-pointer conversions. 880 case Stmt::ImplicitCastExprClass: 881 case Stmt::CStyleCastExprClass: 882 case Stmt::CXXFunctionalCastExprClass: { 883 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 884 QualType T = SubExpr->getType(); 885 886 if (SubExpr->getType()->isPointerType() || 887 SubExpr->getType()->isBlockPointerType() || 888 SubExpr->getType()->isObjCQualifiedIdType()) 889 return EvalAddr(SubExpr); 890 else if (T->isArrayType()) 891 return EvalVal(SubExpr); 892 else 893 return 0; 894 } 895 896 // C++ casts. For dynamic casts, static casts, and const casts, we 897 // are always converting from a pointer-to-pointer, so we just blow 898 // through the cast. In the case the dynamic cast doesn't fail (and 899 // return NULL), we take the conservative route and report cases 900 // where we return the address of a stack variable. For Reinterpre 901 // FIXME: The comment about is wrong; we're not always converting 902 // from pointer to pointer. I'm guessing that this code should also 903 // handle references to objects. 904 case Stmt::CXXStaticCastExprClass: 905 case Stmt::CXXDynamicCastExprClass: 906 case Stmt::CXXConstCastExprClass: 907 case Stmt::CXXReinterpretCastExprClass: { 908 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 909 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 910 return EvalAddr(S); 911 else 912 return NULL; 913 } 914 915 // Everything else: we simply don't reason about them. 916 default: 917 return NULL; 918 } 919} 920 921 922/// EvalVal - This function is complements EvalAddr in the mutual recursion. 923/// See the comments for EvalAddr for more details. 924static DeclRefExpr* EvalVal(Expr *E) { 925 926 // We should only be called for evaluating non-pointer expressions, or 927 // expressions with a pointer type that are not used as references but instead 928 // are l-values (e.g., DeclRefExpr with a pointer type). 929 930 // Our "symbolic interpreter" is just a dispatch off the currently 931 // viewed AST node. We then recursively traverse the AST by calling 932 // EvalAddr and EvalVal appropriately. 933 switch (E->getStmtClass()) { 934 case Stmt::DeclRefExprClass: 935 case Stmt::QualifiedDeclRefExprClass: { 936 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 937 // at code that refers to a variable's name. We check if it has local 938 // storage within the function, and if so, return the expression. 939 DeclRefExpr *DR = cast<DeclRefExpr>(E); 940 941 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 942 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 943 944 return NULL; 945 } 946 947 case Stmt::ParenExprClass: 948 // Ignore parentheses. 949 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 950 951 case Stmt::UnaryOperatorClass: { 952 // The only unary operator that make sense to handle here 953 // is Deref. All others don't resolve to a "name." This includes 954 // handling all sorts of rvalues passed to a unary operator. 955 UnaryOperator *U = cast<UnaryOperator>(E); 956 957 if (U->getOpcode() == UnaryOperator::Deref) 958 return EvalAddr(U->getSubExpr()); 959 960 return NULL; 961 } 962 963 case Stmt::ArraySubscriptExprClass: { 964 // Array subscripts are potential references to data on the stack. We 965 // retrieve the DeclRefExpr* for the array variable if it indeed 966 // has local storage. 967 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 968 } 969 970 case Stmt::ConditionalOperatorClass: { 971 // For conditional operators we need to see if either the LHS or RHS are 972 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 973 ConditionalOperator *C = cast<ConditionalOperator>(E); 974 975 // Handle the GNU extension for missing LHS. 976 if (Expr *lhsExpr = C->getLHS()) 977 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 978 return LHS; 979 980 return EvalVal(C->getRHS()); 981 } 982 983 // Accesses to members are potential references to data on the stack. 984 case Stmt::MemberExprClass: { 985 MemberExpr *M = cast<MemberExpr>(E); 986 987 // Check for indirect access. We only want direct field accesses. 988 if (!M->isArrow()) 989 return EvalVal(M->getBase()); 990 else 991 return NULL; 992 } 993 994 // Everything else: we simply don't reason about them. 995 default: 996 return NULL; 997 } 998} 999 1000//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1001 1002/// Check for comparisons of floating point operands using != and ==. 1003/// Issue a warning if these are no self-comparisons, as they are not likely 1004/// to do what the programmer intended. 1005void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1006 bool EmitWarning = true; 1007 1008 Expr* LeftExprSansParen = lex->IgnoreParens(); 1009 Expr* RightExprSansParen = rex->IgnoreParens(); 1010 1011 // Special case: check for x == x (which is OK). 1012 // Do not emit warnings for such cases. 1013 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1014 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1015 if (DRL->getDecl() == DRR->getDecl()) 1016 EmitWarning = false; 1017 1018 1019 // Special case: check for comparisons against literals that can be exactly 1020 // represented by APFloat. In such cases, do not emit a warning. This 1021 // is a heuristic: often comparison against such literals are used to 1022 // detect if a value in a variable has not changed. This clearly can 1023 // lead to false negatives. 1024 if (EmitWarning) { 1025 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1026 if (FLL->isExact()) 1027 EmitWarning = false; 1028 } 1029 else 1030 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1031 if (FLR->isExact()) 1032 EmitWarning = false; 1033 } 1034 } 1035 1036 // Check for comparisons with builtin types. 1037 if (EmitWarning) 1038 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1039 if (isCallBuiltin(CL)) 1040 EmitWarning = false; 1041 1042 if (EmitWarning) 1043 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1044 if (isCallBuiltin(CR)) 1045 EmitWarning = false; 1046 1047 // Emit the diagnostic. 1048 if (EmitWarning) 1049 Diag(loc, diag::warn_floatingpoint_eq) 1050 << lex->getSourceRange() << rex->getSourceRange(); 1051} 1052