SemaChecking.cpp revision 60800081361b0ffc114877b8abbc81cb57b4edf6
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/Preprocessor.h" 21using namespace clang; 22 23/// getLocationOfStringLiteralByte - Return a source location that points to the 24/// specified byte of the specified string literal. 25/// 26/// Strings are amazingly complex. They can be formed from multiple tokens and 27/// can have escape sequences in them in addition to the usual trigraph and 28/// escaped newline business. This routine handles this complexity. 29/// 30SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 31 unsigned ByteNo) const { 32 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 33 34 // Loop over all of the tokens in this string until we find the one that 35 // contains the byte we're looking for. 36 unsigned TokNo = 0; 37 while (1) { 38 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 39 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 40 41 // Get the spelling of the string so that we can get the data that makes up 42 // the string literal, not the identifier for the macro it is potentially 43 // expanded through. 44 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 45 46 // Re-lex the token to get its length and original spelling. 47 std::pair<FileID, unsigned> LocInfo = 48 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 49 std::pair<const char *,const char *> Buffer = 50 SourceMgr.getBufferData(LocInfo.first); 51 const char *StrData = Buffer.first+LocInfo.second; 52 53 // Create a langops struct and enable trigraphs. This is sufficient for 54 // relexing tokens. 55 LangOptions LangOpts; 56 LangOpts.Trigraphs = true; 57 58 // Create a lexer starting at the beginning of this token. 59 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, 60 Buffer.second); 61 Token TheTok; 62 TheLexer.LexFromRawLexer(TheTok); 63 64 // The length of the string is the token length minus the two quotes. 65 unsigned TokNumBytes = TheTok.getLength()-2; 66 67 // If we found the token we're looking for, return the location. 68 // FIXME: This should consider character escapes! 69 if (ByteNo < TokNumBytes || 70 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 71 // If the original token came from a macro expansion, just return the 72 // start of the token. We don't want to magically jump to the spelling 73 // for a diagnostic. We do the above business in case some tokens come 74 // from a macro expansion but others don't. 75 if (!StrTokLoc.isFileID()) return StrTokLoc; 76 77 // We advance +1 to step over the '"'. 78 return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1); 79 } 80 81 // Move to the next string token. 82 ++TokNo; 83 ByteNo -= TokNumBytes; 84 } 85} 86 87 88/// CheckFunctionCall - Check a direct function call for various correctness 89/// and safety properties not strictly enforced by the C type system. 90Action::OwningExprResult 91Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 92 OwningExprResult TheCallResult(Owned(TheCall)); 93 // Get the IdentifierInfo* for the called function. 94 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 95 96 // None of the checks below are needed for functions that don't have 97 // simple names (e.g., C++ conversion functions). 98 if (!FnInfo) 99 return move(TheCallResult); 100 101 switch (FDecl->getBuiltinID(Context)) { 102 case Builtin::BI__builtin___CFStringMakeConstantString: 103 assert(TheCall->getNumArgs() == 1 && 104 "Wrong # arguments to builtin CFStringMakeConstantString"); 105 if (CheckObjCString(TheCall->getArg(0))) 106 return ExprError(); 107 return move(TheCallResult); 108 case Builtin::BI__builtin_stdarg_start: 109 case Builtin::BI__builtin_va_start: 110 if (SemaBuiltinVAStart(TheCall)) 111 return ExprError(); 112 return move(TheCallResult); 113 case Builtin::BI__builtin_isgreater: 114 case Builtin::BI__builtin_isgreaterequal: 115 case Builtin::BI__builtin_isless: 116 case Builtin::BI__builtin_islessequal: 117 case Builtin::BI__builtin_islessgreater: 118 case Builtin::BI__builtin_isunordered: 119 if (SemaBuiltinUnorderedCompare(TheCall)) 120 return ExprError(); 121 return move(TheCallResult); 122 case Builtin::BI__builtin_return_address: 123 case Builtin::BI__builtin_frame_address: 124 if (SemaBuiltinStackAddress(TheCall)) 125 return ExprError(); 126 return move(TheCallResult); 127 case Builtin::BI__builtin_shufflevector: 128 return SemaBuiltinShuffleVector(TheCall); 129 // TheCall will be freed by the smart pointer here, but that's fine, since 130 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 131 case Builtin::BI__builtin_prefetch: 132 if (SemaBuiltinPrefetch(TheCall)) 133 return ExprError(); 134 return move(TheCallResult); 135 case Builtin::BI__builtin_object_size: 136 if (SemaBuiltinObjectSize(TheCall)) 137 return ExprError(); 138 } 139 140 // FIXME: This mechanism should be abstracted to be less fragile and 141 // more efficient. For example, just map function ids to custom 142 // handlers. 143 144 // Printf checking. 145 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 146 if (Format->getType() == "printf") { 147 bool HasVAListArg = false; 148 if (const FunctionTypeProto *Proto 149 = FDecl->getType()->getAsFunctionTypeProto()) 150 HasVAListArg = !Proto->isVariadic(); 151 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 152 Format->getFirstArg() - 1); 153 } 154 } 155 156 return move(TheCallResult); 157} 158 159/// CheckObjCString - Checks that the argument to the builtin 160/// CFString constructor is correct 161bool Sema::CheckObjCString(Expr *Arg) { 162 Arg = Arg->IgnoreParenCasts(); 163 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 164 165 if (!Literal || Literal->isWide()) { 166 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 167 << Arg->getSourceRange(); 168 return true; 169 } 170 171 const char *Data = Literal->getStrData(); 172 unsigned Length = Literal->getByteLength(); 173 174 for (unsigned i = 0; i < Length; ++i) { 175 if (!isascii(Data[i])) { 176 Diag(getLocationOfStringLiteralByte(Literal, i), 177 diag::warn_cfstring_literal_contains_non_ascii_character) 178 << Arg->getSourceRange(); 179 break; 180 } 181 182 if (!Data[i]) { 183 Diag(getLocationOfStringLiteralByte(Literal, i), 184 diag::warn_cfstring_literal_contains_nul_character) 185 << Arg->getSourceRange(); 186 break; 187 } 188 } 189 190 return false; 191} 192 193/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 194/// Emit an error and return true on failure, return false on success. 195bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 196 Expr *Fn = TheCall->getCallee(); 197 if (TheCall->getNumArgs() > 2) { 198 Diag(TheCall->getArg(2)->getLocStart(), 199 diag::err_typecheck_call_too_many_args) 200 << 0 /*function call*/ << Fn->getSourceRange() 201 << SourceRange(TheCall->getArg(2)->getLocStart(), 202 (*(TheCall->arg_end()-1))->getLocEnd()); 203 return true; 204 } 205 206 if (TheCall->getNumArgs() < 2) { 207 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 208 << 0 /*function call*/; 209 } 210 211 // Determine whether the current function is variadic or not. 212 bool isVariadic; 213 if (getCurFunctionDecl()) { 214 if (FunctionTypeProto* FTP = 215 dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType())) 216 isVariadic = FTP->isVariadic(); 217 else 218 isVariadic = false; 219 } else { 220 isVariadic = getCurMethodDecl()->isVariadic(); 221 } 222 223 if (!isVariadic) { 224 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 225 return true; 226 } 227 228 // Verify that the second argument to the builtin is the last argument of the 229 // current function or method. 230 bool SecondArgIsLastNamedArgument = false; 231 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 232 233 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 234 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 235 // FIXME: This isn't correct for methods (results in bogus warning). 236 // Get the last formal in the current function. 237 const ParmVarDecl *LastArg; 238 if (FunctionDecl *FD = getCurFunctionDecl()) 239 LastArg = *(FD->param_end()-1); 240 else 241 LastArg = *(getCurMethodDecl()->param_end()-1); 242 SecondArgIsLastNamedArgument = PV == LastArg; 243 } 244 } 245 246 if (!SecondArgIsLastNamedArgument) 247 Diag(TheCall->getArg(1)->getLocStart(), 248 diag::warn_second_parameter_of_va_start_not_last_named_argument); 249 return false; 250} 251 252/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 253/// friends. This is declared to take (...), so we have to check everything. 254bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 255 if (TheCall->getNumArgs() < 2) 256 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 257 << 0 /*function call*/; 258 if (TheCall->getNumArgs() > 2) 259 return Diag(TheCall->getArg(2)->getLocStart(), 260 diag::err_typecheck_call_too_many_args) 261 << 0 /*function call*/ 262 << SourceRange(TheCall->getArg(2)->getLocStart(), 263 (*(TheCall->arg_end()-1))->getLocEnd()); 264 265 Expr *OrigArg0 = TheCall->getArg(0); 266 Expr *OrigArg1 = TheCall->getArg(1); 267 268 // Do standard promotions between the two arguments, returning their common 269 // type. 270 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 271 272 // If the common type isn't a real floating type, then the arguments were 273 // invalid for this operation. 274 if (!Res->isRealFloatingType()) 275 return Diag(OrigArg0->getLocStart(), 276 diag::err_typecheck_call_invalid_ordered_compare) 277 << OrigArg0->getType() << OrigArg1->getType() 278 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 279 280 return false; 281} 282 283bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 284 // The signature for these builtins is exact; the only thing we need 285 // to check is that the argument is a constant. 286 SourceLocation Loc; 287 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 288 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 289 290 return false; 291} 292 293/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 294// This is declared to take (...), so we have to check everything. 295Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 296 if (TheCall->getNumArgs() < 3) 297 return ExprError(Diag(TheCall->getLocEnd(), 298 diag::err_typecheck_call_too_few_args) 299 << 0 /*function call*/ << TheCall->getSourceRange()); 300 301 QualType FAType = TheCall->getArg(0)->getType(); 302 QualType SAType = TheCall->getArg(1)->getType(); 303 304 if (!FAType->isVectorType() || !SAType->isVectorType()) { 305 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 306 << SourceRange(TheCall->getArg(0)->getLocStart(), 307 TheCall->getArg(1)->getLocEnd()); 308 return ExprError(); 309 } 310 311 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 312 Context.getCanonicalType(SAType).getUnqualifiedType()) { 313 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 314 << SourceRange(TheCall->getArg(0)->getLocStart(), 315 TheCall->getArg(1)->getLocEnd()); 316 return ExprError(); 317 } 318 319 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 320 if (TheCall->getNumArgs() != numElements+2) { 321 if (TheCall->getNumArgs() < numElements+2) 322 return ExprError(Diag(TheCall->getLocEnd(), 323 diag::err_typecheck_call_too_few_args) 324 << 0 /*function call*/ << TheCall->getSourceRange()); 325 return ExprError(Diag(TheCall->getLocEnd(), 326 diag::err_typecheck_call_too_many_args) 327 << 0 /*function call*/ << TheCall->getSourceRange()); 328 } 329 330 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 331 llvm::APSInt Result(32); 332 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 333 return ExprError(Diag(TheCall->getLocStart(), 334 diag::err_shufflevector_nonconstant_argument) 335 << TheCall->getArg(i)->getSourceRange()); 336 337 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 338 return ExprError(Diag(TheCall->getLocStart(), 339 diag::err_shufflevector_argument_too_large) 340 << TheCall->getArg(i)->getSourceRange()); 341 } 342 343 llvm::SmallVector<Expr*, 32> exprs; 344 345 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 346 exprs.push_back(TheCall->getArg(i)); 347 TheCall->setArg(i, 0); 348 } 349 350 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 351 FAType, 352 TheCall->getCallee()->getLocStart(), 353 TheCall->getRParenLoc())); 354} 355 356/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 357// This is declared to take (const void*, ...) and can take two 358// optional constant int args. 359bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 360 unsigned NumArgs = TheCall->getNumArgs(); 361 362 if (NumArgs > 3) 363 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 364 << 0 /*function call*/ << TheCall->getSourceRange(); 365 366 // Argument 0 is checked for us and the remaining arguments must be 367 // constant integers. 368 for (unsigned i = 1; i != NumArgs; ++i) { 369 Expr *Arg = TheCall->getArg(i); 370 QualType RWType = Arg->getType(); 371 372 const BuiltinType *BT = RWType->getAsBuiltinType(); 373 llvm::APSInt Result; 374 if (!BT || BT->getKind() != BuiltinType::Int || 375 !Arg->isIntegerConstantExpr(Result, Context)) 376 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 377 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 378 379 // FIXME: gcc issues a warning and rewrites these to 0. These 380 // seems especially odd for the third argument since the default 381 // is 3. 382 if (i == 1) { 383 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 384 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 385 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 386 } else { 387 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 388 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 389 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 390 } 391 } 392 393 return false; 394} 395 396/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 397/// int type). This simply type checks that type is one of the defined 398/// constants (0-3). 399bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 400 Expr *Arg = TheCall->getArg(1); 401 QualType ArgType = Arg->getType(); 402 const BuiltinType *BT = ArgType->getAsBuiltinType(); 403 llvm::APSInt Result(32); 404 if (!BT || BT->getKind() != BuiltinType::Int || 405 !Arg->isIntegerConstantExpr(Result, Context)) { 406 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 407 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 408 } 409 410 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 411 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 412 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 413 } 414 415 return false; 416} 417 418// Handle i > 1 ? "x" : "y", recursivelly 419bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg, 420 unsigned format_idx, unsigned firstDataArg) { 421 422 switch (E->getStmtClass()) { 423 case Stmt::ConditionalOperatorClass: { 424 ConditionalOperator *C = cast<ConditionalOperator>(E); 425 return SemaCheckStringLiteral(C->getLHS(), TheCall, 426 HasVAListArg, format_idx, firstDataArg) 427 && SemaCheckStringLiteral(C->getRHS(), TheCall, 428 HasVAListArg, format_idx, firstDataArg); 429 } 430 431 case Stmt::ImplicitCastExprClass: { 432 ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E); 433 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 434 format_idx, firstDataArg); 435 } 436 437 case Stmt::ParenExprClass: { 438 ParenExpr *Expr = dyn_cast<ParenExpr>(E); 439 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 440 format_idx, firstDataArg); 441 } 442 443 default: { 444 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E); 445 StringLiteral *StrE = NULL; 446 447 if (ObjCFExpr) 448 StrE = ObjCFExpr->getString(); 449 else 450 StrE = dyn_cast<StringLiteral>(E); 451 452 if (StrE) { 453 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 454 firstDataArg); 455 return true; 456 } 457 458 return false; 459 } 460 } 461} 462 463 464/// CheckPrintfArguments - Check calls to printf (and similar functions) for 465/// correct use of format strings. 466/// 467/// HasVAListArg - A predicate indicating whether the printf-like 468/// function is passed an explicit va_arg argument (e.g., vprintf) 469/// 470/// format_idx - The index into Args for the format string. 471/// 472/// Improper format strings to functions in the printf family can be 473/// the source of bizarre bugs and very serious security holes. A 474/// good source of information is available in the following paper 475/// (which includes additional references): 476/// 477/// FormatGuard: Automatic Protection From printf Format String 478/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 479/// 480/// Functionality implemented: 481/// 482/// We can statically check the following properties for string 483/// literal format strings for non v.*printf functions (where the 484/// arguments are passed directly): 485// 486/// (1) Are the number of format conversions equal to the number of 487/// data arguments? 488/// 489/// (2) Does each format conversion correctly match the type of the 490/// corresponding data argument? (TODO) 491/// 492/// Moreover, for all printf functions we can: 493/// 494/// (3) Check for a missing format string (when not caught by type checking). 495/// 496/// (4) Check for no-operation flags; e.g. using "#" with format 497/// conversion 'c' (TODO) 498/// 499/// (5) Check the use of '%n', a major source of security holes. 500/// 501/// (6) Check for malformed format conversions that don't specify anything. 502/// 503/// (7) Check for empty format strings. e.g: printf(""); 504/// 505/// (8) Check that the format string is a wide literal. 506/// 507/// (9) Also check the arguments of functions with the __format__ attribute. 508/// (TODO). 509/// 510/// All of these checks can be done by parsing the format string. 511/// 512/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 513void 514Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg, 515 unsigned format_idx, unsigned firstDataArg) { 516 Expr *Fn = TheCall->getCallee(); 517 518 // CHECK: printf-like function is called with no format string. 519 if (format_idx >= TheCall->getNumArgs()) { 520 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 521 << Fn->getSourceRange(); 522 return; 523 } 524 525 Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 526 527 // CHECK: format string is not a string literal. 528 // 529 // Dynamically generated format strings are difficult to 530 // automatically vet at compile time. Requiring that format strings 531 // are string literals: (1) permits the checking of format strings by 532 // the compiler and thereby (2) can practically remove the source of 533 // many format string exploits. 534 535 // Format string can be either ObjC string (e.g. @"%d") or 536 // C string (e.g. "%d") 537 // ObjC string uses the same format specifiers as C string, so we can use 538 // the same format string checking logic for both ObjC and C strings. 539 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, 540 HasVAListArg, format_idx, 541 firstDataArg); 542 543 if (!isFExpr) { 544 // For vprintf* functions (i.e., HasVAListArg==true), we add a 545 // special check to see if the format string is a function parameter 546 // of the function calling the printf function. If the function 547 // has an attribute indicating it is a printf-like function, then we 548 // should suppress warnings concerning non-literals being used in a call 549 // to a vprintf function. For example: 550 // 551 // void 552 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 553 // va_list ap; 554 // va_start(ap, fmt); 555 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 556 // ... 557 // 558 // 559 // FIXME: We don't have full attribute support yet, so just check to see 560 // if the argument is a DeclRefExpr that references a parameter. We'll 561 // add proper support for checking the attribute later. 562 if (HasVAListArg) 563 if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 564 if (isa<ParmVarDecl>(DR->getDecl())) 565 return; 566 567 Diag(TheCall->getArg(format_idx)->getLocStart(), 568 diag::warn_printf_not_string_constant) 569 << OrigFormatExpr->getSourceRange(); 570 return; 571 } 572} 573 574void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr, 575 CallExpr *TheCall, bool HasVAListArg, unsigned format_idx, 576 unsigned firstDataArg) { 577 578 ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 579 // CHECK: is the format string a wide literal? 580 if (FExpr->isWide()) { 581 Diag(FExpr->getLocStart(), 582 diag::warn_printf_format_string_is_wide_literal) 583 << OrigFormatExpr->getSourceRange(); 584 return; 585 } 586 587 // Str - The format string. NOTE: this is NOT null-terminated! 588 const char * const Str = FExpr->getStrData(); 589 590 // CHECK: empty format string? 591 const unsigned StrLen = FExpr->getByteLength(); 592 593 if (StrLen == 0) { 594 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 595 << OrigFormatExpr->getSourceRange(); 596 return; 597 } 598 599 // We process the format string using a binary state machine. The 600 // current state is stored in CurrentState. 601 enum { 602 state_OrdChr, 603 state_Conversion 604 } CurrentState = state_OrdChr; 605 606 // numConversions - The number of conversions seen so far. This is 607 // incremented as we traverse the format string. 608 unsigned numConversions = 0; 609 610 // numDataArgs - The number of data arguments after the format 611 // string. This can only be determined for non vprintf-like 612 // functions. For those functions, this value is 1 (the sole 613 // va_arg argument). 614 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; 615 616 // Inspect the format string. 617 unsigned StrIdx = 0; 618 619 // LastConversionIdx - Index within the format string where we last saw 620 // a '%' character that starts a new format conversion. 621 unsigned LastConversionIdx = 0; 622 623 for (; StrIdx < StrLen; ++StrIdx) { 624 625 // Is the number of detected conversion conversions greater than 626 // the number of matching data arguments? If so, stop. 627 if (!HasVAListArg && numConversions > numDataArgs) break; 628 629 // Handle "\0" 630 if (Str[StrIdx] == '\0') { 631 // The string returned by getStrData() is not null-terminated, 632 // so the presence of a null character is likely an error. 633 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), 634 diag::warn_printf_format_string_contains_null_char) 635 << OrigFormatExpr->getSourceRange(); 636 return; 637 } 638 639 // Ordinary characters (not processing a format conversion). 640 if (CurrentState == state_OrdChr) { 641 if (Str[StrIdx] == '%') { 642 CurrentState = state_Conversion; 643 LastConversionIdx = StrIdx; 644 } 645 continue; 646 } 647 648 // Seen '%'. Now processing a format conversion. 649 switch (Str[StrIdx]) { 650 // Handle dynamic precision or width specifier. 651 case '*': { 652 ++numConversions; 653 654 if (!HasVAListArg && numConversions > numDataArgs) { 655 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 656 657 if (Str[StrIdx-1] == '.') 658 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 659 << OrigFormatExpr->getSourceRange(); 660 else 661 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 662 << OrigFormatExpr->getSourceRange(); 663 664 // Don't do any more checking. We'll just emit spurious errors. 665 return; 666 } 667 668 // Perform type checking on width/precision specifier. 669 Expr *E = TheCall->getArg(format_idx+numConversions); 670 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 671 if (BT->getKind() == BuiltinType::Int) 672 break; 673 674 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 675 676 if (Str[StrIdx-1] == '.') 677 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 678 << E->getType() << E->getSourceRange(); 679 else 680 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 681 << E->getType() << E->getSourceRange(); 682 683 break; 684 } 685 686 // Characters which can terminate a format conversion 687 // (e.g. "%d"). Characters that specify length modifiers or 688 // other flags are handled by the default case below. 689 // 690 // FIXME: additional checks will go into the following cases. 691 case 'i': 692 case 'd': 693 case 'o': 694 case 'u': 695 case 'x': 696 case 'X': 697 case 'D': 698 case 'O': 699 case 'U': 700 case 'e': 701 case 'E': 702 case 'f': 703 case 'F': 704 case 'g': 705 case 'G': 706 case 'a': 707 case 'A': 708 case 'c': 709 case 'C': 710 case 'S': 711 case 's': 712 case 'p': 713 ++numConversions; 714 CurrentState = state_OrdChr; 715 break; 716 717 // CHECK: Are we using "%n"? Issue a warning. 718 case 'n': { 719 ++numConversions; 720 CurrentState = state_OrdChr; 721 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, 722 LastConversionIdx); 723 724 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 725 break; 726 } 727 728 // Handle "%@" 729 case '@': 730 // %@ is allowed in ObjC format strings only. 731 if(ObjCFExpr != NULL) 732 CurrentState = state_OrdChr; 733 else { 734 // Issue a warning: invalid format conversion. 735 SourceLocation Loc = 736 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 737 738 Diag(Loc, diag::warn_printf_invalid_conversion) 739 << std::string(Str+LastConversionIdx, 740 Str+std::min(LastConversionIdx+2, StrLen)) 741 << OrigFormatExpr->getSourceRange(); 742 } 743 ++numConversions; 744 break; 745 746 // Handle "%%" 747 case '%': 748 // Sanity check: Was the first "%" character the previous one? 749 // If not, we will assume that we have a malformed format 750 // conversion, and that the current "%" character is the start 751 // of a new conversion. 752 if (StrIdx - LastConversionIdx == 1) 753 CurrentState = state_OrdChr; 754 else { 755 // Issue a warning: invalid format conversion. 756 SourceLocation Loc = 757 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 758 759 Diag(Loc, diag::warn_printf_invalid_conversion) 760 << std::string(Str+LastConversionIdx, Str+StrIdx) 761 << OrigFormatExpr->getSourceRange(); 762 763 // This conversion is broken. Advance to the next format 764 // conversion. 765 LastConversionIdx = StrIdx; 766 ++numConversions; 767 } 768 break; 769 770 default: 771 // This case catches all other characters: flags, widths, etc. 772 // We should eventually process those as well. 773 break; 774 } 775 } 776 777 if (CurrentState == state_Conversion) { 778 // Issue a warning: invalid format conversion. 779 SourceLocation Loc = 780 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 781 782 Diag(Loc, diag::warn_printf_invalid_conversion) 783 << std::string(Str+LastConversionIdx, 784 Str+std::min(LastConversionIdx+2, StrLen)) 785 << OrigFormatExpr->getSourceRange(); 786 return; 787 } 788 789 if (!HasVAListArg) { 790 // CHECK: Does the number of format conversions exceed the number 791 // of data arguments? 792 if (numConversions > numDataArgs) { 793 SourceLocation Loc = 794 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 795 796 Diag(Loc, diag::warn_printf_insufficient_data_args) 797 << OrigFormatExpr->getSourceRange(); 798 } 799 // CHECK: Does the number of data arguments exceed the number of 800 // format conversions in the format string? 801 else if (numConversions < numDataArgs) 802 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 803 diag::warn_printf_too_many_data_args) 804 << OrigFormatExpr->getSourceRange(); 805 } 806} 807 808//===--- CHECK: Return Address of Stack Variable --------------------------===// 809 810static DeclRefExpr* EvalVal(Expr *E); 811static DeclRefExpr* EvalAddr(Expr* E); 812 813/// CheckReturnStackAddr - Check if a return statement returns the address 814/// of a stack variable. 815void 816Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 817 SourceLocation ReturnLoc) { 818 819 // Perform checking for returned stack addresses. 820 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 821 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 822 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 823 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 824 825 // Skip over implicit cast expressions when checking for block expressions. 826 if (ImplicitCastExpr *IcExpr = 827 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 828 RetValExp = IcExpr->getSubExpr(); 829 830 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 831 Diag(C->getLocStart(), diag::err_ret_local_block) 832 << C->getSourceRange(); 833 } 834 // Perform checking for stack values returned by reference. 835 else if (lhsType->isReferenceType()) { 836 // Check for a reference to the stack 837 if (DeclRefExpr *DR = EvalVal(RetValExp)) 838 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 839 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 840 } 841} 842 843/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 844/// check if the expression in a return statement evaluates to an address 845/// to a location on the stack. The recursion is used to traverse the 846/// AST of the return expression, with recursion backtracking when we 847/// encounter a subexpression that (1) clearly does not lead to the address 848/// of a stack variable or (2) is something we cannot determine leads to 849/// the address of a stack variable based on such local checking. 850/// 851/// EvalAddr processes expressions that are pointers that are used as 852/// references (and not L-values). EvalVal handles all other values. 853/// At the base case of the recursion is a check for a DeclRefExpr* in 854/// the refers to a stack variable. 855/// 856/// This implementation handles: 857/// 858/// * pointer-to-pointer casts 859/// * implicit conversions from array references to pointers 860/// * taking the address of fields 861/// * arbitrary interplay between "&" and "*" operators 862/// * pointer arithmetic from an address of a stack variable 863/// * taking the address of an array element where the array is on the stack 864static DeclRefExpr* EvalAddr(Expr *E) { 865 // We should only be called for evaluating pointer expressions. 866 assert((E->getType()->isPointerType() || 867 E->getType()->isBlockPointerType() || 868 E->getType()->isObjCQualifiedIdType()) && 869 "EvalAddr only works on pointers"); 870 871 // Our "symbolic interpreter" is just a dispatch off the currently 872 // viewed AST node. We then recursively traverse the AST by calling 873 // EvalAddr and EvalVal appropriately. 874 switch (E->getStmtClass()) { 875 case Stmt::ParenExprClass: 876 // Ignore parentheses. 877 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 878 879 case Stmt::UnaryOperatorClass: { 880 // The only unary operator that make sense to handle here 881 // is AddrOf. All others don't make sense as pointers. 882 UnaryOperator *U = cast<UnaryOperator>(E); 883 884 if (U->getOpcode() == UnaryOperator::AddrOf) 885 return EvalVal(U->getSubExpr()); 886 else 887 return NULL; 888 } 889 890 case Stmt::BinaryOperatorClass: { 891 // Handle pointer arithmetic. All other binary operators are not valid 892 // in this context. 893 BinaryOperator *B = cast<BinaryOperator>(E); 894 BinaryOperator::Opcode op = B->getOpcode(); 895 896 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 897 return NULL; 898 899 Expr *Base = B->getLHS(); 900 901 // Determine which argument is the real pointer base. It could be 902 // the RHS argument instead of the LHS. 903 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 904 905 assert (Base->getType()->isPointerType()); 906 return EvalAddr(Base); 907 } 908 909 // For conditional operators we need to see if either the LHS or RHS are 910 // valid DeclRefExpr*s. If one of them is valid, we return it. 911 case Stmt::ConditionalOperatorClass: { 912 ConditionalOperator *C = cast<ConditionalOperator>(E); 913 914 // Handle the GNU extension for missing LHS. 915 if (Expr *lhsExpr = C->getLHS()) 916 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 917 return LHS; 918 919 return EvalAddr(C->getRHS()); 920 } 921 922 // For casts, we need to handle conversions from arrays to 923 // pointer values, and pointer-to-pointer conversions. 924 case Stmt::ImplicitCastExprClass: 925 case Stmt::CStyleCastExprClass: 926 case Stmt::CXXFunctionalCastExprClass: { 927 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 928 QualType T = SubExpr->getType(); 929 930 if (SubExpr->getType()->isPointerType() || 931 SubExpr->getType()->isBlockPointerType() || 932 SubExpr->getType()->isObjCQualifiedIdType()) 933 return EvalAddr(SubExpr); 934 else if (T->isArrayType()) 935 return EvalVal(SubExpr); 936 else 937 return 0; 938 } 939 940 // C++ casts. For dynamic casts, static casts, and const casts, we 941 // are always converting from a pointer-to-pointer, so we just blow 942 // through the cast. In the case the dynamic cast doesn't fail (and 943 // return NULL), we take the conservative route and report cases 944 // where we return the address of a stack variable. For Reinterpre 945 // FIXME: The comment about is wrong; we're not always converting 946 // from pointer to pointer. I'm guessing that this code should also 947 // handle references to objects. 948 case Stmt::CXXStaticCastExprClass: 949 case Stmt::CXXDynamicCastExprClass: 950 case Stmt::CXXConstCastExprClass: 951 case Stmt::CXXReinterpretCastExprClass: { 952 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 953 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 954 return EvalAddr(S); 955 else 956 return NULL; 957 } 958 959 // Everything else: we simply don't reason about them. 960 default: 961 return NULL; 962 } 963} 964 965 966/// EvalVal - This function is complements EvalAddr in the mutual recursion. 967/// See the comments for EvalAddr for more details. 968static DeclRefExpr* EvalVal(Expr *E) { 969 970 // We should only be called for evaluating non-pointer expressions, or 971 // expressions with a pointer type that are not used as references but instead 972 // are l-values (e.g., DeclRefExpr with a pointer type). 973 974 // Our "symbolic interpreter" is just a dispatch off the currently 975 // viewed AST node. We then recursively traverse the AST by calling 976 // EvalAddr and EvalVal appropriately. 977 switch (E->getStmtClass()) { 978 case Stmt::DeclRefExprClass: 979 case Stmt::QualifiedDeclRefExprClass: { 980 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 981 // at code that refers to a variable's name. We check if it has local 982 // storage within the function, and if so, return the expression. 983 DeclRefExpr *DR = cast<DeclRefExpr>(E); 984 985 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 986 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 987 988 return NULL; 989 } 990 991 case Stmt::ParenExprClass: 992 // Ignore parentheses. 993 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 994 995 case Stmt::UnaryOperatorClass: { 996 // The only unary operator that make sense to handle here 997 // is Deref. All others don't resolve to a "name." This includes 998 // handling all sorts of rvalues passed to a unary operator. 999 UnaryOperator *U = cast<UnaryOperator>(E); 1000 1001 if (U->getOpcode() == UnaryOperator::Deref) 1002 return EvalAddr(U->getSubExpr()); 1003 1004 return NULL; 1005 } 1006 1007 case Stmt::ArraySubscriptExprClass: { 1008 // Array subscripts are potential references to data on the stack. We 1009 // retrieve the DeclRefExpr* for the array variable if it indeed 1010 // has local storage. 1011 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1012 } 1013 1014 case Stmt::ConditionalOperatorClass: { 1015 // For conditional operators we need to see if either the LHS or RHS are 1016 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1017 ConditionalOperator *C = cast<ConditionalOperator>(E); 1018 1019 // Handle the GNU extension for missing LHS. 1020 if (Expr *lhsExpr = C->getLHS()) 1021 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1022 return LHS; 1023 1024 return EvalVal(C->getRHS()); 1025 } 1026 1027 // Accesses to members are potential references to data on the stack. 1028 case Stmt::MemberExprClass: { 1029 MemberExpr *M = cast<MemberExpr>(E); 1030 1031 // Check for indirect access. We only want direct field accesses. 1032 if (!M->isArrow()) 1033 return EvalVal(M->getBase()); 1034 else 1035 return NULL; 1036 } 1037 1038 // Everything else: we simply don't reason about them. 1039 default: 1040 return NULL; 1041 } 1042} 1043 1044//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1045 1046/// Check for comparisons of floating point operands using != and ==. 1047/// Issue a warning if these are no self-comparisons, as they are not likely 1048/// to do what the programmer intended. 1049void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1050 bool EmitWarning = true; 1051 1052 Expr* LeftExprSansParen = lex->IgnoreParens(); 1053 Expr* RightExprSansParen = rex->IgnoreParens(); 1054 1055 // Special case: check for x == x (which is OK). 1056 // Do not emit warnings for such cases. 1057 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1058 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1059 if (DRL->getDecl() == DRR->getDecl()) 1060 EmitWarning = false; 1061 1062 1063 // Special case: check for comparisons against literals that can be exactly 1064 // represented by APFloat. In such cases, do not emit a warning. This 1065 // is a heuristic: often comparison against such literals are used to 1066 // detect if a value in a variable has not changed. This clearly can 1067 // lead to false negatives. 1068 if (EmitWarning) { 1069 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1070 if (FLL->isExact()) 1071 EmitWarning = false; 1072 } 1073 else 1074 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1075 if (FLR->isExact()) 1076 EmitWarning = false; 1077 } 1078 } 1079 1080 // Check for comparisons with builtin types. 1081 if (EmitWarning) 1082 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1083 if (CL->isBuiltinCall(Context)) 1084 EmitWarning = false; 1085 1086 if (EmitWarning) 1087 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1088 if (CR->isBuiltinCall(Context)) 1089 EmitWarning = false; 1090 1091 // Emit the diagnostic. 1092 if (EmitWarning) 1093 Diag(loc, diag::warn_floatingpoint_eq) 1094 << lex->getSourceRange() << rex->getSourceRange(); 1095} 1096