SemaChecking.cpp revision b59212a6e494d2c364b54462f545833902c29158
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/LiteralSupport.h" 21#include "clang/Lex/Preprocessor.h" 22using namespace clang; 23 24/// getLocationOfStringLiteralByte - Return a source location that points to the 25/// specified byte of the specified string literal. 26/// 27/// Strings are amazingly complex. They can be formed from multiple tokens and 28/// can have escape sequences in them in addition to the usual trigraph and 29/// escaped newline business. This routine handles this complexity. 30/// 31SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 32 unsigned ByteNo) const { 33 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 34 35 // Loop over all of the tokens in this string until we find the one that 36 // contains the byte we're looking for. 37 unsigned TokNo = 0; 38 while (1) { 39 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 40 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 41 42 // Get the spelling of the string so that we can get the data that makes up 43 // the string literal, not the identifier for the macro it is potentially 44 // expanded through. 45 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 46 47 // Re-lex the token to get its length and original spelling. 48 std::pair<FileID, unsigned> LocInfo = 49 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 50 std::pair<const char *,const char *> Buffer = 51 SourceMgr.getBufferData(LocInfo.first); 52 const char *StrData = Buffer.first+LocInfo.second; 53 54 // Create a langops struct and enable trigraphs. This is sufficient for 55 // relexing tokens. 56 LangOptions LangOpts; 57 LangOpts.Trigraphs = true; 58 59 // Create a lexer starting at the beginning of this token. 60 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, 61 Buffer.second); 62 Token TheTok; 63 TheLexer.LexFromRawLexer(TheTok); 64 65 // Use the StringLiteralParser to compute the length of the string in bytes. 66 StringLiteralParser SLP(&TheTok, 1, PP); 67 unsigned TokNumBytes = SLP.GetStringLength(); 68 69 // If the byte is in this token, return the location of the byte. 70 if (ByteNo < TokNumBytes || 71 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 72 unsigned Offset = 73 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 74 75 // Now that we know the offset of the token in the spelling, use the 76 // preprocessor to get the offset in the original source. 77 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 78 } 79 80 // Move to the next string token. 81 ++TokNo; 82 ByteNo -= TokNumBytes; 83 } 84} 85 86 87/// CheckFunctionCall - Check a direct function call for various correctness 88/// and safety properties not strictly enforced by the C type system. 89Action::OwningExprResult 90Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 91 OwningExprResult TheCallResult(Owned(TheCall)); 92 // Get the IdentifierInfo* for the called function. 93 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 94 95 // None of the checks below are needed for functions that don't have 96 // simple names (e.g., C++ conversion functions). 97 if (!FnInfo) 98 return move(TheCallResult); 99 100 switch (FDecl->getBuiltinID(Context)) { 101 case Builtin::BI__builtin___CFStringMakeConstantString: 102 assert(TheCall->getNumArgs() == 1 && 103 "Wrong # arguments to builtin CFStringMakeConstantString"); 104 if (CheckObjCString(TheCall->getArg(0))) 105 return ExprError(); 106 return move(TheCallResult); 107 case Builtin::BI__builtin_stdarg_start: 108 case Builtin::BI__builtin_va_start: 109 if (SemaBuiltinVAStart(TheCall)) 110 return ExprError(); 111 return move(TheCallResult); 112 case Builtin::BI__builtin_isgreater: 113 case Builtin::BI__builtin_isgreaterequal: 114 case Builtin::BI__builtin_isless: 115 case Builtin::BI__builtin_islessequal: 116 case Builtin::BI__builtin_islessgreater: 117 case Builtin::BI__builtin_isunordered: 118 if (SemaBuiltinUnorderedCompare(TheCall)) 119 return ExprError(); 120 return move(TheCallResult); 121 case Builtin::BI__builtin_return_address: 122 case Builtin::BI__builtin_frame_address: 123 if (SemaBuiltinStackAddress(TheCall)) 124 return ExprError(); 125 return move(TheCallResult); 126 case Builtin::BI__builtin_shufflevector: 127 return SemaBuiltinShuffleVector(TheCall); 128 // TheCall will be freed by the smart pointer here, but that's fine, since 129 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 130 case Builtin::BI__builtin_prefetch: 131 if (SemaBuiltinPrefetch(TheCall)) 132 return ExprError(); 133 return move(TheCallResult); 134 case Builtin::BI__builtin_object_size: 135 if (SemaBuiltinObjectSize(TheCall)) 136 return ExprError(); 137 } 138 139 // FIXME: This mechanism should be abstracted to be less fragile and 140 // more efficient. For example, just map function ids to custom 141 // handlers. 142 143 // Printf checking. 144 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 145 if (Format->getType() == "printf") { 146 bool HasVAListArg = Format->getFirstArg() == 0; 147 if (!HasVAListArg) { 148 if (const FunctionProtoType *Proto 149 = FDecl->getType()->getAsFunctionProtoType()) 150 HasVAListArg = !Proto->isVariadic(); 151 } 152 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 153 HasVAListArg ? 0 : Format->getFirstArg() - 1); 154 } 155 } 156 157 return move(TheCallResult); 158} 159 160/// CheckObjCString - Checks that the argument to the builtin 161/// CFString constructor is correct 162bool Sema::CheckObjCString(Expr *Arg) { 163 Arg = Arg->IgnoreParenCasts(); 164 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 165 166 if (!Literal || Literal->isWide()) { 167 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 168 << Arg->getSourceRange(); 169 return true; 170 } 171 172 const char *Data = Literal->getStrData(); 173 unsigned Length = Literal->getByteLength(); 174 175 for (unsigned i = 0; i < Length; ++i) { 176 if (!Data[i]) { 177 Diag(getLocationOfStringLiteralByte(Literal, i), 178 diag::warn_cfstring_literal_contains_nul_character) 179 << Arg->getSourceRange(); 180 break; 181 } 182 } 183 184 return false; 185} 186 187/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 188/// Emit an error and return true on failure, return false on success. 189bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 190 Expr *Fn = TheCall->getCallee(); 191 if (TheCall->getNumArgs() > 2) { 192 Diag(TheCall->getArg(2)->getLocStart(), 193 diag::err_typecheck_call_too_many_args) 194 << 0 /*function call*/ << Fn->getSourceRange() 195 << SourceRange(TheCall->getArg(2)->getLocStart(), 196 (*(TheCall->arg_end()-1))->getLocEnd()); 197 return true; 198 } 199 200 if (TheCall->getNumArgs() < 2) { 201 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 202 << 0 /*function call*/; 203 } 204 205 // Determine whether the current function is variadic or not. 206 bool isVariadic; 207 if (getCurFunctionDecl()) { 208 if (FunctionProtoType* FTP = 209 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 210 isVariadic = FTP->isVariadic(); 211 else 212 isVariadic = false; 213 } else { 214 isVariadic = getCurMethodDecl()->isVariadic(); 215 } 216 217 if (!isVariadic) { 218 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 219 return true; 220 } 221 222 // Verify that the second argument to the builtin is the last argument of the 223 // current function or method. 224 bool SecondArgIsLastNamedArgument = false; 225 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 226 227 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 228 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 229 // FIXME: This isn't correct for methods (results in bogus warning). 230 // Get the last formal in the current function. 231 const ParmVarDecl *LastArg; 232 if (FunctionDecl *FD = getCurFunctionDecl()) 233 LastArg = *(FD->param_end()-1); 234 else 235 LastArg = *(getCurMethodDecl()->param_end()-1); 236 SecondArgIsLastNamedArgument = PV == LastArg; 237 } 238 } 239 240 if (!SecondArgIsLastNamedArgument) 241 Diag(TheCall->getArg(1)->getLocStart(), 242 diag::warn_second_parameter_of_va_start_not_last_named_argument); 243 return false; 244} 245 246/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 247/// friends. This is declared to take (...), so we have to check everything. 248bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 249 if (TheCall->getNumArgs() < 2) 250 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 251 << 0 /*function call*/; 252 if (TheCall->getNumArgs() > 2) 253 return Diag(TheCall->getArg(2)->getLocStart(), 254 diag::err_typecheck_call_too_many_args) 255 << 0 /*function call*/ 256 << SourceRange(TheCall->getArg(2)->getLocStart(), 257 (*(TheCall->arg_end()-1))->getLocEnd()); 258 259 Expr *OrigArg0 = TheCall->getArg(0); 260 Expr *OrigArg1 = TheCall->getArg(1); 261 262 // Do standard promotions between the two arguments, returning their common 263 // type. 264 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 265 266 // Make sure any conversions are pushed back into the call; this is 267 // type safe since unordered compare builtins are declared as "_Bool 268 // foo(...)". 269 TheCall->setArg(0, OrigArg0); 270 TheCall->setArg(1, OrigArg1); 271 272 // If the common type isn't a real floating type, then the arguments were 273 // invalid for this operation. 274 if (!Res->isRealFloatingType()) 275 return Diag(OrigArg0->getLocStart(), 276 diag::err_typecheck_call_invalid_ordered_compare) 277 << OrigArg0->getType() << OrigArg1->getType() 278 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 279 280 return false; 281} 282 283bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 284 // The signature for these builtins is exact; the only thing we need 285 // to check is that the argument is a constant. 286 SourceLocation Loc; 287 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 288 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 289 290 return false; 291} 292 293/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 294// This is declared to take (...), so we have to check everything. 295Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 296 if (TheCall->getNumArgs() < 3) 297 return ExprError(Diag(TheCall->getLocEnd(), 298 diag::err_typecheck_call_too_few_args) 299 << 0 /*function call*/ << TheCall->getSourceRange()); 300 301 QualType FAType = TheCall->getArg(0)->getType(); 302 QualType SAType = TheCall->getArg(1)->getType(); 303 304 if (!FAType->isVectorType() || !SAType->isVectorType()) { 305 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 306 << SourceRange(TheCall->getArg(0)->getLocStart(), 307 TheCall->getArg(1)->getLocEnd()); 308 return ExprError(); 309 } 310 311 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 312 Context.getCanonicalType(SAType).getUnqualifiedType()) { 313 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 314 << SourceRange(TheCall->getArg(0)->getLocStart(), 315 TheCall->getArg(1)->getLocEnd()); 316 return ExprError(); 317 } 318 319 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 320 if (TheCall->getNumArgs() != numElements+2) { 321 if (TheCall->getNumArgs() < numElements+2) 322 return ExprError(Diag(TheCall->getLocEnd(), 323 diag::err_typecheck_call_too_few_args) 324 << 0 /*function call*/ << TheCall->getSourceRange()); 325 return ExprError(Diag(TheCall->getLocEnd(), 326 diag::err_typecheck_call_too_many_args) 327 << 0 /*function call*/ << TheCall->getSourceRange()); 328 } 329 330 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 331 llvm::APSInt Result(32); 332 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 333 return ExprError(Diag(TheCall->getLocStart(), 334 diag::err_shufflevector_nonconstant_argument) 335 << TheCall->getArg(i)->getSourceRange()); 336 337 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 338 return ExprError(Diag(TheCall->getLocStart(), 339 diag::err_shufflevector_argument_too_large) 340 << TheCall->getArg(i)->getSourceRange()); 341 } 342 343 llvm::SmallVector<Expr*, 32> exprs; 344 345 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 346 exprs.push_back(TheCall->getArg(i)); 347 TheCall->setArg(i, 0); 348 } 349 350 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 351 FAType, 352 TheCall->getCallee()->getLocStart(), 353 TheCall->getRParenLoc())); 354} 355 356/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 357// This is declared to take (const void*, ...) and can take two 358// optional constant int args. 359bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 360 unsigned NumArgs = TheCall->getNumArgs(); 361 362 if (NumArgs > 3) 363 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 364 << 0 /*function call*/ << TheCall->getSourceRange(); 365 366 // Argument 0 is checked for us and the remaining arguments must be 367 // constant integers. 368 for (unsigned i = 1; i != NumArgs; ++i) { 369 Expr *Arg = TheCall->getArg(i); 370 QualType RWType = Arg->getType(); 371 372 const BuiltinType *BT = RWType->getAsBuiltinType(); 373 llvm::APSInt Result; 374 if (!BT || BT->getKind() != BuiltinType::Int || 375 !Arg->isIntegerConstantExpr(Result, Context)) 376 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 377 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 378 379 // FIXME: gcc issues a warning and rewrites these to 0. These 380 // seems especially odd for the third argument since the default 381 // is 3. 382 if (i == 1) { 383 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 384 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 385 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 386 } else { 387 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 388 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 389 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 390 } 391 } 392 393 return false; 394} 395 396/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 397/// int type). This simply type checks that type is one of the defined 398/// constants (0-3). 399bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 400 Expr *Arg = TheCall->getArg(1); 401 QualType ArgType = Arg->getType(); 402 const BuiltinType *BT = ArgType->getAsBuiltinType(); 403 llvm::APSInt Result(32); 404 if (!BT || BT->getKind() != BuiltinType::Int || 405 !Arg->isIntegerConstantExpr(Result, Context)) { 406 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 407 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 408 } 409 410 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 411 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 412 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 413 } 414 415 return false; 416} 417 418// Handle i > 1 ? "x" : "y", recursivelly 419bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 420 bool HasVAListArg, 421 unsigned format_idx, unsigned firstDataArg) { 422 423 switch (E->getStmtClass()) { 424 case Stmt::ConditionalOperatorClass: { 425 const ConditionalOperator *C = cast<ConditionalOperator>(E); 426 return SemaCheckStringLiteral(C->getLHS(), TheCall, 427 HasVAListArg, format_idx, firstDataArg) 428 && SemaCheckStringLiteral(C->getRHS(), TheCall, 429 HasVAListArg, format_idx, firstDataArg); 430 } 431 432 case Stmt::ImplicitCastExprClass: { 433 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 434 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 435 format_idx, firstDataArg); 436 } 437 438 case Stmt::ParenExprClass: { 439 const ParenExpr *Expr = cast<ParenExpr>(E); 440 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 441 format_idx, firstDataArg); 442 } 443 444 case Stmt::DeclRefExprClass: { 445 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 446 447 // As an exception, do not flag errors for variables binding to 448 // const string literals. 449 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 450 bool isConstant = false; 451 QualType T = DR->getType(); 452 453 if (const ArrayType *AT = Context.getAsArrayType(T)) { 454 isConstant = AT->getElementType().isConstant(Context); 455 } 456 else if (const PointerType *PT = T->getAsPointerType()) { 457 isConstant = T.isConstant(Context) && 458 PT->getPointeeType().isConstant(Context); 459 } 460 461 if (isConstant) { 462 const VarDecl *Def = 0; 463 if (const Expr *Init = VD->getDefinition(Def)) 464 return SemaCheckStringLiteral(Init, TheCall, 465 HasVAListArg, format_idx, firstDataArg); 466 } 467 } 468 469 return false; 470 } 471 472 case Stmt::ObjCStringLiteralClass: 473 case Stmt::StringLiteralClass: { 474 const StringLiteral *StrE = NULL; 475 476 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 477 StrE = ObjCFExpr->getString(); 478 else 479 StrE = cast<StringLiteral>(E); 480 481 if (StrE) { 482 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 483 firstDataArg); 484 return true; 485 } 486 487 return false; 488 } 489 490 default: 491 return false; 492 } 493} 494 495 496/// CheckPrintfArguments - Check calls to printf (and similar functions) for 497/// correct use of format strings. 498/// 499/// HasVAListArg - A predicate indicating whether the printf-like 500/// function is passed an explicit va_arg argument (e.g., vprintf) 501/// 502/// format_idx - The index into Args for the format string. 503/// 504/// Improper format strings to functions in the printf family can be 505/// the source of bizarre bugs and very serious security holes. A 506/// good source of information is available in the following paper 507/// (which includes additional references): 508/// 509/// FormatGuard: Automatic Protection From printf Format String 510/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 511/// 512/// Functionality implemented: 513/// 514/// We can statically check the following properties for string 515/// literal format strings for non v.*printf functions (where the 516/// arguments are passed directly): 517// 518/// (1) Are the number of format conversions equal to the number of 519/// data arguments? 520/// 521/// (2) Does each format conversion correctly match the type of the 522/// corresponding data argument? (TODO) 523/// 524/// Moreover, for all printf functions we can: 525/// 526/// (3) Check for a missing format string (when not caught by type checking). 527/// 528/// (4) Check for no-operation flags; e.g. using "#" with format 529/// conversion 'c' (TODO) 530/// 531/// (5) Check the use of '%n', a major source of security holes. 532/// 533/// (6) Check for malformed format conversions that don't specify anything. 534/// 535/// (7) Check for empty format strings. e.g: printf(""); 536/// 537/// (8) Check that the format string is a wide literal. 538/// 539/// (9) Also check the arguments of functions with the __format__ attribute. 540/// (TODO). 541/// 542/// All of these checks can be done by parsing the format string. 543/// 544/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 545void 546Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 547 unsigned format_idx, unsigned firstDataArg) { 548 const Expr *Fn = TheCall->getCallee(); 549 550 // CHECK: printf-like function is called with no format string. 551 if (format_idx >= TheCall->getNumArgs()) { 552 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 553 << Fn->getSourceRange(); 554 return; 555 } 556 557 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 558 559 // CHECK: format string is not a string literal. 560 // 561 // Dynamically generated format strings are difficult to 562 // automatically vet at compile time. Requiring that format strings 563 // are string literals: (1) permits the checking of format strings by 564 // the compiler and thereby (2) can practically remove the source of 565 // many format string exploits. 566 567 // Format string can be either ObjC string (e.g. @"%d") or 568 // C string (e.g. "%d") 569 // ObjC string uses the same format specifiers as C string, so we can use 570 // the same format string checking logic for both ObjC and C strings. 571 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, 572 HasVAListArg, format_idx, 573 firstDataArg); 574 575 if (!isFExpr) { 576 // For vprintf* functions (i.e., HasVAListArg==true), we add a 577 // special check to see if the format string is a function parameter 578 // of the function calling the printf function. If the function 579 // has an attribute indicating it is a printf-like function, then we 580 // should suppress warnings concerning non-literals being used in a call 581 // to a vprintf function. For example: 582 // 583 // void 584 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 585 // va_list ap; 586 // va_start(ap, fmt); 587 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 588 // ... 589 // 590 // 591 // FIXME: We don't have full attribute support yet, so just check to see 592 // if the argument is a DeclRefExpr that references a parameter. We'll 593 // add proper support for checking the attribute later. 594 if (HasVAListArg) 595 if (const DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 596 if (isa<ParmVarDecl>(DR->getDecl())) 597 return; 598 599 Diag(TheCall->getArg(format_idx)->getLocStart(), 600 diag::warn_printf_not_string_constant) 601 << OrigFormatExpr->getSourceRange(); 602 return; 603 } 604} 605 606void Sema::CheckPrintfString(const StringLiteral *FExpr, 607 const Expr *OrigFormatExpr, 608 const CallExpr *TheCall, bool HasVAListArg, 609 unsigned format_idx, unsigned firstDataArg) { 610 611 const ObjCStringLiteral *ObjCFExpr = 612 dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 613 614 // CHECK: is the format string a wide literal? 615 if (FExpr->isWide()) { 616 Diag(FExpr->getLocStart(), 617 diag::warn_printf_format_string_is_wide_literal) 618 << OrigFormatExpr->getSourceRange(); 619 return; 620 } 621 622 // Str - The format string. NOTE: this is NOT null-terminated! 623 const char * const Str = FExpr->getStrData(); 624 625 // CHECK: empty format string? 626 const unsigned StrLen = FExpr->getByteLength(); 627 628 if (StrLen == 0) { 629 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 630 << OrigFormatExpr->getSourceRange(); 631 return; 632 } 633 634 // We process the format string using a binary state machine. The 635 // current state is stored in CurrentState. 636 enum { 637 state_OrdChr, 638 state_Conversion 639 } CurrentState = state_OrdChr; 640 641 // numConversions - The number of conversions seen so far. This is 642 // incremented as we traverse the format string. 643 unsigned numConversions = 0; 644 645 // numDataArgs - The number of data arguments after the format 646 // string. This can only be determined for non vprintf-like 647 // functions. For those functions, this value is 1 (the sole 648 // va_arg argument). 649 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; 650 651 // Inspect the format string. 652 unsigned StrIdx = 0; 653 654 // LastConversionIdx - Index within the format string where we last saw 655 // a '%' character that starts a new format conversion. 656 unsigned LastConversionIdx = 0; 657 658 for (; StrIdx < StrLen; ++StrIdx) { 659 660 // Is the number of detected conversion conversions greater than 661 // the number of matching data arguments? If so, stop. 662 if (!HasVAListArg && numConversions > numDataArgs) break; 663 664 // Handle "\0" 665 if (Str[StrIdx] == '\0') { 666 // The string returned by getStrData() is not null-terminated, 667 // so the presence of a null character is likely an error. 668 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), 669 diag::warn_printf_format_string_contains_null_char) 670 << OrigFormatExpr->getSourceRange(); 671 return; 672 } 673 674 // Ordinary characters (not processing a format conversion). 675 if (CurrentState == state_OrdChr) { 676 if (Str[StrIdx] == '%') { 677 CurrentState = state_Conversion; 678 LastConversionIdx = StrIdx; 679 } 680 continue; 681 } 682 683 // Seen '%'. Now processing a format conversion. 684 switch (Str[StrIdx]) { 685 // Handle dynamic precision or width specifier. 686 case '*': { 687 ++numConversions; 688 689 if (!HasVAListArg && numConversions > numDataArgs) { 690 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 691 692 if (Str[StrIdx-1] == '.') 693 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 694 << OrigFormatExpr->getSourceRange(); 695 else 696 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 697 << OrigFormatExpr->getSourceRange(); 698 699 // Don't do any more checking. We'll just emit spurious errors. 700 return; 701 } 702 703 // Perform type checking on width/precision specifier. 704 const Expr *E = TheCall->getArg(format_idx+numConversions); 705 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 706 if (BT->getKind() == BuiltinType::Int) 707 break; 708 709 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 710 711 if (Str[StrIdx-1] == '.') 712 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 713 << E->getType() << E->getSourceRange(); 714 else 715 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 716 << E->getType() << E->getSourceRange(); 717 718 break; 719 } 720 721 // Characters which can terminate a format conversion 722 // (e.g. "%d"). Characters that specify length modifiers or 723 // other flags are handled by the default case below. 724 // 725 // FIXME: additional checks will go into the following cases. 726 case 'i': 727 case 'd': 728 case 'o': 729 case 'u': 730 case 'x': 731 case 'X': 732 case 'D': 733 case 'O': 734 case 'U': 735 case 'e': 736 case 'E': 737 case 'f': 738 case 'F': 739 case 'g': 740 case 'G': 741 case 'a': 742 case 'A': 743 case 'c': 744 case 'C': 745 case 'S': 746 case 's': 747 case 'p': 748 ++numConversions; 749 CurrentState = state_OrdChr; 750 break; 751 752 // CHECK: Are we using "%n"? Issue a warning. 753 case 'n': { 754 ++numConversions; 755 CurrentState = state_OrdChr; 756 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, 757 LastConversionIdx); 758 759 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 760 break; 761 } 762 763 // Handle "%@" 764 case '@': 765 // %@ is allowed in ObjC format strings only. 766 if(ObjCFExpr != NULL) 767 CurrentState = state_OrdChr; 768 else { 769 // Issue a warning: invalid format conversion. 770 SourceLocation Loc = 771 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 772 773 Diag(Loc, diag::warn_printf_invalid_conversion) 774 << std::string(Str+LastConversionIdx, 775 Str+std::min(LastConversionIdx+2, StrLen)) 776 << OrigFormatExpr->getSourceRange(); 777 } 778 ++numConversions; 779 break; 780 781 // Handle "%%" 782 case '%': 783 // Sanity check: Was the first "%" character the previous one? 784 // If not, we will assume that we have a malformed format 785 // conversion, and that the current "%" character is the start 786 // of a new conversion. 787 if (StrIdx - LastConversionIdx == 1) 788 CurrentState = state_OrdChr; 789 else { 790 // Issue a warning: invalid format conversion. 791 SourceLocation Loc = 792 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 793 794 Diag(Loc, diag::warn_printf_invalid_conversion) 795 << std::string(Str+LastConversionIdx, Str+StrIdx) 796 << OrigFormatExpr->getSourceRange(); 797 798 // This conversion is broken. Advance to the next format 799 // conversion. 800 LastConversionIdx = StrIdx; 801 ++numConversions; 802 } 803 break; 804 805 default: 806 // This case catches all other characters: flags, widths, etc. 807 // We should eventually process those as well. 808 break; 809 } 810 } 811 812 if (CurrentState == state_Conversion) { 813 // Issue a warning: invalid format conversion. 814 SourceLocation Loc = 815 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 816 817 Diag(Loc, diag::warn_printf_invalid_conversion) 818 << std::string(Str+LastConversionIdx, 819 Str+std::min(LastConversionIdx+2, StrLen)) 820 << OrigFormatExpr->getSourceRange(); 821 return; 822 } 823 824 if (!HasVAListArg) { 825 // CHECK: Does the number of format conversions exceed the number 826 // of data arguments? 827 if (numConversions > numDataArgs) { 828 SourceLocation Loc = 829 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 830 831 Diag(Loc, diag::warn_printf_insufficient_data_args) 832 << OrigFormatExpr->getSourceRange(); 833 } 834 // CHECK: Does the number of data arguments exceed the number of 835 // format conversions in the format string? 836 else if (numConversions < numDataArgs) 837 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 838 diag::warn_printf_too_many_data_args) 839 << OrigFormatExpr->getSourceRange(); 840 } 841} 842 843//===--- CHECK: Return Address of Stack Variable --------------------------===// 844 845static DeclRefExpr* EvalVal(Expr *E); 846static DeclRefExpr* EvalAddr(Expr* E); 847 848/// CheckReturnStackAddr - Check if a return statement returns the address 849/// of a stack variable. 850void 851Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 852 SourceLocation ReturnLoc) { 853 854 // Perform checking for returned stack addresses. 855 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 856 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 857 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 858 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 859 860 // Skip over implicit cast expressions when checking for block expressions. 861 if (ImplicitCastExpr *IcExpr = 862 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 863 RetValExp = IcExpr->getSubExpr(); 864 865 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 866 Diag(C->getLocStart(), diag::err_ret_local_block) 867 << C->getSourceRange(); 868 } 869 // Perform checking for stack values returned by reference. 870 else if (lhsType->isReferenceType()) { 871 // Check for a reference to the stack 872 if (DeclRefExpr *DR = EvalVal(RetValExp)) 873 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 874 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 875 } 876} 877 878/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 879/// check if the expression in a return statement evaluates to an address 880/// to a location on the stack. The recursion is used to traverse the 881/// AST of the return expression, with recursion backtracking when we 882/// encounter a subexpression that (1) clearly does not lead to the address 883/// of a stack variable or (2) is something we cannot determine leads to 884/// the address of a stack variable based on such local checking. 885/// 886/// EvalAddr processes expressions that are pointers that are used as 887/// references (and not L-values). EvalVal handles all other values. 888/// At the base case of the recursion is a check for a DeclRefExpr* in 889/// the refers to a stack variable. 890/// 891/// This implementation handles: 892/// 893/// * pointer-to-pointer casts 894/// * implicit conversions from array references to pointers 895/// * taking the address of fields 896/// * arbitrary interplay between "&" and "*" operators 897/// * pointer arithmetic from an address of a stack variable 898/// * taking the address of an array element where the array is on the stack 899static DeclRefExpr* EvalAddr(Expr *E) { 900 // We should only be called for evaluating pointer expressions. 901 assert((E->getType()->isPointerType() || 902 E->getType()->isBlockPointerType() || 903 E->getType()->isObjCQualifiedIdType()) && 904 "EvalAddr only works on pointers"); 905 906 // Our "symbolic interpreter" is just a dispatch off the currently 907 // viewed AST node. We then recursively traverse the AST by calling 908 // EvalAddr and EvalVal appropriately. 909 switch (E->getStmtClass()) { 910 case Stmt::ParenExprClass: 911 // Ignore parentheses. 912 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 913 914 case Stmt::UnaryOperatorClass: { 915 // The only unary operator that make sense to handle here 916 // is AddrOf. All others don't make sense as pointers. 917 UnaryOperator *U = cast<UnaryOperator>(E); 918 919 if (U->getOpcode() == UnaryOperator::AddrOf) 920 return EvalVal(U->getSubExpr()); 921 else 922 return NULL; 923 } 924 925 case Stmt::BinaryOperatorClass: { 926 // Handle pointer arithmetic. All other binary operators are not valid 927 // in this context. 928 BinaryOperator *B = cast<BinaryOperator>(E); 929 BinaryOperator::Opcode op = B->getOpcode(); 930 931 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 932 return NULL; 933 934 Expr *Base = B->getLHS(); 935 936 // Determine which argument is the real pointer base. It could be 937 // the RHS argument instead of the LHS. 938 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 939 940 assert (Base->getType()->isPointerType()); 941 return EvalAddr(Base); 942 } 943 944 // For conditional operators we need to see if either the LHS or RHS are 945 // valid DeclRefExpr*s. If one of them is valid, we return it. 946 case Stmt::ConditionalOperatorClass: { 947 ConditionalOperator *C = cast<ConditionalOperator>(E); 948 949 // Handle the GNU extension for missing LHS. 950 if (Expr *lhsExpr = C->getLHS()) 951 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 952 return LHS; 953 954 return EvalAddr(C->getRHS()); 955 } 956 957 // For casts, we need to handle conversions from arrays to 958 // pointer values, and pointer-to-pointer conversions. 959 case Stmt::ImplicitCastExprClass: 960 case Stmt::CStyleCastExprClass: 961 case Stmt::CXXFunctionalCastExprClass: { 962 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 963 QualType T = SubExpr->getType(); 964 965 if (SubExpr->getType()->isPointerType() || 966 SubExpr->getType()->isBlockPointerType() || 967 SubExpr->getType()->isObjCQualifiedIdType()) 968 return EvalAddr(SubExpr); 969 else if (T->isArrayType()) 970 return EvalVal(SubExpr); 971 else 972 return 0; 973 } 974 975 // C++ casts. For dynamic casts, static casts, and const casts, we 976 // are always converting from a pointer-to-pointer, so we just blow 977 // through the cast. In the case the dynamic cast doesn't fail (and 978 // return NULL), we take the conservative route and report cases 979 // where we return the address of a stack variable. For Reinterpre 980 // FIXME: The comment about is wrong; we're not always converting 981 // from pointer to pointer. I'm guessing that this code should also 982 // handle references to objects. 983 case Stmt::CXXStaticCastExprClass: 984 case Stmt::CXXDynamicCastExprClass: 985 case Stmt::CXXConstCastExprClass: 986 case Stmt::CXXReinterpretCastExprClass: { 987 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 988 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 989 return EvalAddr(S); 990 else 991 return NULL; 992 } 993 994 // Everything else: we simply don't reason about them. 995 default: 996 return NULL; 997 } 998} 999 1000 1001/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1002/// See the comments for EvalAddr for more details. 1003static DeclRefExpr* EvalVal(Expr *E) { 1004 1005 // We should only be called for evaluating non-pointer expressions, or 1006 // expressions with a pointer type that are not used as references but instead 1007 // are l-values (e.g., DeclRefExpr with a pointer type). 1008 1009 // Our "symbolic interpreter" is just a dispatch off the currently 1010 // viewed AST node. We then recursively traverse the AST by calling 1011 // EvalAddr and EvalVal appropriately. 1012 switch (E->getStmtClass()) { 1013 case Stmt::DeclRefExprClass: 1014 case Stmt::QualifiedDeclRefExprClass: { 1015 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1016 // at code that refers to a variable's name. We check if it has local 1017 // storage within the function, and if so, return the expression. 1018 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1019 1020 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1021 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1022 1023 return NULL; 1024 } 1025 1026 case Stmt::ParenExprClass: 1027 // Ignore parentheses. 1028 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1029 1030 case Stmt::UnaryOperatorClass: { 1031 // The only unary operator that make sense to handle here 1032 // is Deref. All others don't resolve to a "name." This includes 1033 // handling all sorts of rvalues passed to a unary operator. 1034 UnaryOperator *U = cast<UnaryOperator>(E); 1035 1036 if (U->getOpcode() == UnaryOperator::Deref) 1037 return EvalAddr(U->getSubExpr()); 1038 1039 return NULL; 1040 } 1041 1042 case Stmt::ArraySubscriptExprClass: { 1043 // Array subscripts are potential references to data on the stack. We 1044 // retrieve the DeclRefExpr* for the array variable if it indeed 1045 // has local storage. 1046 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1047 } 1048 1049 case Stmt::ConditionalOperatorClass: { 1050 // For conditional operators we need to see if either the LHS or RHS are 1051 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1052 ConditionalOperator *C = cast<ConditionalOperator>(E); 1053 1054 // Handle the GNU extension for missing LHS. 1055 if (Expr *lhsExpr = C->getLHS()) 1056 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1057 return LHS; 1058 1059 return EvalVal(C->getRHS()); 1060 } 1061 1062 // Accesses to members are potential references to data on the stack. 1063 case Stmt::MemberExprClass: { 1064 MemberExpr *M = cast<MemberExpr>(E); 1065 1066 // Check for indirect access. We only want direct field accesses. 1067 if (!M->isArrow()) 1068 return EvalVal(M->getBase()); 1069 else 1070 return NULL; 1071 } 1072 1073 // Everything else: we simply don't reason about them. 1074 default: 1075 return NULL; 1076 } 1077} 1078 1079//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1080 1081/// Check for comparisons of floating point operands using != and ==. 1082/// Issue a warning if these are no self-comparisons, as they are not likely 1083/// to do what the programmer intended. 1084void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1085 bool EmitWarning = true; 1086 1087 Expr* LeftExprSansParen = lex->IgnoreParens(); 1088 Expr* RightExprSansParen = rex->IgnoreParens(); 1089 1090 // Special case: check for x == x (which is OK). 1091 // Do not emit warnings for such cases. 1092 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1093 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1094 if (DRL->getDecl() == DRR->getDecl()) 1095 EmitWarning = false; 1096 1097 1098 // Special case: check for comparisons against literals that can be exactly 1099 // represented by APFloat. In such cases, do not emit a warning. This 1100 // is a heuristic: often comparison against such literals are used to 1101 // detect if a value in a variable has not changed. This clearly can 1102 // lead to false negatives. 1103 if (EmitWarning) { 1104 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1105 if (FLL->isExact()) 1106 EmitWarning = false; 1107 } 1108 else 1109 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1110 if (FLR->isExact()) 1111 EmitWarning = false; 1112 } 1113 } 1114 1115 // Check for comparisons with builtin types. 1116 if (EmitWarning) 1117 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1118 if (CL->isBuiltinCall(Context)) 1119 EmitWarning = false; 1120 1121 if (EmitWarning) 1122 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1123 if (CR->isBuiltinCall(Context)) 1124 EmitWarning = false; 1125 1126 // Emit the diagnostic. 1127 if (EmitWarning) 1128 Diag(loc, diag::warn_floatingpoint_eq) 1129 << lex->getSourceRange() << rex->getSourceRange(); 1130} 1131