SemaChecking.cpp revision 397195bf3077fb42789b326f69f7d417227a0588
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/LiteralSupport.h" 21#include "clang/Lex/Preprocessor.h" 22using namespace clang; 23 24/// getLocationOfStringLiteralByte - Return a source location that points to the 25/// specified byte of the specified string literal. 26/// 27/// Strings are amazingly complex. They can be formed from multiple tokens and 28/// can have escape sequences in them in addition to the usual trigraph and 29/// escaped newline business. This routine handles this complexity. 30/// 31SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 32 unsigned ByteNo) const { 33 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 34 35 // Loop over all of the tokens in this string until we find the one that 36 // contains the byte we're looking for. 37 unsigned TokNo = 0; 38 while (1) { 39 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 40 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 41 42 // Get the spelling of the string so that we can get the data that makes up 43 // the string literal, not the identifier for the macro it is potentially 44 // expanded through. 45 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 46 47 // Re-lex the token to get its length and original spelling. 48 std::pair<FileID, unsigned> LocInfo = 49 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 50 std::pair<const char *,const char *> Buffer = 51 SourceMgr.getBufferData(LocInfo.first); 52 const char *StrData = Buffer.first+LocInfo.second; 53 54 // Create a langops struct and enable trigraphs. This is sufficient for 55 // relexing tokens. 56 LangOptions LangOpts; 57 LangOpts.Trigraphs = true; 58 59 // Create a lexer starting at the beginning of this token. 60 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, 61 Buffer.second); 62 Token TheTok; 63 TheLexer.LexFromRawLexer(TheTok); 64 65 // Use the StringLiteralParser to compute the length of the string in bytes. 66 StringLiteralParser SLP(&TheTok, 1, PP); 67 unsigned TokNumBytes = SLP.GetStringLength(); 68 69 // If the byte is in this token, return the location of the byte. 70 if (ByteNo < TokNumBytes || 71 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 72 unsigned Offset = 73 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 74 75 // Now that we know the offset of the token in the spelling, use the 76 // preprocessor to get the offset in the original source. 77 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 78 } 79 80 // Move to the next string token. 81 ++TokNo; 82 ByteNo -= TokNumBytes; 83 } 84} 85 86 87/// CheckFunctionCall - Check a direct function call for various correctness 88/// and safety properties not strictly enforced by the C type system. 89Action::OwningExprResult 90Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 91 OwningExprResult TheCallResult(Owned(TheCall)); 92 // Get the IdentifierInfo* for the called function. 93 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 94 95 // None of the checks below are needed for functions that don't have 96 // simple names (e.g., C++ conversion functions). 97 if (!FnInfo) 98 return move(TheCallResult); 99 100 switch (FDecl->getBuiltinID(Context)) { 101 case Builtin::BI__builtin___CFStringMakeConstantString: 102 assert(TheCall->getNumArgs() == 1 && 103 "Wrong # arguments to builtin CFStringMakeConstantString"); 104 if (CheckObjCString(TheCall->getArg(0))) 105 return ExprError(); 106 return move(TheCallResult); 107 case Builtin::BI__builtin_stdarg_start: 108 case Builtin::BI__builtin_va_start: 109 if (SemaBuiltinVAStart(TheCall)) 110 return ExprError(); 111 return move(TheCallResult); 112 case Builtin::BI__builtin_isgreater: 113 case Builtin::BI__builtin_isgreaterequal: 114 case Builtin::BI__builtin_isless: 115 case Builtin::BI__builtin_islessequal: 116 case Builtin::BI__builtin_islessgreater: 117 case Builtin::BI__builtin_isunordered: 118 if (SemaBuiltinUnorderedCompare(TheCall)) 119 return ExprError(); 120 return move(TheCallResult); 121 case Builtin::BI__builtin_return_address: 122 case Builtin::BI__builtin_frame_address: 123 if (SemaBuiltinStackAddress(TheCall)) 124 return ExprError(); 125 return move(TheCallResult); 126 case Builtin::BI__builtin_shufflevector: 127 return SemaBuiltinShuffleVector(TheCall); 128 // TheCall will be freed by the smart pointer here, but that's fine, since 129 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 130 case Builtin::BI__builtin_prefetch: 131 if (SemaBuiltinPrefetch(TheCall)) 132 return ExprError(); 133 return move(TheCallResult); 134 case Builtin::BI__builtin_object_size: 135 if (SemaBuiltinObjectSize(TheCall)) 136 return ExprError(); 137 } 138 139 // FIXME: This mechanism should be abstracted to be less fragile and 140 // more efficient. For example, just map function ids to custom 141 // handlers. 142 143 // Printf checking. 144 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 145 if (Format->getType() == "printf") { 146 bool HasVAListArg = Format->getFirstArg() == 0; 147 if (!HasVAListArg) { 148 if (const FunctionProtoType *Proto 149 = FDecl->getType()->getAsFunctionProtoType()) 150 HasVAListArg = !Proto->isVariadic(); 151 } 152 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 153 HasVAListArg ? 0 : Format->getFirstArg() - 1); 154 } 155 } 156 157 return move(TheCallResult); 158} 159 160/// CheckObjCString - Checks that the argument to the builtin 161/// CFString constructor is correct 162/// FIXME: GCC currently emits the following warning: 163/// "warning: input conversion stopped due to an input byte that does not 164/// belong to the input codeset UTF-8" 165/// Note: It might also make sense to do the UTF-16 conversion here (would 166/// simplify the backend). 167bool Sema::CheckObjCString(Expr *Arg) { 168 Arg = Arg->IgnoreParenCasts(); 169 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 170 171 if (!Literal || Literal->isWide()) { 172 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 173 << Arg->getSourceRange(); 174 return true; 175 } 176 177 const char *Data = Literal->getStrData(); 178 unsigned Length = Literal->getByteLength(); 179 180 for (unsigned i = 0; i < Length; ++i) { 181 if (!Data[i]) { 182 Diag(getLocationOfStringLiteralByte(Literal, i), 183 diag::warn_cfstring_literal_contains_nul_character) 184 << Arg->getSourceRange(); 185 break; 186 } 187 } 188 189 return false; 190} 191 192/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 193/// Emit an error and return true on failure, return false on success. 194bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 195 Expr *Fn = TheCall->getCallee(); 196 if (TheCall->getNumArgs() > 2) { 197 Diag(TheCall->getArg(2)->getLocStart(), 198 diag::err_typecheck_call_too_many_args) 199 << 0 /*function call*/ << Fn->getSourceRange() 200 << SourceRange(TheCall->getArg(2)->getLocStart(), 201 (*(TheCall->arg_end()-1))->getLocEnd()); 202 return true; 203 } 204 205 if (TheCall->getNumArgs() < 2) { 206 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 207 << 0 /*function call*/; 208 } 209 210 // Determine whether the current function is variadic or not. 211 bool isVariadic; 212 if (CurBlock) 213 isVariadic = CurBlock->isVariadic; 214 else if (getCurFunctionDecl()) { 215 if (FunctionProtoType* FTP = 216 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 217 isVariadic = FTP->isVariadic(); 218 else 219 isVariadic = false; 220 } else { 221 isVariadic = getCurMethodDecl()->isVariadic(); 222 } 223 224 if (!isVariadic) { 225 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 226 return true; 227 } 228 229 // Verify that the second argument to the builtin is the last argument of the 230 // current function or method. 231 bool SecondArgIsLastNamedArgument = false; 232 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 233 234 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 235 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 236 // FIXME: This isn't correct for methods (results in bogus warning). 237 // Get the last formal in the current function. 238 const ParmVarDecl *LastArg; 239 if (CurBlock) 240 LastArg = *(CurBlock->TheDecl->param_end()-1); 241 else if (FunctionDecl *FD = getCurFunctionDecl()) 242 LastArg = *(FD->param_end()-1); 243 else 244 LastArg = *(getCurMethodDecl()->param_end()-1); 245 SecondArgIsLastNamedArgument = PV == LastArg; 246 } 247 } 248 249 if (!SecondArgIsLastNamedArgument) 250 Diag(TheCall->getArg(1)->getLocStart(), 251 diag::warn_second_parameter_of_va_start_not_last_named_argument); 252 return false; 253} 254 255/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 256/// friends. This is declared to take (...), so we have to check everything. 257bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 258 if (TheCall->getNumArgs() < 2) 259 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 260 << 0 /*function call*/; 261 if (TheCall->getNumArgs() > 2) 262 return Diag(TheCall->getArg(2)->getLocStart(), 263 diag::err_typecheck_call_too_many_args) 264 << 0 /*function call*/ 265 << SourceRange(TheCall->getArg(2)->getLocStart(), 266 (*(TheCall->arg_end()-1))->getLocEnd()); 267 268 Expr *OrigArg0 = TheCall->getArg(0); 269 Expr *OrigArg1 = TheCall->getArg(1); 270 271 // Do standard promotions between the two arguments, returning their common 272 // type. 273 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 274 275 // Make sure any conversions are pushed back into the call; this is 276 // type safe since unordered compare builtins are declared as "_Bool 277 // foo(...)". 278 TheCall->setArg(0, OrigArg0); 279 TheCall->setArg(1, OrigArg1); 280 281 // If the common type isn't a real floating type, then the arguments were 282 // invalid for this operation. 283 if (!Res->isRealFloatingType()) 284 return Diag(OrigArg0->getLocStart(), 285 diag::err_typecheck_call_invalid_ordered_compare) 286 << OrigArg0->getType() << OrigArg1->getType() 287 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 288 289 return false; 290} 291 292bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 293 // The signature for these builtins is exact; the only thing we need 294 // to check is that the argument is a constant. 295 SourceLocation Loc; 296 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 297 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 298 299 return false; 300} 301 302/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 303// This is declared to take (...), so we have to check everything. 304Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 305 if (TheCall->getNumArgs() < 3) 306 return ExprError(Diag(TheCall->getLocEnd(), 307 diag::err_typecheck_call_too_few_args) 308 << 0 /*function call*/ << TheCall->getSourceRange()); 309 310 QualType FAType = TheCall->getArg(0)->getType(); 311 QualType SAType = TheCall->getArg(1)->getType(); 312 313 if (!FAType->isVectorType() || !SAType->isVectorType()) { 314 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 315 << SourceRange(TheCall->getArg(0)->getLocStart(), 316 TheCall->getArg(1)->getLocEnd()); 317 return ExprError(); 318 } 319 320 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 321 Context.getCanonicalType(SAType).getUnqualifiedType()) { 322 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 323 << SourceRange(TheCall->getArg(0)->getLocStart(), 324 TheCall->getArg(1)->getLocEnd()); 325 return ExprError(); 326 } 327 328 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 329 if (TheCall->getNumArgs() != numElements+2) { 330 if (TheCall->getNumArgs() < numElements+2) 331 return ExprError(Diag(TheCall->getLocEnd(), 332 diag::err_typecheck_call_too_few_args) 333 << 0 /*function call*/ << TheCall->getSourceRange()); 334 return ExprError(Diag(TheCall->getLocEnd(), 335 diag::err_typecheck_call_too_many_args) 336 << 0 /*function call*/ << TheCall->getSourceRange()); 337 } 338 339 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 340 llvm::APSInt Result(32); 341 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 342 return ExprError(Diag(TheCall->getLocStart(), 343 diag::err_shufflevector_nonconstant_argument) 344 << TheCall->getArg(i)->getSourceRange()); 345 346 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 347 return ExprError(Diag(TheCall->getLocStart(), 348 diag::err_shufflevector_argument_too_large) 349 << TheCall->getArg(i)->getSourceRange()); 350 } 351 352 llvm::SmallVector<Expr*, 32> exprs; 353 354 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 355 exprs.push_back(TheCall->getArg(i)); 356 TheCall->setArg(i, 0); 357 } 358 359 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 360 FAType, 361 TheCall->getCallee()->getLocStart(), 362 TheCall->getRParenLoc())); 363} 364 365/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 366// This is declared to take (const void*, ...) and can take two 367// optional constant int args. 368bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 369 unsigned NumArgs = TheCall->getNumArgs(); 370 371 if (NumArgs > 3) 372 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 373 << 0 /*function call*/ << TheCall->getSourceRange(); 374 375 // Argument 0 is checked for us and the remaining arguments must be 376 // constant integers. 377 for (unsigned i = 1; i != NumArgs; ++i) { 378 Expr *Arg = TheCall->getArg(i); 379 QualType RWType = Arg->getType(); 380 381 const BuiltinType *BT = RWType->getAsBuiltinType(); 382 llvm::APSInt Result; 383 if (!BT || BT->getKind() != BuiltinType::Int || 384 !Arg->isIntegerConstantExpr(Result, Context)) 385 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 386 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 387 388 // FIXME: gcc issues a warning and rewrites these to 0. These 389 // seems especially odd for the third argument since the default 390 // is 3. 391 if (i == 1) { 392 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 393 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 394 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 395 } else { 396 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 397 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 398 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 399 } 400 } 401 402 return false; 403} 404 405/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 406/// int type). This simply type checks that type is one of the defined 407/// constants (0-3). 408bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 409 Expr *Arg = TheCall->getArg(1); 410 QualType ArgType = Arg->getType(); 411 const BuiltinType *BT = ArgType->getAsBuiltinType(); 412 llvm::APSInt Result(32); 413 if (!BT || BT->getKind() != BuiltinType::Int || 414 !Arg->isIntegerConstantExpr(Result, Context)) { 415 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 416 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 417 } 418 419 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 420 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 421 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 422 } 423 424 return false; 425} 426 427// Handle i > 1 ? "x" : "y", recursivelly 428bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 429 bool HasVAListArg, 430 unsigned format_idx, unsigned firstDataArg) { 431 432 switch (E->getStmtClass()) { 433 case Stmt::ConditionalOperatorClass: { 434 const ConditionalOperator *C = cast<ConditionalOperator>(E); 435 return SemaCheckStringLiteral(C->getLHS(), TheCall, 436 HasVAListArg, format_idx, firstDataArg) 437 && SemaCheckStringLiteral(C->getRHS(), TheCall, 438 HasVAListArg, format_idx, firstDataArg); 439 } 440 441 case Stmt::ImplicitCastExprClass: { 442 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 443 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 444 format_idx, firstDataArg); 445 } 446 447 case Stmt::ParenExprClass: { 448 const ParenExpr *Expr = cast<ParenExpr>(E); 449 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 450 format_idx, firstDataArg); 451 } 452 453 case Stmt::DeclRefExprClass: { 454 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 455 456 // As an exception, do not flag errors for variables binding to 457 // const string literals. 458 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 459 bool isConstant = false; 460 QualType T = DR->getType(); 461 462 if (const ArrayType *AT = Context.getAsArrayType(T)) { 463 isConstant = AT->getElementType().isConstant(Context); 464 } 465 else if (const PointerType *PT = T->getAsPointerType()) { 466 isConstant = T.isConstant(Context) && 467 PT->getPointeeType().isConstant(Context); 468 } 469 470 if (isConstant) { 471 const VarDecl *Def = 0; 472 if (const Expr *Init = VD->getDefinition(Def)) 473 return SemaCheckStringLiteral(Init, TheCall, 474 HasVAListArg, format_idx, firstDataArg); 475 } 476 } 477 478 return false; 479 } 480 481 case Stmt::ObjCStringLiteralClass: 482 case Stmt::StringLiteralClass: { 483 const StringLiteral *StrE = NULL; 484 485 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 486 StrE = ObjCFExpr->getString(); 487 else 488 StrE = cast<StringLiteral>(E); 489 490 if (StrE) { 491 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 492 firstDataArg); 493 return true; 494 } 495 496 return false; 497 } 498 499 default: 500 return false; 501 } 502} 503 504 505/// CheckPrintfArguments - Check calls to printf (and similar functions) for 506/// correct use of format strings. 507/// 508/// HasVAListArg - A predicate indicating whether the printf-like 509/// function is passed an explicit va_arg argument (e.g., vprintf) 510/// 511/// format_idx - The index into Args for the format string. 512/// 513/// Improper format strings to functions in the printf family can be 514/// the source of bizarre bugs and very serious security holes. A 515/// good source of information is available in the following paper 516/// (which includes additional references): 517/// 518/// FormatGuard: Automatic Protection From printf Format String 519/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 520/// 521/// Functionality implemented: 522/// 523/// We can statically check the following properties for string 524/// literal format strings for non v.*printf functions (where the 525/// arguments are passed directly): 526// 527/// (1) Are the number of format conversions equal to the number of 528/// data arguments? 529/// 530/// (2) Does each format conversion correctly match the type of the 531/// corresponding data argument? (TODO) 532/// 533/// Moreover, for all printf functions we can: 534/// 535/// (3) Check for a missing format string (when not caught by type checking). 536/// 537/// (4) Check for no-operation flags; e.g. using "#" with format 538/// conversion 'c' (TODO) 539/// 540/// (5) Check the use of '%n', a major source of security holes. 541/// 542/// (6) Check for malformed format conversions that don't specify anything. 543/// 544/// (7) Check for empty format strings. e.g: printf(""); 545/// 546/// (8) Check that the format string is a wide literal. 547/// 548/// (9) Also check the arguments of functions with the __format__ attribute. 549/// (TODO). 550/// 551/// All of these checks can be done by parsing the format string. 552/// 553/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 554void 555Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 556 unsigned format_idx, unsigned firstDataArg) { 557 const Expr *Fn = TheCall->getCallee(); 558 559 // CHECK: printf-like function is called with no format string. 560 if (format_idx >= TheCall->getNumArgs()) { 561 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 562 << Fn->getSourceRange(); 563 return; 564 } 565 566 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 567 568 // CHECK: format string is not a string literal. 569 // 570 // Dynamically generated format strings are difficult to 571 // automatically vet at compile time. Requiring that format strings 572 // are string literals: (1) permits the checking of format strings by 573 // the compiler and thereby (2) can practically remove the source of 574 // many format string exploits. 575 576 // Format string can be either ObjC string (e.g. @"%d") or 577 // C string (e.g. "%d") 578 // ObjC string uses the same format specifiers as C string, so we can use 579 // the same format string checking logic for both ObjC and C strings. 580 bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, 581 HasVAListArg, format_idx, 582 firstDataArg); 583 584 if (!isFExpr) { 585 // For vprintf* functions (i.e., HasVAListArg==true), we add a 586 // special check to see if the format string is a function parameter 587 // of the function calling the printf function. If the function 588 // has an attribute indicating it is a printf-like function, then we 589 // should suppress warnings concerning non-literals being used in a call 590 // to a vprintf function. For example: 591 // 592 // void 593 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 594 // va_list ap; 595 // va_start(ap, fmt); 596 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 597 // ... 598 // 599 // 600 // FIXME: We don't have full attribute support yet, so just check to see 601 // if the argument is a DeclRefExpr that references a parameter. We'll 602 // add proper support for checking the attribute later. 603 if (HasVAListArg) 604 if (const DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 605 if (isa<ParmVarDecl>(DR->getDecl())) 606 return; 607 608 Diag(TheCall->getArg(format_idx)->getLocStart(), 609 diag::warn_printf_not_string_constant) 610 << OrigFormatExpr->getSourceRange(); 611 return; 612 } 613} 614 615void Sema::CheckPrintfString(const StringLiteral *FExpr, 616 const Expr *OrigFormatExpr, 617 const CallExpr *TheCall, bool HasVAListArg, 618 unsigned format_idx, unsigned firstDataArg) { 619 620 const ObjCStringLiteral *ObjCFExpr = 621 dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 622 623 // CHECK: is the format string a wide literal? 624 if (FExpr->isWide()) { 625 Diag(FExpr->getLocStart(), 626 diag::warn_printf_format_string_is_wide_literal) 627 << OrigFormatExpr->getSourceRange(); 628 return; 629 } 630 631 // Str - The format string. NOTE: this is NOT null-terminated! 632 const char * const Str = FExpr->getStrData(); 633 634 // CHECK: empty format string? 635 const unsigned StrLen = FExpr->getByteLength(); 636 637 if (StrLen == 0) { 638 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 639 << OrigFormatExpr->getSourceRange(); 640 return; 641 } 642 643 // We process the format string using a binary state machine. The 644 // current state is stored in CurrentState. 645 enum { 646 state_OrdChr, 647 state_Conversion 648 } CurrentState = state_OrdChr; 649 650 // numConversions - The number of conversions seen so far. This is 651 // incremented as we traverse the format string. 652 unsigned numConversions = 0; 653 654 // numDataArgs - The number of data arguments after the format 655 // string. This can only be determined for non vprintf-like 656 // functions. For those functions, this value is 1 (the sole 657 // va_arg argument). 658 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; 659 660 // Inspect the format string. 661 unsigned StrIdx = 0; 662 663 // LastConversionIdx - Index within the format string where we last saw 664 // a '%' character that starts a new format conversion. 665 unsigned LastConversionIdx = 0; 666 667 for (; StrIdx < StrLen; ++StrIdx) { 668 669 // Is the number of detected conversion conversions greater than 670 // the number of matching data arguments? If so, stop. 671 if (!HasVAListArg && numConversions > numDataArgs) break; 672 673 // Handle "\0" 674 if (Str[StrIdx] == '\0') { 675 // The string returned by getStrData() is not null-terminated, 676 // so the presence of a null character is likely an error. 677 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), 678 diag::warn_printf_format_string_contains_null_char) 679 << OrigFormatExpr->getSourceRange(); 680 return; 681 } 682 683 // Ordinary characters (not processing a format conversion). 684 if (CurrentState == state_OrdChr) { 685 if (Str[StrIdx] == '%') { 686 CurrentState = state_Conversion; 687 LastConversionIdx = StrIdx; 688 } 689 continue; 690 } 691 692 // Seen '%'. Now processing a format conversion. 693 switch (Str[StrIdx]) { 694 // Handle dynamic precision or width specifier. 695 case '*': { 696 ++numConversions; 697 698 if (!HasVAListArg && numConversions > numDataArgs) { 699 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 700 701 if (Str[StrIdx-1] == '.') 702 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 703 << OrigFormatExpr->getSourceRange(); 704 else 705 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 706 << OrigFormatExpr->getSourceRange(); 707 708 // Don't do any more checking. We'll just emit spurious errors. 709 return; 710 } 711 712 // Perform type checking on width/precision specifier. 713 const Expr *E = TheCall->getArg(format_idx+numConversions); 714 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 715 if (BT->getKind() == BuiltinType::Int) 716 break; 717 718 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 719 720 if (Str[StrIdx-1] == '.') 721 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 722 << E->getType() << E->getSourceRange(); 723 else 724 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 725 << E->getType() << E->getSourceRange(); 726 727 break; 728 } 729 730 // Characters which can terminate a format conversion 731 // (e.g. "%d"). Characters that specify length modifiers or 732 // other flags are handled by the default case below. 733 // 734 // FIXME: additional checks will go into the following cases. 735 case 'i': 736 case 'd': 737 case 'o': 738 case 'u': 739 case 'x': 740 case 'X': 741 case 'D': 742 case 'O': 743 case 'U': 744 case 'e': 745 case 'E': 746 case 'f': 747 case 'F': 748 case 'g': 749 case 'G': 750 case 'a': 751 case 'A': 752 case 'c': 753 case 'C': 754 case 'S': 755 case 's': 756 case 'p': 757 ++numConversions; 758 CurrentState = state_OrdChr; 759 break; 760 761 // CHECK: Are we using "%n"? Issue a warning. 762 case 'n': { 763 ++numConversions; 764 CurrentState = state_OrdChr; 765 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, 766 LastConversionIdx); 767 768 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 769 break; 770 } 771 772 // Handle "%@" 773 case '@': 774 // %@ is allowed in ObjC format strings only. 775 if(ObjCFExpr != NULL) 776 CurrentState = state_OrdChr; 777 else { 778 // Issue a warning: invalid format conversion. 779 SourceLocation Loc = 780 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 781 782 Diag(Loc, diag::warn_printf_invalid_conversion) 783 << std::string(Str+LastConversionIdx, 784 Str+std::min(LastConversionIdx+2, StrLen)) 785 << OrigFormatExpr->getSourceRange(); 786 } 787 ++numConversions; 788 break; 789 790 // Handle "%%" 791 case '%': 792 // Sanity check: Was the first "%" character the previous one? 793 // If not, we will assume that we have a malformed format 794 // conversion, and that the current "%" character is the start 795 // of a new conversion. 796 if (StrIdx - LastConversionIdx == 1) 797 CurrentState = state_OrdChr; 798 else { 799 // Issue a warning: invalid format conversion. 800 SourceLocation Loc = 801 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 802 803 Diag(Loc, diag::warn_printf_invalid_conversion) 804 << std::string(Str+LastConversionIdx, Str+StrIdx) 805 << OrigFormatExpr->getSourceRange(); 806 807 // This conversion is broken. Advance to the next format 808 // conversion. 809 LastConversionIdx = StrIdx; 810 ++numConversions; 811 } 812 break; 813 814 default: 815 // This case catches all other characters: flags, widths, etc. 816 // We should eventually process those as well. 817 break; 818 } 819 } 820 821 if (CurrentState == state_Conversion) { 822 // Issue a warning: invalid format conversion. 823 SourceLocation Loc = 824 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 825 826 Diag(Loc, diag::warn_printf_invalid_conversion) 827 << std::string(Str+LastConversionIdx, 828 Str+std::min(LastConversionIdx+2, StrLen)) 829 << OrigFormatExpr->getSourceRange(); 830 return; 831 } 832 833 if (!HasVAListArg) { 834 // CHECK: Does the number of format conversions exceed the number 835 // of data arguments? 836 if (numConversions > numDataArgs) { 837 SourceLocation Loc = 838 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 839 840 Diag(Loc, diag::warn_printf_insufficient_data_args) 841 << OrigFormatExpr->getSourceRange(); 842 } 843 // CHECK: Does the number of data arguments exceed the number of 844 // format conversions in the format string? 845 else if (numConversions < numDataArgs) 846 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 847 diag::warn_printf_too_many_data_args) 848 << OrigFormatExpr->getSourceRange(); 849 } 850} 851 852//===--- CHECK: Return Address of Stack Variable --------------------------===// 853 854static DeclRefExpr* EvalVal(Expr *E); 855static DeclRefExpr* EvalAddr(Expr* E); 856 857/// CheckReturnStackAddr - Check if a return statement returns the address 858/// of a stack variable. 859void 860Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 861 SourceLocation ReturnLoc) { 862 863 // Perform checking for returned stack addresses. 864 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 865 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 866 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 867 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 868 869 // Skip over implicit cast expressions when checking for block expressions. 870 if (ImplicitCastExpr *IcExpr = 871 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 872 RetValExp = IcExpr->getSubExpr(); 873 874 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 875 if (C->hasBlockDeclRefExprs()) 876 Diag(C->getLocStart(), diag::err_ret_local_block) 877 << C->getSourceRange(); 878 } 879 // Perform checking for stack values returned by reference. 880 else if (lhsType->isReferenceType()) { 881 // Check for a reference to the stack 882 if (DeclRefExpr *DR = EvalVal(RetValExp)) 883 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 884 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 885 } 886} 887 888/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 889/// check if the expression in a return statement evaluates to an address 890/// to a location on the stack. The recursion is used to traverse the 891/// AST of the return expression, with recursion backtracking when we 892/// encounter a subexpression that (1) clearly does not lead to the address 893/// of a stack variable or (2) is something we cannot determine leads to 894/// the address of a stack variable based on such local checking. 895/// 896/// EvalAddr processes expressions that are pointers that are used as 897/// references (and not L-values). EvalVal handles all other values. 898/// At the base case of the recursion is a check for a DeclRefExpr* in 899/// the refers to a stack variable. 900/// 901/// This implementation handles: 902/// 903/// * pointer-to-pointer casts 904/// * implicit conversions from array references to pointers 905/// * taking the address of fields 906/// * arbitrary interplay between "&" and "*" operators 907/// * pointer arithmetic from an address of a stack variable 908/// * taking the address of an array element where the array is on the stack 909static DeclRefExpr* EvalAddr(Expr *E) { 910 // We should only be called for evaluating pointer expressions. 911 assert((E->getType()->isPointerType() || 912 E->getType()->isBlockPointerType() || 913 E->getType()->isObjCQualifiedIdType()) && 914 "EvalAddr only works on pointers"); 915 916 // Our "symbolic interpreter" is just a dispatch off the currently 917 // viewed AST node. We then recursively traverse the AST by calling 918 // EvalAddr and EvalVal appropriately. 919 switch (E->getStmtClass()) { 920 case Stmt::ParenExprClass: 921 // Ignore parentheses. 922 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 923 924 case Stmt::UnaryOperatorClass: { 925 // The only unary operator that make sense to handle here 926 // is AddrOf. All others don't make sense as pointers. 927 UnaryOperator *U = cast<UnaryOperator>(E); 928 929 if (U->getOpcode() == UnaryOperator::AddrOf) 930 return EvalVal(U->getSubExpr()); 931 else 932 return NULL; 933 } 934 935 case Stmt::BinaryOperatorClass: { 936 // Handle pointer arithmetic. All other binary operators are not valid 937 // in this context. 938 BinaryOperator *B = cast<BinaryOperator>(E); 939 BinaryOperator::Opcode op = B->getOpcode(); 940 941 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 942 return NULL; 943 944 Expr *Base = B->getLHS(); 945 946 // Determine which argument is the real pointer base. It could be 947 // the RHS argument instead of the LHS. 948 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 949 950 assert (Base->getType()->isPointerType()); 951 return EvalAddr(Base); 952 } 953 954 // For conditional operators we need to see if either the LHS or RHS are 955 // valid DeclRefExpr*s. If one of them is valid, we return it. 956 case Stmt::ConditionalOperatorClass: { 957 ConditionalOperator *C = cast<ConditionalOperator>(E); 958 959 // Handle the GNU extension for missing LHS. 960 if (Expr *lhsExpr = C->getLHS()) 961 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 962 return LHS; 963 964 return EvalAddr(C->getRHS()); 965 } 966 967 // For casts, we need to handle conversions from arrays to 968 // pointer values, and pointer-to-pointer conversions. 969 case Stmt::ImplicitCastExprClass: 970 case Stmt::CStyleCastExprClass: 971 case Stmt::CXXFunctionalCastExprClass: { 972 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 973 QualType T = SubExpr->getType(); 974 975 if (SubExpr->getType()->isPointerType() || 976 SubExpr->getType()->isBlockPointerType() || 977 SubExpr->getType()->isObjCQualifiedIdType()) 978 return EvalAddr(SubExpr); 979 else if (T->isArrayType()) 980 return EvalVal(SubExpr); 981 else 982 return 0; 983 } 984 985 // C++ casts. For dynamic casts, static casts, and const casts, we 986 // are always converting from a pointer-to-pointer, so we just blow 987 // through the cast. In the case the dynamic cast doesn't fail (and 988 // return NULL), we take the conservative route and report cases 989 // where we return the address of a stack variable. For Reinterpre 990 // FIXME: The comment about is wrong; we're not always converting 991 // from pointer to pointer. I'm guessing that this code should also 992 // handle references to objects. 993 case Stmt::CXXStaticCastExprClass: 994 case Stmt::CXXDynamicCastExprClass: 995 case Stmt::CXXConstCastExprClass: 996 case Stmt::CXXReinterpretCastExprClass: { 997 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 998 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 999 return EvalAddr(S); 1000 else 1001 return NULL; 1002 } 1003 1004 // Everything else: we simply don't reason about them. 1005 default: 1006 return NULL; 1007 } 1008} 1009 1010 1011/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1012/// See the comments for EvalAddr for more details. 1013static DeclRefExpr* EvalVal(Expr *E) { 1014 1015 // We should only be called for evaluating non-pointer expressions, or 1016 // expressions with a pointer type that are not used as references but instead 1017 // are l-values (e.g., DeclRefExpr with a pointer type). 1018 1019 // Our "symbolic interpreter" is just a dispatch off the currently 1020 // viewed AST node. We then recursively traverse the AST by calling 1021 // EvalAddr and EvalVal appropriately. 1022 switch (E->getStmtClass()) { 1023 case Stmt::DeclRefExprClass: 1024 case Stmt::QualifiedDeclRefExprClass: { 1025 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1026 // at code that refers to a variable's name. We check if it has local 1027 // storage within the function, and if so, return the expression. 1028 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1029 1030 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1031 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1032 1033 return NULL; 1034 } 1035 1036 case Stmt::ParenExprClass: 1037 // Ignore parentheses. 1038 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1039 1040 case Stmt::UnaryOperatorClass: { 1041 // The only unary operator that make sense to handle here 1042 // is Deref. All others don't resolve to a "name." This includes 1043 // handling all sorts of rvalues passed to a unary operator. 1044 UnaryOperator *U = cast<UnaryOperator>(E); 1045 1046 if (U->getOpcode() == UnaryOperator::Deref) 1047 return EvalAddr(U->getSubExpr()); 1048 1049 return NULL; 1050 } 1051 1052 case Stmt::ArraySubscriptExprClass: { 1053 // Array subscripts are potential references to data on the stack. We 1054 // retrieve the DeclRefExpr* for the array variable if it indeed 1055 // has local storage. 1056 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1057 } 1058 1059 case Stmt::ConditionalOperatorClass: { 1060 // For conditional operators we need to see if either the LHS or RHS are 1061 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1062 ConditionalOperator *C = cast<ConditionalOperator>(E); 1063 1064 // Handle the GNU extension for missing LHS. 1065 if (Expr *lhsExpr = C->getLHS()) 1066 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1067 return LHS; 1068 1069 return EvalVal(C->getRHS()); 1070 } 1071 1072 // Accesses to members are potential references to data on the stack. 1073 case Stmt::MemberExprClass: { 1074 MemberExpr *M = cast<MemberExpr>(E); 1075 1076 // Check for indirect access. We only want direct field accesses. 1077 if (!M->isArrow()) 1078 return EvalVal(M->getBase()); 1079 else 1080 return NULL; 1081 } 1082 1083 // Everything else: we simply don't reason about them. 1084 default: 1085 return NULL; 1086 } 1087} 1088 1089//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1090 1091/// Check for comparisons of floating point operands using != and ==. 1092/// Issue a warning if these are no self-comparisons, as they are not likely 1093/// to do what the programmer intended. 1094void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1095 bool EmitWarning = true; 1096 1097 Expr* LeftExprSansParen = lex->IgnoreParens(); 1098 Expr* RightExprSansParen = rex->IgnoreParens(); 1099 1100 // Special case: check for x == x (which is OK). 1101 // Do not emit warnings for such cases. 1102 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1103 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1104 if (DRL->getDecl() == DRR->getDecl()) 1105 EmitWarning = false; 1106 1107 1108 // Special case: check for comparisons against literals that can be exactly 1109 // represented by APFloat. In such cases, do not emit a warning. This 1110 // is a heuristic: often comparison against such literals are used to 1111 // detect if a value in a variable has not changed. This clearly can 1112 // lead to false negatives. 1113 if (EmitWarning) { 1114 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1115 if (FLL->isExact()) 1116 EmitWarning = false; 1117 } 1118 else 1119 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1120 if (FLR->isExact()) 1121 EmitWarning = false; 1122 } 1123 } 1124 1125 // Check for comparisons with builtin types. 1126 if (EmitWarning) 1127 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1128 if (CL->isBuiltinCall(Context)) 1129 EmitWarning = false; 1130 1131 if (EmitWarning) 1132 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1133 if (CR->isBuiltinCall(Context)) 1134 EmitWarning = false; 1135 1136 // Emit the diagnostic. 1137 if (EmitWarning) 1138 Diag(loc, diag::warn_floatingpoint_eq) 1139 << lex->getSourceRange() << rex->getSourceRange(); 1140} 1141