SemaChecking.cpp revision 586d6a81428da2d1ce70bcb98df29d749361cbf3
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/LiteralSupport.h" 21#include "clang/Lex/Preprocessor.h" 22using namespace clang; 23 24/// getLocationOfStringLiteralByte - Return a source location that points to the 25/// specified byte of the specified string literal. 26/// 27/// Strings are amazingly complex. They can be formed from multiple tokens and 28/// can have escape sequences in them in addition to the usual trigraph and 29/// escaped newline business. This routine handles this complexity. 30/// 31SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 32 unsigned ByteNo) const { 33 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 34 35 // Loop over all of the tokens in this string until we find the one that 36 // contains the byte we're looking for. 37 unsigned TokNo = 0; 38 while (1) { 39 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 40 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 41 42 // Get the spelling of the string so that we can get the data that makes up 43 // the string literal, not the identifier for the macro it is potentially 44 // expanded through. 45 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 46 47 // Re-lex the token to get its length and original spelling. 48 std::pair<FileID, unsigned> LocInfo = 49 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 50 std::pair<const char *,const char *> Buffer = 51 SourceMgr.getBufferData(LocInfo.first); 52 const char *StrData = Buffer.first+LocInfo.second; 53 54 // Create a langops struct and enable trigraphs. This is sufficient for 55 // relexing tokens. 56 LangOptions LangOpts; 57 LangOpts.Trigraphs = true; 58 59 // Create a lexer starting at the beginning of this token. 60 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, 61 Buffer.second); 62 Token TheTok; 63 TheLexer.LexFromRawLexer(TheTok); 64 65 // Use the StringLiteralParser to compute the length of the string in bytes. 66 StringLiteralParser SLP(&TheTok, 1, PP); 67 unsigned TokNumBytes = SLP.GetStringLength(); 68 69 // If the byte is in this token, return the location of the byte. 70 if (ByteNo < TokNumBytes || 71 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 72 unsigned Offset = 73 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 74 75 // Now that we know the offset of the token in the spelling, use the 76 // preprocessor to get the offset in the original source. 77 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 78 } 79 80 // Move to the next string token. 81 ++TokNo; 82 ByteNo -= TokNumBytes; 83 } 84} 85 86 87/// CheckFunctionCall - Check a direct function call for various correctness 88/// and safety properties not strictly enforced by the C type system. 89Action::OwningExprResult 90Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 91 OwningExprResult TheCallResult(Owned(TheCall)); 92 // Get the IdentifierInfo* for the called function. 93 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 94 95 // None of the checks below are needed for functions that don't have 96 // simple names (e.g., C++ conversion functions). 97 if (!FnInfo) 98 return move(TheCallResult); 99 100 switch (FDecl->getBuiltinID(Context)) { 101 case Builtin::BI__builtin___CFStringMakeConstantString: 102 assert(TheCall->getNumArgs() == 1 && 103 "Wrong # arguments to builtin CFStringMakeConstantString"); 104 if (CheckObjCString(TheCall->getArg(0))) 105 return ExprError(); 106 return move(TheCallResult); 107 case Builtin::BI__builtin_stdarg_start: 108 case Builtin::BI__builtin_va_start: 109 if (SemaBuiltinVAStart(TheCall)) 110 return ExprError(); 111 return move(TheCallResult); 112 case Builtin::BI__builtin_isgreater: 113 case Builtin::BI__builtin_isgreaterequal: 114 case Builtin::BI__builtin_isless: 115 case Builtin::BI__builtin_islessequal: 116 case Builtin::BI__builtin_islessgreater: 117 case Builtin::BI__builtin_isunordered: 118 if (SemaBuiltinUnorderedCompare(TheCall)) 119 return ExprError(); 120 return move(TheCallResult); 121 case Builtin::BI__builtin_return_address: 122 case Builtin::BI__builtin_frame_address: 123 if (SemaBuiltinStackAddress(TheCall)) 124 return ExprError(); 125 return move(TheCallResult); 126 case Builtin::BI__builtin_shufflevector: 127 return SemaBuiltinShuffleVector(TheCall); 128 // TheCall will be freed by the smart pointer here, but that's fine, since 129 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 130 case Builtin::BI__builtin_prefetch: 131 if (SemaBuiltinPrefetch(TheCall)) 132 return ExprError(); 133 return move(TheCallResult); 134 case Builtin::BI__builtin_object_size: 135 if (SemaBuiltinObjectSize(TheCall)) 136 return ExprError(); 137 return move(TheCallResult); 138 case Builtin::BI__builtin_longjmp: 139 if (SemaBuiltinLongjmp(TheCall)) 140 return ExprError(); 141 return move(TheCallResult); 142 } 143 144 // FIXME: This mechanism should be abstracted to be less fragile and 145 // more efficient. For example, just map function ids to custom 146 // handlers. 147 148 // Printf checking. 149 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 150 if (Format->getType() == "printf") { 151 bool HasVAListArg = Format->getFirstArg() == 0; 152 if (!HasVAListArg) { 153 if (const FunctionProtoType *Proto 154 = FDecl->getType()->getAsFunctionProtoType()) 155 HasVAListArg = !Proto->isVariadic(); 156 } 157 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 158 HasVAListArg ? 0 : Format->getFirstArg() - 1); 159 } 160 } 161 162 return move(TheCallResult); 163} 164 165/// CheckObjCString - Checks that the argument to the builtin 166/// CFString constructor is correct 167/// FIXME: GCC currently emits the following warning: 168/// "warning: input conversion stopped due to an input byte that does not 169/// belong to the input codeset UTF-8" 170/// Note: It might also make sense to do the UTF-16 conversion here (would 171/// simplify the backend). 172bool Sema::CheckObjCString(Expr *Arg) { 173 Arg = Arg->IgnoreParenCasts(); 174 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 175 176 if (!Literal || Literal->isWide()) { 177 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 178 << Arg->getSourceRange(); 179 return true; 180 } 181 182 const char *Data = Literal->getStrData(); 183 unsigned Length = Literal->getByteLength(); 184 185 for (unsigned i = 0; i < Length; ++i) { 186 if (!Data[i]) { 187 Diag(getLocationOfStringLiteralByte(Literal, i), 188 diag::warn_cfstring_literal_contains_nul_character) 189 << Arg->getSourceRange(); 190 break; 191 } 192 } 193 194 return false; 195} 196 197/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 198/// Emit an error and return true on failure, return false on success. 199bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 200 Expr *Fn = TheCall->getCallee(); 201 if (TheCall->getNumArgs() > 2) { 202 Diag(TheCall->getArg(2)->getLocStart(), 203 diag::err_typecheck_call_too_many_args) 204 << 0 /*function call*/ << Fn->getSourceRange() 205 << SourceRange(TheCall->getArg(2)->getLocStart(), 206 (*(TheCall->arg_end()-1))->getLocEnd()); 207 return true; 208 } 209 210 if (TheCall->getNumArgs() < 2) { 211 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 212 << 0 /*function call*/; 213 } 214 215 // Determine whether the current function is variadic or not. 216 bool isVariadic; 217 if (CurBlock) 218 isVariadic = CurBlock->isVariadic; 219 else if (getCurFunctionDecl()) { 220 if (FunctionProtoType* FTP = 221 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 222 isVariadic = FTP->isVariadic(); 223 else 224 isVariadic = false; 225 } else { 226 isVariadic = getCurMethodDecl()->isVariadic(); 227 } 228 229 if (!isVariadic) { 230 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 231 return true; 232 } 233 234 // Verify that the second argument to the builtin is the last argument of the 235 // current function or method. 236 bool SecondArgIsLastNamedArgument = false; 237 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 238 239 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 240 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 241 // FIXME: This isn't correct for methods (results in bogus warning). 242 // Get the last formal in the current function. 243 const ParmVarDecl *LastArg; 244 if (CurBlock) 245 LastArg = *(CurBlock->TheDecl->param_end()-1); 246 else if (FunctionDecl *FD = getCurFunctionDecl()) 247 LastArg = *(FD->param_end()-1); 248 else 249 LastArg = *(getCurMethodDecl()->param_end()-1); 250 SecondArgIsLastNamedArgument = PV == LastArg; 251 } 252 } 253 254 if (!SecondArgIsLastNamedArgument) 255 Diag(TheCall->getArg(1)->getLocStart(), 256 diag::warn_second_parameter_of_va_start_not_last_named_argument); 257 return false; 258} 259 260/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 261/// friends. This is declared to take (...), so we have to check everything. 262bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 263 if (TheCall->getNumArgs() < 2) 264 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 265 << 0 /*function call*/; 266 if (TheCall->getNumArgs() > 2) 267 return Diag(TheCall->getArg(2)->getLocStart(), 268 diag::err_typecheck_call_too_many_args) 269 << 0 /*function call*/ 270 << SourceRange(TheCall->getArg(2)->getLocStart(), 271 (*(TheCall->arg_end()-1))->getLocEnd()); 272 273 Expr *OrigArg0 = TheCall->getArg(0); 274 Expr *OrigArg1 = TheCall->getArg(1); 275 276 // Do standard promotions between the two arguments, returning their common 277 // type. 278 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 279 280 // Make sure any conversions are pushed back into the call; this is 281 // type safe since unordered compare builtins are declared as "_Bool 282 // foo(...)". 283 TheCall->setArg(0, OrigArg0); 284 TheCall->setArg(1, OrigArg1); 285 286 // If the common type isn't a real floating type, then the arguments were 287 // invalid for this operation. 288 if (!Res->isRealFloatingType()) 289 return Diag(OrigArg0->getLocStart(), 290 diag::err_typecheck_call_invalid_ordered_compare) 291 << OrigArg0->getType() << OrigArg1->getType() 292 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 293 294 return false; 295} 296 297bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 298 // The signature for these builtins is exact; the only thing we need 299 // to check is that the argument is a constant. 300 SourceLocation Loc; 301 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 302 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 303 304 return false; 305} 306 307/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 308// This is declared to take (...), so we have to check everything. 309Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 310 if (TheCall->getNumArgs() < 3) 311 return ExprError(Diag(TheCall->getLocEnd(), 312 diag::err_typecheck_call_too_few_args) 313 << 0 /*function call*/ << TheCall->getSourceRange()); 314 315 QualType FAType = TheCall->getArg(0)->getType(); 316 QualType SAType = TheCall->getArg(1)->getType(); 317 318 if (!FAType->isVectorType() || !SAType->isVectorType()) { 319 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 320 << SourceRange(TheCall->getArg(0)->getLocStart(), 321 TheCall->getArg(1)->getLocEnd()); 322 return ExprError(); 323 } 324 325 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 326 Context.getCanonicalType(SAType).getUnqualifiedType()) { 327 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 328 << SourceRange(TheCall->getArg(0)->getLocStart(), 329 TheCall->getArg(1)->getLocEnd()); 330 return ExprError(); 331 } 332 333 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 334 if (TheCall->getNumArgs() != numElements+2) { 335 if (TheCall->getNumArgs() < numElements+2) 336 return ExprError(Diag(TheCall->getLocEnd(), 337 diag::err_typecheck_call_too_few_args) 338 << 0 /*function call*/ << TheCall->getSourceRange()); 339 return ExprError(Diag(TheCall->getLocEnd(), 340 diag::err_typecheck_call_too_many_args) 341 << 0 /*function call*/ << TheCall->getSourceRange()); 342 } 343 344 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 345 llvm::APSInt Result(32); 346 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 347 return ExprError(Diag(TheCall->getLocStart(), 348 diag::err_shufflevector_nonconstant_argument) 349 << TheCall->getArg(i)->getSourceRange()); 350 351 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 352 return ExprError(Diag(TheCall->getLocStart(), 353 diag::err_shufflevector_argument_too_large) 354 << TheCall->getArg(i)->getSourceRange()); 355 } 356 357 llvm::SmallVector<Expr*, 32> exprs; 358 359 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 360 exprs.push_back(TheCall->getArg(i)); 361 TheCall->setArg(i, 0); 362 } 363 364 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 365 FAType, 366 TheCall->getCallee()->getLocStart(), 367 TheCall->getRParenLoc())); 368} 369 370/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 371// This is declared to take (const void*, ...) and can take two 372// optional constant int args. 373bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 374 unsigned NumArgs = TheCall->getNumArgs(); 375 376 if (NumArgs > 3) 377 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 378 << 0 /*function call*/ << TheCall->getSourceRange(); 379 380 // Argument 0 is checked for us and the remaining arguments must be 381 // constant integers. 382 for (unsigned i = 1; i != NumArgs; ++i) { 383 Expr *Arg = TheCall->getArg(i); 384 QualType RWType = Arg->getType(); 385 386 const BuiltinType *BT = RWType->getAsBuiltinType(); 387 llvm::APSInt Result; 388 if (!BT || BT->getKind() != BuiltinType::Int || 389 !Arg->isIntegerConstantExpr(Result, Context)) 390 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 391 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 392 393 // FIXME: gcc issues a warning and rewrites these to 0. These 394 // seems especially odd for the third argument since the default 395 // is 3. 396 if (i == 1) { 397 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 398 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 399 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 400 } else { 401 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 402 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 403 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 404 } 405 } 406 407 return false; 408} 409 410/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 411/// int type). This simply type checks that type is one of the defined 412/// constants (0-3). 413bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 414 Expr *Arg = TheCall->getArg(1); 415 QualType ArgType = Arg->getType(); 416 const BuiltinType *BT = ArgType->getAsBuiltinType(); 417 llvm::APSInt Result(32); 418 if (!BT || BT->getKind() != BuiltinType::Int || 419 !Arg->isIntegerConstantExpr(Result, Context)) { 420 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 421 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 422 } 423 424 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 425 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 426 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 427 } 428 429 return false; 430} 431 432/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 433/// This checks that val is a constant 1. 434bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 435 Expr *Arg = TheCall->getArg(1); 436 llvm::APSInt Result(32); 437 if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1) 438 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 439 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 440 441 return false; 442} 443 444// Handle i > 1 ? "x" : "y", recursivelly 445bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 446 bool HasVAListArg, 447 unsigned format_idx, unsigned firstDataArg) { 448 449 switch (E->getStmtClass()) { 450 case Stmt::ConditionalOperatorClass: { 451 const ConditionalOperator *C = cast<ConditionalOperator>(E); 452 return SemaCheckStringLiteral(C->getLHS(), TheCall, 453 HasVAListArg, format_idx, firstDataArg) 454 && SemaCheckStringLiteral(C->getRHS(), TheCall, 455 HasVAListArg, format_idx, firstDataArg); 456 } 457 458 case Stmt::ImplicitCastExprClass: { 459 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 460 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 461 format_idx, firstDataArg); 462 } 463 464 case Stmt::ParenExprClass: { 465 const ParenExpr *Expr = cast<ParenExpr>(E); 466 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 467 format_idx, firstDataArg); 468 } 469 470 case Stmt::DeclRefExprClass: { 471 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 472 473 // As an exception, do not flag errors for variables binding to 474 // const string literals. 475 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 476 bool isConstant = false; 477 QualType T = DR->getType(); 478 479 if (const ArrayType *AT = Context.getAsArrayType(T)) { 480 isConstant = AT->getElementType().isConstant(Context); 481 } 482 else if (const PointerType *PT = T->getAsPointerType()) { 483 isConstant = T.isConstant(Context) && 484 PT->getPointeeType().isConstant(Context); 485 } 486 487 if (isConstant) { 488 const VarDecl *Def = 0; 489 if (const Expr *Init = VD->getDefinition(Def)) 490 return SemaCheckStringLiteral(Init, TheCall, 491 HasVAListArg, format_idx, firstDataArg); 492 } 493 } 494 495 return false; 496 } 497 498 case Stmt::ObjCStringLiteralClass: 499 case Stmt::StringLiteralClass: { 500 const StringLiteral *StrE = NULL; 501 502 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 503 StrE = ObjCFExpr->getString(); 504 else 505 StrE = cast<StringLiteral>(E); 506 507 if (StrE) { 508 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 509 firstDataArg); 510 return true; 511 } 512 513 return false; 514 } 515 516 default: 517 return false; 518 } 519} 520 521 522/// CheckPrintfArguments - Check calls to printf (and similar functions) for 523/// correct use of format strings. 524/// 525/// HasVAListArg - A predicate indicating whether the printf-like 526/// function is passed an explicit va_arg argument (e.g., vprintf) 527/// 528/// format_idx - The index into Args for the format string. 529/// 530/// Improper format strings to functions in the printf family can be 531/// the source of bizarre bugs and very serious security holes. A 532/// good source of information is available in the following paper 533/// (which includes additional references): 534/// 535/// FormatGuard: Automatic Protection From printf Format String 536/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 537/// 538/// Functionality implemented: 539/// 540/// We can statically check the following properties for string 541/// literal format strings for non v.*printf functions (where the 542/// arguments are passed directly): 543// 544/// (1) Are the number of format conversions equal to the number of 545/// data arguments? 546/// 547/// (2) Does each format conversion correctly match the type of the 548/// corresponding data argument? (TODO) 549/// 550/// Moreover, for all printf functions we can: 551/// 552/// (3) Check for a missing format string (when not caught by type checking). 553/// 554/// (4) Check for no-operation flags; e.g. using "#" with format 555/// conversion 'c' (TODO) 556/// 557/// (5) Check the use of '%n', a major source of security holes. 558/// 559/// (6) Check for malformed format conversions that don't specify anything. 560/// 561/// (7) Check for empty format strings. e.g: printf(""); 562/// 563/// (8) Check that the format string is a wide literal. 564/// 565/// (9) Also check the arguments of functions with the __format__ attribute. 566/// (TODO). 567/// 568/// All of these checks can be done by parsing the format string. 569/// 570/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 571void 572Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 573 unsigned format_idx, unsigned firstDataArg) { 574 const Expr *Fn = TheCall->getCallee(); 575 576 // CHECK: printf-like function is called with no format string. 577 if (format_idx >= TheCall->getNumArgs()) { 578 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 579 << Fn->getSourceRange(); 580 return; 581 } 582 583 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 584 585 // CHECK: format string is not a string literal. 586 // 587 // Dynamically generated format strings are difficult to 588 // automatically vet at compile time. Requiring that format strings 589 // are string literals: (1) permits the checking of format strings by 590 // the compiler and thereby (2) can practically remove the source of 591 // many format string exploits. 592 593 // Format string can be either ObjC string (e.g. @"%d") or 594 // C string (e.g. "%d") 595 // ObjC string uses the same format specifiers as C string, so we can use 596 // the same format string checking logic for both ObjC and C strings. 597 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 598 firstDataArg)) 599 return; // Literal format string found, check done! 600 601 // For vprintf* functions (i.e., HasVAListArg==true), we add a 602 // special check to see if the format string is a function parameter 603 // of the function calling the printf function. If the function 604 // has an attribute indicating it is a printf-like function, then we 605 // should suppress warnings concerning non-literals being used in a call 606 // to a vprintf function. For example: 607 // 608 // void 609 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 610 // va_list ap; 611 // va_start(ap, fmt); 612 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 613 // ... 614 // 615 // 616 // FIXME: We don't have full attribute support yet, so just check to see 617 // if the argument is a DeclRefExpr that references a parameter. We'll 618 // add proper support for checking the attribute later. 619 if (HasVAListArg) 620 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 621 if (isa<ParmVarDecl>(DR->getDecl())) 622 return; 623 624 // If there are no arguments specified, warn with -Wformat-security, otherwise 625 // warn only with -Wformat-nonliteral. 626 if (TheCall->getNumArgs() == format_idx+1) 627 Diag(TheCall->getArg(format_idx)->getLocStart(), 628 diag::warn_printf_nonliteral_noargs) 629 << OrigFormatExpr->getSourceRange(); 630 else 631 Diag(TheCall->getArg(format_idx)->getLocStart(), 632 diag::warn_printf_nonliteral) 633 << OrigFormatExpr->getSourceRange(); 634} 635 636void Sema::CheckPrintfString(const StringLiteral *FExpr, 637 const Expr *OrigFormatExpr, 638 const CallExpr *TheCall, bool HasVAListArg, 639 unsigned format_idx, unsigned firstDataArg) { 640 641 const ObjCStringLiteral *ObjCFExpr = 642 dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 643 644 // CHECK: is the format string a wide literal? 645 if (FExpr->isWide()) { 646 Diag(FExpr->getLocStart(), 647 diag::warn_printf_format_string_is_wide_literal) 648 << OrigFormatExpr->getSourceRange(); 649 return; 650 } 651 652 // Str - The format string. NOTE: this is NOT null-terminated! 653 const char *Str = FExpr->getStrData(); 654 655 // CHECK: empty format string? 656 unsigned StrLen = FExpr->getByteLength(); 657 658 if (StrLen == 0) { 659 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 660 << OrigFormatExpr->getSourceRange(); 661 return; 662 } 663 664 // We process the format string using a binary state machine. The 665 // current state is stored in CurrentState. 666 enum { 667 state_OrdChr, 668 state_Conversion 669 } CurrentState = state_OrdChr; 670 671 // numConversions - The number of conversions seen so far. This is 672 // incremented as we traverse the format string. 673 unsigned numConversions = 0; 674 675 // numDataArgs - The number of data arguments after the format 676 // string. This can only be determined for non vprintf-like 677 // functions. For those functions, this value is 1 (the sole 678 // va_arg argument). 679 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; 680 681 // Inspect the format string. 682 unsigned StrIdx = 0; 683 684 // LastConversionIdx - Index within the format string where we last saw 685 // a '%' character that starts a new format conversion. 686 unsigned LastConversionIdx = 0; 687 688 for (; StrIdx < StrLen; ++StrIdx) { 689 690 // Is the number of detected conversion conversions greater than 691 // the number of matching data arguments? If so, stop. 692 if (!HasVAListArg && numConversions > numDataArgs) break; 693 694 // Handle "\0" 695 if (Str[StrIdx] == '\0') { 696 // The string returned by getStrData() is not null-terminated, 697 // so the presence of a null character is likely an error. 698 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), 699 diag::warn_printf_format_string_contains_null_char) 700 << OrigFormatExpr->getSourceRange(); 701 return; 702 } 703 704 // Ordinary characters (not processing a format conversion). 705 if (CurrentState == state_OrdChr) { 706 if (Str[StrIdx] == '%') { 707 CurrentState = state_Conversion; 708 LastConversionIdx = StrIdx; 709 } 710 continue; 711 } 712 713 // Seen '%'. Now processing a format conversion. 714 switch (Str[StrIdx]) { 715 // Handle dynamic precision or width specifier. 716 case '*': { 717 ++numConversions; 718 719 if (!HasVAListArg && numConversions > numDataArgs) { 720 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 721 722 if (Str[StrIdx-1] == '.') 723 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 724 << OrigFormatExpr->getSourceRange(); 725 else 726 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 727 << OrigFormatExpr->getSourceRange(); 728 729 // Don't do any more checking. We'll just emit spurious errors. 730 return; 731 } 732 733 // Perform type checking on width/precision specifier. 734 const Expr *E = TheCall->getArg(format_idx+numConversions); 735 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 736 if (BT->getKind() == BuiltinType::Int) 737 break; 738 739 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 740 741 if (Str[StrIdx-1] == '.') 742 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 743 << E->getType() << E->getSourceRange(); 744 else 745 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 746 << E->getType() << E->getSourceRange(); 747 748 break; 749 } 750 751 // Characters which can terminate a format conversion 752 // (e.g. "%d"). Characters that specify length modifiers or 753 // other flags are handled by the default case below. 754 // 755 // FIXME: additional checks will go into the following cases. 756 case 'i': 757 case 'd': 758 case 'o': 759 case 'u': 760 case 'x': 761 case 'X': 762 case 'D': 763 case 'O': 764 case 'U': 765 case 'e': 766 case 'E': 767 case 'f': 768 case 'F': 769 case 'g': 770 case 'G': 771 case 'a': 772 case 'A': 773 case 'c': 774 case 'C': 775 case 'S': 776 case 's': 777 case 'p': 778 ++numConversions; 779 CurrentState = state_OrdChr; 780 break; 781 782 // CHECK: Are we using "%n"? Issue a warning. 783 case 'n': { 784 ++numConversions; 785 CurrentState = state_OrdChr; 786 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, 787 LastConversionIdx); 788 789 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 790 break; 791 } 792 793 // Handle "%@" 794 case '@': 795 // %@ is allowed in ObjC format strings only. 796 if(ObjCFExpr != NULL) 797 CurrentState = state_OrdChr; 798 else { 799 // Issue a warning: invalid format conversion. 800 SourceLocation Loc = 801 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 802 803 Diag(Loc, diag::warn_printf_invalid_conversion) 804 << std::string(Str+LastConversionIdx, 805 Str+std::min(LastConversionIdx+2, StrLen)) 806 << OrigFormatExpr->getSourceRange(); 807 } 808 ++numConversions; 809 break; 810 811 // Handle "%%" 812 case '%': 813 // Sanity check: Was the first "%" character the previous one? 814 // If not, we will assume that we have a malformed format 815 // conversion, and that the current "%" character is the start 816 // of a new conversion. 817 if (StrIdx - LastConversionIdx == 1) 818 CurrentState = state_OrdChr; 819 else { 820 // Issue a warning: invalid format conversion. 821 SourceLocation Loc = 822 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 823 824 Diag(Loc, diag::warn_printf_invalid_conversion) 825 << std::string(Str+LastConversionIdx, Str+StrIdx) 826 << OrigFormatExpr->getSourceRange(); 827 828 // This conversion is broken. Advance to the next format 829 // conversion. 830 LastConversionIdx = StrIdx; 831 ++numConversions; 832 } 833 break; 834 835 default: 836 // This case catches all other characters: flags, widths, etc. 837 // We should eventually process those as well. 838 break; 839 } 840 } 841 842 if (CurrentState == state_Conversion) { 843 // Issue a warning: invalid format conversion. 844 SourceLocation Loc = 845 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 846 847 Diag(Loc, diag::warn_printf_invalid_conversion) 848 << std::string(Str+LastConversionIdx, 849 Str+std::min(LastConversionIdx+2, StrLen)) 850 << OrigFormatExpr->getSourceRange(); 851 return; 852 } 853 854 if (!HasVAListArg) { 855 // CHECK: Does the number of format conversions exceed the number 856 // of data arguments? 857 if (numConversions > numDataArgs) { 858 SourceLocation Loc = 859 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 860 861 Diag(Loc, diag::warn_printf_insufficient_data_args) 862 << OrigFormatExpr->getSourceRange(); 863 } 864 // CHECK: Does the number of data arguments exceed the number of 865 // format conversions in the format string? 866 else if (numConversions < numDataArgs) 867 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 868 diag::warn_printf_too_many_data_args) 869 << OrigFormatExpr->getSourceRange(); 870 } 871} 872 873//===--- CHECK: Return Address of Stack Variable --------------------------===// 874 875static DeclRefExpr* EvalVal(Expr *E); 876static DeclRefExpr* EvalAddr(Expr* E); 877 878/// CheckReturnStackAddr - Check if a return statement returns the address 879/// of a stack variable. 880void 881Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 882 SourceLocation ReturnLoc) { 883 884 // Perform checking for returned stack addresses. 885 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 886 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 887 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 888 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 889 890 // Skip over implicit cast expressions when checking for block expressions. 891 if (ImplicitCastExpr *IcExpr = 892 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 893 RetValExp = IcExpr->getSubExpr(); 894 895 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 896 if (C->hasBlockDeclRefExprs()) 897 Diag(C->getLocStart(), diag::err_ret_local_block) 898 << C->getSourceRange(); 899 } 900 // Perform checking for stack values returned by reference. 901 else if (lhsType->isReferenceType()) { 902 // Check for a reference to the stack 903 if (DeclRefExpr *DR = EvalVal(RetValExp)) 904 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 905 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 906 } 907} 908 909/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 910/// check if the expression in a return statement evaluates to an address 911/// to a location on the stack. The recursion is used to traverse the 912/// AST of the return expression, with recursion backtracking when we 913/// encounter a subexpression that (1) clearly does not lead to the address 914/// of a stack variable or (2) is something we cannot determine leads to 915/// the address of a stack variable based on such local checking. 916/// 917/// EvalAddr processes expressions that are pointers that are used as 918/// references (and not L-values). EvalVal handles all other values. 919/// At the base case of the recursion is a check for a DeclRefExpr* in 920/// the refers to a stack variable. 921/// 922/// This implementation handles: 923/// 924/// * pointer-to-pointer casts 925/// * implicit conversions from array references to pointers 926/// * taking the address of fields 927/// * arbitrary interplay between "&" and "*" operators 928/// * pointer arithmetic from an address of a stack variable 929/// * taking the address of an array element where the array is on the stack 930static DeclRefExpr* EvalAddr(Expr *E) { 931 // We should only be called for evaluating pointer expressions. 932 assert((E->getType()->isPointerType() || 933 E->getType()->isBlockPointerType() || 934 E->getType()->isObjCQualifiedIdType()) && 935 "EvalAddr only works on pointers"); 936 937 // Our "symbolic interpreter" is just a dispatch off the currently 938 // viewed AST node. We then recursively traverse the AST by calling 939 // EvalAddr and EvalVal appropriately. 940 switch (E->getStmtClass()) { 941 case Stmt::ParenExprClass: 942 // Ignore parentheses. 943 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 944 945 case Stmt::UnaryOperatorClass: { 946 // The only unary operator that make sense to handle here 947 // is AddrOf. All others don't make sense as pointers. 948 UnaryOperator *U = cast<UnaryOperator>(E); 949 950 if (U->getOpcode() == UnaryOperator::AddrOf) 951 return EvalVal(U->getSubExpr()); 952 else 953 return NULL; 954 } 955 956 case Stmt::BinaryOperatorClass: { 957 // Handle pointer arithmetic. All other binary operators are not valid 958 // in this context. 959 BinaryOperator *B = cast<BinaryOperator>(E); 960 BinaryOperator::Opcode op = B->getOpcode(); 961 962 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 963 return NULL; 964 965 Expr *Base = B->getLHS(); 966 967 // Determine which argument is the real pointer base. It could be 968 // the RHS argument instead of the LHS. 969 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 970 971 assert (Base->getType()->isPointerType()); 972 return EvalAddr(Base); 973 } 974 975 // For conditional operators we need to see if either the LHS or RHS are 976 // valid DeclRefExpr*s. If one of them is valid, we return it. 977 case Stmt::ConditionalOperatorClass: { 978 ConditionalOperator *C = cast<ConditionalOperator>(E); 979 980 // Handle the GNU extension for missing LHS. 981 if (Expr *lhsExpr = C->getLHS()) 982 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 983 return LHS; 984 985 return EvalAddr(C->getRHS()); 986 } 987 988 // For casts, we need to handle conversions from arrays to 989 // pointer values, and pointer-to-pointer conversions. 990 case Stmt::ImplicitCastExprClass: 991 case Stmt::CStyleCastExprClass: 992 case Stmt::CXXFunctionalCastExprClass: { 993 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 994 QualType T = SubExpr->getType(); 995 996 if (SubExpr->getType()->isPointerType() || 997 SubExpr->getType()->isBlockPointerType() || 998 SubExpr->getType()->isObjCQualifiedIdType()) 999 return EvalAddr(SubExpr); 1000 else if (T->isArrayType()) 1001 return EvalVal(SubExpr); 1002 else 1003 return 0; 1004 } 1005 1006 // C++ casts. For dynamic casts, static casts, and const casts, we 1007 // are always converting from a pointer-to-pointer, so we just blow 1008 // through the cast. In the case the dynamic cast doesn't fail (and 1009 // return NULL), we take the conservative route and report cases 1010 // where we return the address of a stack variable. For Reinterpre 1011 // FIXME: The comment about is wrong; we're not always converting 1012 // from pointer to pointer. I'm guessing that this code should also 1013 // handle references to objects. 1014 case Stmt::CXXStaticCastExprClass: 1015 case Stmt::CXXDynamicCastExprClass: 1016 case Stmt::CXXConstCastExprClass: 1017 case Stmt::CXXReinterpretCastExprClass: { 1018 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1019 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1020 return EvalAddr(S); 1021 else 1022 return NULL; 1023 } 1024 1025 // Everything else: we simply don't reason about them. 1026 default: 1027 return NULL; 1028 } 1029} 1030 1031 1032/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1033/// See the comments for EvalAddr for more details. 1034static DeclRefExpr* EvalVal(Expr *E) { 1035 1036 // We should only be called for evaluating non-pointer expressions, or 1037 // expressions with a pointer type that are not used as references but instead 1038 // are l-values (e.g., DeclRefExpr with a pointer type). 1039 1040 // Our "symbolic interpreter" is just a dispatch off the currently 1041 // viewed AST node. We then recursively traverse the AST by calling 1042 // EvalAddr and EvalVal appropriately. 1043 switch (E->getStmtClass()) { 1044 case Stmt::DeclRefExprClass: 1045 case Stmt::QualifiedDeclRefExprClass: { 1046 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1047 // at code that refers to a variable's name. We check if it has local 1048 // storage within the function, and if so, return the expression. 1049 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1050 1051 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1052 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1053 1054 return NULL; 1055 } 1056 1057 case Stmt::ParenExprClass: 1058 // Ignore parentheses. 1059 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1060 1061 case Stmt::UnaryOperatorClass: { 1062 // The only unary operator that make sense to handle here 1063 // is Deref. All others don't resolve to a "name." This includes 1064 // handling all sorts of rvalues passed to a unary operator. 1065 UnaryOperator *U = cast<UnaryOperator>(E); 1066 1067 if (U->getOpcode() == UnaryOperator::Deref) 1068 return EvalAddr(U->getSubExpr()); 1069 1070 return NULL; 1071 } 1072 1073 case Stmt::ArraySubscriptExprClass: { 1074 // Array subscripts are potential references to data on the stack. We 1075 // retrieve the DeclRefExpr* for the array variable if it indeed 1076 // has local storage. 1077 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1078 } 1079 1080 case Stmt::ConditionalOperatorClass: { 1081 // For conditional operators we need to see if either the LHS or RHS are 1082 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1083 ConditionalOperator *C = cast<ConditionalOperator>(E); 1084 1085 // Handle the GNU extension for missing LHS. 1086 if (Expr *lhsExpr = C->getLHS()) 1087 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1088 return LHS; 1089 1090 return EvalVal(C->getRHS()); 1091 } 1092 1093 // Accesses to members are potential references to data on the stack. 1094 case Stmt::MemberExprClass: { 1095 MemberExpr *M = cast<MemberExpr>(E); 1096 1097 // Check for indirect access. We only want direct field accesses. 1098 if (!M->isArrow()) 1099 return EvalVal(M->getBase()); 1100 else 1101 return NULL; 1102 } 1103 1104 // Everything else: we simply don't reason about them. 1105 default: 1106 return NULL; 1107 } 1108} 1109 1110//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1111 1112/// Check for comparisons of floating point operands using != and ==. 1113/// Issue a warning if these are no self-comparisons, as they are not likely 1114/// to do what the programmer intended. 1115void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1116 bool EmitWarning = true; 1117 1118 Expr* LeftExprSansParen = lex->IgnoreParens(); 1119 Expr* RightExprSansParen = rex->IgnoreParens(); 1120 1121 // Special case: check for x == x (which is OK). 1122 // Do not emit warnings for such cases. 1123 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1124 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1125 if (DRL->getDecl() == DRR->getDecl()) 1126 EmitWarning = false; 1127 1128 1129 // Special case: check for comparisons against literals that can be exactly 1130 // represented by APFloat. In such cases, do not emit a warning. This 1131 // is a heuristic: often comparison against such literals are used to 1132 // detect if a value in a variable has not changed. This clearly can 1133 // lead to false negatives. 1134 if (EmitWarning) { 1135 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1136 if (FLL->isExact()) 1137 EmitWarning = false; 1138 } 1139 else 1140 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1141 if (FLR->isExact()) 1142 EmitWarning = false; 1143 } 1144 } 1145 1146 // Check for comparisons with builtin types. 1147 if (EmitWarning) 1148 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1149 if (CL->isBuiltinCall(Context)) 1150 EmitWarning = false; 1151 1152 if (EmitWarning) 1153 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1154 if (CR->isBuiltinCall(Context)) 1155 EmitWarning = false; 1156 1157 // Emit the diagnostic. 1158 if (EmitWarning) 1159 Diag(loc, diag::warn_floatingpoint_eq) 1160 << lex->getSourceRange() << rex->getSourceRange(); 1161} 1162