SemaChecking.cpp revision 6277e4095e523d8a20d1eff82df66191ecd37487
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/LiteralSupport.h" 21#include "clang/Lex/Preprocessor.h" 22using namespace clang; 23 24/// getLocationOfStringLiteralByte - Return a source location that points to the 25/// specified byte of the specified string literal. 26/// 27/// Strings are amazingly complex. They can be formed from multiple tokens and 28/// can have escape sequences in them in addition to the usual trigraph and 29/// escaped newline business. This routine handles this complexity. 30/// 31SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 32 unsigned ByteNo) const { 33 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 34 35 // Loop over all of the tokens in this string until we find the one that 36 // contains the byte we're looking for. 37 unsigned TokNo = 0; 38 while (1) { 39 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 40 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 41 42 // Get the spelling of the string so that we can get the data that makes up 43 // the string literal, not the identifier for the macro it is potentially 44 // expanded through. 45 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 46 47 // Re-lex the token to get its length and original spelling. 48 std::pair<FileID, unsigned> LocInfo = 49 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 50 std::pair<const char *,const char *> Buffer = 51 SourceMgr.getBufferData(LocInfo.first); 52 const char *StrData = Buffer.first+LocInfo.second; 53 54 // Create a langops struct and enable trigraphs. This is sufficient for 55 // relexing tokens. 56 LangOptions LangOpts; 57 LangOpts.Trigraphs = true; 58 59 // Create a lexer starting at the beginning of this token. 60 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, 61 Buffer.second); 62 Token TheTok; 63 TheLexer.LexFromRawLexer(TheTok); 64 65 // Use the StringLiteralParser to compute the length of the string in bytes. 66 StringLiteralParser SLP(&TheTok, 1, PP); 67 unsigned TokNumBytes = SLP.GetStringLength(); 68 69 // If the byte is in this token, return the location of the byte. 70 if (ByteNo < TokNumBytes || 71 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 72 unsigned Offset = 73 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 74 75 // Now that we know the offset of the token in the spelling, use the 76 // preprocessor to get the offset in the original source. 77 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 78 } 79 80 // Move to the next string token. 81 ++TokNo; 82 ByteNo -= TokNumBytes; 83 } 84} 85 86 87/// CheckFunctionCall - Check a direct function call for various correctness 88/// and safety properties not strictly enforced by the C type system. 89Action::OwningExprResult 90Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 91 OwningExprResult TheCallResult(Owned(TheCall)); 92 // Get the IdentifierInfo* for the called function. 93 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 94 95 // None of the checks below are needed for functions that don't have 96 // simple names (e.g., C++ conversion functions). 97 if (!FnInfo) 98 return move(TheCallResult); 99 100 switch (FDecl->getBuiltinID(Context)) { 101 case Builtin::BI__builtin___CFStringMakeConstantString: 102 assert(TheCall->getNumArgs() == 1 && 103 "Wrong # arguments to builtin CFStringMakeConstantString"); 104 if (CheckObjCString(TheCall->getArg(0))) 105 return ExprError(); 106 return move(TheCallResult); 107 case Builtin::BI__builtin_stdarg_start: 108 case Builtin::BI__builtin_va_start: 109 if (SemaBuiltinVAStart(TheCall)) 110 return ExprError(); 111 return move(TheCallResult); 112 case Builtin::BI__builtin_isgreater: 113 case Builtin::BI__builtin_isgreaterequal: 114 case Builtin::BI__builtin_isless: 115 case Builtin::BI__builtin_islessequal: 116 case Builtin::BI__builtin_islessgreater: 117 case Builtin::BI__builtin_isunordered: 118 if (SemaBuiltinUnorderedCompare(TheCall)) 119 return ExprError(); 120 return move(TheCallResult); 121 case Builtin::BI__builtin_return_address: 122 case Builtin::BI__builtin_frame_address: 123 if (SemaBuiltinStackAddress(TheCall)) 124 return ExprError(); 125 return move(TheCallResult); 126 case Builtin::BI__builtin_shufflevector: 127 return SemaBuiltinShuffleVector(TheCall); 128 // TheCall will be freed by the smart pointer here, but that's fine, since 129 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 130 case Builtin::BI__builtin_prefetch: 131 if (SemaBuiltinPrefetch(TheCall)) 132 return ExprError(); 133 return move(TheCallResult); 134 case Builtin::BI__builtin_object_size: 135 if (SemaBuiltinObjectSize(TheCall)) 136 return ExprError(); 137 case Builtin::BI__builtin_longjmp: 138 if (SemaBuiltinLongjmp(TheCall)) 139 return ExprError(); 140 } 141 142 // FIXME: This mechanism should be abstracted to be less fragile and 143 // more efficient. For example, just map function ids to custom 144 // handlers. 145 146 // Printf checking. 147 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 148 if (Format->getType() == "printf") { 149 bool HasVAListArg = Format->getFirstArg() == 0; 150 if (!HasVAListArg) { 151 if (const FunctionProtoType *Proto 152 = FDecl->getType()->getAsFunctionProtoType()) 153 HasVAListArg = !Proto->isVariadic(); 154 } 155 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 156 HasVAListArg ? 0 : Format->getFirstArg() - 1); 157 } 158 } 159 160 return move(TheCallResult); 161} 162 163/// CheckObjCString - Checks that the argument to the builtin 164/// CFString constructor is correct 165/// FIXME: GCC currently emits the following warning: 166/// "warning: input conversion stopped due to an input byte that does not 167/// belong to the input codeset UTF-8" 168/// Note: It might also make sense to do the UTF-16 conversion here (would 169/// simplify the backend). 170bool Sema::CheckObjCString(Expr *Arg) { 171 Arg = Arg->IgnoreParenCasts(); 172 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 173 174 if (!Literal || Literal->isWide()) { 175 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 176 << Arg->getSourceRange(); 177 return true; 178 } 179 180 const char *Data = Literal->getStrData(); 181 unsigned Length = Literal->getByteLength(); 182 183 for (unsigned i = 0; i < Length; ++i) { 184 if (!Data[i]) { 185 Diag(getLocationOfStringLiteralByte(Literal, i), 186 diag::warn_cfstring_literal_contains_nul_character) 187 << Arg->getSourceRange(); 188 break; 189 } 190 } 191 192 return false; 193} 194 195/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 196/// Emit an error and return true on failure, return false on success. 197bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 198 Expr *Fn = TheCall->getCallee(); 199 if (TheCall->getNumArgs() > 2) { 200 Diag(TheCall->getArg(2)->getLocStart(), 201 diag::err_typecheck_call_too_many_args) 202 << 0 /*function call*/ << Fn->getSourceRange() 203 << SourceRange(TheCall->getArg(2)->getLocStart(), 204 (*(TheCall->arg_end()-1))->getLocEnd()); 205 return true; 206 } 207 208 if (TheCall->getNumArgs() < 2) { 209 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 210 << 0 /*function call*/; 211 } 212 213 // Determine whether the current function is variadic or not. 214 bool isVariadic; 215 if (CurBlock) 216 isVariadic = CurBlock->isVariadic; 217 else if (getCurFunctionDecl()) { 218 if (FunctionProtoType* FTP = 219 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 220 isVariadic = FTP->isVariadic(); 221 else 222 isVariadic = false; 223 } else { 224 isVariadic = getCurMethodDecl()->isVariadic(); 225 } 226 227 if (!isVariadic) { 228 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 229 return true; 230 } 231 232 // Verify that the second argument to the builtin is the last argument of the 233 // current function or method. 234 bool SecondArgIsLastNamedArgument = false; 235 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 236 237 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 238 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 239 // FIXME: This isn't correct for methods (results in bogus warning). 240 // Get the last formal in the current function. 241 const ParmVarDecl *LastArg; 242 if (CurBlock) 243 LastArg = *(CurBlock->TheDecl->param_end()-1); 244 else if (FunctionDecl *FD = getCurFunctionDecl()) 245 LastArg = *(FD->param_end()-1); 246 else 247 LastArg = *(getCurMethodDecl()->param_end()-1); 248 SecondArgIsLastNamedArgument = PV == LastArg; 249 } 250 } 251 252 if (!SecondArgIsLastNamedArgument) 253 Diag(TheCall->getArg(1)->getLocStart(), 254 diag::warn_second_parameter_of_va_start_not_last_named_argument); 255 return false; 256} 257 258/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 259/// friends. This is declared to take (...), so we have to check everything. 260bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 261 if (TheCall->getNumArgs() < 2) 262 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 263 << 0 /*function call*/; 264 if (TheCall->getNumArgs() > 2) 265 return Diag(TheCall->getArg(2)->getLocStart(), 266 diag::err_typecheck_call_too_many_args) 267 << 0 /*function call*/ 268 << SourceRange(TheCall->getArg(2)->getLocStart(), 269 (*(TheCall->arg_end()-1))->getLocEnd()); 270 271 Expr *OrigArg0 = TheCall->getArg(0); 272 Expr *OrigArg1 = TheCall->getArg(1); 273 274 // Do standard promotions between the two arguments, returning their common 275 // type. 276 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 277 278 // Make sure any conversions are pushed back into the call; this is 279 // type safe since unordered compare builtins are declared as "_Bool 280 // foo(...)". 281 TheCall->setArg(0, OrigArg0); 282 TheCall->setArg(1, OrigArg1); 283 284 // If the common type isn't a real floating type, then the arguments were 285 // invalid for this operation. 286 if (!Res->isRealFloatingType()) 287 return Diag(OrigArg0->getLocStart(), 288 diag::err_typecheck_call_invalid_ordered_compare) 289 << OrigArg0->getType() << OrigArg1->getType() 290 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 291 292 return false; 293} 294 295bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 296 // The signature for these builtins is exact; the only thing we need 297 // to check is that the argument is a constant. 298 SourceLocation Loc; 299 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 300 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 301 302 return false; 303} 304 305/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 306// This is declared to take (...), so we have to check everything. 307Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 308 if (TheCall->getNumArgs() < 3) 309 return ExprError(Diag(TheCall->getLocEnd(), 310 diag::err_typecheck_call_too_few_args) 311 << 0 /*function call*/ << TheCall->getSourceRange()); 312 313 QualType FAType = TheCall->getArg(0)->getType(); 314 QualType SAType = TheCall->getArg(1)->getType(); 315 316 if (!FAType->isVectorType() || !SAType->isVectorType()) { 317 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 318 << SourceRange(TheCall->getArg(0)->getLocStart(), 319 TheCall->getArg(1)->getLocEnd()); 320 return ExprError(); 321 } 322 323 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 324 Context.getCanonicalType(SAType).getUnqualifiedType()) { 325 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 326 << SourceRange(TheCall->getArg(0)->getLocStart(), 327 TheCall->getArg(1)->getLocEnd()); 328 return ExprError(); 329 } 330 331 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 332 if (TheCall->getNumArgs() != numElements+2) { 333 if (TheCall->getNumArgs() < numElements+2) 334 return ExprError(Diag(TheCall->getLocEnd(), 335 diag::err_typecheck_call_too_few_args) 336 << 0 /*function call*/ << TheCall->getSourceRange()); 337 return ExprError(Diag(TheCall->getLocEnd(), 338 diag::err_typecheck_call_too_many_args) 339 << 0 /*function call*/ << TheCall->getSourceRange()); 340 } 341 342 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 343 llvm::APSInt Result(32); 344 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 345 return ExprError(Diag(TheCall->getLocStart(), 346 diag::err_shufflevector_nonconstant_argument) 347 << TheCall->getArg(i)->getSourceRange()); 348 349 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 350 return ExprError(Diag(TheCall->getLocStart(), 351 diag::err_shufflevector_argument_too_large) 352 << TheCall->getArg(i)->getSourceRange()); 353 } 354 355 llvm::SmallVector<Expr*, 32> exprs; 356 357 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 358 exprs.push_back(TheCall->getArg(i)); 359 TheCall->setArg(i, 0); 360 } 361 362 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 363 FAType, 364 TheCall->getCallee()->getLocStart(), 365 TheCall->getRParenLoc())); 366} 367 368/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 369// This is declared to take (const void*, ...) and can take two 370// optional constant int args. 371bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 372 unsigned NumArgs = TheCall->getNumArgs(); 373 374 if (NumArgs > 3) 375 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 376 << 0 /*function call*/ << TheCall->getSourceRange(); 377 378 // Argument 0 is checked for us and the remaining arguments must be 379 // constant integers. 380 for (unsigned i = 1; i != NumArgs; ++i) { 381 Expr *Arg = TheCall->getArg(i); 382 QualType RWType = Arg->getType(); 383 384 const BuiltinType *BT = RWType->getAsBuiltinType(); 385 llvm::APSInt Result; 386 if (!BT || BT->getKind() != BuiltinType::Int || 387 !Arg->isIntegerConstantExpr(Result, Context)) 388 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 389 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 390 391 // FIXME: gcc issues a warning and rewrites these to 0. These 392 // seems especially odd for the third argument since the default 393 // is 3. 394 if (i == 1) { 395 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 396 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 397 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 398 } else { 399 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 400 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 401 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 402 } 403 } 404 405 return false; 406} 407 408/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 409/// int type). This simply type checks that type is one of the defined 410/// constants (0-3). 411bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 412 Expr *Arg = TheCall->getArg(1); 413 QualType ArgType = Arg->getType(); 414 const BuiltinType *BT = ArgType->getAsBuiltinType(); 415 llvm::APSInt Result(32); 416 if (!BT || BT->getKind() != BuiltinType::Int || 417 !Arg->isIntegerConstantExpr(Result, Context)) { 418 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 419 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 420 } 421 422 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 423 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 424 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 425 } 426 427 return false; 428} 429 430/// SemaBuiltinObjectSize - Handle __builtin_longjmp(void *env[5], int val). 431/// This checks that val is a constant 1. 432bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 433 Expr *Arg = TheCall->getArg(1); 434 llvm::APSInt Result(32); 435 if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1) 436 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 437 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 438 439 return false; 440} 441 442// Handle i > 1 ? "x" : "y", recursivelly 443bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 444 bool HasVAListArg, 445 unsigned format_idx, unsigned firstDataArg) { 446 447 switch (E->getStmtClass()) { 448 case Stmt::ConditionalOperatorClass: { 449 const ConditionalOperator *C = cast<ConditionalOperator>(E); 450 return SemaCheckStringLiteral(C->getLHS(), TheCall, 451 HasVAListArg, format_idx, firstDataArg) 452 && SemaCheckStringLiteral(C->getRHS(), TheCall, 453 HasVAListArg, format_idx, firstDataArg); 454 } 455 456 case Stmt::ImplicitCastExprClass: { 457 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 458 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 459 format_idx, firstDataArg); 460 } 461 462 case Stmt::ParenExprClass: { 463 const ParenExpr *Expr = cast<ParenExpr>(E); 464 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 465 format_idx, firstDataArg); 466 } 467 468 case Stmt::DeclRefExprClass: { 469 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 470 471 // As an exception, do not flag errors for variables binding to 472 // const string literals. 473 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 474 bool isConstant = false; 475 QualType T = DR->getType(); 476 477 if (const ArrayType *AT = Context.getAsArrayType(T)) { 478 isConstant = AT->getElementType().isConstant(Context); 479 } 480 else if (const PointerType *PT = T->getAsPointerType()) { 481 isConstant = T.isConstant(Context) && 482 PT->getPointeeType().isConstant(Context); 483 } 484 485 if (isConstant) { 486 const VarDecl *Def = 0; 487 if (const Expr *Init = VD->getDefinition(Def)) 488 return SemaCheckStringLiteral(Init, TheCall, 489 HasVAListArg, format_idx, firstDataArg); 490 } 491 } 492 493 return false; 494 } 495 496 case Stmt::ObjCStringLiteralClass: 497 case Stmt::StringLiteralClass: { 498 const StringLiteral *StrE = NULL; 499 500 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 501 StrE = ObjCFExpr->getString(); 502 else 503 StrE = cast<StringLiteral>(E); 504 505 if (StrE) { 506 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 507 firstDataArg); 508 return true; 509 } 510 511 return false; 512 } 513 514 default: 515 return false; 516 } 517} 518 519 520/// CheckPrintfArguments - Check calls to printf (and similar functions) for 521/// correct use of format strings. 522/// 523/// HasVAListArg - A predicate indicating whether the printf-like 524/// function is passed an explicit va_arg argument (e.g., vprintf) 525/// 526/// format_idx - The index into Args for the format string. 527/// 528/// Improper format strings to functions in the printf family can be 529/// the source of bizarre bugs and very serious security holes. A 530/// good source of information is available in the following paper 531/// (which includes additional references): 532/// 533/// FormatGuard: Automatic Protection From printf Format String 534/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 535/// 536/// Functionality implemented: 537/// 538/// We can statically check the following properties for string 539/// literal format strings for non v.*printf functions (where the 540/// arguments are passed directly): 541// 542/// (1) Are the number of format conversions equal to the number of 543/// data arguments? 544/// 545/// (2) Does each format conversion correctly match the type of the 546/// corresponding data argument? (TODO) 547/// 548/// Moreover, for all printf functions we can: 549/// 550/// (3) Check for a missing format string (when not caught by type checking). 551/// 552/// (4) Check for no-operation flags; e.g. using "#" with format 553/// conversion 'c' (TODO) 554/// 555/// (5) Check the use of '%n', a major source of security holes. 556/// 557/// (6) Check for malformed format conversions that don't specify anything. 558/// 559/// (7) Check for empty format strings. e.g: printf(""); 560/// 561/// (8) Check that the format string is a wide literal. 562/// 563/// (9) Also check the arguments of functions with the __format__ attribute. 564/// (TODO). 565/// 566/// All of these checks can be done by parsing the format string. 567/// 568/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 569void 570Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 571 unsigned format_idx, unsigned firstDataArg) { 572 const Expr *Fn = TheCall->getCallee(); 573 574 // CHECK: printf-like function is called with no format string. 575 if (format_idx >= TheCall->getNumArgs()) { 576 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 577 << Fn->getSourceRange(); 578 return; 579 } 580 581 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 582 583 // CHECK: format string is not a string literal. 584 // 585 // Dynamically generated format strings are difficult to 586 // automatically vet at compile time. Requiring that format strings 587 // are string literals: (1) permits the checking of format strings by 588 // the compiler and thereby (2) can practically remove the source of 589 // many format string exploits. 590 591 // Format string can be either ObjC string (e.g. @"%d") or 592 // C string (e.g. "%d") 593 // ObjC string uses the same format specifiers as C string, so we can use 594 // the same format string checking logic for both ObjC and C strings. 595 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 596 firstDataArg)) 597 return; // Literal format string found, check done! 598 599 // For vprintf* functions (i.e., HasVAListArg==true), we add a 600 // special check to see if the format string is a function parameter 601 // of the function calling the printf function. If the function 602 // has an attribute indicating it is a printf-like function, then we 603 // should suppress warnings concerning non-literals being used in a call 604 // to a vprintf function. For example: 605 // 606 // void 607 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 608 // va_list ap; 609 // va_start(ap, fmt); 610 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 611 // ... 612 // 613 // 614 // FIXME: We don't have full attribute support yet, so just check to see 615 // if the argument is a DeclRefExpr that references a parameter. We'll 616 // add proper support for checking the attribute later. 617 if (HasVAListArg) 618 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 619 if (isa<ParmVarDecl>(DR->getDecl())) 620 return; 621 622 // If there are no arguments specified, warn with -Wformat-security, otherwise 623 // warn only with -Wformat-nonliteral. 624 if (TheCall->getNumArgs() == format_idx+1) 625 Diag(TheCall->getArg(format_idx)->getLocStart(), 626 diag::warn_printf_nonliteral_noargs) 627 << OrigFormatExpr->getSourceRange(); 628 else 629 Diag(TheCall->getArg(format_idx)->getLocStart(), 630 diag::warn_printf_nonliteral) 631 << OrigFormatExpr->getSourceRange(); 632} 633 634void Sema::CheckPrintfString(const StringLiteral *FExpr, 635 const Expr *OrigFormatExpr, 636 const CallExpr *TheCall, bool HasVAListArg, 637 unsigned format_idx, unsigned firstDataArg) { 638 639 const ObjCStringLiteral *ObjCFExpr = 640 dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 641 642 // CHECK: is the format string a wide literal? 643 if (FExpr->isWide()) { 644 Diag(FExpr->getLocStart(), 645 diag::warn_printf_format_string_is_wide_literal) 646 << OrigFormatExpr->getSourceRange(); 647 return; 648 } 649 650 // Str - The format string. NOTE: this is NOT null-terminated! 651 const char *Str = FExpr->getStrData(); 652 653 // CHECK: empty format string? 654 unsigned StrLen = FExpr->getByteLength(); 655 656 if (StrLen == 0) { 657 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 658 << OrigFormatExpr->getSourceRange(); 659 return; 660 } 661 662 // We process the format string using a binary state machine. The 663 // current state is stored in CurrentState. 664 enum { 665 state_OrdChr, 666 state_Conversion 667 } CurrentState = state_OrdChr; 668 669 // numConversions - The number of conversions seen so far. This is 670 // incremented as we traverse the format string. 671 unsigned numConversions = 0; 672 673 // numDataArgs - The number of data arguments after the format 674 // string. This can only be determined for non vprintf-like 675 // functions. For those functions, this value is 1 (the sole 676 // va_arg argument). 677 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; 678 679 // Inspect the format string. 680 unsigned StrIdx = 0; 681 682 // LastConversionIdx - Index within the format string where we last saw 683 // a '%' character that starts a new format conversion. 684 unsigned LastConversionIdx = 0; 685 686 for (; StrIdx < StrLen; ++StrIdx) { 687 688 // Is the number of detected conversion conversions greater than 689 // the number of matching data arguments? If so, stop. 690 if (!HasVAListArg && numConversions > numDataArgs) break; 691 692 // Handle "\0" 693 if (Str[StrIdx] == '\0') { 694 // The string returned by getStrData() is not null-terminated, 695 // so the presence of a null character is likely an error. 696 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), 697 diag::warn_printf_format_string_contains_null_char) 698 << OrigFormatExpr->getSourceRange(); 699 return; 700 } 701 702 // Ordinary characters (not processing a format conversion). 703 if (CurrentState == state_OrdChr) { 704 if (Str[StrIdx] == '%') { 705 CurrentState = state_Conversion; 706 LastConversionIdx = StrIdx; 707 } 708 continue; 709 } 710 711 // Seen '%'. Now processing a format conversion. 712 switch (Str[StrIdx]) { 713 // Handle dynamic precision or width specifier. 714 case '*': { 715 ++numConversions; 716 717 if (!HasVAListArg && numConversions > numDataArgs) { 718 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 719 720 if (Str[StrIdx-1] == '.') 721 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 722 << OrigFormatExpr->getSourceRange(); 723 else 724 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 725 << OrigFormatExpr->getSourceRange(); 726 727 // Don't do any more checking. We'll just emit spurious errors. 728 return; 729 } 730 731 // Perform type checking on width/precision specifier. 732 const Expr *E = TheCall->getArg(format_idx+numConversions); 733 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 734 if (BT->getKind() == BuiltinType::Int) 735 break; 736 737 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 738 739 if (Str[StrIdx-1] == '.') 740 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 741 << E->getType() << E->getSourceRange(); 742 else 743 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 744 << E->getType() << E->getSourceRange(); 745 746 break; 747 } 748 749 // Characters which can terminate a format conversion 750 // (e.g. "%d"). Characters that specify length modifiers or 751 // other flags are handled by the default case below. 752 // 753 // FIXME: additional checks will go into the following cases. 754 case 'i': 755 case 'd': 756 case 'o': 757 case 'u': 758 case 'x': 759 case 'X': 760 case 'D': 761 case 'O': 762 case 'U': 763 case 'e': 764 case 'E': 765 case 'f': 766 case 'F': 767 case 'g': 768 case 'G': 769 case 'a': 770 case 'A': 771 case 'c': 772 case 'C': 773 case 'S': 774 case 's': 775 case 'p': 776 ++numConversions; 777 CurrentState = state_OrdChr; 778 break; 779 780 // CHECK: Are we using "%n"? Issue a warning. 781 case 'n': { 782 ++numConversions; 783 CurrentState = state_OrdChr; 784 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, 785 LastConversionIdx); 786 787 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 788 break; 789 } 790 791 // Handle "%@" 792 case '@': 793 // %@ is allowed in ObjC format strings only. 794 if(ObjCFExpr != NULL) 795 CurrentState = state_OrdChr; 796 else { 797 // Issue a warning: invalid format conversion. 798 SourceLocation Loc = 799 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 800 801 Diag(Loc, diag::warn_printf_invalid_conversion) 802 << std::string(Str+LastConversionIdx, 803 Str+std::min(LastConversionIdx+2, StrLen)) 804 << OrigFormatExpr->getSourceRange(); 805 } 806 ++numConversions; 807 break; 808 809 // Handle "%%" 810 case '%': 811 // Sanity check: Was the first "%" character the previous one? 812 // If not, we will assume that we have a malformed format 813 // conversion, and that the current "%" character is the start 814 // of a new conversion. 815 if (StrIdx - LastConversionIdx == 1) 816 CurrentState = state_OrdChr; 817 else { 818 // Issue a warning: invalid format conversion. 819 SourceLocation Loc = 820 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 821 822 Diag(Loc, diag::warn_printf_invalid_conversion) 823 << std::string(Str+LastConversionIdx, Str+StrIdx) 824 << OrigFormatExpr->getSourceRange(); 825 826 // This conversion is broken. Advance to the next format 827 // conversion. 828 LastConversionIdx = StrIdx; 829 ++numConversions; 830 } 831 break; 832 833 default: 834 // This case catches all other characters: flags, widths, etc. 835 // We should eventually process those as well. 836 break; 837 } 838 } 839 840 if (CurrentState == state_Conversion) { 841 // Issue a warning: invalid format conversion. 842 SourceLocation Loc = 843 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 844 845 Diag(Loc, diag::warn_printf_invalid_conversion) 846 << std::string(Str+LastConversionIdx, 847 Str+std::min(LastConversionIdx+2, StrLen)) 848 << OrigFormatExpr->getSourceRange(); 849 return; 850 } 851 852 if (!HasVAListArg) { 853 // CHECK: Does the number of format conversions exceed the number 854 // of data arguments? 855 if (numConversions > numDataArgs) { 856 SourceLocation Loc = 857 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 858 859 Diag(Loc, diag::warn_printf_insufficient_data_args) 860 << OrigFormatExpr->getSourceRange(); 861 } 862 // CHECK: Does the number of data arguments exceed the number of 863 // format conversions in the format string? 864 else if (numConversions < numDataArgs) 865 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 866 diag::warn_printf_too_many_data_args) 867 << OrigFormatExpr->getSourceRange(); 868 } 869} 870 871//===--- CHECK: Return Address of Stack Variable --------------------------===// 872 873static DeclRefExpr* EvalVal(Expr *E); 874static DeclRefExpr* EvalAddr(Expr* E); 875 876/// CheckReturnStackAddr - Check if a return statement returns the address 877/// of a stack variable. 878void 879Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 880 SourceLocation ReturnLoc) { 881 882 // Perform checking for returned stack addresses. 883 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 884 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 885 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 886 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 887 888 // Skip over implicit cast expressions when checking for block expressions. 889 if (ImplicitCastExpr *IcExpr = 890 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 891 RetValExp = IcExpr->getSubExpr(); 892 893 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 894 if (C->hasBlockDeclRefExprs()) 895 Diag(C->getLocStart(), diag::err_ret_local_block) 896 << C->getSourceRange(); 897 } 898 // Perform checking for stack values returned by reference. 899 else if (lhsType->isReferenceType()) { 900 // Check for a reference to the stack 901 if (DeclRefExpr *DR = EvalVal(RetValExp)) 902 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 903 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 904 } 905} 906 907/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 908/// check if the expression in a return statement evaluates to an address 909/// to a location on the stack. The recursion is used to traverse the 910/// AST of the return expression, with recursion backtracking when we 911/// encounter a subexpression that (1) clearly does not lead to the address 912/// of a stack variable or (2) is something we cannot determine leads to 913/// the address of a stack variable based on such local checking. 914/// 915/// EvalAddr processes expressions that are pointers that are used as 916/// references (and not L-values). EvalVal handles all other values. 917/// At the base case of the recursion is a check for a DeclRefExpr* in 918/// the refers to a stack variable. 919/// 920/// This implementation handles: 921/// 922/// * pointer-to-pointer casts 923/// * implicit conversions from array references to pointers 924/// * taking the address of fields 925/// * arbitrary interplay between "&" and "*" operators 926/// * pointer arithmetic from an address of a stack variable 927/// * taking the address of an array element where the array is on the stack 928static DeclRefExpr* EvalAddr(Expr *E) { 929 // We should only be called for evaluating pointer expressions. 930 assert((E->getType()->isPointerType() || 931 E->getType()->isBlockPointerType() || 932 E->getType()->isObjCQualifiedIdType()) && 933 "EvalAddr only works on pointers"); 934 935 // Our "symbolic interpreter" is just a dispatch off the currently 936 // viewed AST node. We then recursively traverse the AST by calling 937 // EvalAddr and EvalVal appropriately. 938 switch (E->getStmtClass()) { 939 case Stmt::ParenExprClass: 940 // Ignore parentheses. 941 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 942 943 case Stmt::UnaryOperatorClass: { 944 // The only unary operator that make sense to handle here 945 // is AddrOf. All others don't make sense as pointers. 946 UnaryOperator *U = cast<UnaryOperator>(E); 947 948 if (U->getOpcode() == UnaryOperator::AddrOf) 949 return EvalVal(U->getSubExpr()); 950 else 951 return NULL; 952 } 953 954 case Stmt::BinaryOperatorClass: { 955 // Handle pointer arithmetic. All other binary operators are not valid 956 // in this context. 957 BinaryOperator *B = cast<BinaryOperator>(E); 958 BinaryOperator::Opcode op = B->getOpcode(); 959 960 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 961 return NULL; 962 963 Expr *Base = B->getLHS(); 964 965 // Determine which argument is the real pointer base. It could be 966 // the RHS argument instead of the LHS. 967 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 968 969 assert (Base->getType()->isPointerType()); 970 return EvalAddr(Base); 971 } 972 973 // For conditional operators we need to see if either the LHS or RHS are 974 // valid DeclRefExpr*s. If one of them is valid, we return it. 975 case Stmt::ConditionalOperatorClass: { 976 ConditionalOperator *C = cast<ConditionalOperator>(E); 977 978 // Handle the GNU extension for missing LHS. 979 if (Expr *lhsExpr = C->getLHS()) 980 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 981 return LHS; 982 983 return EvalAddr(C->getRHS()); 984 } 985 986 // For casts, we need to handle conversions from arrays to 987 // pointer values, and pointer-to-pointer conversions. 988 case Stmt::ImplicitCastExprClass: 989 case Stmt::CStyleCastExprClass: 990 case Stmt::CXXFunctionalCastExprClass: { 991 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 992 QualType T = SubExpr->getType(); 993 994 if (SubExpr->getType()->isPointerType() || 995 SubExpr->getType()->isBlockPointerType() || 996 SubExpr->getType()->isObjCQualifiedIdType()) 997 return EvalAddr(SubExpr); 998 else if (T->isArrayType()) 999 return EvalVal(SubExpr); 1000 else 1001 return 0; 1002 } 1003 1004 // C++ casts. For dynamic casts, static casts, and const casts, we 1005 // are always converting from a pointer-to-pointer, so we just blow 1006 // through the cast. In the case the dynamic cast doesn't fail (and 1007 // return NULL), we take the conservative route and report cases 1008 // where we return the address of a stack variable. For Reinterpre 1009 // FIXME: The comment about is wrong; we're not always converting 1010 // from pointer to pointer. I'm guessing that this code should also 1011 // handle references to objects. 1012 case Stmt::CXXStaticCastExprClass: 1013 case Stmt::CXXDynamicCastExprClass: 1014 case Stmt::CXXConstCastExprClass: 1015 case Stmt::CXXReinterpretCastExprClass: { 1016 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1017 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1018 return EvalAddr(S); 1019 else 1020 return NULL; 1021 } 1022 1023 // Everything else: we simply don't reason about them. 1024 default: 1025 return NULL; 1026 } 1027} 1028 1029 1030/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1031/// See the comments for EvalAddr for more details. 1032static DeclRefExpr* EvalVal(Expr *E) { 1033 1034 // We should only be called for evaluating non-pointer expressions, or 1035 // expressions with a pointer type that are not used as references but instead 1036 // are l-values (e.g., DeclRefExpr with a pointer type). 1037 1038 // Our "symbolic interpreter" is just a dispatch off the currently 1039 // viewed AST node. We then recursively traverse the AST by calling 1040 // EvalAddr and EvalVal appropriately. 1041 switch (E->getStmtClass()) { 1042 case Stmt::DeclRefExprClass: 1043 case Stmt::QualifiedDeclRefExprClass: { 1044 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1045 // at code that refers to a variable's name. We check if it has local 1046 // storage within the function, and if so, return the expression. 1047 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1048 1049 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1050 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1051 1052 return NULL; 1053 } 1054 1055 case Stmt::ParenExprClass: 1056 // Ignore parentheses. 1057 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1058 1059 case Stmt::UnaryOperatorClass: { 1060 // The only unary operator that make sense to handle here 1061 // is Deref. All others don't resolve to a "name." This includes 1062 // handling all sorts of rvalues passed to a unary operator. 1063 UnaryOperator *U = cast<UnaryOperator>(E); 1064 1065 if (U->getOpcode() == UnaryOperator::Deref) 1066 return EvalAddr(U->getSubExpr()); 1067 1068 return NULL; 1069 } 1070 1071 case Stmt::ArraySubscriptExprClass: { 1072 // Array subscripts are potential references to data on the stack. We 1073 // retrieve the DeclRefExpr* for the array variable if it indeed 1074 // has local storage. 1075 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1076 } 1077 1078 case Stmt::ConditionalOperatorClass: { 1079 // For conditional operators we need to see if either the LHS or RHS are 1080 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1081 ConditionalOperator *C = cast<ConditionalOperator>(E); 1082 1083 // Handle the GNU extension for missing LHS. 1084 if (Expr *lhsExpr = C->getLHS()) 1085 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1086 return LHS; 1087 1088 return EvalVal(C->getRHS()); 1089 } 1090 1091 // Accesses to members are potential references to data on the stack. 1092 case Stmt::MemberExprClass: { 1093 MemberExpr *M = cast<MemberExpr>(E); 1094 1095 // Check for indirect access. We only want direct field accesses. 1096 if (!M->isArrow()) 1097 return EvalVal(M->getBase()); 1098 else 1099 return NULL; 1100 } 1101 1102 // Everything else: we simply don't reason about them. 1103 default: 1104 return NULL; 1105 } 1106} 1107 1108//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1109 1110/// Check for comparisons of floating point operands using != and ==. 1111/// Issue a warning if these are no self-comparisons, as they are not likely 1112/// to do what the programmer intended. 1113void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1114 bool EmitWarning = true; 1115 1116 Expr* LeftExprSansParen = lex->IgnoreParens(); 1117 Expr* RightExprSansParen = rex->IgnoreParens(); 1118 1119 // Special case: check for x == x (which is OK). 1120 // Do not emit warnings for such cases. 1121 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1122 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1123 if (DRL->getDecl() == DRR->getDecl()) 1124 EmitWarning = false; 1125 1126 1127 // Special case: check for comparisons against literals that can be exactly 1128 // represented by APFloat. In such cases, do not emit a warning. This 1129 // is a heuristic: often comparison against such literals are used to 1130 // detect if a value in a variable has not changed. This clearly can 1131 // lead to false negatives. 1132 if (EmitWarning) { 1133 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1134 if (FLL->isExact()) 1135 EmitWarning = false; 1136 } 1137 else 1138 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1139 if (FLR->isExact()) 1140 EmitWarning = false; 1141 } 1142 } 1143 1144 // Check for comparisons with builtin types. 1145 if (EmitWarning) 1146 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1147 if (CL->isBuiltinCall(Context)) 1148 EmitWarning = false; 1149 1150 if (EmitWarning) 1151 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1152 if (CR->isBuiltinCall(Context)) 1153 EmitWarning = false; 1154 1155 // Emit the diagnostic. 1156 if (EmitWarning) 1157 Diag(loc, diag::warn_floatingpoint_eq) 1158 << lex->getSourceRange() << rex->getSourceRange(); 1159} 1160