SemaChecking.cpp revision 96827eb52405a71c65c200949f3e644368e86454
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/Analysis/Analyses/FormatString.h" 17#include "clang/AST/ASTContext.h" 18#include "clang/AST/CharUnits.h" 19#include "clang/AST/DeclObjC.h" 20#include "clang/AST/ExprCXX.h" 21#include "clang/AST/ExprObjC.h" 22#include "clang/AST/DeclObjC.h" 23#include "clang/AST/StmtCXX.h" 24#include "clang/AST/StmtObjC.h" 25#include "clang/Lex/LiteralSupport.h" 26#include "clang/Lex/Preprocessor.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/STLExtras.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/Support/raw_ostream.h" 31#include "clang/Basic/TargetBuiltins.h" 32#include "clang/Basic/TargetInfo.h" 33#include <limits> 34using namespace clang; 35 36/// getLocationOfStringLiteralByte - Return a source location that points to the 37/// specified byte of the specified string literal. 38/// 39/// Strings are amazingly complex. They can be formed from multiple tokens and 40/// can have escape sequences in them in addition to the usual trigraph and 41/// escaped newline business. This routine handles this complexity. 42/// 43SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 44 unsigned ByteNo) const { 45 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 46 47 // Loop over all of the tokens in this string until we find the one that 48 // contains the byte we're looking for. 49 unsigned TokNo = 0; 50 while (1) { 51 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 52 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 53 54 // Get the spelling of the string so that we can get the data that makes up 55 // the string literal, not the identifier for the macro it is potentially 56 // expanded through. 57 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 58 59 // Re-lex the token to get its length and original spelling. 60 std::pair<FileID, unsigned> LocInfo = 61 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 62 bool Invalid = false; 63 llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid); 64 if (Invalid) 65 return StrTokSpellingLoc; 66 67 const char *StrData = Buffer.data()+LocInfo.second; 68 69 // Create a langops struct and enable trigraphs. This is sufficient for 70 // relexing tokens. 71 LangOptions LangOpts; 72 LangOpts.Trigraphs = true; 73 74 // Create a lexer starting at the beginning of this token. 75 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData, 76 Buffer.end()); 77 Token TheTok; 78 TheLexer.LexFromRawLexer(TheTok); 79 80 // Use the StringLiteralParser to compute the length of the string in bytes. 81 StringLiteralParser SLP(&TheTok, 1, PP, /*Complain=*/false); 82 unsigned TokNumBytes = SLP.GetStringLength(); 83 84 // If the byte is in this token, return the location of the byte. 85 if (ByteNo < TokNumBytes || 86 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 87 unsigned Offset = 88 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP, 89 /*Complain=*/false); 90 91 // Now that we know the offset of the token in the spelling, use the 92 // preprocessor to get the offset in the original source. 93 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 94 } 95 96 // Move to the next string token. 97 ++TokNo; 98 ByteNo -= TokNumBytes; 99 } 100} 101 102/// CheckablePrintfAttr - does a function call have a "printf" attribute 103/// and arguments that merit checking? 104bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) { 105 if (Format->getType() == "printf") return true; 106 if (Format->getType() == "printf0") { 107 // printf0 allows null "format" string; if so don't check format/args 108 unsigned format_idx = Format->getFormatIdx() - 1; 109 // Does the index refer to the implicit object argument? 110 if (isa<CXXMemberCallExpr>(TheCall)) { 111 if (format_idx == 0) 112 return false; 113 --format_idx; 114 } 115 if (format_idx < TheCall->getNumArgs()) { 116 Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts(); 117 if (!Format->isNullPointerConstant(Context, 118 Expr::NPC_ValueDependentIsNull)) 119 return true; 120 } 121 } 122 return false; 123} 124 125Action::OwningExprResult 126Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 127 OwningExprResult TheCallResult(Owned(TheCall)); 128 129 switch (BuiltinID) { 130 case Builtin::BI__builtin___CFStringMakeConstantString: 131 assert(TheCall->getNumArgs() == 1 && 132 "Wrong # arguments to builtin CFStringMakeConstantString"); 133 if (CheckObjCString(TheCall->getArg(0))) 134 return ExprError(); 135 break; 136 case Builtin::BI__builtin_stdarg_start: 137 case Builtin::BI__builtin_va_start: 138 if (SemaBuiltinVAStart(TheCall)) 139 return ExprError(); 140 break; 141 case Builtin::BI__builtin_isgreater: 142 case Builtin::BI__builtin_isgreaterequal: 143 case Builtin::BI__builtin_isless: 144 case Builtin::BI__builtin_islessequal: 145 case Builtin::BI__builtin_islessgreater: 146 case Builtin::BI__builtin_isunordered: 147 if (SemaBuiltinUnorderedCompare(TheCall)) 148 return ExprError(); 149 break; 150 case Builtin::BI__builtin_fpclassify: 151 if (SemaBuiltinFPClassification(TheCall, 6)) 152 return ExprError(); 153 break; 154 case Builtin::BI__builtin_isfinite: 155 case Builtin::BI__builtin_isinf: 156 case Builtin::BI__builtin_isinf_sign: 157 case Builtin::BI__builtin_isnan: 158 case Builtin::BI__builtin_isnormal: 159 if (SemaBuiltinFPClassification(TheCall, 1)) 160 return ExprError(); 161 break; 162 case Builtin::BI__builtin_return_address: 163 case Builtin::BI__builtin_frame_address: { 164 llvm::APSInt Result; 165 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 166 return ExprError(); 167 break; 168 } 169 case Builtin::BI__builtin_eh_return_data_regno: { 170 llvm::APSInt Result; 171 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 172 return ExprError(); 173 break; 174 } 175 case Builtin::BI__builtin_shufflevector: 176 return SemaBuiltinShuffleVector(TheCall); 177 // TheCall will be freed by the smart pointer here, but that's fine, since 178 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 179 case Builtin::BI__builtin_prefetch: 180 if (SemaBuiltinPrefetch(TheCall)) 181 return ExprError(); 182 break; 183 case Builtin::BI__builtin_object_size: 184 if (SemaBuiltinObjectSize(TheCall)) 185 return ExprError(); 186 break; 187 case Builtin::BI__builtin_longjmp: 188 if (SemaBuiltinLongjmp(TheCall)) 189 return ExprError(); 190 break; 191 case Builtin::BI__sync_fetch_and_add: 192 case Builtin::BI__sync_fetch_and_sub: 193 case Builtin::BI__sync_fetch_and_or: 194 case Builtin::BI__sync_fetch_and_and: 195 case Builtin::BI__sync_fetch_and_xor: 196 case Builtin::BI__sync_add_and_fetch: 197 case Builtin::BI__sync_sub_and_fetch: 198 case Builtin::BI__sync_and_and_fetch: 199 case Builtin::BI__sync_or_and_fetch: 200 case Builtin::BI__sync_xor_and_fetch: 201 case Builtin::BI__sync_val_compare_and_swap: 202 case Builtin::BI__sync_bool_compare_and_swap: 203 case Builtin::BI__sync_lock_test_and_set: 204 case Builtin::BI__sync_lock_release: 205 return SemaBuiltinAtomicOverloaded(move(TheCallResult)); 206 } 207 208 // Since the target specific builtins for each arch overlap, only check those 209 // of the arch we are compiling for. 210 if (BuiltinID >= Builtin::FirstTSBuiltin) { 211 switch (Context.Target.getTriple().getArch()) { 212 case llvm::Triple::arm: 213 case llvm::Triple::thumb: 214 if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) 215 return ExprError(); 216 break; 217 case llvm::Triple::x86: 218 case llvm::Triple::x86_64: 219 if (CheckX86BuiltinFunctionCall(BuiltinID, TheCall)) 220 return ExprError(); 221 break; 222 default: 223 break; 224 } 225 } 226 227 return move(TheCallResult); 228} 229 230bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 231 switch (BuiltinID) { 232 case X86::BI__builtin_ia32_palignr128: 233 case X86::BI__builtin_ia32_palignr: { 234 llvm::APSInt Result; 235 if (SemaBuiltinConstantArg(TheCall, 2, Result)) 236 return true; 237 break; 238 } 239 } 240 return false; 241} 242 243// Get the valid immediate range for the specified NEON type code. 244static unsigned RFT(unsigned t, bool shift = false) { 245 bool quad = t & 0x10; 246 247 switch (t & 0x7) { 248 case 0: // i8 249 return shift ? 7 : (8 << (int)quad) - 1; 250 case 1: // i16 251 return shift ? 15 : (4 << (int)quad) - 1; 252 case 2: // i32 253 return shift ? 31 : (2 << (int)quad) - 1; 254 case 3: // i64 255 return shift ? 63 : (1 << (int)quad) - 1; 256 case 4: // f32 257 assert(!shift && "cannot shift float types!"); 258 return (2 << (int)quad) - 1; 259 case 5: // poly8 260 assert(!shift && "cannot shift polynomial types!"); 261 return (8 << (int)quad) - 1; 262 case 6: // poly16 263 assert(!shift && "cannot shift polynomial types!"); 264 return (4 << (int)quad) - 1; 265 case 7: // float16 266 assert(!shift && "cannot shift float types!"); 267 return (4 << (int)quad) - 1; 268 } 269 return 0; 270} 271 272bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 273 llvm::APSInt Result; 274 275 unsigned mask = 0; 276 unsigned TV = 0; 277 switch (BuiltinID) { 278#define GET_NEON_OVERLOAD_CHECK 279#include "clang/Basic/arm_neon.inc" 280#undef GET_NEON_OVERLOAD_CHECK 281 } 282 283 // For NEON intrinsics which are overloaded on vector element type, validate 284 // the immediate which specifies which variant to emit. 285 if (mask) { 286 unsigned ArgNo = TheCall->getNumArgs()-1; 287 if (SemaBuiltinConstantArg(TheCall, ArgNo, Result)) 288 return true; 289 290 TV = Result.getLimitedValue(32); 291 if ((TV > 31) || (mask & (1 << TV)) == 0) 292 return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code) 293 << TheCall->getArg(ArgNo)->getSourceRange(); 294 } 295 296 // For NEON intrinsics which take an immediate value as part of the 297 // instruction, range check them here. 298 unsigned i = 0, l = 0, u = 0; 299 switch (BuiltinID) { 300 default: return false; 301#define GET_NEON_IMMEDIATE_CHECK 302#include "clang/Basic/arm_neon.inc" 303#undef GET_NEON_IMMEDIATE_CHECK 304 }; 305 306 // Check that the immediate argument is actually a constant. 307 if (SemaBuiltinConstantArg(TheCall, i, Result)) 308 return true; 309 310 // Range check against the upper/lower values for this isntruction. 311 unsigned Val = Result.getZExtValue(); 312 if (Val < l || Val > (u + l)) 313 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 314 << llvm::utostr(l) << llvm::utostr(u+l) 315 << TheCall->getArg(i)->getSourceRange(); 316 317 return false; 318} 319 320/// CheckFunctionCall - Check a direct function call for various correctness 321/// and safety properties not strictly enforced by the C type system. 322bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 323 // Get the IdentifierInfo* for the called function. 324 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 325 326 // None of the checks below are needed for functions that don't have 327 // simple names (e.g., C++ conversion functions). 328 if (!FnInfo) 329 return false; 330 331 // FIXME: This mechanism should be abstracted to be less fragile and 332 // more efficient. For example, just map function ids to custom 333 // handlers. 334 335 // Printf checking. 336 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 337 const bool b = Format->getType() == "scanf"; 338 if (b || CheckablePrintfAttr(Format, TheCall)) { 339 bool HasVAListArg = Format->getFirstArg() == 0; 340 CheckPrintfScanfArguments(TheCall, HasVAListArg, 341 Format->getFormatIdx() - 1, 342 HasVAListArg ? 0 : Format->getFirstArg() - 1, 343 !b); 344 } 345 } 346 347 for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull; 348 NonNull = NonNull->getNext<NonNullAttr>()) 349 CheckNonNullArguments(NonNull, TheCall); 350 351 return false; 352} 353 354bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 355 // Printf checking. 356 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 357 if (!Format) 358 return false; 359 360 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 361 if (!V) 362 return false; 363 364 QualType Ty = V->getType(); 365 if (!Ty->isBlockPointerType()) 366 return false; 367 368 const bool b = Format->getType() == "scanf"; 369 if (!b && !CheckablePrintfAttr(Format, TheCall)) 370 return false; 371 372 bool HasVAListArg = Format->getFirstArg() == 0; 373 CheckPrintfScanfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 374 HasVAListArg ? 0 : Format->getFirstArg() - 1, !b); 375 376 return false; 377} 378 379/// SemaBuiltinAtomicOverloaded - We have a call to a function like 380/// __sync_fetch_and_add, which is an overloaded function based on the pointer 381/// type of its first argument. The main ActOnCallExpr routines have already 382/// promoted the types of arguments because all of these calls are prototyped as 383/// void(...). 384/// 385/// This function goes through and does final semantic checking for these 386/// builtins, 387Sema::OwningExprResult 388Sema::SemaBuiltinAtomicOverloaded(OwningExprResult TheCallResult) { 389 CallExpr *TheCall = (CallExpr *)TheCallResult.get(); 390 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 391 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 392 393 // Ensure that we have at least one argument to do type inference from. 394 if (TheCall->getNumArgs() < 1) { 395 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 396 << 0 << 1 << TheCall->getNumArgs() 397 << TheCall->getCallee()->getSourceRange(); 398 return ExprError(); 399 } 400 401 // Inspect the first argument of the atomic builtin. This should always be 402 // a pointer type, whose element is an integral scalar or pointer type. 403 // Because it is a pointer type, we don't have to worry about any implicit 404 // casts here. 405 // FIXME: We don't allow floating point scalars as input. 406 Expr *FirstArg = TheCall->getArg(0); 407 if (!FirstArg->getType()->isPointerType()) { 408 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 409 << FirstArg->getType() << FirstArg->getSourceRange(); 410 return ExprError(); 411 } 412 413 QualType ValType = 414 FirstArg->getType()->getAs<PointerType>()->getPointeeType(); 415 if (!ValType->isIntegerType() && !ValType->isPointerType() && 416 !ValType->isBlockPointerType()) { 417 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer_intptr) 418 << FirstArg->getType() << FirstArg->getSourceRange(); 419 return ExprError(); 420 } 421 422 // The majority of builtins return a value, but a few have special return 423 // types, so allow them to override appropriately below. 424 QualType ResultType = ValType; 425 426 // We need to figure out which concrete builtin this maps onto. For example, 427 // __sync_fetch_and_add with a 2 byte object turns into 428 // __sync_fetch_and_add_2. 429#define BUILTIN_ROW(x) \ 430 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 431 Builtin::BI##x##_8, Builtin::BI##x##_16 } 432 433 static const unsigned BuiltinIndices[][5] = { 434 BUILTIN_ROW(__sync_fetch_and_add), 435 BUILTIN_ROW(__sync_fetch_and_sub), 436 BUILTIN_ROW(__sync_fetch_and_or), 437 BUILTIN_ROW(__sync_fetch_and_and), 438 BUILTIN_ROW(__sync_fetch_and_xor), 439 440 BUILTIN_ROW(__sync_add_and_fetch), 441 BUILTIN_ROW(__sync_sub_and_fetch), 442 BUILTIN_ROW(__sync_and_and_fetch), 443 BUILTIN_ROW(__sync_or_and_fetch), 444 BUILTIN_ROW(__sync_xor_and_fetch), 445 446 BUILTIN_ROW(__sync_val_compare_and_swap), 447 BUILTIN_ROW(__sync_bool_compare_and_swap), 448 BUILTIN_ROW(__sync_lock_test_and_set), 449 BUILTIN_ROW(__sync_lock_release) 450 }; 451#undef BUILTIN_ROW 452 453 // Determine the index of the size. 454 unsigned SizeIndex; 455 switch (Context.getTypeSizeInChars(ValType).getQuantity()) { 456 case 1: SizeIndex = 0; break; 457 case 2: SizeIndex = 1; break; 458 case 4: SizeIndex = 2; break; 459 case 8: SizeIndex = 3; break; 460 case 16: SizeIndex = 4; break; 461 default: 462 Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 463 << FirstArg->getType() << FirstArg->getSourceRange(); 464 return ExprError(); 465 } 466 467 // Each of these builtins has one pointer argument, followed by some number of 468 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 469 // that we ignore. Find out which row of BuiltinIndices to read from as well 470 // as the number of fixed args. 471 unsigned BuiltinID = FDecl->getBuiltinID(); 472 unsigned BuiltinIndex, NumFixed = 1; 473 switch (BuiltinID) { 474 default: assert(0 && "Unknown overloaded atomic builtin!"); 475 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 476 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 477 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 478 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 479 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 480 481 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 5; break; 482 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 6; break; 483 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 7; break; 484 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 8; break; 485 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex = 9; break; 486 487 case Builtin::BI__sync_val_compare_and_swap: 488 BuiltinIndex = 10; 489 NumFixed = 2; 490 break; 491 case Builtin::BI__sync_bool_compare_and_swap: 492 BuiltinIndex = 11; 493 NumFixed = 2; 494 ResultType = Context.BoolTy; 495 break; 496 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 12; break; 497 case Builtin::BI__sync_lock_release: 498 BuiltinIndex = 13; 499 NumFixed = 0; 500 ResultType = Context.VoidTy; 501 break; 502 } 503 504 // Now that we know how many fixed arguments we expect, first check that we 505 // have at least that many. 506 if (TheCall->getNumArgs() < 1+NumFixed) { 507 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 508 << 0 << 1+NumFixed << TheCall->getNumArgs() 509 << TheCall->getCallee()->getSourceRange(); 510 return ExprError(); 511 } 512 513 // Get the decl for the concrete builtin from this, we can tell what the 514 // concrete integer type we should convert to is. 515 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 516 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 517 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 518 FunctionDecl *NewBuiltinDecl = 519 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 520 TUScope, false, DRE->getLocStart())); 521 522 // The first argument is by definition correct, we use it's type as the type 523 // of the entire operation. Walk the remaining arguments promoting them to 524 // the deduced value type. 525 for (unsigned i = 0; i != NumFixed; ++i) { 526 Expr *Arg = TheCall->getArg(i+1); 527 528 // If the argument is an implicit cast, then there was a promotion due to 529 // "...", just remove it now. 530 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) { 531 Arg = ICE->getSubExpr(); 532 ICE->setSubExpr(0); 533 TheCall->setArg(i+1, Arg); 534 } 535 536 // GCC does an implicit conversion to the pointer or integer ValType. This 537 // can fail in some cases (1i -> int**), check for this error case now. 538 CastExpr::CastKind Kind = CastExpr::CK_Unknown; 539 CXXBaseSpecifierArray BasePath; 540 if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind, BasePath)) 541 return ExprError(); 542 543 // Okay, we have something that *can* be converted to the right type. Check 544 // to see if there is a potentially weird extension going on here. This can 545 // happen when you do an atomic operation on something like an char* and 546 // pass in 42. The 42 gets converted to char. This is even more strange 547 // for things like 45.123 -> char, etc. 548 // FIXME: Do this check. 549 ImpCastExprToType(Arg, ValType, Kind); 550 TheCall->setArg(i+1, Arg); 551 } 552 553 // Switch the DeclRefExpr to refer to the new decl. 554 DRE->setDecl(NewBuiltinDecl); 555 DRE->setType(NewBuiltinDecl->getType()); 556 557 // Set the callee in the CallExpr. 558 // FIXME: This leaks the original parens and implicit casts. 559 Expr *PromotedCall = DRE; 560 UsualUnaryConversions(PromotedCall); 561 TheCall->setCallee(PromotedCall); 562 563 // Change the result type of the call to match the original value type. This 564 // is arbitrary, but the codegen for these builtins ins design to handle it 565 // gracefully. 566 TheCall->setType(ResultType); 567 568 return move(TheCallResult); 569} 570 571 572/// CheckObjCString - Checks that the argument to the builtin 573/// CFString constructor is correct 574/// FIXME: GCC currently emits the following warning: 575/// "warning: input conversion stopped due to an input byte that does not 576/// belong to the input codeset UTF-8" 577/// Note: It might also make sense to do the UTF-16 conversion here (would 578/// simplify the backend). 579bool Sema::CheckObjCString(Expr *Arg) { 580 Arg = Arg->IgnoreParenCasts(); 581 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 582 583 if (!Literal || Literal->isWide()) { 584 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 585 << Arg->getSourceRange(); 586 return true; 587 } 588 589 const char *Data = Literal->getStrData(); 590 unsigned Length = Literal->getByteLength(); 591 592 for (unsigned i = 0; i < Length; ++i) { 593 if (!Data[i]) { 594 Diag(getLocationOfStringLiteralByte(Literal, i), 595 diag::warn_cfstring_literal_contains_nul_character) 596 << Arg->getSourceRange(); 597 break; 598 } 599 } 600 601 return false; 602} 603 604/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 605/// Emit an error and return true on failure, return false on success. 606bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 607 Expr *Fn = TheCall->getCallee(); 608 if (TheCall->getNumArgs() > 2) { 609 Diag(TheCall->getArg(2)->getLocStart(), 610 diag::err_typecheck_call_too_many_args) 611 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 612 << Fn->getSourceRange() 613 << SourceRange(TheCall->getArg(2)->getLocStart(), 614 (*(TheCall->arg_end()-1))->getLocEnd()); 615 return true; 616 } 617 618 if (TheCall->getNumArgs() < 2) { 619 return Diag(TheCall->getLocEnd(), 620 diag::err_typecheck_call_too_few_args_at_least) 621 << 0 /*function call*/ << 2 << TheCall->getNumArgs(); 622 } 623 624 // Determine whether the current function is variadic or not. 625 BlockScopeInfo *CurBlock = getCurBlock(); 626 bool isVariadic; 627 if (CurBlock) 628 isVariadic = CurBlock->TheDecl->isVariadic(); 629 else if (FunctionDecl *FD = getCurFunctionDecl()) 630 isVariadic = FD->isVariadic(); 631 else 632 isVariadic = getCurMethodDecl()->isVariadic(); 633 634 if (!isVariadic) { 635 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 636 return true; 637 } 638 639 // Verify that the second argument to the builtin is the last argument of the 640 // current function or method. 641 bool SecondArgIsLastNamedArgument = false; 642 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 643 644 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 645 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 646 // FIXME: This isn't correct for methods (results in bogus warning). 647 // Get the last formal in the current function. 648 const ParmVarDecl *LastArg; 649 if (CurBlock) 650 LastArg = *(CurBlock->TheDecl->param_end()-1); 651 else if (FunctionDecl *FD = getCurFunctionDecl()) 652 LastArg = *(FD->param_end()-1); 653 else 654 LastArg = *(getCurMethodDecl()->param_end()-1); 655 SecondArgIsLastNamedArgument = PV == LastArg; 656 } 657 } 658 659 if (!SecondArgIsLastNamedArgument) 660 Diag(TheCall->getArg(1)->getLocStart(), 661 diag::warn_second_parameter_of_va_start_not_last_named_argument); 662 return false; 663} 664 665/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 666/// friends. This is declared to take (...), so we have to check everything. 667bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 668 if (TheCall->getNumArgs() < 2) 669 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 670 << 0 << 2 << TheCall->getNumArgs()/*function call*/; 671 if (TheCall->getNumArgs() > 2) 672 return Diag(TheCall->getArg(2)->getLocStart(), 673 diag::err_typecheck_call_too_many_args) 674 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 675 << SourceRange(TheCall->getArg(2)->getLocStart(), 676 (*(TheCall->arg_end()-1))->getLocEnd()); 677 678 Expr *OrigArg0 = TheCall->getArg(0); 679 Expr *OrigArg1 = TheCall->getArg(1); 680 681 // Do standard promotions between the two arguments, returning their common 682 // type. 683 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 684 685 // Make sure any conversions are pushed back into the call; this is 686 // type safe since unordered compare builtins are declared as "_Bool 687 // foo(...)". 688 TheCall->setArg(0, OrigArg0); 689 TheCall->setArg(1, OrigArg1); 690 691 if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent()) 692 return false; 693 694 // If the common type isn't a real floating type, then the arguments were 695 // invalid for this operation. 696 if (!Res->isRealFloatingType()) 697 return Diag(OrigArg0->getLocStart(), 698 diag::err_typecheck_call_invalid_ordered_compare) 699 << OrigArg0->getType() << OrigArg1->getType() 700 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 701 702 return false; 703} 704 705/// SemaBuiltinSemaBuiltinFPClassification - Handle functions like 706/// __builtin_isnan and friends. This is declared to take (...), so we have 707/// to check everything. We expect the last argument to be a floating point 708/// value. 709bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { 710 if (TheCall->getNumArgs() < NumArgs) 711 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 712 << 0 << NumArgs << TheCall->getNumArgs()/*function call*/; 713 if (TheCall->getNumArgs() > NumArgs) 714 return Diag(TheCall->getArg(NumArgs)->getLocStart(), 715 diag::err_typecheck_call_too_many_args) 716 << 0 /*function call*/ << NumArgs << TheCall->getNumArgs() 717 << SourceRange(TheCall->getArg(NumArgs)->getLocStart(), 718 (*(TheCall->arg_end()-1))->getLocEnd()); 719 720 Expr *OrigArg = TheCall->getArg(NumArgs-1); 721 722 if (OrigArg->isTypeDependent()) 723 return false; 724 725 // This operation requires a non-_Complex floating-point number. 726 if (!OrigArg->getType()->isRealFloatingType()) 727 return Diag(OrigArg->getLocStart(), 728 diag::err_typecheck_call_invalid_unary_fp) 729 << OrigArg->getType() << OrigArg->getSourceRange(); 730 731 // If this is an implicit conversion from float -> double, remove it. 732 if (ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(OrigArg)) { 733 Expr *CastArg = Cast->getSubExpr(); 734 if (CastArg->getType()->isSpecificBuiltinType(BuiltinType::Float)) { 735 assert(Cast->getType()->isSpecificBuiltinType(BuiltinType::Double) && 736 "promotion from float to double is the only expected cast here"); 737 Cast->setSubExpr(0); 738 TheCall->setArg(NumArgs-1, CastArg); 739 OrigArg = CastArg; 740 } 741 } 742 743 return false; 744} 745 746/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 747// This is declared to take (...), so we have to check everything. 748Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 749 if (TheCall->getNumArgs() < 2) 750 return ExprError(Diag(TheCall->getLocEnd(), 751 diag::err_typecheck_call_too_few_args_at_least) 752 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 753 << TheCall->getSourceRange()); 754 755 // Determine which of the following types of shufflevector we're checking: 756 // 1) unary, vector mask: (lhs, mask) 757 // 2) binary, vector mask: (lhs, rhs, mask) 758 // 3) binary, scalar mask: (lhs, rhs, index, ..., index) 759 QualType resType = TheCall->getArg(0)->getType(); 760 unsigned numElements = 0; 761 762 if (!TheCall->getArg(0)->isTypeDependent() && 763 !TheCall->getArg(1)->isTypeDependent()) { 764 QualType LHSType = TheCall->getArg(0)->getType(); 765 QualType RHSType = TheCall->getArg(1)->getType(); 766 767 if (!LHSType->isVectorType() || !RHSType->isVectorType()) { 768 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 769 << SourceRange(TheCall->getArg(0)->getLocStart(), 770 TheCall->getArg(1)->getLocEnd()); 771 return ExprError(); 772 } 773 774 numElements = LHSType->getAs<VectorType>()->getNumElements(); 775 unsigned numResElements = TheCall->getNumArgs() - 2; 776 777 // Check to see if we have a call with 2 vector arguments, the unary shuffle 778 // with mask. If so, verify that RHS is an integer vector type with the 779 // same number of elts as lhs. 780 if (TheCall->getNumArgs() == 2) { 781 if (!RHSType->hasIntegerRepresentation() || 782 RHSType->getAs<VectorType>()->getNumElements() != numElements) 783 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 784 << SourceRange(TheCall->getArg(1)->getLocStart(), 785 TheCall->getArg(1)->getLocEnd()); 786 numResElements = numElements; 787 } 788 else if (!Context.hasSameUnqualifiedType(LHSType, RHSType)) { 789 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 790 << SourceRange(TheCall->getArg(0)->getLocStart(), 791 TheCall->getArg(1)->getLocEnd()); 792 return ExprError(); 793 } else if (numElements != numResElements) { 794 QualType eltType = LHSType->getAs<VectorType>()->getElementType(); 795 resType = Context.getVectorType(eltType, numResElements, 796 VectorType::NotAltiVec); 797 } 798 } 799 800 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 801 if (TheCall->getArg(i)->isTypeDependent() || 802 TheCall->getArg(i)->isValueDependent()) 803 continue; 804 805 llvm::APSInt Result(32); 806 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 807 return ExprError(Diag(TheCall->getLocStart(), 808 diag::err_shufflevector_nonconstant_argument) 809 << TheCall->getArg(i)->getSourceRange()); 810 811 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 812 return ExprError(Diag(TheCall->getLocStart(), 813 diag::err_shufflevector_argument_too_large) 814 << TheCall->getArg(i)->getSourceRange()); 815 } 816 817 llvm::SmallVector<Expr*, 32> exprs; 818 819 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 820 exprs.push_back(TheCall->getArg(i)); 821 TheCall->setArg(i, 0); 822 } 823 824 return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(), 825 exprs.size(), resType, 826 TheCall->getCallee()->getLocStart(), 827 TheCall->getRParenLoc())); 828} 829 830/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 831// This is declared to take (const void*, ...) and can take two 832// optional constant int args. 833bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 834 unsigned NumArgs = TheCall->getNumArgs(); 835 836 if (NumArgs > 3) 837 return Diag(TheCall->getLocEnd(), 838 diag::err_typecheck_call_too_many_args_at_most) 839 << 0 /*function call*/ << 3 << NumArgs 840 << TheCall->getSourceRange(); 841 842 // Argument 0 is checked for us and the remaining arguments must be 843 // constant integers. 844 for (unsigned i = 1; i != NumArgs; ++i) { 845 Expr *Arg = TheCall->getArg(i); 846 847 llvm::APSInt Result; 848 if (SemaBuiltinConstantArg(TheCall, i, Result)) 849 return true; 850 851 // FIXME: gcc issues a warning and rewrites these to 0. These 852 // seems especially odd for the third argument since the default 853 // is 3. 854 if (i == 1) { 855 if (Result.getLimitedValue() > 1) 856 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 857 << "0" << "1" << Arg->getSourceRange(); 858 } else { 859 if (Result.getLimitedValue() > 3) 860 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 861 << "0" << "3" << Arg->getSourceRange(); 862 } 863 } 864 865 return false; 866} 867 868/// SemaBuiltinConstantArg - Handle a check if argument ArgNum of CallExpr 869/// TheCall is a constant expression. 870bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, 871 llvm::APSInt &Result) { 872 Expr *Arg = TheCall->getArg(ArgNum); 873 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 874 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 875 876 if (Arg->isTypeDependent() || Arg->isValueDependent()) return false; 877 878 if (!Arg->isIntegerConstantExpr(Result, Context)) 879 return Diag(TheCall->getLocStart(), diag::err_constant_integer_arg_type) 880 << FDecl->getDeclName() << Arg->getSourceRange(); 881 882 return false; 883} 884 885/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 886/// int type). This simply type checks that type is one of the defined 887/// constants (0-3). 888// For compatability check 0-3, llvm only handles 0 and 2. 889bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 890 llvm::APSInt Result; 891 892 // Check constant-ness first. 893 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 894 return true; 895 896 Expr *Arg = TheCall->getArg(1); 897 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 898 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 899 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 900 } 901 902 return false; 903} 904 905/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 906/// This checks that val is a constant 1. 907bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 908 Expr *Arg = TheCall->getArg(1); 909 llvm::APSInt Result; 910 911 // TODO: This is less than ideal. Overload this to take a value. 912 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 913 return true; 914 915 if (Result != 1) 916 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 917 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 918 919 return false; 920} 921 922// Handle i > 1 ? "x" : "y", recursivelly 923bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 924 bool HasVAListArg, 925 unsigned format_idx, unsigned firstDataArg, 926 bool isPrintf) { 927 928 if (E->isTypeDependent() || E->isValueDependent()) 929 return false; 930 931 switch (E->getStmtClass()) { 932 case Stmt::ConditionalOperatorClass: { 933 const ConditionalOperator *C = cast<ConditionalOperator>(E); 934 return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, HasVAListArg, 935 format_idx, firstDataArg, isPrintf) 936 && SemaCheckStringLiteral(C->getRHS(), TheCall, HasVAListArg, 937 format_idx, firstDataArg, isPrintf); 938 } 939 940 case Stmt::ImplicitCastExprClass: { 941 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 942 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 943 format_idx, firstDataArg, isPrintf); 944 } 945 946 case Stmt::ParenExprClass: { 947 const ParenExpr *Expr = cast<ParenExpr>(E); 948 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 949 format_idx, firstDataArg, isPrintf); 950 } 951 952 case Stmt::DeclRefExprClass: { 953 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 954 955 // As an exception, do not flag errors for variables binding to 956 // const string literals. 957 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 958 bool isConstant = false; 959 QualType T = DR->getType(); 960 961 if (const ArrayType *AT = Context.getAsArrayType(T)) { 962 isConstant = AT->getElementType().isConstant(Context); 963 } else if (const PointerType *PT = T->getAs<PointerType>()) { 964 isConstant = T.isConstant(Context) && 965 PT->getPointeeType().isConstant(Context); 966 } 967 968 if (isConstant) { 969 if (const Expr *Init = VD->getAnyInitializer()) 970 return SemaCheckStringLiteral(Init, TheCall, 971 HasVAListArg, format_idx, firstDataArg, 972 isPrintf); 973 } 974 975 // For vprintf* functions (i.e., HasVAListArg==true), we add a 976 // special check to see if the format string is a function parameter 977 // of the function calling the printf function. If the function 978 // has an attribute indicating it is a printf-like function, then we 979 // should suppress warnings concerning non-literals being used in a call 980 // to a vprintf function. For example: 981 // 982 // void 983 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ 984 // va_list ap; 985 // va_start(ap, fmt); 986 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 987 // ... 988 // 989 // 990 // FIXME: We don't have full attribute support yet, so just check to see 991 // if the argument is a DeclRefExpr that references a parameter. We'll 992 // add proper support for checking the attribute later. 993 if (HasVAListArg) 994 if (isa<ParmVarDecl>(VD)) 995 return true; 996 } 997 998 return false; 999 } 1000 1001 case Stmt::CallExprClass: { 1002 const CallExpr *CE = cast<CallExpr>(E); 1003 if (const ImplicitCastExpr *ICE 1004 = dyn_cast<ImplicitCastExpr>(CE->getCallee())) { 1005 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) { 1006 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { 1007 if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) { 1008 unsigned ArgIndex = FA->getFormatIdx(); 1009 const Expr *Arg = CE->getArg(ArgIndex - 1); 1010 1011 return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg, 1012 format_idx, firstDataArg, isPrintf); 1013 } 1014 } 1015 } 1016 } 1017 1018 return false; 1019 } 1020 case Stmt::ObjCStringLiteralClass: 1021 case Stmt::StringLiteralClass: { 1022 const StringLiteral *StrE = NULL; 1023 1024 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 1025 StrE = ObjCFExpr->getString(); 1026 else 1027 StrE = cast<StringLiteral>(E); 1028 1029 if (StrE) { 1030 CheckFormatString(StrE, E, TheCall, HasVAListArg, format_idx, 1031 firstDataArg, isPrintf); 1032 return true; 1033 } 1034 1035 return false; 1036 } 1037 1038 default: 1039 return false; 1040 } 1041} 1042 1043void 1044Sema::CheckNonNullArguments(const NonNullAttr *NonNull, 1045 const CallExpr *TheCall) { 1046 for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end(); 1047 i != e; ++i) { 1048 const Expr *ArgExpr = TheCall->getArg(*i); 1049 if (ArgExpr->isNullPointerConstant(Context, 1050 Expr::NPC_ValueDependentIsNotNull)) 1051 Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg) 1052 << ArgExpr->getSourceRange(); 1053 } 1054} 1055 1056/// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar 1057/// functions) for correct use of format strings. 1058void 1059Sema::CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg, 1060 unsigned format_idx, unsigned firstDataArg, 1061 bool isPrintf) { 1062 1063 const Expr *Fn = TheCall->getCallee(); 1064 1065 // The way the format attribute works in GCC, the implicit this argument 1066 // of member functions is counted. However, it doesn't appear in our own 1067 // lists, so decrement format_idx in that case. 1068 if (isa<CXXMemberCallExpr>(TheCall)) { 1069 // Catch a format attribute mistakenly referring to the object argument. 1070 if (format_idx == 0) 1071 return; 1072 --format_idx; 1073 if(firstDataArg != 0) 1074 --firstDataArg; 1075 } 1076 1077 // CHECK: printf/scanf-like function is called with no format string. 1078 if (format_idx >= TheCall->getNumArgs()) { 1079 Diag(TheCall->getRParenLoc(), diag::warn_missing_format_string) 1080 << Fn->getSourceRange(); 1081 return; 1082 } 1083 1084 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 1085 1086 // CHECK: format string is not a string literal. 1087 // 1088 // Dynamically generated format strings are difficult to 1089 // automatically vet at compile time. Requiring that format strings 1090 // are string literals: (1) permits the checking of format strings by 1091 // the compiler and thereby (2) can practically remove the source of 1092 // many format string exploits. 1093 1094 // Format string can be either ObjC string (e.g. @"%d") or 1095 // C string (e.g. "%d") 1096 // ObjC string uses the same format specifiers as C string, so we can use 1097 // the same format string checking logic for both ObjC and C strings. 1098 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 1099 firstDataArg, isPrintf)) 1100 return; // Literal format string found, check done! 1101 1102 // If there are no arguments specified, warn with -Wformat-security, otherwise 1103 // warn only with -Wformat-nonliteral. 1104 if (TheCall->getNumArgs() == format_idx+1) 1105 Diag(TheCall->getArg(format_idx)->getLocStart(), 1106 diag::warn_format_nonliteral_noargs) 1107 << OrigFormatExpr->getSourceRange(); 1108 else 1109 Diag(TheCall->getArg(format_idx)->getLocStart(), 1110 diag::warn_format_nonliteral) 1111 << OrigFormatExpr->getSourceRange(); 1112} 1113 1114namespace { 1115class CheckFormatHandler : public analyze_format_string::FormatStringHandler { 1116protected: 1117 Sema &S; 1118 const StringLiteral *FExpr; 1119 const Expr *OrigFormatExpr; 1120 const unsigned FirstDataArg; 1121 const unsigned NumDataArgs; 1122 const bool IsObjCLiteral; 1123 const char *Beg; // Start of format string. 1124 const bool HasVAListArg; 1125 const CallExpr *TheCall; 1126 unsigned FormatIdx; 1127 llvm::BitVector CoveredArgs; 1128 bool usesPositionalArgs; 1129 bool atFirstArg; 1130public: 1131 CheckFormatHandler(Sema &s, const StringLiteral *fexpr, 1132 const Expr *origFormatExpr, unsigned firstDataArg, 1133 unsigned numDataArgs, bool isObjCLiteral, 1134 const char *beg, bool hasVAListArg, 1135 const CallExpr *theCall, unsigned formatIdx) 1136 : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), 1137 FirstDataArg(firstDataArg), 1138 NumDataArgs(numDataArgs), 1139 IsObjCLiteral(isObjCLiteral), Beg(beg), 1140 HasVAListArg(hasVAListArg), 1141 TheCall(theCall), FormatIdx(formatIdx), 1142 usesPositionalArgs(false), atFirstArg(true) { 1143 CoveredArgs.resize(numDataArgs); 1144 CoveredArgs.reset(); 1145 } 1146 1147 void DoneProcessing(); 1148 1149 void HandleIncompleteSpecifier(const char *startSpecifier, 1150 unsigned specifierLen); 1151 1152 virtual void HandleInvalidPosition(const char *startSpecifier, 1153 unsigned specifierLen, 1154 analyze_format_string::PositionContext p); 1155 1156 virtual void HandleZeroPosition(const char *startPos, unsigned posLen); 1157 1158 void HandleNullChar(const char *nullCharacter); 1159 1160protected: 1161 bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, 1162 const char *startSpec, 1163 unsigned specifierLen, 1164 const char *csStart, unsigned csLen); 1165 1166 SourceRange getFormatStringRange(); 1167 CharSourceRange getSpecifierRange(const char *startSpecifier, 1168 unsigned specifierLen); 1169 SourceLocation getLocationOfByte(const char *x); 1170 1171 const Expr *getDataArg(unsigned i) const; 1172 1173 bool CheckNumArgs(const analyze_format_string::FormatSpecifier &FS, 1174 const analyze_format_string::ConversionSpecifier &CS, 1175 const char *startSpecifier, unsigned specifierLen, 1176 unsigned argIndex); 1177}; 1178} 1179 1180SourceRange CheckFormatHandler::getFormatStringRange() { 1181 return OrigFormatExpr->getSourceRange(); 1182} 1183 1184CharSourceRange CheckFormatHandler:: 1185getSpecifierRange(const char *startSpecifier, unsigned specifierLen) { 1186 SourceLocation Start = getLocationOfByte(startSpecifier); 1187 SourceLocation End = getLocationOfByte(startSpecifier + specifierLen - 1); 1188 1189 // Advance the end SourceLocation by one due to half-open ranges. 1190 End = End.getFileLocWithOffset(1); 1191 1192 return CharSourceRange::getCharRange(Start, End); 1193} 1194 1195SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { 1196 return S.getLocationOfStringLiteralByte(FExpr, x - Beg); 1197} 1198 1199void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, 1200 unsigned specifierLen){ 1201 SourceLocation Loc = getLocationOfByte(startSpecifier); 1202 S.Diag(Loc, diag::warn_printf_incomplete_specifier) 1203 << getSpecifierRange(startSpecifier, specifierLen); 1204} 1205 1206void 1207CheckFormatHandler::HandleInvalidPosition(const char *startPos, unsigned posLen, 1208 analyze_format_string::PositionContext p) { 1209 SourceLocation Loc = getLocationOfByte(startPos); 1210 S.Diag(Loc, diag::warn_format_invalid_positional_specifier) 1211 << (unsigned) p << getSpecifierRange(startPos, posLen); 1212} 1213 1214void CheckFormatHandler::HandleZeroPosition(const char *startPos, 1215 unsigned posLen) { 1216 SourceLocation Loc = getLocationOfByte(startPos); 1217 S.Diag(Loc, diag::warn_format_zero_positional_specifier) 1218 << getSpecifierRange(startPos, posLen); 1219} 1220 1221void CheckFormatHandler::HandleNullChar(const char *nullCharacter) { 1222 // The presence of a null character is likely an error. 1223 S.Diag(getLocationOfByte(nullCharacter), 1224 diag::warn_printf_format_string_contains_null_char) 1225 << getFormatStringRange(); 1226} 1227 1228const Expr *CheckFormatHandler::getDataArg(unsigned i) const { 1229 return TheCall->getArg(FirstDataArg + i); 1230} 1231 1232void CheckFormatHandler::DoneProcessing() { 1233 // Does the number of data arguments exceed the number of 1234 // format conversions in the format string? 1235 if (!HasVAListArg) { 1236 // Find any arguments that weren't covered. 1237 CoveredArgs.flip(); 1238 signed notCoveredArg = CoveredArgs.find_first(); 1239 if (notCoveredArg >= 0) { 1240 assert((unsigned)notCoveredArg < NumDataArgs); 1241 S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), 1242 diag::warn_printf_data_arg_not_used) 1243 << getFormatStringRange(); 1244 } 1245 } 1246} 1247 1248bool 1249CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, 1250 SourceLocation Loc, 1251 const char *startSpec, 1252 unsigned specifierLen, 1253 const char *csStart, 1254 unsigned csLen) { 1255 1256 bool keepGoing = true; 1257 if (argIndex < NumDataArgs) { 1258 // Consider the argument coverered, even though the specifier doesn't 1259 // make sense. 1260 CoveredArgs.set(argIndex); 1261 } 1262 else { 1263 // If argIndex exceeds the number of data arguments we 1264 // don't issue a warning because that is just a cascade of warnings (and 1265 // they may have intended '%%' anyway). We don't want to continue processing 1266 // the format string after this point, however, as we will like just get 1267 // gibberish when trying to match arguments. 1268 keepGoing = false; 1269 } 1270 1271 S.Diag(Loc, diag::warn_format_invalid_conversion) 1272 << llvm::StringRef(csStart, csLen) 1273 << getSpecifierRange(startSpec, specifierLen); 1274 1275 return keepGoing; 1276} 1277 1278bool 1279CheckFormatHandler::CheckNumArgs( 1280 const analyze_format_string::FormatSpecifier &FS, 1281 const analyze_format_string::ConversionSpecifier &CS, 1282 const char *startSpecifier, unsigned specifierLen, unsigned argIndex) { 1283 1284 if (argIndex >= NumDataArgs) { 1285 if (FS.usesPositionalArg()) { 1286 S.Diag(getLocationOfByte(CS.getStart()), 1287 diag::warn_printf_positional_arg_exceeds_data_args) 1288 << (argIndex+1) << NumDataArgs 1289 << getSpecifierRange(startSpecifier, specifierLen); 1290 } 1291 else { 1292 S.Diag(getLocationOfByte(CS.getStart()), 1293 diag::warn_printf_insufficient_data_args) 1294 << getSpecifierRange(startSpecifier, specifierLen); 1295 } 1296 1297 return false; 1298 } 1299 return true; 1300} 1301 1302//===--- CHECK: Printf format string checking ------------------------------===// 1303 1304namespace { 1305class CheckPrintfHandler : public CheckFormatHandler { 1306public: 1307 CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, 1308 const Expr *origFormatExpr, unsigned firstDataArg, 1309 unsigned numDataArgs, bool isObjCLiteral, 1310 const char *beg, bool hasVAListArg, 1311 const CallExpr *theCall, unsigned formatIdx) 1312 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1313 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1314 theCall, formatIdx) {} 1315 1316 1317 bool HandleInvalidPrintfConversionSpecifier( 1318 const analyze_printf::PrintfSpecifier &FS, 1319 const char *startSpecifier, 1320 unsigned specifierLen); 1321 1322 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 1323 const char *startSpecifier, 1324 unsigned specifierLen); 1325 1326 bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k, 1327 const char *startSpecifier, unsigned specifierLen); 1328 void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS, 1329 const analyze_printf::OptionalAmount &Amt, 1330 unsigned type, 1331 const char *startSpecifier, unsigned specifierLen); 1332 void HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1333 const analyze_printf::OptionalFlag &flag, 1334 const char *startSpecifier, unsigned specifierLen); 1335 void HandleIgnoredFlag(const analyze_printf::PrintfSpecifier &FS, 1336 const analyze_printf::OptionalFlag &ignoredFlag, 1337 const analyze_printf::OptionalFlag &flag, 1338 const char *startSpecifier, unsigned specifierLen); 1339}; 1340} 1341 1342bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier( 1343 const analyze_printf::PrintfSpecifier &FS, 1344 const char *startSpecifier, 1345 unsigned specifierLen) { 1346 const analyze_printf::PrintfConversionSpecifier &CS = 1347 FS.getConversionSpecifier(); 1348 1349 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1350 getLocationOfByte(CS.getStart()), 1351 startSpecifier, specifierLen, 1352 CS.getStart(), CS.getLength()); 1353} 1354 1355bool CheckPrintfHandler::HandleAmount( 1356 const analyze_format_string::OptionalAmount &Amt, 1357 unsigned k, const char *startSpecifier, 1358 unsigned specifierLen) { 1359 1360 if (Amt.hasDataArgument()) { 1361 if (!HasVAListArg) { 1362 unsigned argIndex = Amt.getArgIndex(); 1363 if (argIndex >= NumDataArgs) { 1364 S.Diag(getLocationOfByte(Amt.getStart()), 1365 diag::warn_printf_asterisk_missing_arg) 1366 << k << getSpecifierRange(startSpecifier, specifierLen); 1367 // Don't do any more checking. We will just emit 1368 // spurious errors. 1369 return false; 1370 } 1371 1372 // Type check the data argument. It should be an 'int'. 1373 // Although not in conformance with C99, we also allow the argument to be 1374 // an 'unsigned int' as that is a reasonably safe case. GCC also 1375 // doesn't emit a warning for that case. 1376 CoveredArgs.set(argIndex); 1377 const Expr *Arg = getDataArg(argIndex); 1378 QualType T = Arg->getType(); 1379 1380 const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); 1381 assert(ATR.isValid()); 1382 1383 if (!ATR.matchesType(S.Context, T)) { 1384 S.Diag(getLocationOfByte(Amt.getStart()), 1385 diag::warn_printf_asterisk_wrong_type) 1386 << k 1387 << ATR.getRepresentativeType(S.Context) << T 1388 << getSpecifierRange(startSpecifier, specifierLen) 1389 << Arg->getSourceRange(); 1390 // Don't do any more checking. We will just emit 1391 // spurious errors. 1392 return false; 1393 } 1394 } 1395 } 1396 return true; 1397} 1398 1399void CheckPrintfHandler::HandleInvalidAmount( 1400 const analyze_printf::PrintfSpecifier &FS, 1401 const analyze_printf::OptionalAmount &Amt, 1402 unsigned type, 1403 const char *startSpecifier, 1404 unsigned specifierLen) { 1405 const analyze_printf::PrintfConversionSpecifier &CS = 1406 FS.getConversionSpecifier(); 1407 switch (Amt.getHowSpecified()) { 1408 case analyze_printf::OptionalAmount::Constant: 1409 S.Diag(getLocationOfByte(Amt.getStart()), 1410 diag::warn_printf_nonsensical_optional_amount) 1411 << type 1412 << CS.toString() 1413 << getSpecifierRange(startSpecifier, specifierLen) 1414 << FixItHint::CreateRemoval(getSpecifierRange(Amt.getStart(), 1415 Amt.getConstantLength())); 1416 break; 1417 1418 default: 1419 S.Diag(getLocationOfByte(Amt.getStart()), 1420 diag::warn_printf_nonsensical_optional_amount) 1421 << type 1422 << CS.toString() 1423 << getSpecifierRange(startSpecifier, specifierLen); 1424 break; 1425 } 1426} 1427 1428void CheckPrintfHandler::HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1429 const analyze_printf::OptionalFlag &flag, 1430 const char *startSpecifier, 1431 unsigned specifierLen) { 1432 // Warn about pointless flag with a fixit removal. 1433 const analyze_printf::PrintfConversionSpecifier &CS = 1434 FS.getConversionSpecifier(); 1435 S.Diag(getLocationOfByte(flag.getPosition()), 1436 diag::warn_printf_nonsensical_flag) 1437 << flag.toString() << CS.toString() 1438 << getSpecifierRange(startSpecifier, specifierLen) 1439 << FixItHint::CreateRemoval(getSpecifierRange(flag.getPosition(), 1)); 1440} 1441 1442void CheckPrintfHandler::HandleIgnoredFlag( 1443 const analyze_printf::PrintfSpecifier &FS, 1444 const analyze_printf::OptionalFlag &ignoredFlag, 1445 const analyze_printf::OptionalFlag &flag, 1446 const char *startSpecifier, 1447 unsigned specifierLen) { 1448 // Warn about ignored flag with a fixit removal. 1449 S.Diag(getLocationOfByte(ignoredFlag.getPosition()), 1450 diag::warn_printf_ignored_flag) 1451 << ignoredFlag.toString() << flag.toString() 1452 << getSpecifierRange(startSpecifier, specifierLen) 1453 << FixItHint::CreateRemoval(getSpecifierRange( 1454 ignoredFlag.getPosition(), 1)); 1455} 1456 1457bool 1458CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier 1459 &FS, 1460 const char *startSpecifier, 1461 unsigned specifierLen) { 1462 1463 using namespace analyze_format_string; 1464 using namespace analyze_printf; 1465 const PrintfConversionSpecifier &CS = FS.getConversionSpecifier(); 1466 1467 if (FS.consumesDataArgument()) { 1468 if (atFirstArg) { 1469 atFirstArg = false; 1470 usesPositionalArgs = FS.usesPositionalArg(); 1471 } 1472 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1473 // Cannot mix-and-match positional and non-positional arguments. 1474 S.Diag(getLocationOfByte(CS.getStart()), 1475 diag::warn_format_mix_positional_nonpositional_args) 1476 << getSpecifierRange(startSpecifier, specifierLen); 1477 return false; 1478 } 1479 } 1480 1481 // First check if the field width, precision, and conversion specifier 1482 // have matching data arguments. 1483 if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0, 1484 startSpecifier, specifierLen)) { 1485 return false; 1486 } 1487 1488 if (!HandleAmount(FS.getPrecision(), /* precision */ 1, 1489 startSpecifier, specifierLen)) { 1490 return false; 1491 } 1492 1493 if (!CS.consumesDataArgument()) { 1494 // FIXME: Technically specifying a precision or field width here 1495 // makes no sense. Worth issuing a warning at some point. 1496 return true; 1497 } 1498 1499 // Consume the argument. 1500 unsigned argIndex = FS.getArgIndex(); 1501 if (argIndex < NumDataArgs) { 1502 // The check to see if the argIndex is valid will come later. 1503 // We set the bit here because we may exit early from this 1504 // function if we encounter some other error. 1505 CoveredArgs.set(argIndex); 1506 } 1507 1508 // Check for using an Objective-C specific conversion specifier 1509 // in a non-ObjC literal. 1510 if (!IsObjCLiteral && CS.isObjCArg()) { 1511 return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, 1512 specifierLen); 1513 } 1514 1515 // Check for invalid use of field width 1516 if (!FS.hasValidFieldWidth()) { 1517 HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 0, 1518 startSpecifier, specifierLen); 1519 } 1520 1521 // Check for invalid use of precision 1522 if (!FS.hasValidPrecision()) { 1523 HandleInvalidAmount(FS, FS.getPrecision(), /* precision */ 1, 1524 startSpecifier, specifierLen); 1525 } 1526 1527 // Check each flag does not conflict with any other component. 1528 if (!FS.hasValidLeadingZeros()) 1529 HandleFlag(FS, FS.hasLeadingZeros(), startSpecifier, specifierLen); 1530 if (!FS.hasValidPlusPrefix()) 1531 HandleFlag(FS, FS.hasPlusPrefix(), startSpecifier, specifierLen); 1532 if (!FS.hasValidSpacePrefix()) 1533 HandleFlag(FS, FS.hasSpacePrefix(), startSpecifier, specifierLen); 1534 if (!FS.hasValidAlternativeForm()) 1535 HandleFlag(FS, FS.hasAlternativeForm(), startSpecifier, specifierLen); 1536 if (!FS.hasValidLeftJustified()) 1537 HandleFlag(FS, FS.isLeftJustified(), startSpecifier, specifierLen); 1538 1539 // Check that flags are not ignored by another flag 1540 if (FS.hasSpacePrefix() && FS.hasPlusPrefix()) // ' ' ignored by '+' 1541 HandleIgnoredFlag(FS, FS.hasSpacePrefix(), FS.hasPlusPrefix(), 1542 startSpecifier, specifierLen); 1543 if (FS.hasLeadingZeros() && FS.isLeftJustified()) // '0' ignored by '-' 1544 HandleIgnoredFlag(FS, FS.hasLeadingZeros(), FS.isLeftJustified(), 1545 startSpecifier, specifierLen); 1546 1547 // Check the length modifier is valid with the given conversion specifier. 1548 const LengthModifier &LM = FS.getLengthModifier(); 1549 if (!FS.hasValidLengthModifier()) 1550 S.Diag(getLocationOfByte(LM.getStart()), 1551 diag::warn_format_nonsensical_length) 1552 << LM.toString() << CS.toString() 1553 << getSpecifierRange(startSpecifier, specifierLen) 1554 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1555 LM.getLength())); 1556 1557 // Are we using '%n'? 1558 if (CS.getKind() == ConversionSpecifier::nArg) { 1559 // Issue a warning about this being a possible security issue. 1560 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) 1561 << getSpecifierRange(startSpecifier, specifierLen); 1562 // Continue checking the other format specifiers. 1563 return true; 1564 } 1565 1566 // The remaining checks depend on the data arguments. 1567 if (HasVAListArg) 1568 return true; 1569 1570 if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) 1571 return false; 1572 1573 // Now type check the data expression that matches the 1574 // format specifier. 1575 const Expr *Ex = getDataArg(argIndex); 1576 const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); 1577 if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { 1578 // Check if we didn't match because of an implicit cast from a 'char' 1579 // or 'short' to an 'int'. This is done because printf is a varargs 1580 // function. 1581 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Ex)) 1582 if (ICE->getType() == S.Context.IntTy) 1583 if (ATR.matchesType(S.Context, ICE->getSubExpr()->getType())) 1584 return true; 1585 1586 // We may be able to offer a FixItHint if it is a supported type. 1587 PrintfSpecifier fixedFS = FS; 1588 bool success = fixedFS.fixType(Ex->getType()); 1589 1590 if (success) { 1591 // Get the fix string from the fixed format specifier 1592 llvm::SmallString<128> buf; 1593 llvm::raw_svector_ostream os(buf); 1594 fixedFS.toString(os); 1595 1596 S.Diag(getLocationOfByte(CS.getStart()), 1597 diag::warn_printf_conversion_argument_type_mismatch) 1598 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1599 << getSpecifierRange(startSpecifier, specifierLen) 1600 << Ex->getSourceRange() 1601 << FixItHint::CreateReplacement( 1602 getSpecifierRange(startSpecifier, specifierLen), 1603 os.str()); 1604 } 1605 else { 1606 S.Diag(getLocationOfByte(CS.getStart()), 1607 diag::warn_printf_conversion_argument_type_mismatch) 1608 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1609 << getSpecifierRange(startSpecifier, specifierLen) 1610 << Ex->getSourceRange(); 1611 } 1612 } 1613 1614 return true; 1615} 1616 1617//===--- CHECK: Scanf format string checking ------------------------------===// 1618 1619namespace { 1620class CheckScanfHandler : public CheckFormatHandler { 1621public: 1622 CheckScanfHandler(Sema &s, const StringLiteral *fexpr, 1623 const Expr *origFormatExpr, unsigned firstDataArg, 1624 unsigned numDataArgs, bool isObjCLiteral, 1625 const char *beg, bool hasVAListArg, 1626 const CallExpr *theCall, unsigned formatIdx) 1627 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1628 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1629 theCall, formatIdx) {} 1630 1631 bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 1632 const char *startSpecifier, 1633 unsigned specifierLen); 1634 1635 bool HandleInvalidScanfConversionSpecifier( 1636 const analyze_scanf::ScanfSpecifier &FS, 1637 const char *startSpecifier, 1638 unsigned specifierLen); 1639 1640 void HandleIncompleteScanList(const char *start, const char *end); 1641}; 1642} 1643 1644void CheckScanfHandler::HandleIncompleteScanList(const char *start, 1645 const char *end) { 1646 S.Diag(getLocationOfByte(end), diag::warn_scanf_scanlist_incomplete) 1647 << getSpecifierRange(start, end - start); 1648} 1649 1650bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier( 1651 const analyze_scanf::ScanfSpecifier &FS, 1652 const char *startSpecifier, 1653 unsigned specifierLen) { 1654 1655 const analyze_scanf::ScanfConversionSpecifier &CS = 1656 FS.getConversionSpecifier(); 1657 1658 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1659 getLocationOfByte(CS.getStart()), 1660 startSpecifier, specifierLen, 1661 CS.getStart(), CS.getLength()); 1662} 1663 1664bool CheckScanfHandler::HandleScanfSpecifier( 1665 const analyze_scanf::ScanfSpecifier &FS, 1666 const char *startSpecifier, 1667 unsigned specifierLen) { 1668 1669 using namespace analyze_scanf; 1670 using namespace analyze_format_string; 1671 1672 const ScanfConversionSpecifier &CS = FS.getConversionSpecifier(); 1673 1674 // Handle case where '%' and '*' don't consume an argument. These shouldn't 1675 // be used to decide if we are using positional arguments consistently. 1676 if (FS.consumesDataArgument()) { 1677 if (atFirstArg) { 1678 atFirstArg = false; 1679 usesPositionalArgs = FS.usesPositionalArg(); 1680 } 1681 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1682 // Cannot mix-and-match positional and non-positional arguments. 1683 S.Diag(getLocationOfByte(CS.getStart()), 1684 diag::warn_format_mix_positional_nonpositional_args) 1685 << getSpecifierRange(startSpecifier, specifierLen); 1686 return false; 1687 } 1688 } 1689 1690 // Check if the field with is non-zero. 1691 const OptionalAmount &Amt = FS.getFieldWidth(); 1692 if (Amt.getHowSpecified() == OptionalAmount::Constant) { 1693 if (Amt.getConstantAmount() == 0) { 1694 const CharSourceRange &R = getSpecifierRange(Amt.getStart(), 1695 Amt.getConstantLength()); 1696 S.Diag(getLocationOfByte(Amt.getStart()), 1697 diag::warn_scanf_nonzero_width) 1698 << R << FixItHint::CreateRemoval(R); 1699 } 1700 } 1701 1702 if (!FS.consumesDataArgument()) { 1703 // FIXME: Technically specifying a precision or field width here 1704 // makes no sense. Worth issuing a warning at some point. 1705 return true; 1706 } 1707 1708 // Consume the argument. 1709 unsigned argIndex = FS.getArgIndex(); 1710 if (argIndex < NumDataArgs) { 1711 // The check to see if the argIndex is valid will come later. 1712 // We set the bit here because we may exit early from this 1713 // function if we encounter some other error. 1714 CoveredArgs.set(argIndex); 1715 } 1716 1717 // Check the length modifier is valid with the given conversion specifier. 1718 const LengthModifier &LM = FS.getLengthModifier(); 1719 if (!FS.hasValidLengthModifier()) { 1720 S.Diag(getLocationOfByte(LM.getStart()), 1721 diag::warn_format_nonsensical_length) 1722 << LM.toString() << CS.toString() 1723 << getSpecifierRange(startSpecifier, specifierLen) 1724 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1725 LM.getLength())); 1726 } 1727 1728 // The remaining checks depend on the data arguments. 1729 if (HasVAListArg) 1730 return true; 1731 1732 if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) 1733 return false; 1734 1735 // FIXME: Check that the argument type matches the format specifier. 1736 1737 return true; 1738} 1739 1740void Sema::CheckFormatString(const StringLiteral *FExpr, 1741 const Expr *OrigFormatExpr, 1742 const CallExpr *TheCall, bool HasVAListArg, 1743 unsigned format_idx, unsigned firstDataArg, 1744 bool isPrintf) { 1745 1746 // CHECK: is the format string a wide literal? 1747 if (FExpr->isWide()) { 1748 Diag(FExpr->getLocStart(), 1749 diag::warn_format_string_is_wide_literal) 1750 << OrigFormatExpr->getSourceRange(); 1751 return; 1752 } 1753 1754 // Str - The format string. NOTE: this is NOT null-terminated! 1755 const char *Str = FExpr->getStrData(); 1756 1757 // CHECK: empty format string? 1758 unsigned StrLen = FExpr->getByteLength(); 1759 1760 if (StrLen == 0) { 1761 Diag(FExpr->getLocStart(), diag::warn_empty_format_string) 1762 << OrigFormatExpr->getSourceRange(); 1763 return; 1764 } 1765 1766 if (isPrintf) { 1767 CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1768 TheCall->getNumArgs() - firstDataArg, 1769 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1770 HasVAListArg, TheCall, format_idx); 1771 1772 if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen)) 1773 H.DoneProcessing(); 1774 } 1775 else { 1776 CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1777 TheCall->getNumArgs() - firstDataArg, 1778 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1779 HasVAListArg, TheCall, format_idx); 1780 1781 if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen)) 1782 H.DoneProcessing(); 1783 } 1784} 1785 1786//===--- CHECK: Return Address of Stack Variable --------------------------===// 1787 1788static DeclRefExpr* EvalVal(Expr *E); 1789static DeclRefExpr* EvalAddr(Expr* E); 1790 1791/// CheckReturnStackAddr - Check if a return statement returns the address 1792/// of a stack variable. 1793void 1794Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1795 SourceLocation ReturnLoc) { 1796 1797 // Perform checking for returned stack addresses. 1798 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1799 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 1800 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 1801 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1802 1803 // Skip over implicit cast expressions when checking for block expressions. 1804 RetValExp = RetValExp->IgnoreParenCasts(); 1805 1806 if (BlockExpr *C = dyn_cast<BlockExpr>(RetValExp)) 1807 if (C->hasBlockDeclRefExprs()) 1808 Diag(C->getLocStart(), diag::err_ret_local_block) 1809 << C->getSourceRange(); 1810 1811 if (AddrLabelExpr *ALE = dyn_cast<AddrLabelExpr>(RetValExp)) 1812 Diag(ALE->getLocStart(), diag::warn_ret_addr_label) 1813 << ALE->getSourceRange(); 1814 1815 } else if (lhsType->isReferenceType()) { 1816 // Perform checking for stack values returned by reference. 1817 // Check for a reference to the stack 1818 if (DeclRefExpr *DR = EvalVal(RetValExp)) 1819 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 1820 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1821 } 1822} 1823 1824/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1825/// check if the expression in a return statement evaluates to an address 1826/// to a location on the stack. The recursion is used to traverse the 1827/// AST of the return expression, with recursion backtracking when we 1828/// encounter a subexpression that (1) clearly does not lead to the address 1829/// of a stack variable or (2) is something we cannot determine leads to 1830/// the address of a stack variable based on such local checking. 1831/// 1832/// EvalAddr processes expressions that are pointers that are used as 1833/// references (and not L-values). EvalVal handles all other values. 1834/// At the base case of the recursion is a check for a DeclRefExpr* in 1835/// the refers to a stack variable. 1836/// 1837/// This implementation handles: 1838/// 1839/// * pointer-to-pointer casts 1840/// * implicit conversions from array references to pointers 1841/// * taking the address of fields 1842/// * arbitrary interplay between "&" and "*" operators 1843/// * pointer arithmetic from an address of a stack variable 1844/// * taking the address of an array element where the array is on the stack 1845static DeclRefExpr* EvalAddr(Expr *E) { 1846 // We should only be called for evaluating pointer expressions. 1847 assert((E->getType()->isAnyPointerType() || 1848 E->getType()->isBlockPointerType() || 1849 E->getType()->isObjCQualifiedIdType()) && 1850 "EvalAddr only works on pointers"); 1851 1852 // Our "symbolic interpreter" is just a dispatch off the currently 1853 // viewed AST node. We then recursively traverse the AST by calling 1854 // EvalAddr and EvalVal appropriately. 1855 switch (E->getStmtClass()) { 1856 case Stmt::ParenExprClass: 1857 // Ignore parentheses. 1858 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 1859 1860 case Stmt::UnaryOperatorClass: { 1861 // The only unary operator that make sense to handle here 1862 // is AddrOf. All others don't make sense as pointers. 1863 UnaryOperator *U = cast<UnaryOperator>(E); 1864 1865 if (U->getOpcode() == UnaryOperator::AddrOf) 1866 return EvalVal(U->getSubExpr()); 1867 else 1868 return NULL; 1869 } 1870 1871 case Stmt::BinaryOperatorClass: { 1872 // Handle pointer arithmetic. All other binary operators are not valid 1873 // in this context. 1874 BinaryOperator *B = cast<BinaryOperator>(E); 1875 BinaryOperator::Opcode op = B->getOpcode(); 1876 1877 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 1878 return NULL; 1879 1880 Expr *Base = B->getLHS(); 1881 1882 // Determine which argument is the real pointer base. It could be 1883 // the RHS argument instead of the LHS. 1884 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1885 1886 assert (Base->getType()->isPointerType()); 1887 return EvalAddr(Base); 1888 } 1889 1890 // For conditional operators we need to see if either the LHS or RHS are 1891 // valid DeclRefExpr*s. If one of them is valid, we return it. 1892 case Stmt::ConditionalOperatorClass: { 1893 ConditionalOperator *C = cast<ConditionalOperator>(E); 1894 1895 // Handle the GNU extension for missing LHS. 1896 if (Expr *lhsExpr = C->getLHS()) 1897 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 1898 return LHS; 1899 1900 return EvalAddr(C->getRHS()); 1901 } 1902 1903 // For casts, we need to handle conversions from arrays to 1904 // pointer values, and pointer-to-pointer conversions. 1905 case Stmt::ImplicitCastExprClass: 1906 case Stmt::CStyleCastExprClass: 1907 case Stmt::CXXFunctionalCastExprClass: { 1908 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1909 QualType T = SubExpr->getType(); 1910 1911 if (SubExpr->getType()->isPointerType() || 1912 SubExpr->getType()->isBlockPointerType() || 1913 SubExpr->getType()->isObjCQualifiedIdType()) 1914 return EvalAddr(SubExpr); 1915 else if (T->isArrayType()) 1916 return EvalVal(SubExpr); 1917 else 1918 return 0; 1919 } 1920 1921 // C++ casts. For dynamic casts, static casts, and const casts, we 1922 // are always converting from a pointer-to-pointer, so we just blow 1923 // through the cast. In the case the dynamic cast doesn't fail (and 1924 // return NULL), we take the conservative route and report cases 1925 // where we return the address of a stack variable. For Reinterpre 1926 // FIXME: The comment about is wrong; we're not always converting 1927 // from pointer to pointer. I'm guessing that this code should also 1928 // handle references to objects. 1929 case Stmt::CXXStaticCastExprClass: 1930 case Stmt::CXXDynamicCastExprClass: 1931 case Stmt::CXXConstCastExprClass: 1932 case Stmt::CXXReinterpretCastExprClass: { 1933 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1934 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1935 return EvalAddr(S); 1936 else 1937 return NULL; 1938 } 1939 1940 // Everything else: we simply don't reason about them. 1941 default: 1942 return NULL; 1943 } 1944} 1945 1946 1947/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1948/// See the comments for EvalAddr for more details. 1949static DeclRefExpr* EvalVal(Expr *E) { 1950 1951 // We should only be called for evaluating non-pointer expressions, or 1952 // expressions with a pointer type that are not used as references but instead 1953 // are l-values (e.g., DeclRefExpr with a pointer type). 1954 1955 // Our "symbolic interpreter" is just a dispatch off the currently 1956 // viewed AST node. We then recursively traverse the AST by calling 1957 // EvalAddr and EvalVal appropriately. 1958 switch (E->getStmtClass()) { 1959 case Stmt::DeclRefExprClass: { 1960 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1961 // at code that refers to a variable's name. We check if it has local 1962 // storage within the function, and if so, return the expression. 1963 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1964 1965 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1966 if (V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1967 1968 return NULL; 1969 } 1970 1971 case Stmt::ParenExprClass: 1972 // Ignore parentheses. 1973 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1974 1975 case Stmt::UnaryOperatorClass: { 1976 // The only unary operator that make sense to handle here 1977 // is Deref. All others don't resolve to a "name." This includes 1978 // handling all sorts of rvalues passed to a unary operator. 1979 UnaryOperator *U = cast<UnaryOperator>(E); 1980 1981 if (U->getOpcode() == UnaryOperator::Deref) 1982 return EvalAddr(U->getSubExpr()); 1983 1984 return NULL; 1985 } 1986 1987 case Stmt::ArraySubscriptExprClass: { 1988 // Array subscripts are potential references to data on the stack. We 1989 // retrieve the DeclRefExpr* for the array variable if it indeed 1990 // has local storage. 1991 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1992 } 1993 1994 case Stmt::ConditionalOperatorClass: { 1995 // For conditional operators we need to see if either the LHS or RHS are 1996 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1997 ConditionalOperator *C = cast<ConditionalOperator>(E); 1998 1999 // Handle the GNU extension for missing LHS. 2000 if (Expr *lhsExpr = C->getLHS()) 2001 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 2002 return LHS; 2003 2004 return EvalVal(C->getRHS()); 2005 } 2006 2007 // Accesses to members are potential references to data on the stack. 2008 case Stmt::MemberExprClass: { 2009 MemberExpr *M = cast<MemberExpr>(E); 2010 2011 // Check for indirect access. We only want direct field accesses. 2012 if (!M->isArrow()) 2013 return EvalVal(M->getBase()); 2014 else 2015 return NULL; 2016 } 2017 2018 // Everything else: we simply don't reason about them. 2019 default: 2020 return NULL; 2021 } 2022} 2023 2024//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 2025 2026/// Check for comparisons of floating point operands using != and ==. 2027/// Issue a warning if these are no self-comparisons, as they are not likely 2028/// to do what the programmer intended. 2029void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 2030 bool EmitWarning = true; 2031 2032 Expr* LeftExprSansParen = lex->IgnoreParens(); 2033 Expr* RightExprSansParen = rex->IgnoreParens(); 2034 2035 // Special case: check for x == x (which is OK). 2036 // Do not emit warnings for such cases. 2037 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 2038 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 2039 if (DRL->getDecl() == DRR->getDecl()) 2040 EmitWarning = false; 2041 2042 2043 // Special case: check for comparisons against literals that can be exactly 2044 // represented by APFloat. In such cases, do not emit a warning. This 2045 // is a heuristic: often comparison against such literals are used to 2046 // detect if a value in a variable has not changed. This clearly can 2047 // lead to false negatives. 2048 if (EmitWarning) { 2049 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 2050 if (FLL->isExact()) 2051 EmitWarning = false; 2052 } else 2053 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 2054 if (FLR->isExact()) 2055 EmitWarning = false; 2056 } 2057 } 2058 2059 // Check for comparisons with builtin types. 2060 if (EmitWarning) 2061 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 2062 if (CL->isBuiltinCall(Context)) 2063 EmitWarning = false; 2064 2065 if (EmitWarning) 2066 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 2067 if (CR->isBuiltinCall(Context)) 2068 EmitWarning = false; 2069 2070 // Emit the diagnostic. 2071 if (EmitWarning) 2072 Diag(loc, diag::warn_floatingpoint_eq) 2073 << lex->getSourceRange() << rex->getSourceRange(); 2074} 2075 2076//===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// 2077//===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===// 2078 2079namespace { 2080 2081/// Structure recording the 'active' range of an integer-valued 2082/// expression. 2083struct IntRange { 2084 /// The number of bits active in the int. 2085 unsigned Width; 2086 2087 /// True if the int is known not to have negative values. 2088 bool NonNegative; 2089 2090 IntRange() {} 2091 IntRange(unsigned Width, bool NonNegative) 2092 : Width(Width), NonNegative(NonNegative) 2093 {} 2094 2095 // Returns the range of the bool type. 2096 static IntRange forBoolType() { 2097 return IntRange(1, true); 2098 } 2099 2100 // Returns the range of an integral type. 2101 static IntRange forType(ASTContext &C, QualType T) { 2102 return forCanonicalType(C, T->getCanonicalTypeInternal().getTypePtr()); 2103 } 2104 2105 // Returns the range of an integeral type based on its canonical 2106 // representation. 2107 static IntRange forCanonicalType(ASTContext &C, const Type *T) { 2108 assert(T->isCanonicalUnqualified()); 2109 2110 if (const VectorType *VT = dyn_cast<VectorType>(T)) 2111 T = VT->getElementType().getTypePtr(); 2112 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 2113 T = CT->getElementType().getTypePtr(); 2114 2115 if (const EnumType *ET = dyn_cast<EnumType>(T)) { 2116 EnumDecl *Enum = ET->getDecl(); 2117 unsigned NumPositive = Enum->getNumPositiveBits(); 2118 unsigned NumNegative = Enum->getNumNegativeBits(); 2119 2120 return IntRange(std::max(NumPositive, NumNegative), NumNegative == 0); 2121 } 2122 2123 const BuiltinType *BT = cast<BuiltinType>(T); 2124 assert(BT->isInteger()); 2125 2126 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 2127 } 2128 2129 // Returns the supremum of two ranges: i.e. their conservative merge. 2130 static IntRange join(IntRange L, IntRange R) { 2131 return IntRange(std::max(L.Width, R.Width), 2132 L.NonNegative && R.NonNegative); 2133 } 2134 2135 // Returns the infinum of two ranges: i.e. their aggressive merge. 2136 static IntRange meet(IntRange L, IntRange R) { 2137 return IntRange(std::min(L.Width, R.Width), 2138 L.NonNegative || R.NonNegative); 2139 } 2140}; 2141 2142IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) { 2143 if (value.isSigned() && value.isNegative()) 2144 return IntRange(value.getMinSignedBits(), false); 2145 2146 if (value.getBitWidth() > MaxWidth) 2147 value.trunc(MaxWidth); 2148 2149 // isNonNegative() just checks the sign bit without considering 2150 // signedness. 2151 return IntRange(value.getActiveBits(), true); 2152} 2153 2154IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty, 2155 unsigned MaxWidth) { 2156 if (result.isInt()) 2157 return GetValueRange(C, result.getInt(), MaxWidth); 2158 2159 if (result.isVector()) { 2160 IntRange R = GetValueRange(C, result.getVectorElt(0), Ty, MaxWidth); 2161 for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i) { 2162 IntRange El = GetValueRange(C, result.getVectorElt(i), Ty, MaxWidth); 2163 R = IntRange::join(R, El); 2164 } 2165 return R; 2166 } 2167 2168 if (result.isComplexInt()) { 2169 IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth); 2170 IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth); 2171 return IntRange::join(R, I); 2172 } 2173 2174 // This can happen with lossless casts to intptr_t of "based" lvalues. 2175 // Assume it might use arbitrary bits. 2176 // FIXME: The only reason we need to pass the type in here is to get 2177 // the sign right on this one case. It would be nice if APValue 2178 // preserved this. 2179 assert(result.isLValue()); 2180 return IntRange(MaxWidth, Ty->isUnsignedIntegerType()); 2181} 2182 2183/// Pseudo-evaluate the given integer expression, estimating the 2184/// range of values it might take. 2185/// 2186/// \param MaxWidth - the width to which the value will be truncated 2187IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { 2188 E = E->IgnoreParens(); 2189 2190 // Try a full evaluation first. 2191 Expr::EvalResult result; 2192 if (E->Evaluate(result, C)) 2193 return GetValueRange(C, result.Val, E->getType(), MaxWidth); 2194 2195 // I think we only want to look through implicit casts here; if the 2196 // user has an explicit widening cast, we should treat the value as 2197 // being of the new, wider type. 2198 if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) { 2199 if (CE->getCastKind() == CastExpr::CK_NoOp) 2200 return GetExprRange(C, CE->getSubExpr(), MaxWidth); 2201 2202 IntRange OutputTypeRange = IntRange::forType(C, CE->getType()); 2203 2204 bool isIntegerCast = (CE->getCastKind() == CastExpr::CK_IntegralCast); 2205 if (!isIntegerCast && CE->getCastKind() == CastExpr::CK_Unknown) 2206 isIntegerCast = CE->getSubExpr()->getType()->isIntegerType(); 2207 2208 // Assume that non-integer casts can span the full range of the type. 2209 if (!isIntegerCast) 2210 return OutputTypeRange; 2211 2212 IntRange SubRange 2213 = GetExprRange(C, CE->getSubExpr(), 2214 std::min(MaxWidth, OutputTypeRange.Width)); 2215 2216 // Bail out if the subexpr's range is as wide as the cast type. 2217 if (SubRange.Width >= OutputTypeRange.Width) 2218 return OutputTypeRange; 2219 2220 // Otherwise, we take the smaller width, and we're non-negative if 2221 // either the output type or the subexpr is. 2222 return IntRange(SubRange.Width, 2223 SubRange.NonNegative || OutputTypeRange.NonNegative); 2224 } 2225 2226 if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) { 2227 // If we can fold the condition, just take that operand. 2228 bool CondResult; 2229 if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) 2230 return GetExprRange(C, CondResult ? CO->getTrueExpr() 2231 : CO->getFalseExpr(), 2232 MaxWidth); 2233 2234 // Otherwise, conservatively merge. 2235 IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth); 2236 IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth); 2237 return IntRange::join(L, R); 2238 } 2239 2240 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 2241 switch (BO->getOpcode()) { 2242 2243 // Boolean-valued operations are single-bit and positive. 2244 case BinaryOperator::LAnd: 2245 case BinaryOperator::LOr: 2246 case BinaryOperator::LT: 2247 case BinaryOperator::GT: 2248 case BinaryOperator::LE: 2249 case BinaryOperator::GE: 2250 case BinaryOperator::EQ: 2251 case BinaryOperator::NE: 2252 return IntRange::forBoolType(); 2253 2254 // The type of these compound assignments is the type of the LHS, 2255 // so the RHS is not necessarily an integer. 2256 case BinaryOperator::MulAssign: 2257 case BinaryOperator::DivAssign: 2258 case BinaryOperator::RemAssign: 2259 case BinaryOperator::AddAssign: 2260 case BinaryOperator::SubAssign: 2261 return IntRange::forType(C, E->getType()); 2262 2263 // Operations with opaque sources are black-listed. 2264 case BinaryOperator::PtrMemD: 2265 case BinaryOperator::PtrMemI: 2266 return IntRange::forType(C, E->getType()); 2267 2268 // Bitwise-and uses the *infinum* of the two source ranges. 2269 case BinaryOperator::And: 2270 case BinaryOperator::AndAssign: 2271 return IntRange::meet(GetExprRange(C, BO->getLHS(), MaxWidth), 2272 GetExprRange(C, BO->getRHS(), MaxWidth)); 2273 2274 // Left shift gets black-listed based on a judgement call. 2275 case BinaryOperator::Shl: 2276 // ...except that we want to treat '1 << (blah)' as logically 2277 // positive. It's an important idiom. 2278 if (IntegerLiteral *I 2279 = dyn_cast<IntegerLiteral>(BO->getLHS()->IgnoreParenCasts())) { 2280 if (I->getValue() == 1) { 2281 IntRange R = IntRange::forType(C, E->getType()); 2282 return IntRange(R.Width, /*NonNegative*/ true); 2283 } 2284 } 2285 // fallthrough 2286 2287 case BinaryOperator::ShlAssign: 2288 return IntRange::forType(C, E->getType()); 2289 2290 // Right shift by a constant can narrow its left argument. 2291 case BinaryOperator::Shr: 2292 case BinaryOperator::ShrAssign: { 2293 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2294 2295 // If the shift amount is a positive constant, drop the width by 2296 // that much. 2297 llvm::APSInt shift; 2298 if (BO->getRHS()->isIntegerConstantExpr(shift, C) && 2299 shift.isNonNegative()) { 2300 unsigned zext = shift.getZExtValue(); 2301 if (zext >= L.Width) 2302 L.Width = (L.NonNegative ? 0 : 1); 2303 else 2304 L.Width -= zext; 2305 } 2306 2307 return L; 2308 } 2309 2310 // Comma acts as its right operand. 2311 case BinaryOperator::Comma: 2312 return GetExprRange(C, BO->getRHS(), MaxWidth); 2313 2314 // Black-list pointer subtractions. 2315 case BinaryOperator::Sub: 2316 if (BO->getLHS()->getType()->isPointerType()) 2317 return IntRange::forType(C, E->getType()); 2318 // fallthrough 2319 2320 default: 2321 break; 2322 } 2323 2324 // Treat every other operator as if it were closed on the 2325 // narrowest type that encompasses both operands. 2326 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2327 IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth); 2328 return IntRange::join(L, R); 2329 } 2330 2331 if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { 2332 switch (UO->getOpcode()) { 2333 // Boolean-valued operations are white-listed. 2334 case UnaryOperator::LNot: 2335 return IntRange::forBoolType(); 2336 2337 // Operations with opaque sources are black-listed. 2338 case UnaryOperator::Deref: 2339 case UnaryOperator::AddrOf: // should be impossible 2340 case UnaryOperator::OffsetOf: 2341 return IntRange::forType(C, E->getType()); 2342 2343 default: 2344 return GetExprRange(C, UO->getSubExpr(), MaxWidth); 2345 } 2346 } 2347 2348 if (dyn_cast<OffsetOfExpr>(E)) { 2349 IntRange::forType(C, E->getType()); 2350 } 2351 2352 FieldDecl *BitField = E->getBitField(); 2353 if (BitField) { 2354 llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C); 2355 unsigned BitWidth = BitWidthAP.getZExtValue(); 2356 2357 return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType()); 2358 } 2359 2360 return IntRange::forType(C, E->getType()); 2361} 2362 2363IntRange GetExprRange(ASTContext &C, Expr *E) { 2364 return GetExprRange(C, E, C.getIntWidth(E->getType())); 2365} 2366 2367/// Checks whether the given value, which currently has the given 2368/// source semantics, has the same value when coerced through the 2369/// target semantics. 2370bool IsSameFloatAfterCast(const llvm::APFloat &value, 2371 const llvm::fltSemantics &Src, 2372 const llvm::fltSemantics &Tgt) { 2373 llvm::APFloat truncated = value; 2374 2375 bool ignored; 2376 truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored); 2377 truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored); 2378 2379 return truncated.bitwiseIsEqual(value); 2380} 2381 2382/// Checks whether the given value, which currently has the given 2383/// source semantics, has the same value when coerced through the 2384/// target semantics. 2385/// 2386/// The value might be a vector of floats (or a complex number). 2387bool IsSameFloatAfterCast(const APValue &value, 2388 const llvm::fltSemantics &Src, 2389 const llvm::fltSemantics &Tgt) { 2390 if (value.isFloat()) 2391 return IsSameFloatAfterCast(value.getFloat(), Src, Tgt); 2392 2393 if (value.isVector()) { 2394 for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i) 2395 if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt)) 2396 return false; 2397 return true; 2398 } 2399 2400 assert(value.isComplexFloat()); 2401 return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) && 2402 IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt)); 2403} 2404 2405void AnalyzeImplicitConversions(Sema &S, Expr *E); 2406 2407bool IsZero(Sema &S, Expr *E) { 2408 llvm::APSInt Value; 2409 return E->isIntegerConstantExpr(Value, S.Context) && Value == 0; 2410} 2411 2412void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) { 2413 BinaryOperator::Opcode op = E->getOpcode(); 2414 if (op == BinaryOperator::LT && IsZero(S, E->getRHS())) { 2415 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2416 << "< 0" << "false" 2417 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2418 } else if (op == BinaryOperator::GE && IsZero(S, E->getRHS())) { 2419 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2420 << ">= 0" << "true" 2421 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2422 } else if (op == BinaryOperator::GT && IsZero(S, E->getLHS())) { 2423 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2424 << "0 >" << "false" 2425 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2426 } else if (op == BinaryOperator::LE && IsZero(S, E->getLHS())) { 2427 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2428 << "0 <=" << "true" 2429 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2430 } 2431} 2432 2433/// Analyze the operands of the given comparison. Implements the 2434/// fallback case from AnalyzeComparison. 2435void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) { 2436 AnalyzeImplicitConversions(S, E->getLHS()); 2437 AnalyzeImplicitConversions(S, E->getRHS()); 2438} 2439 2440/// \brief Implements -Wsign-compare. 2441/// 2442/// \param lex the left-hand expression 2443/// \param rex the right-hand expression 2444/// \param OpLoc the location of the joining operator 2445/// \param BinOpc binary opcode or 0 2446void AnalyzeComparison(Sema &S, BinaryOperator *E) { 2447 // The type the comparison is being performed in. 2448 QualType T = E->getLHS()->getType(); 2449 assert(S.Context.hasSameUnqualifiedType(T, E->getRHS()->getType()) 2450 && "comparison with mismatched types"); 2451 2452 // We don't do anything special if this isn't an unsigned integral 2453 // comparison: we're only interested in integral comparisons, and 2454 // signed comparisons only happen in cases we don't care to warn about. 2455 if (!T->hasUnsignedIntegerRepresentation()) 2456 return AnalyzeImpConvsInComparison(S, E); 2457 2458 Expr *lex = E->getLHS()->IgnoreParenImpCasts(); 2459 Expr *rex = E->getRHS()->IgnoreParenImpCasts(); 2460 2461 // Check to see if one of the (unmodified) operands is of different 2462 // signedness. 2463 Expr *signedOperand, *unsignedOperand; 2464 if (lex->getType()->hasSignedIntegerRepresentation()) { 2465 assert(!rex->getType()->hasSignedIntegerRepresentation() && 2466 "unsigned comparison between two signed integer expressions?"); 2467 signedOperand = lex; 2468 unsignedOperand = rex; 2469 } else if (rex->getType()->hasSignedIntegerRepresentation()) { 2470 signedOperand = rex; 2471 unsignedOperand = lex; 2472 } else { 2473 CheckTrivialUnsignedComparison(S, E); 2474 return AnalyzeImpConvsInComparison(S, E); 2475 } 2476 2477 // Otherwise, calculate the effective range of the signed operand. 2478 IntRange signedRange = GetExprRange(S.Context, signedOperand); 2479 2480 // Go ahead and analyze implicit conversions in the operands. Note 2481 // that we skip the implicit conversions on both sides. 2482 AnalyzeImplicitConversions(S, lex); 2483 AnalyzeImplicitConversions(S, rex); 2484 2485 // If the signed range is non-negative, -Wsign-compare won't fire, 2486 // but we should still check for comparisons which are always true 2487 // or false. 2488 if (signedRange.NonNegative) 2489 return CheckTrivialUnsignedComparison(S, E); 2490 2491 // For (in)equality comparisons, if the unsigned operand is a 2492 // constant which cannot collide with a overflowed signed operand, 2493 // then reinterpreting the signed operand as unsigned will not 2494 // change the result of the comparison. 2495 if (E->isEqualityOp()) { 2496 unsigned comparisonWidth = S.Context.getIntWidth(T); 2497 IntRange unsignedRange = GetExprRange(S.Context, unsignedOperand); 2498 2499 // We should never be unable to prove that the unsigned operand is 2500 // non-negative. 2501 assert(unsignedRange.NonNegative && "unsigned range includes negative?"); 2502 2503 if (unsignedRange.Width < comparisonWidth) 2504 return; 2505 } 2506 2507 S.Diag(E->getOperatorLoc(), diag::warn_mixed_sign_comparison) 2508 << lex->getType() << rex->getType() 2509 << lex->getSourceRange() << rex->getSourceRange(); 2510} 2511 2512/// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2513void DiagnoseImpCast(Sema &S, Expr *E, QualType T, unsigned diag) { 2514 S.Diag(E->getExprLoc(), diag) << E->getType() << T << E->getSourceRange(); 2515} 2516 2517void CheckImplicitConversion(Sema &S, Expr *E, QualType T, 2518 bool *ICContext = 0) { 2519 if (E->isTypeDependent() || E->isValueDependent()) return; 2520 2521 const Type *Source = S.Context.getCanonicalType(E->getType()).getTypePtr(); 2522 const Type *Target = S.Context.getCanonicalType(T).getTypePtr(); 2523 if (Source == Target) return; 2524 if (Target->isDependentType()) return; 2525 2526 // Never diagnose implicit casts to bool. 2527 if (Target->isSpecificBuiltinType(BuiltinType::Bool)) 2528 return; 2529 2530 // Strip vector types. 2531 if (isa<VectorType>(Source)) { 2532 if (!isa<VectorType>(Target)) 2533 return DiagnoseImpCast(S, E, T, diag::warn_impcast_vector_scalar); 2534 2535 Source = cast<VectorType>(Source)->getElementType().getTypePtr(); 2536 Target = cast<VectorType>(Target)->getElementType().getTypePtr(); 2537 } 2538 2539 // Strip complex types. 2540 if (isa<ComplexType>(Source)) { 2541 if (!isa<ComplexType>(Target)) 2542 return DiagnoseImpCast(S, E, T, diag::warn_impcast_complex_scalar); 2543 2544 Source = cast<ComplexType>(Source)->getElementType().getTypePtr(); 2545 Target = cast<ComplexType>(Target)->getElementType().getTypePtr(); 2546 } 2547 2548 const BuiltinType *SourceBT = dyn_cast<BuiltinType>(Source); 2549 const BuiltinType *TargetBT = dyn_cast<BuiltinType>(Target); 2550 2551 // If the source is floating point... 2552 if (SourceBT && SourceBT->isFloatingPoint()) { 2553 // ...and the target is floating point... 2554 if (TargetBT && TargetBT->isFloatingPoint()) { 2555 // ...then warn if we're dropping FP rank. 2556 2557 // Builtin FP kinds are ordered by increasing FP rank. 2558 if (SourceBT->getKind() > TargetBT->getKind()) { 2559 // Don't warn about float constants that are precisely 2560 // representable in the target type. 2561 Expr::EvalResult result; 2562 if (E->Evaluate(result, S.Context)) { 2563 // Value might be a float, a float vector, or a float complex. 2564 if (IsSameFloatAfterCast(result.Val, 2565 S.Context.getFloatTypeSemantics(QualType(TargetBT, 0)), 2566 S.Context.getFloatTypeSemantics(QualType(SourceBT, 0)))) 2567 return; 2568 } 2569 2570 DiagnoseImpCast(S, E, T, diag::warn_impcast_float_precision); 2571 } 2572 return; 2573 } 2574 2575 // If the target is integral, always warn. 2576 if ((TargetBT && TargetBT->isInteger())) 2577 // TODO: don't warn for integer values? 2578 DiagnoseImpCast(S, E, T, diag::warn_impcast_float_integer); 2579 2580 return; 2581 } 2582 2583 if (!Source->isIntegerType() || !Target->isIntegerType()) 2584 return; 2585 2586 IntRange SourceRange = GetExprRange(S.Context, E); 2587 IntRange TargetRange = IntRange::forCanonicalType(S.Context, Target); 2588 2589 if (SourceRange.Width > TargetRange.Width) { 2590 // People want to build with -Wshorten-64-to-32 and not -Wconversion 2591 // and by god we'll let them. 2592 if (SourceRange.Width == 64 && TargetRange.Width == 32) 2593 return DiagnoseImpCast(S, E, T, diag::warn_impcast_integer_64_32); 2594 return DiagnoseImpCast(S, E, T, diag::warn_impcast_integer_precision); 2595 } 2596 2597 if ((TargetRange.NonNegative && !SourceRange.NonNegative) || 2598 (!TargetRange.NonNegative && SourceRange.NonNegative && 2599 SourceRange.Width == TargetRange.Width)) { 2600 unsigned DiagID = diag::warn_impcast_integer_sign; 2601 2602 // Traditionally, gcc has warned about this under -Wsign-compare. 2603 // We also want to warn about it in -Wconversion. 2604 // So if -Wconversion is off, use a completely identical diagnostic 2605 // in the sign-compare group. 2606 // The conditional-checking code will 2607 if (ICContext) { 2608 DiagID = diag::warn_impcast_integer_sign_conditional; 2609 *ICContext = true; 2610 } 2611 2612 return DiagnoseImpCast(S, E, T, DiagID); 2613 } 2614 2615 return; 2616} 2617 2618void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T); 2619 2620void CheckConditionalOperand(Sema &S, Expr *E, QualType T, 2621 bool &ICContext) { 2622 E = E->IgnoreParenImpCasts(); 2623 2624 if (isa<ConditionalOperator>(E)) 2625 return CheckConditionalOperator(S, cast<ConditionalOperator>(E), T); 2626 2627 AnalyzeImplicitConversions(S, E); 2628 if (E->getType() != T) 2629 return CheckImplicitConversion(S, E, T, &ICContext); 2630 return; 2631} 2632 2633void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T) { 2634 AnalyzeImplicitConversions(S, E->getCond()); 2635 2636 bool Suspicious = false; 2637 CheckConditionalOperand(S, E->getTrueExpr(), T, Suspicious); 2638 CheckConditionalOperand(S, E->getFalseExpr(), T, Suspicious); 2639 2640 // If -Wconversion would have warned about either of the candidates 2641 // for a signedness conversion to the context type... 2642 if (!Suspicious) return; 2643 2644 // ...but it's currently ignored... 2645 if (S.Diags.getDiagnosticLevel(diag::warn_impcast_integer_sign_conditional)) 2646 return; 2647 2648 // ...and -Wsign-compare isn't... 2649 if (!S.Diags.getDiagnosticLevel(diag::warn_mixed_sign_conditional)) 2650 return; 2651 2652 // ...then check whether it would have warned about either of the 2653 // candidates for a signedness conversion to the condition type. 2654 if (E->getType() != T) { 2655 Suspicious = false; 2656 CheckImplicitConversion(S, E->getTrueExpr()->IgnoreParenImpCasts(), 2657 E->getType(), &Suspicious); 2658 if (!Suspicious) 2659 CheckImplicitConversion(S, E->getFalseExpr()->IgnoreParenImpCasts(), 2660 E->getType(), &Suspicious); 2661 if (!Suspicious) 2662 return; 2663 } 2664 2665 // If so, emit a diagnostic under -Wsign-compare. 2666 Expr *lex = E->getTrueExpr()->IgnoreParenImpCasts(); 2667 Expr *rex = E->getFalseExpr()->IgnoreParenImpCasts(); 2668 S.Diag(E->getQuestionLoc(), diag::warn_mixed_sign_conditional) 2669 << lex->getType() << rex->getType() 2670 << lex->getSourceRange() << rex->getSourceRange(); 2671} 2672 2673/// AnalyzeImplicitConversions - Find and report any interesting 2674/// implicit conversions in the given expression. There are a couple 2675/// of competing diagnostics here, -Wconversion and -Wsign-compare. 2676void AnalyzeImplicitConversions(Sema &S, Expr *OrigE) { 2677 QualType T = OrigE->getType(); 2678 Expr *E = OrigE->IgnoreParenImpCasts(); 2679 2680 // For conditional operators, we analyze the arguments as if they 2681 // were being fed directly into the output. 2682 if (isa<ConditionalOperator>(E)) { 2683 ConditionalOperator *CO = cast<ConditionalOperator>(E); 2684 CheckConditionalOperator(S, CO, T); 2685 return; 2686 } 2687 2688 // Go ahead and check any implicit conversions we might have skipped. 2689 // The non-canonical typecheck is just an optimization; 2690 // CheckImplicitConversion will filter out dead implicit conversions. 2691 if (E->getType() != T) 2692 CheckImplicitConversion(S, E, T); 2693 2694 // Now continue drilling into this expression. 2695 2696 // Skip past explicit casts. 2697 if (isa<ExplicitCastExpr>(E)) { 2698 E = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreParenImpCasts(); 2699 return AnalyzeImplicitConversions(S, E); 2700 } 2701 2702 // Do a somewhat different check with comparison operators. 2703 if (isa<BinaryOperator>(E) && cast<BinaryOperator>(E)->isComparisonOp()) 2704 return AnalyzeComparison(S, cast<BinaryOperator>(E)); 2705 2706 // These break the otherwise-useful invariant below. Fortunately, 2707 // we don't really need to recurse into them, because any internal 2708 // expressions should have been analyzed already when they were 2709 // built into statements. 2710 if (isa<StmtExpr>(E)) return; 2711 2712 // Don't descend into unevaluated contexts. 2713 if (isa<SizeOfAlignOfExpr>(E)) return; 2714 2715 // Now just recurse over the expression's children. 2716 for (Stmt::child_iterator I = E->child_begin(), IE = E->child_end(); 2717 I != IE; ++I) 2718 AnalyzeImplicitConversions(S, cast<Expr>(*I)); 2719} 2720 2721} // end anonymous namespace 2722 2723/// Diagnoses "dangerous" implicit conversions within the given 2724/// expression (which is a full expression). Implements -Wconversion 2725/// and -Wsign-compare. 2726void Sema::CheckImplicitConversions(Expr *E) { 2727 // Don't diagnose in unevaluated contexts. 2728 if (ExprEvalContexts.back().Context == Sema::Unevaluated) 2729 return; 2730 2731 // Don't diagnose for value- or type-dependent expressions. 2732 if (E->isTypeDependent() || E->isValueDependent()) 2733 return; 2734 2735 AnalyzeImplicitConversions(*this, E); 2736} 2737 2738/// CheckParmsForFunctionDef - Check that the parameters of the given 2739/// function are appropriate for the definition of a function. This 2740/// takes care of any checks that cannot be performed on the 2741/// declaration itself, e.g., that the types of each of the function 2742/// parameters are complete. 2743bool Sema::CheckParmsForFunctionDef(FunctionDecl *FD) { 2744 bool HasInvalidParm = false; 2745 for (unsigned p = 0, NumParams = FD->getNumParams(); p < NumParams; ++p) { 2746 ParmVarDecl *Param = FD->getParamDecl(p); 2747 2748 // C99 6.7.5.3p4: the parameters in a parameter type list in a 2749 // function declarator that is part of a function definition of 2750 // that function shall not have incomplete type. 2751 // 2752 // This is also C++ [dcl.fct]p6. 2753 if (!Param->isInvalidDecl() && 2754 RequireCompleteType(Param->getLocation(), Param->getType(), 2755 diag::err_typecheck_decl_incomplete_type)) { 2756 Param->setInvalidDecl(); 2757 HasInvalidParm = true; 2758 } 2759 2760 // C99 6.9.1p5: If the declarator includes a parameter type list, the 2761 // declaration of each parameter shall include an identifier. 2762 if (Param->getIdentifier() == 0 && 2763 !Param->isImplicit() && 2764 !getLangOptions().CPlusPlus) 2765 Diag(Param->getLocation(), diag::err_parameter_name_omitted); 2766 2767 // C99 6.7.5.3p12: 2768 // If the function declarator is not part of a definition of that 2769 // function, parameters may have incomplete type and may use the [*] 2770 // notation in their sequences of declarator specifiers to specify 2771 // variable length array types. 2772 QualType PType = Param->getOriginalType(); 2773 if (const ArrayType *AT = Context.getAsArrayType(PType)) { 2774 if (AT->getSizeModifier() == ArrayType::Star) { 2775 // FIXME: This diagnosic should point the the '[*]' if source-location 2776 // information is added for it. 2777 Diag(Param->getLocation(), diag::err_array_star_in_function_definition); 2778 } 2779 } 2780 } 2781 2782 return HasInvalidParm; 2783} 2784