SemaChecking.cpp revision e737f5041a36d0befb39ffeed8d50ba15916d3da
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Sema/Sema.h" 16#include "clang/Analysis/Analyses/FormatString.h" 17#include "clang/AST/ASTContext.h" 18#include "clang/AST/CharUnits.h" 19#include "clang/AST/DeclObjC.h" 20#include "clang/AST/ExprCXX.h" 21#include "clang/AST/ExprObjC.h" 22#include "clang/AST/DeclObjC.h" 23#include "clang/AST/StmtCXX.h" 24#include "clang/AST/StmtObjC.h" 25#include "clang/Lex/LiteralSupport.h" 26#include "clang/Lex/Preprocessor.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/STLExtras.h" 29#include "llvm/Support/raw_ostream.h" 30#include "clang/Basic/TargetBuiltins.h" 31#include "clang/Basic/TargetInfo.h" 32#include <limits> 33using namespace clang; 34 35/// getLocationOfStringLiteralByte - Return a source location that points to the 36/// specified byte of the specified string literal. 37/// 38/// Strings are amazingly complex. They can be formed from multiple tokens and 39/// can have escape sequences in them in addition to the usual trigraph and 40/// escaped newline business. This routine handles this complexity. 41/// 42SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 43 unsigned ByteNo) const { 44 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 45 46 // Loop over all of the tokens in this string until we find the one that 47 // contains the byte we're looking for. 48 unsigned TokNo = 0; 49 while (1) { 50 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 51 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 52 53 // Get the spelling of the string so that we can get the data that makes up 54 // the string literal, not the identifier for the macro it is potentially 55 // expanded through. 56 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 57 58 // Re-lex the token to get its length and original spelling. 59 std::pair<FileID, unsigned> LocInfo = 60 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 61 bool Invalid = false; 62 llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid); 63 if (Invalid) 64 return StrTokSpellingLoc; 65 66 const char *StrData = Buffer.data()+LocInfo.second; 67 68 // Create a langops struct and enable trigraphs. This is sufficient for 69 // relexing tokens. 70 LangOptions LangOpts; 71 LangOpts.Trigraphs = true; 72 73 // Create a lexer starting at the beginning of this token. 74 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData, 75 Buffer.end()); 76 Token TheTok; 77 TheLexer.LexFromRawLexer(TheTok); 78 79 // Use the StringLiteralParser to compute the length of the string in bytes. 80 StringLiteralParser SLP(&TheTok, 1, PP, /*Complain=*/false); 81 unsigned TokNumBytes = SLP.GetStringLength(); 82 83 // If the byte is in this token, return the location of the byte. 84 if (ByteNo < TokNumBytes || 85 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 86 unsigned Offset = 87 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP, 88 /*Complain=*/false); 89 90 // Now that we know the offset of the token in the spelling, use the 91 // preprocessor to get the offset in the original source. 92 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 93 } 94 95 // Move to the next string token. 96 ++TokNo; 97 ByteNo -= TokNumBytes; 98 } 99} 100 101/// CheckablePrintfAttr - does a function call have a "printf" attribute 102/// and arguments that merit checking? 103bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) { 104 if (Format->getType() == "printf") return true; 105 if (Format->getType() == "printf0") { 106 // printf0 allows null "format" string; if so don't check format/args 107 unsigned format_idx = Format->getFormatIdx() - 1; 108 // Does the index refer to the implicit object argument? 109 if (isa<CXXMemberCallExpr>(TheCall)) { 110 if (format_idx == 0) 111 return false; 112 --format_idx; 113 } 114 if (format_idx < TheCall->getNumArgs()) { 115 Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts(); 116 if (!Format->isNullPointerConstant(Context, 117 Expr::NPC_ValueDependentIsNull)) 118 return true; 119 } 120 } 121 return false; 122} 123 124Action::OwningExprResult 125Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 126 OwningExprResult TheCallResult(Owned(TheCall)); 127 128 switch (BuiltinID) { 129 case Builtin::BI__builtin___CFStringMakeConstantString: 130 assert(TheCall->getNumArgs() == 1 && 131 "Wrong # arguments to builtin CFStringMakeConstantString"); 132 if (CheckObjCString(TheCall->getArg(0))) 133 return ExprError(); 134 break; 135 case Builtin::BI__builtin_stdarg_start: 136 case Builtin::BI__builtin_va_start: 137 if (SemaBuiltinVAStart(TheCall)) 138 return ExprError(); 139 break; 140 case Builtin::BI__builtin_isgreater: 141 case Builtin::BI__builtin_isgreaterequal: 142 case Builtin::BI__builtin_isless: 143 case Builtin::BI__builtin_islessequal: 144 case Builtin::BI__builtin_islessgreater: 145 case Builtin::BI__builtin_isunordered: 146 if (SemaBuiltinUnorderedCompare(TheCall)) 147 return ExprError(); 148 break; 149 case Builtin::BI__builtin_fpclassify: 150 if (SemaBuiltinFPClassification(TheCall, 6)) 151 return ExprError(); 152 break; 153 case Builtin::BI__builtin_isfinite: 154 case Builtin::BI__builtin_isinf: 155 case Builtin::BI__builtin_isinf_sign: 156 case Builtin::BI__builtin_isnan: 157 case Builtin::BI__builtin_isnormal: 158 if (SemaBuiltinFPClassification(TheCall, 1)) 159 return ExprError(); 160 break; 161 case Builtin::BI__builtin_return_address: 162 case Builtin::BI__builtin_frame_address: { 163 llvm::APSInt Result; 164 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 165 return ExprError(); 166 break; 167 } 168 case Builtin::BI__builtin_eh_return_data_regno: { 169 llvm::APSInt Result; 170 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 171 return ExprError(); 172 break; 173 } 174 case Builtin::BI__builtin_shufflevector: 175 return SemaBuiltinShuffleVector(TheCall); 176 // TheCall will be freed by the smart pointer here, but that's fine, since 177 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 178 case Builtin::BI__builtin_prefetch: 179 if (SemaBuiltinPrefetch(TheCall)) 180 return ExprError(); 181 break; 182 case Builtin::BI__builtin_object_size: 183 if (SemaBuiltinObjectSize(TheCall)) 184 return ExprError(); 185 break; 186 case Builtin::BI__builtin_longjmp: 187 if (SemaBuiltinLongjmp(TheCall)) 188 return ExprError(); 189 break; 190 case Builtin::BI__sync_fetch_and_add: 191 case Builtin::BI__sync_fetch_and_sub: 192 case Builtin::BI__sync_fetch_and_or: 193 case Builtin::BI__sync_fetch_and_and: 194 case Builtin::BI__sync_fetch_and_xor: 195 case Builtin::BI__sync_add_and_fetch: 196 case Builtin::BI__sync_sub_and_fetch: 197 case Builtin::BI__sync_and_and_fetch: 198 case Builtin::BI__sync_or_and_fetch: 199 case Builtin::BI__sync_xor_and_fetch: 200 case Builtin::BI__sync_val_compare_and_swap: 201 case Builtin::BI__sync_bool_compare_and_swap: 202 case Builtin::BI__sync_lock_test_and_set: 203 case Builtin::BI__sync_lock_release: 204 return SemaBuiltinAtomicOverloaded(move(TheCallResult)); 205 } 206 207 // Since the target specific builtins for each arch overlap, only check those 208 // of the arch we are compiling for. 209 if (BuiltinID >= Builtin::FirstTSBuiltin) { 210 switch (Context.Target.getTriple().getArch()) { 211 case llvm::Triple::arm: 212 case llvm::Triple::thumb: 213 if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) 214 return ExprError(); 215 break; 216 case llvm::Triple::x86: 217 case llvm::Triple::x86_64: 218 if (CheckX86BuiltinFunctionCall(BuiltinID, TheCall)) 219 return ExprError(); 220 break; 221 default: 222 break; 223 } 224 } 225 226 return move(TheCallResult); 227} 228 229bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 230 switch (BuiltinID) { 231 case X86::BI__builtin_ia32_palignr128: 232 case X86::BI__builtin_ia32_palignr: { 233 llvm::APSInt Result; 234 if (SemaBuiltinConstantArg(TheCall, 2, Result)) 235 return true; 236 break; 237 } 238 } 239 return false; 240} 241 242// Get the valid immediate range for the specified NEON type code. 243static unsigned RFT(unsigned t, bool shift = false) { 244 bool quad = t & 0x10; 245 246 switch (t & 0x7) { 247 case 0: // i8 248 return shift ? 7 : (8 << (int)quad) - 1; 249 case 1: // i16 250 return shift ? 15 : (4 << (int)quad) - 1; 251 case 2: // i32 252 return shift ? 31 : (2 << (int)quad) - 1; 253 case 3: // i64 254 return shift ? 63 : (1 << (int)quad) - 1; 255 case 4: // f32 256 assert(!shift && "cannot shift float types!"); 257 return (2 << (int)quad) - 1; 258 case 5: // poly8 259 assert(!shift && "cannot shift polynomial types!"); 260 return (8 << (int)quad) - 1; 261 case 6: // poly16 262 assert(!shift && "cannot shift polynomial types!"); 263 return (4 << (int)quad) - 1; 264 case 7: // float16 265 assert(!shift && "cannot shift float types!"); 266 return (4 << (int)quad) - 1; 267 } 268 return 0; 269} 270 271bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 272 llvm::APSInt Result; 273 274 unsigned mask = 0; 275 unsigned TV = 0; 276 switch (BuiltinID) { 277#define GET_NEON_OVERLOAD_CHECK 278#include "clang/Basic/arm_neon.inc" 279#undef GET_NEON_OVERLOAD_CHECK 280 } 281 282 // For NEON intrinsics which are overloaded on vector element type, validate 283 // the immediate which specifies which variant to emit. 284 if (mask) { 285 unsigned ArgNo = TheCall->getNumArgs()-1; 286 if (SemaBuiltinConstantArg(TheCall, ArgNo, Result)) 287 return true; 288 289 TV = Result.getLimitedValue(32); 290 if ((TV > 31) || (mask & (1 << TV)) == 0) 291 return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code) 292 << TheCall->getArg(ArgNo)->getSourceRange(); 293 } 294 295 // For NEON intrinsics which take an immediate value as part of the 296 // instruction, range check them here. 297 unsigned i = 0, l = 0, u = 0; 298 switch (BuiltinID) { 299 default: return false; 300 case ARM::BI__builtin_arm_ssat: i = 1; l = 1; u = 31; break; 301 case ARM::BI__builtin_arm_usat: i = 1; u = 31; break; 302 case ARM::BI__builtin_arm_vcvtr_f: 303 case ARM::BI__builtin_arm_vcvtr_d: i = 1; u = 1; break; 304#define GET_NEON_IMMEDIATE_CHECK 305#include "clang/Basic/arm_neon.inc" 306#undef GET_NEON_IMMEDIATE_CHECK 307 }; 308 309 // Check that the immediate argument is actually a constant. 310 if (SemaBuiltinConstantArg(TheCall, i, Result)) 311 return true; 312 313 // Range check against the upper/lower values for this isntruction. 314 unsigned Val = Result.getZExtValue(); 315 if (Val < l || Val > (u + l)) 316 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 317 << l << u+l << TheCall->getArg(i)->getSourceRange(); 318 319 // FIXME: VFP Intrinsics should error if VFP not present. 320 return false; 321} 322 323/// CheckFunctionCall - Check a direct function call for various correctness 324/// and safety properties not strictly enforced by the C type system. 325bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 326 // Get the IdentifierInfo* for the called function. 327 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 328 329 // None of the checks below are needed for functions that don't have 330 // simple names (e.g., C++ conversion functions). 331 if (!FnInfo) 332 return false; 333 334 // FIXME: This mechanism should be abstracted to be less fragile and 335 // more efficient. For example, just map function ids to custom 336 // handlers. 337 338 // Printf checking. 339 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 340 const bool b = Format->getType() == "scanf"; 341 if (b || CheckablePrintfAttr(Format, TheCall)) { 342 bool HasVAListArg = Format->getFirstArg() == 0; 343 CheckPrintfScanfArguments(TheCall, HasVAListArg, 344 Format->getFormatIdx() - 1, 345 HasVAListArg ? 0 : Format->getFirstArg() - 1, 346 !b); 347 } 348 } 349 350 for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull; 351 NonNull = NonNull->getNext<NonNullAttr>()) 352 CheckNonNullArguments(NonNull, TheCall); 353 354 return false; 355} 356 357bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 358 // Printf checking. 359 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 360 if (!Format) 361 return false; 362 363 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 364 if (!V) 365 return false; 366 367 QualType Ty = V->getType(); 368 if (!Ty->isBlockPointerType()) 369 return false; 370 371 const bool b = Format->getType() == "scanf"; 372 if (!b && !CheckablePrintfAttr(Format, TheCall)) 373 return false; 374 375 bool HasVAListArg = Format->getFirstArg() == 0; 376 CheckPrintfScanfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 377 HasVAListArg ? 0 : Format->getFirstArg() - 1, !b); 378 379 return false; 380} 381 382/// SemaBuiltinAtomicOverloaded - We have a call to a function like 383/// __sync_fetch_and_add, which is an overloaded function based on the pointer 384/// type of its first argument. The main ActOnCallExpr routines have already 385/// promoted the types of arguments because all of these calls are prototyped as 386/// void(...). 387/// 388/// This function goes through and does final semantic checking for these 389/// builtins, 390Sema::OwningExprResult 391Sema::SemaBuiltinAtomicOverloaded(OwningExprResult TheCallResult) { 392 CallExpr *TheCall = (CallExpr *)TheCallResult.get(); 393 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 394 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 395 396 // Ensure that we have at least one argument to do type inference from. 397 if (TheCall->getNumArgs() < 1) { 398 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 399 << 0 << 1 << TheCall->getNumArgs() 400 << TheCall->getCallee()->getSourceRange(); 401 return ExprError(); 402 } 403 404 // Inspect the first argument of the atomic builtin. This should always be 405 // a pointer type, whose element is an integral scalar or pointer type. 406 // Because it is a pointer type, we don't have to worry about any implicit 407 // casts here. 408 // FIXME: We don't allow floating point scalars as input. 409 Expr *FirstArg = TheCall->getArg(0); 410 if (!FirstArg->getType()->isPointerType()) { 411 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 412 << FirstArg->getType() << FirstArg->getSourceRange(); 413 return ExprError(); 414 } 415 416 QualType ValType = 417 FirstArg->getType()->getAs<PointerType>()->getPointeeType(); 418 if (!ValType->isIntegerType() && !ValType->isPointerType() && 419 !ValType->isBlockPointerType()) { 420 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer_intptr) 421 << FirstArg->getType() << FirstArg->getSourceRange(); 422 return ExprError(); 423 } 424 425 // The majority of builtins return a value, but a few have special return 426 // types, so allow them to override appropriately below. 427 QualType ResultType = ValType; 428 429 // We need to figure out which concrete builtin this maps onto. For example, 430 // __sync_fetch_and_add with a 2 byte object turns into 431 // __sync_fetch_and_add_2. 432#define BUILTIN_ROW(x) \ 433 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 434 Builtin::BI##x##_8, Builtin::BI##x##_16 } 435 436 static const unsigned BuiltinIndices[][5] = { 437 BUILTIN_ROW(__sync_fetch_and_add), 438 BUILTIN_ROW(__sync_fetch_and_sub), 439 BUILTIN_ROW(__sync_fetch_and_or), 440 BUILTIN_ROW(__sync_fetch_and_and), 441 BUILTIN_ROW(__sync_fetch_and_xor), 442 443 BUILTIN_ROW(__sync_add_and_fetch), 444 BUILTIN_ROW(__sync_sub_and_fetch), 445 BUILTIN_ROW(__sync_and_and_fetch), 446 BUILTIN_ROW(__sync_or_and_fetch), 447 BUILTIN_ROW(__sync_xor_and_fetch), 448 449 BUILTIN_ROW(__sync_val_compare_and_swap), 450 BUILTIN_ROW(__sync_bool_compare_and_swap), 451 BUILTIN_ROW(__sync_lock_test_and_set), 452 BUILTIN_ROW(__sync_lock_release) 453 }; 454#undef BUILTIN_ROW 455 456 // Determine the index of the size. 457 unsigned SizeIndex; 458 switch (Context.getTypeSizeInChars(ValType).getQuantity()) { 459 case 1: SizeIndex = 0; break; 460 case 2: SizeIndex = 1; break; 461 case 4: SizeIndex = 2; break; 462 case 8: SizeIndex = 3; break; 463 case 16: SizeIndex = 4; break; 464 default: 465 Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 466 << FirstArg->getType() << FirstArg->getSourceRange(); 467 return ExprError(); 468 } 469 470 // Each of these builtins has one pointer argument, followed by some number of 471 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 472 // that we ignore. Find out which row of BuiltinIndices to read from as well 473 // as the number of fixed args. 474 unsigned BuiltinID = FDecl->getBuiltinID(); 475 unsigned BuiltinIndex, NumFixed = 1; 476 switch (BuiltinID) { 477 default: assert(0 && "Unknown overloaded atomic builtin!"); 478 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 479 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 480 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 481 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 482 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 483 484 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 5; break; 485 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 6; break; 486 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 7; break; 487 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 8; break; 488 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex = 9; break; 489 490 case Builtin::BI__sync_val_compare_and_swap: 491 BuiltinIndex = 10; 492 NumFixed = 2; 493 break; 494 case Builtin::BI__sync_bool_compare_and_swap: 495 BuiltinIndex = 11; 496 NumFixed = 2; 497 ResultType = Context.BoolTy; 498 break; 499 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 12; break; 500 case Builtin::BI__sync_lock_release: 501 BuiltinIndex = 13; 502 NumFixed = 0; 503 ResultType = Context.VoidTy; 504 break; 505 } 506 507 // Now that we know how many fixed arguments we expect, first check that we 508 // have at least that many. 509 if (TheCall->getNumArgs() < 1+NumFixed) { 510 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 511 << 0 << 1+NumFixed << TheCall->getNumArgs() 512 << TheCall->getCallee()->getSourceRange(); 513 return ExprError(); 514 } 515 516 // Get the decl for the concrete builtin from this, we can tell what the 517 // concrete integer type we should convert to is. 518 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 519 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 520 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 521 FunctionDecl *NewBuiltinDecl = 522 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 523 TUScope, false, DRE->getLocStart())); 524 525 // The first argument --- the pointer --- has a fixed type; we 526 // deduce the types of the rest of the arguments accordingly. Walk 527 // the remaining arguments, converting them to the deduced value type. 528 for (unsigned i = 0; i != NumFixed; ++i) { 529 Expr *Arg = TheCall->getArg(i+1); 530 531 // If the argument is an implicit cast, then there was a promotion due to 532 // "...", just remove it now. 533 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) { 534 Arg = ICE->getSubExpr(); 535 ICE->setSubExpr(0); 536 TheCall->setArg(i+1, Arg); 537 } 538 539 // GCC does an implicit conversion to the pointer or integer ValType. This 540 // can fail in some cases (1i -> int**), check for this error case now. 541 CastExpr::CastKind Kind = CastExpr::CK_Unknown; 542 CXXCastPath BasePath; 543 if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind, BasePath)) 544 return ExprError(); 545 546 // Okay, we have something that *can* be converted to the right type. Check 547 // to see if there is a potentially weird extension going on here. This can 548 // happen when you do an atomic operation on something like an char* and 549 // pass in 42. The 42 gets converted to char. This is even more strange 550 // for things like 45.123 -> char, etc. 551 // FIXME: Do this check. 552 ImpCastExprToType(Arg, ValType, Kind, ImplicitCastExpr::RValue, &BasePath); 553 TheCall->setArg(i+1, Arg); 554 } 555 556 // Switch the DeclRefExpr to refer to the new decl. 557 DRE->setDecl(NewBuiltinDecl); 558 DRE->setType(NewBuiltinDecl->getType()); 559 560 // Set the callee in the CallExpr. 561 // FIXME: This leaks the original parens and implicit casts. 562 Expr *PromotedCall = DRE; 563 UsualUnaryConversions(PromotedCall); 564 TheCall->setCallee(PromotedCall); 565 566 // Change the result type of the call to match the original value type. This 567 // is arbitrary, but the codegen for these builtins ins design to handle it 568 // gracefully. 569 TheCall->setType(ResultType); 570 571 return move(TheCallResult); 572} 573 574 575/// CheckObjCString - Checks that the argument to the builtin 576/// CFString constructor is correct 577/// FIXME: GCC currently emits the following warning: 578/// "warning: input conversion stopped due to an input byte that does not 579/// belong to the input codeset UTF-8" 580/// Note: It might also make sense to do the UTF-16 conversion here (would 581/// simplify the backend). 582bool Sema::CheckObjCString(Expr *Arg) { 583 Arg = Arg->IgnoreParenCasts(); 584 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 585 586 if (!Literal || Literal->isWide()) { 587 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 588 << Arg->getSourceRange(); 589 return true; 590 } 591 592 const char *Data = Literal->getStrData(); 593 unsigned Length = Literal->getByteLength(); 594 595 for (unsigned i = 0; i < Length; ++i) { 596 if (!Data[i]) { 597 Diag(getLocationOfStringLiteralByte(Literal, i), 598 diag::warn_cfstring_literal_contains_nul_character) 599 << Arg->getSourceRange(); 600 break; 601 } 602 } 603 604 return false; 605} 606 607/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 608/// Emit an error and return true on failure, return false on success. 609bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 610 Expr *Fn = TheCall->getCallee(); 611 if (TheCall->getNumArgs() > 2) { 612 Diag(TheCall->getArg(2)->getLocStart(), 613 diag::err_typecheck_call_too_many_args) 614 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 615 << Fn->getSourceRange() 616 << SourceRange(TheCall->getArg(2)->getLocStart(), 617 (*(TheCall->arg_end()-1))->getLocEnd()); 618 return true; 619 } 620 621 if (TheCall->getNumArgs() < 2) { 622 return Diag(TheCall->getLocEnd(), 623 diag::err_typecheck_call_too_few_args_at_least) 624 << 0 /*function call*/ << 2 << TheCall->getNumArgs(); 625 } 626 627 // Determine whether the current function is variadic or not. 628 BlockScopeInfo *CurBlock = getCurBlock(); 629 bool isVariadic; 630 if (CurBlock) 631 isVariadic = CurBlock->TheDecl->isVariadic(); 632 else if (FunctionDecl *FD = getCurFunctionDecl()) 633 isVariadic = FD->isVariadic(); 634 else 635 isVariadic = getCurMethodDecl()->isVariadic(); 636 637 if (!isVariadic) { 638 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 639 return true; 640 } 641 642 // Verify that the second argument to the builtin is the last argument of the 643 // current function or method. 644 bool SecondArgIsLastNamedArgument = false; 645 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 646 647 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 648 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 649 // FIXME: This isn't correct for methods (results in bogus warning). 650 // Get the last formal in the current function. 651 const ParmVarDecl *LastArg; 652 if (CurBlock) 653 LastArg = *(CurBlock->TheDecl->param_end()-1); 654 else if (FunctionDecl *FD = getCurFunctionDecl()) 655 LastArg = *(FD->param_end()-1); 656 else 657 LastArg = *(getCurMethodDecl()->param_end()-1); 658 SecondArgIsLastNamedArgument = PV == LastArg; 659 } 660 } 661 662 if (!SecondArgIsLastNamedArgument) 663 Diag(TheCall->getArg(1)->getLocStart(), 664 diag::warn_second_parameter_of_va_start_not_last_named_argument); 665 return false; 666} 667 668/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 669/// friends. This is declared to take (...), so we have to check everything. 670bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 671 if (TheCall->getNumArgs() < 2) 672 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 673 << 0 << 2 << TheCall->getNumArgs()/*function call*/; 674 if (TheCall->getNumArgs() > 2) 675 return Diag(TheCall->getArg(2)->getLocStart(), 676 diag::err_typecheck_call_too_many_args) 677 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 678 << SourceRange(TheCall->getArg(2)->getLocStart(), 679 (*(TheCall->arg_end()-1))->getLocEnd()); 680 681 Expr *OrigArg0 = TheCall->getArg(0); 682 Expr *OrigArg1 = TheCall->getArg(1); 683 684 // Do standard promotions between the two arguments, returning their common 685 // type. 686 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 687 688 // Make sure any conversions are pushed back into the call; this is 689 // type safe since unordered compare builtins are declared as "_Bool 690 // foo(...)". 691 TheCall->setArg(0, OrigArg0); 692 TheCall->setArg(1, OrigArg1); 693 694 if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent()) 695 return false; 696 697 // If the common type isn't a real floating type, then the arguments were 698 // invalid for this operation. 699 if (!Res->isRealFloatingType()) 700 return Diag(OrigArg0->getLocStart(), 701 diag::err_typecheck_call_invalid_ordered_compare) 702 << OrigArg0->getType() << OrigArg1->getType() 703 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 704 705 return false; 706} 707 708/// SemaBuiltinSemaBuiltinFPClassification - Handle functions like 709/// __builtin_isnan and friends. This is declared to take (...), so we have 710/// to check everything. We expect the last argument to be a floating point 711/// value. 712bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { 713 if (TheCall->getNumArgs() < NumArgs) 714 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 715 << 0 << NumArgs << TheCall->getNumArgs()/*function call*/; 716 if (TheCall->getNumArgs() > NumArgs) 717 return Diag(TheCall->getArg(NumArgs)->getLocStart(), 718 diag::err_typecheck_call_too_many_args) 719 << 0 /*function call*/ << NumArgs << TheCall->getNumArgs() 720 << SourceRange(TheCall->getArg(NumArgs)->getLocStart(), 721 (*(TheCall->arg_end()-1))->getLocEnd()); 722 723 Expr *OrigArg = TheCall->getArg(NumArgs-1); 724 725 if (OrigArg->isTypeDependent()) 726 return false; 727 728 // This operation requires a non-_Complex floating-point number. 729 if (!OrigArg->getType()->isRealFloatingType()) 730 return Diag(OrigArg->getLocStart(), 731 diag::err_typecheck_call_invalid_unary_fp) 732 << OrigArg->getType() << OrigArg->getSourceRange(); 733 734 // If this is an implicit conversion from float -> double, remove it. 735 if (ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(OrigArg)) { 736 Expr *CastArg = Cast->getSubExpr(); 737 if (CastArg->getType()->isSpecificBuiltinType(BuiltinType::Float)) { 738 assert(Cast->getType()->isSpecificBuiltinType(BuiltinType::Double) && 739 "promotion from float to double is the only expected cast here"); 740 Cast->setSubExpr(0); 741 TheCall->setArg(NumArgs-1, CastArg); 742 OrigArg = CastArg; 743 } 744 } 745 746 return false; 747} 748 749/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 750// This is declared to take (...), so we have to check everything. 751Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 752 if (TheCall->getNumArgs() < 2) 753 return ExprError(Diag(TheCall->getLocEnd(), 754 diag::err_typecheck_call_too_few_args_at_least) 755 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 756 << TheCall->getSourceRange()); 757 758 // Determine which of the following types of shufflevector we're checking: 759 // 1) unary, vector mask: (lhs, mask) 760 // 2) binary, vector mask: (lhs, rhs, mask) 761 // 3) binary, scalar mask: (lhs, rhs, index, ..., index) 762 QualType resType = TheCall->getArg(0)->getType(); 763 unsigned numElements = 0; 764 765 if (!TheCall->getArg(0)->isTypeDependent() && 766 !TheCall->getArg(1)->isTypeDependent()) { 767 QualType LHSType = TheCall->getArg(0)->getType(); 768 QualType RHSType = TheCall->getArg(1)->getType(); 769 770 if (!LHSType->isVectorType() || !RHSType->isVectorType()) { 771 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 772 << SourceRange(TheCall->getArg(0)->getLocStart(), 773 TheCall->getArg(1)->getLocEnd()); 774 return ExprError(); 775 } 776 777 numElements = LHSType->getAs<VectorType>()->getNumElements(); 778 unsigned numResElements = TheCall->getNumArgs() - 2; 779 780 // Check to see if we have a call with 2 vector arguments, the unary shuffle 781 // with mask. If so, verify that RHS is an integer vector type with the 782 // same number of elts as lhs. 783 if (TheCall->getNumArgs() == 2) { 784 if (!RHSType->hasIntegerRepresentation() || 785 RHSType->getAs<VectorType>()->getNumElements() != numElements) 786 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 787 << SourceRange(TheCall->getArg(1)->getLocStart(), 788 TheCall->getArg(1)->getLocEnd()); 789 numResElements = numElements; 790 } 791 else if (!Context.hasSameUnqualifiedType(LHSType, RHSType)) { 792 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 793 << SourceRange(TheCall->getArg(0)->getLocStart(), 794 TheCall->getArg(1)->getLocEnd()); 795 return ExprError(); 796 } else if (numElements != numResElements) { 797 QualType eltType = LHSType->getAs<VectorType>()->getElementType(); 798 resType = Context.getVectorType(eltType, numResElements, 799 VectorType::NotAltiVec); 800 } 801 } 802 803 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 804 if (TheCall->getArg(i)->isTypeDependent() || 805 TheCall->getArg(i)->isValueDependent()) 806 continue; 807 808 llvm::APSInt Result(32); 809 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 810 return ExprError(Diag(TheCall->getLocStart(), 811 diag::err_shufflevector_nonconstant_argument) 812 << TheCall->getArg(i)->getSourceRange()); 813 814 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 815 return ExprError(Diag(TheCall->getLocStart(), 816 diag::err_shufflevector_argument_too_large) 817 << TheCall->getArg(i)->getSourceRange()); 818 } 819 820 llvm::SmallVector<Expr*, 32> exprs; 821 822 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 823 exprs.push_back(TheCall->getArg(i)); 824 TheCall->setArg(i, 0); 825 } 826 827 return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(), 828 exprs.size(), resType, 829 TheCall->getCallee()->getLocStart(), 830 TheCall->getRParenLoc())); 831} 832 833/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 834// This is declared to take (const void*, ...) and can take two 835// optional constant int args. 836bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 837 unsigned NumArgs = TheCall->getNumArgs(); 838 839 if (NumArgs > 3) 840 return Diag(TheCall->getLocEnd(), 841 diag::err_typecheck_call_too_many_args_at_most) 842 << 0 /*function call*/ << 3 << NumArgs 843 << TheCall->getSourceRange(); 844 845 // Argument 0 is checked for us and the remaining arguments must be 846 // constant integers. 847 for (unsigned i = 1; i != NumArgs; ++i) { 848 Expr *Arg = TheCall->getArg(i); 849 850 llvm::APSInt Result; 851 if (SemaBuiltinConstantArg(TheCall, i, Result)) 852 return true; 853 854 // FIXME: gcc issues a warning and rewrites these to 0. These 855 // seems especially odd for the third argument since the default 856 // is 3. 857 if (i == 1) { 858 if (Result.getLimitedValue() > 1) 859 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 860 << "0" << "1" << Arg->getSourceRange(); 861 } else { 862 if (Result.getLimitedValue() > 3) 863 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 864 << "0" << "3" << Arg->getSourceRange(); 865 } 866 } 867 868 return false; 869} 870 871/// SemaBuiltinConstantArg - Handle a check if argument ArgNum of CallExpr 872/// TheCall is a constant expression. 873bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, 874 llvm::APSInt &Result) { 875 Expr *Arg = TheCall->getArg(ArgNum); 876 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 877 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 878 879 if (Arg->isTypeDependent() || Arg->isValueDependent()) return false; 880 881 if (!Arg->isIntegerConstantExpr(Result, Context)) 882 return Diag(TheCall->getLocStart(), diag::err_constant_integer_arg_type) 883 << FDecl->getDeclName() << Arg->getSourceRange(); 884 885 return false; 886} 887 888/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 889/// int type). This simply type checks that type is one of the defined 890/// constants (0-3). 891// For compatability check 0-3, llvm only handles 0 and 2. 892bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 893 llvm::APSInt Result; 894 895 // Check constant-ness first. 896 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 897 return true; 898 899 Expr *Arg = TheCall->getArg(1); 900 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 901 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 902 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 903 } 904 905 return false; 906} 907 908/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 909/// This checks that val is a constant 1. 910bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 911 Expr *Arg = TheCall->getArg(1); 912 llvm::APSInt Result; 913 914 // TODO: This is less than ideal. Overload this to take a value. 915 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 916 return true; 917 918 if (Result != 1) 919 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 920 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 921 922 return false; 923} 924 925// Handle i > 1 ? "x" : "y", recursivelly 926bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 927 bool HasVAListArg, 928 unsigned format_idx, unsigned firstDataArg, 929 bool isPrintf) { 930 931 if (E->isTypeDependent() || E->isValueDependent()) 932 return false; 933 934 switch (E->getStmtClass()) { 935 case Stmt::ConditionalOperatorClass: { 936 const ConditionalOperator *C = cast<ConditionalOperator>(E); 937 return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, HasVAListArg, 938 format_idx, firstDataArg, isPrintf) 939 && SemaCheckStringLiteral(C->getRHS(), TheCall, HasVAListArg, 940 format_idx, firstDataArg, isPrintf); 941 } 942 943 case Stmt::ImplicitCastExprClass: { 944 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 945 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 946 format_idx, firstDataArg, isPrintf); 947 } 948 949 case Stmt::ParenExprClass: { 950 const ParenExpr *Expr = cast<ParenExpr>(E); 951 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 952 format_idx, firstDataArg, isPrintf); 953 } 954 955 case Stmt::DeclRefExprClass: { 956 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 957 958 // As an exception, do not flag errors for variables binding to 959 // const string literals. 960 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 961 bool isConstant = false; 962 QualType T = DR->getType(); 963 964 if (const ArrayType *AT = Context.getAsArrayType(T)) { 965 isConstant = AT->getElementType().isConstant(Context); 966 } else if (const PointerType *PT = T->getAs<PointerType>()) { 967 isConstant = T.isConstant(Context) && 968 PT->getPointeeType().isConstant(Context); 969 } 970 971 if (isConstant) { 972 if (const Expr *Init = VD->getAnyInitializer()) 973 return SemaCheckStringLiteral(Init, TheCall, 974 HasVAListArg, format_idx, firstDataArg, 975 isPrintf); 976 } 977 978 // For vprintf* functions (i.e., HasVAListArg==true), we add a 979 // special check to see if the format string is a function parameter 980 // of the function calling the printf function. If the function 981 // has an attribute indicating it is a printf-like function, then we 982 // should suppress warnings concerning non-literals being used in a call 983 // to a vprintf function. For example: 984 // 985 // void 986 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ 987 // va_list ap; 988 // va_start(ap, fmt); 989 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 990 // ... 991 // 992 // 993 // FIXME: We don't have full attribute support yet, so just check to see 994 // if the argument is a DeclRefExpr that references a parameter. We'll 995 // add proper support for checking the attribute later. 996 if (HasVAListArg) 997 if (isa<ParmVarDecl>(VD)) 998 return true; 999 } 1000 1001 return false; 1002 } 1003 1004 case Stmt::CallExprClass: { 1005 const CallExpr *CE = cast<CallExpr>(E); 1006 if (const ImplicitCastExpr *ICE 1007 = dyn_cast<ImplicitCastExpr>(CE->getCallee())) { 1008 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) { 1009 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { 1010 if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) { 1011 unsigned ArgIndex = FA->getFormatIdx(); 1012 const Expr *Arg = CE->getArg(ArgIndex - 1); 1013 1014 return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg, 1015 format_idx, firstDataArg, isPrintf); 1016 } 1017 } 1018 } 1019 } 1020 1021 return false; 1022 } 1023 case Stmt::ObjCStringLiteralClass: 1024 case Stmt::StringLiteralClass: { 1025 const StringLiteral *StrE = NULL; 1026 1027 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 1028 StrE = ObjCFExpr->getString(); 1029 else 1030 StrE = cast<StringLiteral>(E); 1031 1032 if (StrE) { 1033 CheckFormatString(StrE, E, TheCall, HasVAListArg, format_idx, 1034 firstDataArg, isPrintf); 1035 return true; 1036 } 1037 1038 return false; 1039 } 1040 1041 default: 1042 return false; 1043 } 1044} 1045 1046void 1047Sema::CheckNonNullArguments(const NonNullAttr *NonNull, 1048 const CallExpr *TheCall) { 1049 for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end(); 1050 i != e; ++i) { 1051 const Expr *ArgExpr = TheCall->getArg(*i); 1052 if (ArgExpr->isNullPointerConstant(Context, 1053 Expr::NPC_ValueDependentIsNotNull)) 1054 Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg) 1055 << ArgExpr->getSourceRange(); 1056 } 1057} 1058 1059/// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar 1060/// functions) for correct use of format strings. 1061void 1062Sema::CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg, 1063 unsigned format_idx, unsigned firstDataArg, 1064 bool isPrintf) { 1065 1066 const Expr *Fn = TheCall->getCallee(); 1067 1068 // The way the format attribute works in GCC, the implicit this argument 1069 // of member functions is counted. However, it doesn't appear in our own 1070 // lists, so decrement format_idx in that case. 1071 if (isa<CXXMemberCallExpr>(TheCall)) { 1072 // Catch a format attribute mistakenly referring to the object argument. 1073 if (format_idx == 0) 1074 return; 1075 --format_idx; 1076 if(firstDataArg != 0) 1077 --firstDataArg; 1078 } 1079 1080 // CHECK: printf/scanf-like function is called with no format string. 1081 if (format_idx >= TheCall->getNumArgs()) { 1082 Diag(TheCall->getRParenLoc(), diag::warn_missing_format_string) 1083 << Fn->getSourceRange(); 1084 return; 1085 } 1086 1087 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 1088 1089 // CHECK: format string is not a string literal. 1090 // 1091 // Dynamically generated format strings are difficult to 1092 // automatically vet at compile time. Requiring that format strings 1093 // are string literals: (1) permits the checking of format strings by 1094 // the compiler and thereby (2) can practically remove the source of 1095 // many format string exploits. 1096 1097 // Format string can be either ObjC string (e.g. @"%d") or 1098 // C string (e.g. "%d") 1099 // ObjC string uses the same format specifiers as C string, so we can use 1100 // the same format string checking logic for both ObjC and C strings. 1101 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 1102 firstDataArg, isPrintf)) 1103 return; // Literal format string found, check done! 1104 1105 // If there are no arguments specified, warn with -Wformat-security, otherwise 1106 // warn only with -Wformat-nonliteral. 1107 if (TheCall->getNumArgs() == format_idx+1) 1108 Diag(TheCall->getArg(format_idx)->getLocStart(), 1109 diag::warn_format_nonliteral_noargs) 1110 << OrigFormatExpr->getSourceRange(); 1111 else 1112 Diag(TheCall->getArg(format_idx)->getLocStart(), 1113 diag::warn_format_nonliteral) 1114 << OrigFormatExpr->getSourceRange(); 1115} 1116 1117namespace { 1118class CheckFormatHandler : public analyze_format_string::FormatStringHandler { 1119protected: 1120 Sema &S; 1121 const StringLiteral *FExpr; 1122 const Expr *OrigFormatExpr; 1123 const unsigned FirstDataArg; 1124 const unsigned NumDataArgs; 1125 const bool IsObjCLiteral; 1126 const char *Beg; // Start of format string. 1127 const bool HasVAListArg; 1128 const CallExpr *TheCall; 1129 unsigned FormatIdx; 1130 llvm::BitVector CoveredArgs; 1131 bool usesPositionalArgs; 1132 bool atFirstArg; 1133public: 1134 CheckFormatHandler(Sema &s, const StringLiteral *fexpr, 1135 const Expr *origFormatExpr, unsigned firstDataArg, 1136 unsigned numDataArgs, bool isObjCLiteral, 1137 const char *beg, bool hasVAListArg, 1138 const CallExpr *theCall, unsigned formatIdx) 1139 : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), 1140 FirstDataArg(firstDataArg), 1141 NumDataArgs(numDataArgs), 1142 IsObjCLiteral(isObjCLiteral), Beg(beg), 1143 HasVAListArg(hasVAListArg), 1144 TheCall(theCall), FormatIdx(formatIdx), 1145 usesPositionalArgs(false), atFirstArg(true) { 1146 CoveredArgs.resize(numDataArgs); 1147 CoveredArgs.reset(); 1148 } 1149 1150 void DoneProcessing(); 1151 1152 void HandleIncompleteSpecifier(const char *startSpecifier, 1153 unsigned specifierLen); 1154 1155 virtual void HandleInvalidPosition(const char *startSpecifier, 1156 unsigned specifierLen, 1157 analyze_format_string::PositionContext p); 1158 1159 virtual void HandleZeroPosition(const char *startPos, unsigned posLen); 1160 1161 void HandleNullChar(const char *nullCharacter); 1162 1163protected: 1164 bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, 1165 const char *startSpec, 1166 unsigned specifierLen, 1167 const char *csStart, unsigned csLen); 1168 1169 SourceRange getFormatStringRange(); 1170 CharSourceRange getSpecifierRange(const char *startSpecifier, 1171 unsigned specifierLen); 1172 SourceLocation getLocationOfByte(const char *x); 1173 1174 const Expr *getDataArg(unsigned i) const; 1175 1176 bool CheckNumArgs(const analyze_format_string::FormatSpecifier &FS, 1177 const analyze_format_string::ConversionSpecifier &CS, 1178 const char *startSpecifier, unsigned specifierLen, 1179 unsigned argIndex); 1180}; 1181} 1182 1183SourceRange CheckFormatHandler::getFormatStringRange() { 1184 return OrigFormatExpr->getSourceRange(); 1185} 1186 1187CharSourceRange CheckFormatHandler:: 1188getSpecifierRange(const char *startSpecifier, unsigned specifierLen) { 1189 SourceLocation Start = getLocationOfByte(startSpecifier); 1190 SourceLocation End = getLocationOfByte(startSpecifier + specifierLen - 1); 1191 1192 // Advance the end SourceLocation by one due to half-open ranges. 1193 End = End.getFileLocWithOffset(1); 1194 1195 return CharSourceRange::getCharRange(Start, End); 1196} 1197 1198SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { 1199 return S.getLocationOfStringLiteralByte(FExpr, x - Beg); 1200} 1201 1202void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, 1203 unsigned specifierLen){ 1204 SourceLocation Loc = getLocationOfByte(startSpecifier); 1205 S.Diag(Loc, diag::warn_printf_incomplete_specifier) 1206 << getSpecifierRange(startSpecifier, specifierLen); 1207} 1208 1209void 1210CheckFormatHandler::HandleInvalidPosition(const char *startPos, unsigned posLen, 1211 analyze_format_string::PositionContext p) { 1212 SourceLocation Loc = getLocationOfByte(startPos); 1213 S.Diag(Loc, diag::warn_format_invalid_positional_specifier) 1214 << (unsigned) p << getSpecifierRange(startPos, posLen); 1215} 1216 1217void CheckFormatHandler::HandleZeroPosition(const char *startPos, 1218 unsigned posLen) { 1219 SourceLocation Loc = getLocationOfByte(startPos); 1220 S.Diag(Loc, diag::warn_format_zero_positional_specifier) 1221 << getSpecifierRange(startPos, posLen); 1222} 1223 1224void CheckFormatHandler::HandleNullChar(const char *nullCharacter) { 1225 // The presence of a null character is likely an error. 1226 S.Diag(getLocationOfByte(nullCharacter), 1227 diag::warn_printf_format_string_contains_null_char) 1228 << getFormatStringRange(); 1229} 1230 1231const Expr *CheckFormatHandler::getDataArg(unsigned i) const { 1232 return TheCall->getArg(FirstDataArg + i); 1233} 1234 1235void CheckFormatHandler::DoneProcessing() { 1236 // Does the number of data arguments exceed the number of 1237 // format conversions in the format string? 1238 if (!HasVAListArg) { 1239 // Find any arguments that weren't covered. 1240 CoveredArgs.flip(); 1241 signed notCoveredArg = CoveredArgs.find_first(); 1242 if (notCoveredArg >= 0) { 1243 assert((unsigned)notCoveredArg < NumDataArgs); 1244 S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), 1245 diag::warn_printf_data_arg_not_used) 1246 << getFormatStringRange(); 1247 } 1248 } 1249} 1250 1251bool 1252CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, 1253 SourceLocation Loc, 1254 const char *startSpec, 1255 unsigned specifierLen, 1256 const char *csStart, 1257 unsigned csLen) { 1258 1259 bool keepGoing = true; 1260 if (argIndex < NumDataArgs) { 1261 // Consider the argument coverered, even though the specifier doesn't 1262 // make sense. 1263 CoveredArgs.set(argIndex); 1264 } 1265 else { 1266 // If argIndex exceeds the number of data arguments we 1267 // don't issue a warning because that is just a cascade of warnings (and 1268 // they may have intended '%%' anyway). We don't want to continue processing 1269 // the format string after this point, however, as we will like just get 1270 // gibberish when trying to match arguments. 1271 keepGoing = false; 1272 } 1273 1274 S.Diag(Loc, diag::warn_format_invalid_conversion) 1275 << llvm::StringRef(csStart, csLen) 1276 << getSpecifierRange(startSpec, specifierLen); 1277 1278 return keepGoing; 1279} 1280 1281bool 1282CheckFormatHandler::CheckNumArgs( 1283 const analyze_format_string::FormatSpecifier &FS, 1284 const analyze_format_string::ConversionSpecifier &CS, 1285 const char *startSpecifier, unsigned specifierLen, unsigned argIndex) { 1286 1287 if (argIndex >= NumDataArgs) { 1288 if (FS.usesPositionalArg()) { 1289 S.Diag(getLocationOfByte(CS.getStart()), 1290 diag::warn_printf_positional_arg_exceeds_data_args) 1291 << (argIndex+1) << NumDataArgs 1292 << getSpecifierRange(startSpecifier, specifierLen); 1293 } 1294 else { 1295 S.Diag(getLocationOfByte(CS.getStart()), 1296 diag::warn_printf_insufficient_data_args) 1297 << getSpecifierRange(startSpecifier, specifierLen); 1298 } 1299 1300 return false; 1301 } 1302 return true; 1303} 1304 1305//===--- CHECK: Printf format string checking ------------------------------===// 1306 1307namespace { 1308class CheckPrintfHandler : public CheckFormatHandler { 1309public: 1310 CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, 1311 const Expr *origFormatExpr, unsigned firstDataArg, 1312 unsigned numDataArgs, bool isObjCLiteral, 1313 const char *beg, bool hasVAListArg, 1314 const CallExpr *theCall, unsigned formatIdx) 1315 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1316 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1317 theCall, formatIdx) {} 1318 1319 1320 bool HandleInvalidPrintfConversionSpecifier( 1321 const analyze_printf::PrintfSpecifier &FS, 1322 const char *startSpecifier, 1323 unsigned specifierLen); 1324 1325 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 1326 const char *startSpecifier, 1327 unsigned specifierLen); 1328 1329 bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k, 1330 const char *startSpecifier, unsigned specifierLen); 1331 void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS, 1332 const analyze_printf::OptionalAmount &Amt, 1333 unsigned type, 1334 const char *startSpecifier, unsigned specifierLen); 1335 void HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1336 const analyze_printf::OptionalFlag &flag, 1337 const char *startSpecifier, unsigned specifierLen); 1338 void HandleIgnoredFlag(const analyze_printf::PrintfSpecifier &FS, 1339 const analyze_printf::OptionalFlag &ignoredFlag, 1340 const analyze_printf::OptionalFlag &flag, 1341 const char *startSpecifier, unsigned specifierLen); 1342}; 1343} 1344 1345bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier( 1346 const analyze_printf::PrintfSpecifier &FS, 1347 const char *startSpecifier, 1348 unsigned specifierLen) { 1349 const analyze_printf::PrintfConversionSpecifier &CS = 1350 FS.getConversionSpecifier(); 1351 1352 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1353 getLocationOfByte(CS.getStart()), 1354 startSpecifier, specifierLen, 1355 CS.getStart(), CS.getLength()); 1356} 1357 1358bool CheckPrintfHandler::HandleAmount( 1359 const analyze_format_string::OptionalAmount &Amt, 1360 unsigned k, const char *startSpecifier, 1361 unsigned specifierLen) { 1362 1363 if (Amt.hasDataArgument()) { 1364 if (!HasVAListArg) { 1365 unsigned argIndex = Amt.getArgIndex(); 1366 if (argIndex >= NumDataArgs) { 1367 S.Diag(getLocationOfByte(Amt.getStart()), 1368 diag::warn_printf_asterisk_missing_arg) 1369 << k << getSpecifierRange(startSpecifier, specifierLen); 1370 // Don't do any more checking. We will just emit 1371 // spurious errors. 1372 return false; 1373 } 1374 1375 // Type check the data argument. It should be an 'int'. 1376 // Although not in conformance with C99, we also allow the argument to be 1377 // an 'unsigned int' as that is a reasonably safe case. GCC also 1378 // doesn't emit a warning for that case. 1379 CoveredArgs.set(argIndex); 1380 const Expr *Arg = getDataArg(argIndex); 1381 QualType T = Arg->getType(); 1382 1383 const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); 1384 assert(ATR.isValid()); 1385 1386 if (!ATR.matchesType(S.Context, T)) { 1387 S.Diag(getLocationOfByte(Amt.getStart()), 1388 diag::warn_printf_asterisk_wrong_type) 1389 << k 1390 << ATR.getRepresentativeType(S.Context) << T 1391 << getSpecifierRange(startSpecifier, specifierLen) 1392 << Arg->getSourceRange(); 1393 // Don't do any more checking. We will just emit 1394 // spurious errors. 1395 return false; 1396 } 1397 } 1398 } 1399 return true; 1400} 1401 1402void CheckPrintfHandler::HandleInvalidAmount( 1403 const analyze_printf::PrintfSpecifier &FS, 1404 const analyze_printf::OptionalAmount &Amt, 1405 unsigned type, 1406 const char *startSpecifier, 1407 unsigned specifierLen) { 1408 const analyze_printf::PrintfConversionSpecifier &CS = 1409 FS.getConversionSpecifier(); 1410 switch (Amt.getHowSpecified()) { 1411 case analyze_printf::OptionalAmount::Constant: 1412 S.Diag(getLocationOfByte(Amt.getStart()), 1413 diag::warn_printf_nonsensical_optional_amount) 1414 << type 1415 << CS.toString() 1416 << getSpecifierRange(startSpecifier, specifierLen) 1417 << FixItHint::CreateRemoval(getSpecifierRange(Amt.getStart(), 1418 Amt.getConstantLength())); 1419 break; 1420 1421 default: 1422 S.Diag(getLocationOfByte(Amt.getStart()), 1423 diag::warn_printf_nonsensical_optional_amount) 1424 << type 1425 << CS.toString() 1426 << getSpecifierRange(startSpecifier, specifierLen); 1427 break; 1428 } 1429} 1430 1431void CheckPrintfHandler::HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1432 const analyze_printf::OptionalFlag &flag, 1433 const char *startSpecifier, 1434 unsigned specifierLen) { 1435 // Warn about pointless flag with a fixit removal. 1436 const analyze_printf::PrintfConversionSpecifier &CS = 1437 FS.getConversionSpecifier(); 1438 S.Diag(getLocationOfByte(flag.getPosition()), 1439 diag::warn_printf_nonsensical_flag) 1440 << flag.toString() << CS.toString() 1441 << getSpecifierRange(startSpecifier, specifierLen) 1442 << FixItHint::CreateRemoval(getSpecifierRange(flag.getPosition(), 1)); 1443} 1444 1445void CheckPrintfHandler::HandleIgnoredFlag( 1446 const analyze_printf::PrintfSpecifier &FS, 1447 const analyze_printf::OptionalFlag &ignoredFlag, 1448 const analyze_printf::OptionalFlag &flag, 1449 const char *startSpecifier, 1450 unsigned specifierLen) { 1451 // Warn about ignored flag with a fixit removal. 1452 S.Diag(getLocationOfByte(ignoredFlag.getPosition()), 1453 diag::warn_printf_ignored_flag) 1454 << ignoredFlag.toString() << flag.toString() 1455 << getSpecifierRange(startSpecifier, specifierLen) 1456 << FixItHint::CreateRemoval(getSpecifierRange( 1457 ignoredFlag.getPosition(), 1)); 1458} 1459 1460bool 1461CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier 1462 &FS, 1463 const char *startSpecifier, 1464 unsigned specifierLen) { 1465 1466 using namespace analyze_format_string; 1467 using namespace analyze_printf; 1468 const PrintfConversionSpecifier &CS = FS.getConversionSpecifier(); 1469 1470 if (FS.consumesDataArgument()) { 1471 if (atFirstArg) { 1472 atFirstArg = false; 1473 usesPositionalArgs = FS.usesPositionalArg(); 1474 } 1475 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1476 // Cannot mix-and-match positional and non-positional arguments. 1477 S.Diag(getLocationOfByte(CS.getStart()), 1478 diag::warn_format_mix_positional_nonpositional_args) 1479 << getSpecifierRange(startSpecifier, specifierLen); 1480 return false; 1481 } 1482 } 1483 1484 // First check if the field width, precision, and conversion specifier 1485 // have matching data arguments. 1486 if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0, 1487 startSpecifier, specifierLen)) { 1488 return false; 1489 } 1490 1491 if (!HandleAmount(FS.getPrecision(), /* precision */ 1, 1492 startSpecifier, specifierLen)) { 1493 return false; 1494 } 1495 1496 if (!CS.consumesDataArgument()) { 1497 // FIXME: Technically specifying a precision or field width here 1498 // makes no sense. Worth issuing a warning at some point. 1499 return true; 1500 } 1501 1502 // Consume the argument. 1503 unsigned argIndex = FS.getArgIndex(); 1504 if (argIndex < NumDataArgs) { 1505 // The check to see if the argIndex is valid will come later. 1506 // We set the bit here because we may exit early from this 1507 // function if we encounter some other error. 1508 CoveredArgs.set(argIndex); 1509 } 1510 1511 // Check for using an Objective-C specific conversion specifier 1512 // in a non-ObjC literal. 1513 if (!IsObjCLiteral && CS.isObjCArg()) { 1514 return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, 1515 specifierLen); 1516 } 1517 1518 // Check for invalid use of field width 1519 if (!FS.hasValidFieldWidth()) { 1520 HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 0, 1521 startSpecifier, specifierLen); 1522 } 1523 1524 // Check for invalid use of precision 1525 if (!FS.hasValidPrecision()) { 1526 HandleInvalidAmount(FS, FS.getPrecision(), /* precision */ 1, 1527 startSpecifier, specifierLen); 1528 } 1529 1530 // Check each flag does not conflict with any other component. 1531 if (!FS.hasValidLeadingZeros()) 1532 HandleFlag(FS, FS.hasLeadingZeros(), startSpecifier, specifierLen); 1533 if (!FS.hasValidPlusPrefix()) 1534 HandleFlag(FS, FS.hasPlusPrefix(), startSpecifier, specifierLen); 1535 if (!FS.hasValidSpacePrefix()) 1536 HandleFlag(FS, FS.hasSpacePrefix(), startSpecifier, specifierLen); 1537 if (!FS.hasValidAlternativeForm()) 1538 HandleFlag(FS, FS.hasAlternativeForm(), startSpecifier, specifierLen); 1539 if (!FS.hasValidLeftJustified()) 1540 HandleFlag(FS, FS.isLeftJustified(), startSpecifier, specifierLen); 1541 1542 // Check that flags are not ignored by another flag 1543 if (FS.hasSpacePrefix() && FS.hasPlusPrefix()) // ' ' ignored by '+' 1544 HandleIgnoredFlag(FS, FS.hasSpacePrefix(), FS.hasPlusPrefix(), 1545 startSpecifier, specifierLen); 1546 if (FS.hasLeadingZeros() && FS.isLeftJustified()) // '0' ignored by '-' 1547 HandleIgnoredFlag(FS, FS.hasLeadingZeros(), FS.isLeftJustified(), 1548 startSpecifier, specifierLen); 1549 1550 // Check the length modifier is valid with the given conversion specifier. 1551 const LengthModifier &LM = FS.getLengthModifier(); 1552 if (!FS.hasValidLengthModifier()) 1553 S.Diag(getLocationOfByte(LM.getStart()), 1554 diag::warn_format_nonsensical_length) 1555 << LM.toString() << CS.toString() 1556 << getSpecifierRange(startSpecifier, specifierLen) 1557 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1558 LM.getLength())); 1559 1560 // Are we using '%n'? 1561 if (CS.getKind() == ConversionSpecifier::nArg) { 1562 // Issue a warning about this being a possible security issue. 1563 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) 1564 << getSpecifierRange(startSpecifier, specifierLen); 1565 // Continue checking the other format specifiers. 1566 return true; 1567 } 1568 1569 // The remaining checks depend on the data arguments. 1570 if (HasVAListArg) 1571 return true; 1572 1573 if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) 1574 return false; 1575 1576 // Now type check the data expression that matches the 1577 // format specifier. 1578 const Expr *Ex = getDataArg(argIndex); 1579 const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); 1580 if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { 1581 // Check if we didn't match because of an implicit cast from a 'char' 1582 // or 'short' to an 'int'. This is done because printf is a varargs 1583 // function. 1584 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Ex)) 1585 if (ICE->getType() == S.Context.IntTy) 1586 if (ATR.matchesType(S.Context, ICE->getSubExpr()->getType())) 1587 return true; 1588 1589 // We may be able to offer a FixItHint if it is a supported type. 1590 PrintfSpecifier fixedFS = FS; 1591 bool success = fixedFS.fixType(Ex->getType()); 1592 1593 if (success) { 1594 // Get the fix string from the fixed format specifier 1595 llvm::SmallString<128> buf; 1596 llvm::raw_svector_ostream os(buf); 1597 fixedFS.toString(os); 1598 1599 S.Diag(getLocationOfByte(CS.getStart()), 1600 diag::warn_printf_conversion_argument_type_mismatch) 1601 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1602 << getSpecifierRange(startSpecifier, specifierLen) 1603 << Ex->getSourceRange() 1604 << FixItHint::CreateReplacement( 1605 getSpecifierRange(startSpecifier, specifierLen), 1606 os.str()); 1607 } 1608 else { 1609 S.Diag(getLocationOfByte(CS.getStart()), 1610 diag::warn_printf_conversion_argument_type_mismatch) 1611 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1612 << getSpecifierRange(startSpecifier, specifierLen) 1613 << Ex->getSourceRange(); 1614 } 1615 } 1616 1617 return true; 1618} 1619 1620//===--- CHECK: Scanf format string checking ------------------------------===// 1621 1622namespace { 1623class CheckScanfHandler : public CheckFormatHandler { 1624public: 1625 CheckScanfHandler(Sema &s, const StringLiteral *fexpr, 1626 const Expr *origFormatExpr, unsigned firstDataArg, 1627 unsigned numDataArgs, bool isObjCLiteral, 1628 const char *beg, bool hasVAListArg, 1629 const CallExpr *theCall, unsigned formatIdx) 1630 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1631 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1632 theCall, formatIdx) {} 1633 1634 bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 1635 const char *startSpecifier, 1636 unsigned specifierLen); 1637 1638 bool HandleInvalidScanfConversionSpecifier( 1639 const analyze_scanf::ScanfSpecifier &FS, 1640 const char *startSpecifier, 1641 unsigned specifierLen); 1642 1643 void HandleIncompleteScanList(const char *start, const char *end); 1644}; 1645} 1646 1647void CheckScanfHandler::HandleIncompleteScanList(const char *start, 1648 const char *end) { 1649 S.Diag(getLocationOfByte(end), diag::warn_scanf_scanlist_incomplete) 1650 << getSpecifierRange(start, end - start); 1651} 1652 1653bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier( 1654 const analyze_scanf::ScanfSpecifier &FS, 1655 const char *startSpecifier, 1656 unsigned specifierLen) { 1657 1658 const analyze_scanf::ScanfConversionSpecifier &CS = 1659 FS.getConversionSpecifier(); 1660 1661 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1662 getLocationOfByte(CS.getStart()), 1663 startSpecifier, specifierLen, 1664 CS.getStart(), CS.getLength()); 1665} 1666 1667bool CheckScanfHandler::HandleScanfSpecifier( 1668 const analyze_scanf::ScanfSpecifier &FS, 1669 const char *startSpecifier, 1670 unsigned specifierLen) { 1671 1672 using namespace analyze_scanf; 1673 using namespace analyze_format_string; 1674 1675 const ScanfConversionSpecifier &CS = FS.getConversionSpecifier(); 1676 1677 // Handle case where '%' and '*' don't consume an argument. These shouldn't 1678 // be used to decide if we are using positional arguments consistently. 1679 if (FS.consumesDataArgument()) { 1680 if (atFirstArg) { 1681 atFirstArg = false; 1682 usesPositionalArgs = FS.usesPositionalArg(); 1683 } 1684 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1685 // Cannot mix-and-match positional and non-positional arguments. 1686 S.Diag(getLocationOfByte(CS.getStart()), 1687 diag::warn_format_mix_positional_nonpositional_args) 1688 << getSpecifierRange(startSpecifier, specifierLen); 1689 return false; 1690 } 1691 } 1692 1693 // Check if the field with is non-zero. 1694 const OptionalAmount &Amt = FS.getFieldWidth(); 1695 if (Amt.getHowSpecified() == OptionalAmount::Constant) { 1696 if (Amt.getConstantAmount() == 0) { 1697 const CharSourceRange &R = getSpecifierRange(Amt.getStart(), 1698 Amt.getConstantLength()); 1699 S.Diag(getLocationOfByte(Amt.getStart()), 1700 diag::warn_scanf_nonzero_width) 1701 << R << FixItHint::CreateRemoval(R); 1702 } 1703 } 1704 1705 if (!FS.consumesDataArgument()) { 1706 // FIXME: Technically specifying a precision or field width here 1707 // makes no sense. Worth issuing a warning at some point. 1708 return true; 1709 } 1710 1711 // Consume the argument. 1712 unsigned argIndex = FS.getArgIndex(); 1713 if (argIndex < NumDataArgs) { 1714 // The check to see if the argIndex is valid will come later. 1715 // We set the bit here because we may exit early from this 1716 // function if we encounter some other error. 1717 CoveredArgs.set(argIndex); 1718 } 1719 1720 // Check the length modifier is valid with the given conversion specifier. 1721 const LengthModifier &LM = FS.getLengthModifier(); 1722 if (!FS.hasValidLengthModifier()) { 1723 S.Diag(getLocationOfByte(LM.getStart()), 1724 diag::warn_format_nonsensical_length) 1725 << LM.toString() << CS.toString() 1726 << getSpecifierRange(startSpecifier, specifierLen) 1727 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1728 LM.getLength())); 1729 } 1730 1731 // The remaining checks depend on the data arguments. 1732 if (HasVAListArg) 1733 return true; 1734 1735 if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) 1736 return false; 1737 1738 // FIXME: Check that the argument type matches the format specifier. 1739 1740 return true; 1741} 1742 1743void Sema::CheckFormatString(const StringLiteral *FExpr, 1744 const Expr *OrigFormatExpr, 1745 const CallExpr *TheCall, bool HasVAListArg, 1746 unsigned format_idx, unsigned firstDataArg, 1747 bool isPrintf) { 1748 1749 // CHECK: is the format string a wide literal? 1750 if (FExpr->isWide()) { 1751 Diag(FExpr->getLocStart(), 1752 diag::warn_format_string_is_wide_literal) 1753 << OrigFormatExpr->getSourceRange(); 1754 return; 1755 } 1756 1757 // Str - The format string. NOTE: this is NOT null-terminated! 1758 const char *Str = FExpr->getStrData(); 1759 1760 // CHECK: empty format string? 1761 unsigned StrLen = FExpr->getByteLength(); 1762 1763 if (StrLen == 0) { 1764 Diag(FExpr->getLocStart(), diag::warn_empty_format_string) 1765 << OrigFormatExpr->getSourceRange(); 1766 return; 1767 } 1768 1769 if (isPrintf) { 1770 CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1771 TheCall->getNumArgs() - firstDataArg, 1772 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1773 HasVAListArg, TheCall, format_idx); 1774 1775 if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen)) 1776 H.DoneProcessing(); 1777 } 1778 else { 1779 CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1780 TheCall->getNumArgs() - firstDataArg, 1781 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1782 HasVAListArg, TheCall, format_idx); 1783 1784 if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen)) 1785 H.DoneProcessing(); 1786 } 1787} 1788 1789//===--- CHECK: Return Address of Stack Variable --------------------------===// 1790 1791static DeclRefExpr* EvalVal(Expr *E); 1792static DeclRefExpr* EvalAddr(Expr* E); 1793 1794/// CheckReturnStackAddr - Check if a return statement returns the address 1795/// of a stack variable. 1796void 1797Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1798 SourceLocation ReturnLoc) { 1799 1800 // Perform checking for returned stack addresses. 1801 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1802 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 1803 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 1804 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1805 1806 // Skip over implicit cast expressions when checking for block expressions. 1807 RetValExp = RetValExp->IgnoreParenCasts(); 1808 1809 if (BlockExpr *C = dyn_cast<BlockExpr>(RetValExp)) 1810 if (C->hasBlockDeclRefExprs()) 1811 Diag(C->getLocStart(), diag::err_ret_local_block) 1812 << C->getSourceRange(); 1813 1814 if (AddrLabelExpr *ALE = dyn_cast<AddrLabelExpr>(RetValExp)) 1815 Diag(ALE->getLocStart(), diag::warn_ret_addr_label) 1816 << ALE->getSourceRange(); 1817 1818 } else if (lhsType->isReferenceType()) { 1819 // Perform checking for stack values returned by reference. 1820 // Check for a reference to the stack 1821 if (DeclRefExpr *DR = EvalVal(RetValExp)) 1822 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 1823 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1824 } 1825} 1826 1827/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1828/// check if the expression in a return statement evaluates to an address 1829/// to a location on the stack. The recursion is used to traverse the 1830/// AST of the return expression, with recursion backtracking when we 1831/// encounter a subexpression that (1) clearly does not lead to the address 1832/// of a stack variable or (2) is something we cannot determine leads to 1833/// the address of a stack variable based on such local checking. 1834/// 1835/// EvalAddr processes expressions that are pointers that are used as 1836/// references (and not L-values). EvalVal handles all other values. 1837/// At the base case of the recursion is a check for a DeclRefExpr* in 1838/// the refers to a stack variable. 1839/// 1840/// This implementation handles: 1841/// 1842/// * pointer-to-pointer casts 1843/// * implicit conversions from array references to pointers 1844/// * taking the address of fields 1845/// * arbitrary interplay between "&" and "*" operators 1846/// * pointer arithmetic from an address of a stack variable 1847/// * taking the address of an array element where the array is on the stack 1848static DeclRefExpr* EvalAddr(Expr *E) { 1849 // We should only be called for evaluating pointer expressions. 1850 assert((E->getType()->isAnyPointerType() || 1851 E->getType()->isBlockPointerType() || 1852 E->getType()->isObjCQualifiedIdType()) && 1853 "EvalAddr only works on pointers"); 1854 1855 // Our "symbolic interpreter" is just a dispatch off the currently 1856 // viewed AST node. We then recursively traverse the AST by calling 1857 // EvalAddr and EvalVal appropriately. 1858 switch (E->getStmtClass()) { 1859 case Stmt::ParenExprClass: 1860 // Ignore parentheses. 1861 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 1862 1863 case Stmt::UnaryOperatorClass: { 1864 // The only unary operator that make sense to handle here 1865 // is AddrOf. All others don't make sense as pointers. 1866 UnaryOperator *U = cast<UnaryOperator>(E); 1867 1868 if (U->getOpcode() == UnaryOperator::AddrOf) 1869 return EvalVal(U->getSubExpr()); 1870 else 1871 return NULL; 1872 } 1873 1874 case Stmt::BinaryOperatorClass: { 1875 // Handle pointer arithmetic. All other binary operators are not valid 1876 // in this context. 1877 BinaryOperator *B = cast<BinaryOperator>(E); 1878 BinaryOperator::Opcode op = B->getOpcode(); 1879 1880 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 1881 return NULL; 1882 1883 Expr *Base = B->getLHS(); 1884 1885 // Determine which argument is the real pointer base. It could be 1886 // the RHS argument instead of the LHS. 1887 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1888 1889 assert (Base->getType()->isPointerType()); 1890 return EvalAddr(Base); 1891 } 1892 1893 // For conditional operators we need to see if either the LHS or RHS are 1894 // valid DeclRefExpr*s. If one of them is valid, we return it. 1895 case Stmt::ConditionalOperatorClass: { 1896 ConditionalOperator *C = cast<ConditionalOperator>(E); 1897 1898 // Handle the GNU extension for missing LHS. 1899 if (Expr *lhsExpr = C->getLHS()) 1900 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 1901 return LHS; 1902 1903 return EvalAddr(C->getRHS()); 1904 } 1905 1906 // For casts, we need to handle conversions from arrays to 1907 // pointer values, and pointer-to-pointer conversions. 1908 case Stmt::ImplicitCastExprClass: 1909 case Stmt::CStyleCastExprClass: 1910 case Stmt::CXXFunctionalCastExprClass: { 1911 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1912 QualType T = SubExpr->getType(); 1913 1914 if (SubExpr->getType()->isPointerType() || 1915 SubExpr->getType()->isBlockPointerType() || 1916 SubExpr->getType()->isObjCQualifiedIdType()) 1917 return EvalAddr(SubExpr); 1918 else if (T->isArrayType()) 1919 return EvalVal(SubExpr); 1920 else 1921 return 0; 1922 } 1923 1924 // C++ casts. For dynamic casts, static casts, and const casts, we 1925 // are always converting from a pointer-to-pointer, so we just blow 1926 // through the cast. In the case the dynamic cast doesn't fail (and 1927 // return NULL), we take the conservative route and report cases 1928 // where we return the address of a stack variable. For Reinterpre 1929 // FIXME: The comment about is wrong; we're not always converting 1930 // from pointer to pointer. I'm guessing that this code should also 1931 // handle references to objects. 1932 case Stmt::CXXStaticCastExprClass: 1933 case Stmt::CXXDynamicCastExprClass: 1934 case Stmt::CXXConstCastExprClass: 1935 case Stmt::CXXReinterpretCastExprClass: { 1936 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1937 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1938 return EvalAddr(S); 1939 else 1940 return NULL; 1941 } 1942 1943 // Everything else: we simply don't reason about them. 1944 default: 1945 return NULL; 1946 } 1947} 1948 1949 1950/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1951/// See the comments for EvalAddr for more details. 1952static DeclRefExpr* EvalVal(Expr *E) { 1953do { 1954 // We should only be called for evaluating non-pointer expressions, or 1955 // expressions with a pointer type that are not used as references but instead 1956 // are l-values (e.g., DeclRefExpr with a pointer type). 1957 1958 // Our "symbolic interpreter" is just a dispatch off the currently 1959 // viewed AST node. We then recursively traverse the AST by calling 1960 // EvalAddr and EvalVal appropriately. 1961 switch (E->getStmtClass()) { 1962 case Stmt::ImplicitCastExprClass: { 1963 ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E); 1964 if (IE->getCategory() == ImplicitCastExpr::LValue) { 1965 E = IE->getSubExpr(); 1966 continue; 1967 } 1968 return NULL; 1969 } 1970 1971 case Stmt::DeclRefExprClass: { 1972 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1973 // at code that refers to a variable's name. We check if it has local 1974 // storage within the function, and if so, return the expression. 1975 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1976 1977 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1978 if (V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1979 1980 return NULL; 1981 } 1982 1983 case Stmt::ParenExprClass: { 1984 // Ignore parentheses. 1985 E = cast<ParenExpr>(E)->getSubExpr(); 1986 continue; 1987 } 1988 1989 case Stmt::UnaryOperatorClass: { 1990 // The only unary operator that make sense to handle here 1991 // is Deref. All others don't resolve to a "name." This includes 1992 // handling all sorts of rvalues passed to a unary operator. 1993 UnaryOperator *U = cast<UnaryOperator>(E); 1994 1995 if (U->getOpcode() == UnaryOperator::Deref) 1996 return EvalAddr(U->getSubExpr()); 1997 1998 return NULL; 1999 } 2000 2001 case Stmt::ArraySubscriptExprClass: { 2002 // Array subscripts are potential references to data on the stack. We 2003 // retrieve the DeclRefExpr* for the array variable if it indeed 2004 // has local storage. 2005 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 2006 } 2007 2008 case Stmt::ConditionalOperatorClass: { 2009 // For conditional operators we need to see if either the LHS or RHS are 2010 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 2011 ConditionalOperator *C = cast<ConditionalOperator>(E); 2012 2013 // Handle the GNU extension for missing LHS. 2014 if (Expr *lhsExpr = C->getLHS()) 2015 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 2016 return LHS; 2017 2018 return EvalVal(C->getRHS()); 2019 } 2020 2021 // Accesses to members are potential references to data on the stack. 2022 case Stmt::MemberExprClass: { 2023 MemberExpr *M = cast<MemberExpr>(E); 2024 2025 // Check for indirect access. We only want direct field accesses. 2026 if (!M->isArrow()) 2027 return EvalVal(M->getBase()); 2028 else 2029 return NULL; 2030 } 2031 2032 // Everything else: we simply don't reason about them. 2033 default: 2034 return NULL; 2035 } 2036} while (true); 2037} 2038 2039//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 2040 2041/// Check for comparisons of floating point operands using != and ==. 2042/// Issue a warning if these are no self-comparisons, as they are not likely 2043/// to do what the programmer intended. 2044void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 2045 bool EmitWarning = true; 2046 2047 Expr* LeftExprSansParen = lex->IgnoreParens(); 2048 Expr* RightExprSansParen = rex->IgnoreParens(); 2049 2050 // Special case: check for x == x (which is OK). 2051 // Do not emit warnings for such cases. 2052 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 2053 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 2054 if (DRL->getDecl() == DRR->getDecl()) 2055 EmitWarning = false; 2056 2057 2058 // Special case: check for comparisons against literals that can be exactly 2059 // represented by APFloat. In such cases, do not emit a warning. This 2060 // is a heuristic: often comparison against such literals are used to 2061 // detect if a value in a variable has not changed. This clearly can 2062 // lead to false negatives. 2063 if (EmitWarning) { 2064 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 2065 if (FLL->isExact()) 2066 EmitWarning = false; 2067 } else 2068 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 2069 if (FLR->isExact()) 2070 EmitWarning = false; 2071 } 2072 } 2073 2074 // Check for comparisons with builtin types. 2075 if (EmitWarning) 2076 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 2077 if (CL->isBuiltinCall(Context)) 2078 EmitWarning = false; 2079 2080 if (EmitWarning) 2081 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 2082 if (CR->isBuiltinCall(Context)) 2083 EmitWarning = false; 2084 2085 // Emit the diagnostic. 2086 if (EmitWarning) 2087 Diag(loc, diag::warn_floatingpoint_eq) 2088 << lex->getSourceRange() << rex->getSourceRange(); 2089} 2090 2091//===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// 2092//===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===// 2093 2094namespace { 2095 2096/// Structure recording the 'active' range of an integer-valued 2097/// expression. 2098struct IntRange { 2099 /// The number of bits active in the int. 2100 unsigned Width; 2101 2102 /// True if the int is known not to have negative values. 2103 bool NonNegative; 2104 2105 IntRange() {} 2106 IntRange(unsigned Width, bool NonNegative) 2107 : Width(Width), NonNegative(NonNegative) 2108 {} 2109 2110 // Returns the range of the bool type. 2111 static IntRange forBoolType() { 2112 return IntRange(1, true); 2113 } 2114 2115 // Returns the range of an integral type. 2116 static IntRange forType(ASTContext &C, QualType T) { 2117 return forCanonicalType(C, T->getCanonicalTypeInternal().getTypePtr()); 2118 } 2119 2120 // Returns the range of an integeral type based on its canonical 2121 // representation. 2122 static IntRange forCanonicalType(ASTContext &C, const Type *T) { 2123 assert(T->isCanonicalUnqualified()); 2124 2125 if (const VectorType *VT = dyn_cast<VectorType>(T)) 2126 T = VT->getElementType().getTypePtr(); 2127 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 2128 T = CT->getElementType().getTypePtr(); 2129 2130 if (const EnumType *ET = dyn_cast<EnumType>(T)) { 2131 EnumDecl *Enum = ET->getDecl(); 2132 unsigned NumPositive = Enum->getNumPositiveBits(); 2133 unsigned NumNegative = Enum->getNumNegativeBits(); 2134 2135 return IntRange(std::max(NumPositive, NumNegative), NumNegative == 0); 2136 } 2137 2138 const BuiltinType *BT = cast<BuiltinType>(T); 2139 assert(BT->isInteger()); 2140 2141 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 2142 } 2143 2144 // Returns the supremum of two ranges: i.e. their conservative merge. 2145 static IntRange join(IntRange L, IntRange R) { 2146 return IntRange(std::max(L.Width, R.Width), 2147 L.NonNegative && R.NonNegative); 2148 } 2149 2150 // Returns the infinum of two ranges: i.e. their aggressive merge. 2151 static IntRange meet(IntRange L, IntRange R) { 2152 return IntRange(std::min(L.Width, R.Width), 2153 L.NonNegative || R.NonNegative); 2154 } 2155}; 2156 2157IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) { 2158 if (value.isSigned() && value.isNegative()) 2159 return IntRange(value.getMinSignedBits(), false); 2160 2161 if (value.getBitWidth() > MaxWidth) 2162 value.trunc(MaxWidth); 2163 2164 // isNonNegative() just checks the sign bit without considering 2165 // signedness. 2166 return IntRange(value.getActiveBits(), true); 2167} 2168 2169IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty, 2170 unsigned MaxWidth) { 2171 if (result.isInt()) 2172 return GetValueRange(C, result.getInt(), MaxWidth); 2173 2174 if (result.isVector()) { 2175 IntRange R = GetValueRange(C, result.getVectorElt(0), Ty, MaxWidth); 2176 for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i) { 2177 IntRange El = GetValueRange(C, result.getVectorElt(i), Ty, MaxWidth); 2178 R = IntRange::join(R, El); 2179 } 2180 return R; 2181 } 2182 2183 if (result.isComplexInt()) { 2184 IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth); 2185 IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth); 2186 return IntRange::join(R, I); 2187 } 2188 2189 // This can happen with lossless casts to intptr_t of "based" lvalues. 2190 // Assume it might use arbitrary bits. 2191 // FIXME: The only reason we need to pass the type in here is to get 2192 // the sign right on this one case. It would be nice if APValue 2193 // preserved this. 2194 assert(result.isLValue()); 2195 return IntRange(MaxWidth, Ty->isUnsignedIntegerType()); 2196} 2197 2198/// Pseudo-evaluate the given integer expression, estimating the 2199/// range of values it might take. 2200/// 2201/// \param MaxWidth - the width to which the value will be truncated 2202IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { 2203 E = E->IgnoreParens(); 2204 2205 // Try a full evaluation first. 2206 Expr::EvalResult result; 2207 if (E->Evaluate(result, C)) 2208 return GetValueRange(C, result.Val, E->getType(), MaxWidth); 2209 2210 // I think we only want to look through implicit casts here; if the 2211 // user has an explicit widening cast, we should treat the value as 2212 // being of the new, wider type. 2213 if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) { 2214 if (CE->getCastKind() == CastExpr::CK_NoOp) 2215 return GetExprRange(C, CE->getSubExpr(), MaxWidth); 2216 2217 IntRange OutputTypeRange = IntRange::forType(C, CE->getType()); 2218 2219 bool isIntegerCast = (CE->getCastKind() == CastExpr::CK_IntegralCast); 2220 if (!isIntegerCast && CE->getCastKind() == CastExpr::CK_Unknown) 2221 isIntegerCast = CE->getSubExpr()->getType()->isIntegerType(); 2222 2223 // Assume that non-integer casts can span the full range of the type. 2224 if (!isIntegerCast) 2225 return OutputTypeRange; 2226 2227 IntRange SubRange 2228 = GetExprRange(C, CE->getSubExpr(), 2229 std::min(MaxWidth, OutputTypeRange.Width)); 2230 2231 // Bail out if the subexpr's range is as wide as the cast type. 2232 if (SubRange.Width >= OutputTypeRange.Width) 2233 return OutputTypeRange; 2234 2235 // Otherwise, we take the smaller width, and we're non-negative if 2236 // either the output type or the subexpr is. 2237 return IntRange(SubRange.Width, 2238 SubRange.NonNegative || OutputTypeRange.NonNegative); 2239 } 2240 2241 if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) { 2242 // If we can fold the condition, just take that operand. 2243 bool CondResult; 2244 if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) 2245 return GetExprRange(C, CondResult ? CO->getTrueExpr() 2246 : CO->getFalseExpr(), 2247 MaxWidth); 2248 2249 // Otherwise, conservatively merge. 2250 IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth); 2251 IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth); 2252 return IntRange::join(L, R); 2253 } 2254 2255 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 2256 switch (BO->getOpcode()) { 2257 2258 // Boolean-valued operations are single-bit and positive. 2259 case BinaryOperator::LAnd: 2260 case BinaryOperator::LOr: 2261 case BinaryOperator::LT: 2262 case BinaryOperator::GT: 2263 case BinaryOperator::LE: 2264 case BinaryOperator::GE: 2265 case BinaryOperator::EQ: 2266 case BinaryOperator::NE: 2267 return IntRange::forBoolType(); 2268 2269 // The type of these compound assignments is the type of the LHS, 2270 // so the RHS is not necessarily an integer. 2271 case BinaryOperator::MulAssign: 2272 case BinaryOperator::DivAssign: 2273 case BinaryOperator::RemAssign: 2274 case BinaryOperator::AddAssign: 2275 case BinaryOperator::SubAssign: 2276 return IntRange::forType(C, E->getType()); 2277 2278 // Operations with opaque sources are black-listed. 2279 case BinaryOperator::PtrMemD: 2280 case BinaryOperator::PtrMemI: 2281 return IntRange::forType(C, E->getType()); 2282 2283 // Bitwise-and uses the *infinum* of the two source ranges. 2284 case BinaryOperator::And: 2285 case BinaryOperator::AndAssign: 2286 return IntRange::meet(GetExprRange(C, BO->getLHS(), MaxWidth), 2287 GetExprRange(C, BO->getRHS(), MaxWidth)); 2288 2289 // Left shift gets black-listed based on a judgement call. 2290 case BinaryOperator::Shl: 2291 // ...except that we want to treat '1 << (blah)' as logically 2292 // positive. It's an important idiom. 2293 if (IntegerLiteral *I 2294 = dyn_cast<IntegerLiteral>(BO->getLHS()->IgnoreParenCasts())) { 2295 if (I->getValue() == 1) { 2296 IntRange R = IntRange::forType(C, E->getType()); 2297 return IntRange(R.Width, /*NonNegative*/ true); 2298 } 2299 } 2300 // fallthrough 2301 2302 case BinaryOperator::ShlAssign: 2303 return IntRange::forType(C, E->getType()); 2304 2305 // Right shift by a constant can narrow its left argument. 2306 case BinaryOperator::Shr: 2307 case BinaryOperator::ShrAssign: { 2308 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2309 2310 // If the shift amount is a positive constant, drop the width by 2311 // that much. 2312 llvm::APSInt shift; 2313 if (BO->getRHS()->isIntegerConstantExpr(shift, C) && 2314 shift.isNonNegative()) { 2315 unsigned zext = shift.getZExtValue(); 2316 if (zext >= L.Width) 2317 L.Width = (L.NonNegative ? 0 : 1); 2318 else 2319 L.Width -= zext; 2320 } 2321 2322 return L; 2323 } 2324 2325 // Comma acts as its right operand. 2326 case BinaryOperator::Comma: 2327 return GetExprRange(C, BO->getRHS(), MaxWidth); 2328 2329 // Black-list pointer subtractions. 2330 case BinaryOperator::Sub: 2331 if (BO->getLHS()->getType()->isPointerType()) 2332 return IntRange::forType(C, E->getType()); 2333 // fallthrough 2334 2335 default: 2336 break; 2337 } 2338 2339 // Treat every other operator as if it were closed on the 2340 // narrowest type that encompasses both operands. 2341 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2342 IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth); 2343 return IntRange::join(L, R); 2344 } 2345 2346 if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { 2347 switch (UO->getOpcode()) { 2348 // Boolean-valued operations are white-listed. 2349 case UnaryOperator::LNot: 2350 return IntRange::forBoolType(); 2351 2352 // Operations with opaque sources are black-listed. 2353 case UnaryOperator::Deref: 2354 case UnaryOperator::AddrOf: // should be impossible 2355 case UnaryOperator::OffsetOf: 2356 return IntRange::forType(C, E->getType()); 2357 2358 default: 2359 return GetExprRange(C, UO->getSubExpr(), MaxWidth); 2360 } 2361 } 2362 2363 if (dyn_cast<OffsetOfExpr>(E)) { 2364 IntRange::forType(C, E->getType()); 2365 } 2366 2367 FieldDecl *BitField = E->getBitField(); 2368 if (BitField) { 2369 llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C); 2370 unsigned BitWidth = BitWidthAP.getZExtValue(); 2371 2372 return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType()); 2373 } 2374 2375 return IntRange::forType(C, E->getType()); 2376} 2377 2378IntRange GetExprRange(ASTContext &C, Expr *E) { 2379 return GetExprRange(C, E, C.getIntWidth(E->getType())); 2380} 2381 2382/// Checks whether the given value, which currently has the given 2383/// source semantics, has the same value when coerced through the 2384/// target semantics. 2385bool IsSameFloatAfterCast(const llvm::APFloat &value, 2386 const llvm::fltSemantics &Src, 2387 const llvm::fltSemantics &Tgt) { 2388 llvm::APFloat truncated = value; 2389 2390 bool ignored; 2391 truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored); 2392 truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored); 2393 2394 return truncated.bitwiseIsEqual(value); 2395} 2396 2397/// Checks whether the given value, which currently has the given 2398/// source semantics, has the same value when coerced through the 2399/// target semantics. 2400/// 2401/// The value might be a vector of floats (or a complex number). 2402bool IsSameFloatAfterCast(const APValue &value, 2403 const llvm::fltSemantics &Src, 2404 const llvm::fltSemantics &Tgt) { 2405 if (value.isFloat()) 2406 return IsSameFloatAfterCast(value.getFloat(), Src, Tgt); 2407 2408 if (value.isVector()) { 2409 for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i) 2410 if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt)) 2411 return false; 2412 return true; 2413 } 2414 2415 assert(value.isComplexFloat()); 2416 return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) && 2417 IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt)); 2418} 2419 2420void AnalyzeImplicitConversions(Sema &S, Expr *E); 2421 2422bool IsZero(Sema &S, Expr *E) { 2423 llvm::APSInt Value; 2424 return E->isIntegerConstantExpr(Value, S.Context) && Value == 0; 2425} 2426 2427void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) { 2428 BinaryOperator::Opcode op = E->getOpcode(); 2429 if (op == BinaryOperator::LT && IsZero(S, E->getRHS())) { 2430 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2431 << "< 0" << "false" 2432 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2433 } else if (op == BinaryOperator::GE && IsZero(S, E->getRHS())) { 2434 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2435 << ">= 0" << "true" 2436 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2437 } else if (op == BinaryOperator::GT && IsZero(S, E->getLHS())) { 2438 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2439 << "0 >" << "false" 2440 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2441 } else if (op == BinaryOperator::LE && IsZero(S, E->getLHS())) { 2442 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2443 << "0 <=" << "true" 2444 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2445 } 2446} 2447 2448/// Analyze the operands of the given comparison. Implements the 2449/// fallback case from AnalyzeComparison. 2450void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) { 2451 AnalyzeImplicitConversions(S, E->getLHS()); 2452 AnalyzeImplicitConversions(S, E->getRHS()); 2453} 2454 2455/// \brief Implements -Wsign-compare. 2456/// 2457/// \param lex the left-hand expression 2458/// \param rex the right-hand expression 2459/// \param OpLoc the location of the joining operator 2460/// \param BinOpc binary opcode or 0 2461void AnalyzeComparison(Sema &S, BinaryOperator *E) { 2462 // The type the comparison is being performed in. 2463 QualType T = E->getLHS()->getType(); 2464 assert(S.Context.hasSameUnqualifiedType(T, E->getRHS()->getType()) 2465 && "comparison with mismatched types"); 2466 2467 // We don't do anything special if this isn't an unsigned integral 2468 // comparison: we're only interested in integral comparisons, and 2469 // signed comparisons only happen in cases we don't care to warn about. 2470 if (!T->hasUnsignedIntegerRepresentation()) 2471 return AnalyzeImpConvsInComparison(S, E); 2472 2473 Expr *lex = E->getLHS()->IgnoreParenImpCasts(); 2474 Expr *rex = E->getRHS()->IgnoreParenImpCasts(); 2475 2476 // Check to see if one of the (unmodified) operands is of different 2477 // signedness. 2478 Expr *signedOperand, *unsignedOperand; 2479 if (lex->getType()->hasSignedIntegerRepresentation()) { 2480 assert(!rex->getType()->hasSignedIntegerRepresentation() && 2481 "unsigned comparison between two signed integer expressions?"); 2482 signedOperand = lex; 2483 unsignedOperand = rex; 2484 } else if (rex->getType()->hasSignedIntegerRepresentation()) { 2485 signedOperand = rex; 2486 unsignedOperand = lex; 2487 } else { 2488 CheckTrivialUnsignedComparison(S, E); 2489 return AnalyzeImpConvsInComparison(S, E); 2490 } 2491 2492 // Otherwise, calculate the effective range of the signed operand. 2493 IntRange signedRange = GetExprRange(S.Context, signedOperand); 2494 2495 // Go ahead and analyze implicit conversions in the operands. Note 2496 // that we skip the implicit conversions on both sides. 2497 AnalyzeImplicitConversions(S, lex); 2498 AnalyzeImplicitConversions(S, rex); 2499 2500 // If the signed range is non-negative, -Wsign-compare won't fire, 2501 // but we should still check for comparisons which are always true 2502 // or false. 2503 if (signedRange.NonNegative) 2504 return CheckTrivialUnsignedComparison(S, E); 2505 2506 // For (in)equality comparisons, if the unsigned operand is a 2507 // constant which cannot collide with a overflowed signed operand, 2508 // then reinterpreting the signed operand as unsigned will not 2509 // change the result of the comparison. 2510 if (E->isEqualityOp()) { 2511 unsigned comparisonWidth = S.Context.getIntWidth(T); 2512 IntRange unsignedRange = GetExprRange(S.Context, unsignedOperand); 2513 2514 // We should never be unable to prove that the unsigned operand is 2515 // non-negative. 2516 assert(unsignedRange.NonNegative && "unsigned range includes negative?"); 2517 2518 if (unsignedRange.Width < comparisonWidth) 2519 return; 2520 } 2521 2522 S.Diag(E->getOperatorLoc(), diag::warn_mixed_sign_comparison) 2523 << lex->getType() << rex->getType() 2524 << lex->getSourceRange() << rex->getSourceRange(); 2525} 2526 2527/// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2528void DiagnoseImpCast(Sema &S, Expr *E, QualType T, unsigned diag) { 2529 S.Diag(E->getExprLoc(), diag) << E->getType() << T << E->getSourceRange(); 2530} 2531 2532void CheckImplicitConversion(Sema &S, Expr *E, QualType T, 2533 bool *ICContext = 0) { 2534 if (E->isTypeDependent() || E->isValueDependent()) return; 2535 2536 const Type *Source = S.Context.getCanonicalType(E->getType()).getTypePtr(); 2537 const Type *Target = S.Context.getCanonicalType(T).getTypePtr(); 2538 if (Source == Target) return; 2539 if (Target->isDependentType()) return; 2540 2541 // Never diagnose implicit casts to bool. 2542 if (Target->isSpecificBuiltinType(BuiltinType::Bool)) 2543 return; 2544 2545 // Strip vector types. 2546 if (isa<VectorType>(Source)) { 2547 if (!isa<VectorType>(Target)) 2548 return DiagnoseImpCast(S, E, T, diag::warn_impcast_vector_scalar); 2549 2550 Source = cast<VectorType>(Source)->getElementType().getTypePtr(); 2551 Target = cast<VectorType>(Target)->getElementType().getTypePtr(); 2552 } 2553 2554 // Strip complex types. 2555 if (isa<ComplexType>(Source)) { 2556 if (!isa<ComplexType>(Target)) 2557 return DiagnoseImpCast(S, E, T, diag::warn_impcast_complex_scalar); 2558 2559 Source = cast<ComplexType>(Source)->getElementType().getTypePtr(); 2560 Target = cast<ComplexType>(Target)->getElementType().getTypePtr(); 2561 } 2562 2563 const BuiltinType *SourceBT = dyn_cast<BuiltinType>(Source); 2564 const BuiltinType *TargetBT = dyn_cast<BuiltinType>(Target); 2565 2566 // If the source is floating point... 2567 if (SourceBT && SourceBT->isFloatingPoint()) { 2568 // ...and the target is floating point... 2569 if (TargetBT && TargetBT->isFloatingPoint()) { 2570 // ...then warn if we're dropping FP rank. 2571 2572 // Builtin FP kinds are ordered by increasing FP rank. 2573 if (SourceBT->getKind() > TargetBT->getKind()) { 2574 // Don't warn about float constants that are precisely 2575 // representable in the target type. 2576 Expr::EvalResult result; 2577 if (E->Evaluate(result, S.Context)) { 2578 // Value might be a float, a float vector, or a float complex. 2579 if (IsSameFloatAfterCast(result.Val, 2580 S.Context.getFloatTypeSemantics(QualType(TargetBT, 0)), 2581 S.Context.getFloatTypeSemantics(QualType(SourceBT, 0)))) 2582 return; 2583 } 2584 2585 DiagnoseImpCast(S, E, T, diag::warn_impcast_float_precision); 2586 } 2587 return; 2588 } 2589 2590 // If the target is integral, always warn. 2591 if ((TargetBT && TargetBT->isInteger())) 2592 // TODO: don't warn for integer values? 2593 DiagnoseImpCast(S, E, T, diag::warn_impcast_float_integer); 2594 2595 return; 2596 } 2597 2598 if (!Source->isIntegerType() || !Target->isIntegerType()) 2599 return; 2600 2601 IntRange SourceRange = GetExprRange(S.Context, E); 2602 IntRange TargetRange = IntRange::forCanonicalType(S.Context, Target); 2603 2604 if (SourceRange.Width > TargetRange.Width) { 2605 // People want to build with -Wshorten-64-to-32 and not -Wconversion 2606 // and by god we'll let them. 2607 if (SourceRange.Width == 64 && TargetRange.Width == 32) 2608 return DiagnoseImpCast(S, E, T, diag::warn_impcast_integer_64_32); 2609 return DiagnoseImpCast(S, E, T, diag::warn_impcast_integer_precision); 2610 } 2611 2612 if ((TargetRange.NonNegative && !SourceRange.NonNegative) || 2613 (!TargetRange.NonNegative && SourceRange.NonNegative && 2614 SourceRange.Width == TargetRange.Width)) { 2615 unsigned DiagID = diag::warn_impcast_integer_sign; 2616 2617 // Traditionally, gcc has warned about this under -Wsign-compare. 2618 // We also want to warn about it in -Wconversion. 2619 // So if -Wconversion is off, use a completely identical diagnostic 2620 // in the sign-compare group. 2621 // The conditional-checking code will 2622 if (ICContext) { 2623 DiagID = diag::warn_impcast_integer_sign_conditional; 2624 *ICContext = true; 2625 } 2626 2627 return DiagnoseImpCast(S, E, T, DiagID); 2628 } 2629 2630 return; 2631} 2632 2633void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T); 2634 2635void CheckConditionalOperand(Sema &S, Expr *E, QualType T, 2636 bool &ICContext) { 2637 E = E->IgnoreParenImpCasts(); 2638 2639 if (isa<ConditionalOperator>(E)) 2640 return CheckConditionalOperator(S, cast<ConditionalOperator>(E), T); 2641 2642 AnalyzeImplicitConversions(S, E); 2643 if (E->getType() != T) 2644 return CheckImplicitConversion(S, E, T, &ICContext); 2645 return; 2646} 2647 2648void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T) { 2649 AnalyzeImplicitConversions(S, E->getCond()); 2650 2651 bool Suspicious = false; 2652 CheckConditionalOperand(S, E->getTrueExpr(), T, Suspicious); 2653 CheckConditionalOperand(S, E->getFalseExpr(), T, Suspicious); 2654 2655 // If -Wconversion would have warned about either of the candidates 2656 // for a signedness conversion to the context type... 2657 if (!Suspicious) return; 2658 2659 // ...but it's currently ignored... 2660 if (S.Diags.getDiagnosticLevel(diag::warn_impcast_integer_sign_conditional)) 2661 return; 2662 2663 // ...and -Wsign-compare isn't... 2664 if (!S.Diags.getDiagnosticLevel(diag::warn_mixed_sign_conditional)) 2665 return; 2666 2667 // ...then check whether it would have warned about either of the 2668 // candidates for a signedness conversion to the condition type. 2669 if (E->getType() != T) { 2670 Suspicious = false; 2671 CheckImplicitConversion(S, E->getTrueExpr()->IgnoreParenImpCasts(), 2672 E->getType(), &Suspicious); 2673 if (!Suspicious) 2674 CheckImplicitConversion(S, E->getFalseExpr()->IgnoreParenImpCasts(), 2675 E->getType(), &Suspicious); 2676 if (!Suspicious) 2677 return; 2678 } 2679 2680 // If so, emit a diagnostic under -Wsign-compare. 2681 Expr *lex = E->getTrueExpr()->IgnoreParenImpCasts(); 2682 Expr *rex = E->getFalseExpr()->IgnoreParenImpCasts(); 2683 S.Diag(E->getQuestionLoc(), diag::warn_mixed_sign_conditional) 2684 << lex->getType() << rex->getType() 2685 << lex->getSourceRange() << rex->getSourceRange(); 2686} 2687 2688/// AnalyzeImplicitConversions - Find and report any interesting 2689/// implicit conversions in the given expression. There are a couple 2690/// of competing diagnostics here, -Wconversion and -Wsign-compare. 2691void AnalyzeImplicitConversions(Sema &S, Expr *OrigE) { 2692 QualType T = OrigE->getType(); 2693 Expr *E = OrigE->IgnoreParenImpCasts(); 2694 2695 // For conditional operators, we analyze the arguments as if they 2696 // were being fed directly into the output. 2697 if (isa<ConditionalOperator>(E)) { 2698 ConditionalOperator *CO = cast<ConditionalOperator>(E); 2699 CheckConditionalOperator(S, CO, T); 2700 return; 2701 } 2702 2703 // Go ahead and check any implicit conversions we might have skipped. 2704 // The non-canonical typecheck is just an optimization; 2705 // CheckImplicitConversion will filter out dead implicit conversions. 2706 if (E->getType() != T) 2707 CheckImplicitConversion(S, E, T); 2708 2709 // Now continue drilling into this expression. 2710 2711 // Skip past explicit casts. 2712 if (isa<ExplicitCastExpr>(E)) { 2713 E = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreParenImpCasts(); 2714 return AnalyzeImplicitConversions(S, E); 2715 } 2716 2717 // Do a somewhat different check with comparison operators. 2718 if (isa<BinaryOperator>(E) && cast<BinaryOperator>(E)->isComparisonOp()) 2719 return AnalyzeComparison(S, cast<BinaryOperator>(E)); 2720 2721 // These break the otherwise-useful invariant below. Fortunately, 2722 // we don't really need to recurse into them, because any internal 2723 // expressions should have been analyzed already when they were 2724 // built into statements. 2725 if (isa<StmtExpr>(E)) return; 2726 2727 // Don't descend into unevaluated contexts. 2728 if (isa<SizeOfAlignOfExpr>(E)) return; 2729 2730 // Now just recurse over the expression's children. 2731 for (Stmt::child_iterator I = E->child_begin(), IE = E->child_end(); 2732 I != IE; ++I) 2733 AnalyzeImplicitConversions(S, cast<Expr>(*I)); 2734} 2735 2736} // end anonymous namespace 2737 2738/// Diagnoses "dangerous" implicit conversions within the given 2739/// expression (which is a full expression). Implements -Wconversion 2740/// and -Wsign-compare. 2741void Sema::CheckImplicitConversions(Expr *E) { 2742 // Don't diagnose in unevaluated contexts. 2743 if (ExprEvalContexts.back().Context == Sema::Unevaluated) 2744 return; 2745 2746 // Don't diagnose for value- or type-dependent expressions. 2747 if (E->isTypeDependent() || E->isValueDependent()) 2748 return; 2749 2750 AnalyzeImplicitConversions(*this, E); 2751} 2752 2753/// CheckParmsForFunctionDef - Check that the parameters of the given 2754/// function are appropriate for the definition of a function. This 2755/// takes care of any checks that cannot be performed on the 2756/// declaration itself, e.g., that the types of each of the function 2757/// parameters are complete. 2758bool Sema::CheckParmsForFunctionDef(FunctionDecl *FD) { 2759 bool HasInvalidParm = false; 2760 for (unsigned p = 0, NumParams = FD->getNumParams(); p < NumParams; ++p) { 2761 ParmVarDecl *Param = FD->getParamDecl(p); 2762 2763 // C99 6.7.5.3p4: the parameters in a parameter type list in a 2764 // function declarator that is part of a function definition of 2765 // that function shall not have incomplete type. 2766 // 2767 // This is also C++ [dcl.fct]p6. 2768 if (!Param->isInvalidDecl() && 2769 RequireCompleteType(Param->getLocation(), Param->getType(), 2770 diag::err_typecheck_decl_incomplete_type)) { 2771 Param->setInvalidDecl(); 2772 HasInvalidParm = true; 2773 } 2774 2775 // C99 6.9.1p5: If the declarator includes a parameter type list, the 2776 // declaration of each parameter shall include an identifier. 2777 if (Param->getIdentifier() == 0 && 2778 !Param->isImplicit() && 2779 !getLangOptions().CPlusPlus) 2780 Diag(Param->getLocation(), diag::err_parameter_name_omitted); 2781 2782 // C99 6.7.5.3p12: 2783 // If the function declarator is not part of a definition of that 2784 // function, parameters may have incomplete type and may use the [*] 2785 // notation in their sequences of declarator specifiers to specify 2786 // variable length array types. 2787 QualType PType = Param->getOriginalType(); 2788 if (const ArrayType *AT = Context.getAsArrayType(PType)) { 2789 if (AT->getSizeModifier() == ArrayType::Star) { 2790 // FIXME: This diagnosic should point the the '[*]' if source-location 2791 // information is added for it. 2792 Diag(Param->getLocation(), diag::err_array_star_in_function_definition); 2793 } 2794 } 2795 } 2796 2797 return HasInvalidParm; 2798} 2799