SemaChecking.cpp revision ff331c15729f7d4439d253c97f4d60f2a7ffd0c6
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/Analysis/Analyses/FormatString.h" 17#include "clang/AST/ASTContext.h" 18#include "clang/AST/CharUnits.h" 19#include "clang/AST/DeclObjC.h" 20#include "clang/AST/ExprCXX.h" 21#include "clang/AST/ExprObjC.h" 22#include "clang/AST/DeclObjC.h" 23#include "clang/AST/StmtCXX.h" 24#include "clang/AST/StmtObjC.h" 25#include "clang/Lex/LiteralSupport.h" 26#include "clang/Lex/Preprocessor.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/STLExtras.h" 29#include "llvm/ADT/StringExtras.h" 30#include "llvm/Support/raw_ostream.h" 31#include "clang/Basic/TargetBuiltins.h" 32#include "clang/Basic/TargetInfo.h" 33#include <limits> 34using namespace clang; 35 36/// getLocationOfStringLiteralByte - Return a source location that points to the 37/// specified byte of the specified string literal. 38/// 39/// Strings are amazingly complex. They can be formed from multiple tokens and 40/// can have escape sequences in them in addition to the usual trigraph and 41/// escaped newline business. This routine handles this complexity. 42/// 43SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 44 unsigned ByteNo) const { 45 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 46 47 // Loop over all of the tokens in this string until we find the one that 48 // contains the byte we're looking for. 49 unsigned TokNo = 0; 50 while (1) { 51 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 52 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 53 54 // Get the spelling of the string so that we can get the data that makes up 55 // the string literal, not the identifier for the macro it is potentially 56 // expanded through. 57 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 58 59 // Re-lex the token to get its length and original spelling. 60 std::pair<FileID, unsigned> LocInfo = 61 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 62 bool Invalid = false; 63 llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid); 64 if (Invalid) 65 return StrTokSpellingLoc; 66 67 const char *StrData = Buffer.data()+LocInfo.second; 68 69 // Create a langops struct and enable trigraphs. This is sufficient for 70 // relexing tokens. 71 LangOptions LangOpts; 72 LangOpts.Trigraphs = true; 73 74 // Create a lexer starting at the beginning of this token. 75 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData, 76 Buffer.end()); 77 Token TheTok; 78 TheLexer.LexFromRawLexer(TheTok); 79 80 // Use the StringLiteralParser to compute the length of the string in bytes. 81 StringLiteralParser SLP(&TheTok, 1, PP, /*Complain=*/false); 82 unsigned TokNumBytes = SLP.GetStringLength(); 83 84 // If the byte is in this token, return the location of the byte. 85 if (ByteNo < TokNumBytes || 86 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 87 unsigned Offset = 88 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP, 89 /*Complain=*/false); 90 91 // Now that we know the offset of the token in the spelling, use the 92 // preprocessor to get the offset in the original source. 93 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 94 } 95 96 // Move to the next string token. 97 ++TokNo; 98 ByteNo -= TokNumBytes; 99 } 100} 101 102/// CheckablePrintfAttr - does a function call have a "printf" attribute 103/// and arguments that merit checking? 104bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) { 105 if (Format->getType() == "printf") return true; 106 if (Format->getType() == "printf0") { 107 // printf0 allows null "format" string; if so don't check format/args 108 unsigned format_idx = Format->getFormatIdx() - 1; 109 // Does the index refer to the implicit object argument? 110 if (isa<CXXMemberCallExpr>(TheCall)) { 111 if (format_idx == 0) 112 return false; 113 --format_idx; 114 } 115 if (format_idx < TheCall->getNumArgs()) { 116 Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts(); 117 if (!Format->isNullPointerConstant(Context, 118 Expr::NPC_ValueDependentIsNull)) 119 return true; 120 } 121 } 122 return false; 123} 124 125Action::OwningExprResult 126Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 127 OwningExprResult TheCallResult(Owned(TheCall)); 128 129 switch (BuiltinID) { 130 case Builtin::BI__builtin___CFStringMakeConstantString: 131 assert(TheCall->getNumArgs() == 1 && 132 "Wrong # arguments to builtin CFStringMakeConstantString"); 133 if (CheckObjCString(TheCall->getArg(0))) 134 return ExprError(); 135 break; 136 case Builtin::BI__builtin_stdarg_start: 137 case Builtin::BI__builtin_va_start: 138 if (SemaBuiltinVAStart(TheCall)) 139 return ExprError(); 140 break; 141 case Builtin::BI__builtin_isgreater: 142 case Builtin::BI__builtin_isgreaterequal: 143 case Builtin::BI__builtin_isless: 144 case Builtin::BI__builtin_islessequal: 145 case Builtin::BI__builtin_islessgreater: 146 case Builtin::BI__builtin_isunordered: 147 if (SemaBuiltinUnorderedCompare(TheCall)) 148 return ExprError(); 149 break; 150 case Builtin::BI__builtin_fpclassify: 151 if (SemaBuiltinFPClassification(TheCall, 6)) 152 return ExprError(); 153 break; 154 case Builtin::BI__builtin_isfinite: 155 case Builtin::BI__builtin_isinf: 156 case Builtin::BI__builtin_isinf_sign: 157 case Builtin::BI__builtin_isnan: 158 case Builtin::BI__builtin_isnormal: 159 if (SemaBuiltinFPClassification(TheCall, 1)) 160 return ExprError(); 161 break; 162 case Builtin::BI__builtin_return_address: 163 case Builtin::BI__builtin_frame_address: { 164 llvm::APSInt Result; 165 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 166 return ExprError(); 167 break; 168 } 169 case Builtin::BI__builtin_eh_return_data_regno: { 170 llvm::APSInt Result; 171 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 172 return ExprError(); 173 break; 174 } 175 case Builtin::BI__builtin_shufflevector: 176 return SemaBuiltinShuffleVector(TheCall); 177 // TheCall will be freed by the smart pointer here, but that's fine, since 178 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 179 case Builtin::BI__builtin_prefetch: 180 if (SemaBuiltinPrefetch(TheCall)) 181 return ExprError(); 182 break; 183 case Builtin::BI__builtin_object_size: 184 if (SemaBuiltinObjectSize(TheCall)) 185 return ExprError(); 186 break; 187 case Builtin::BI__builtin_longjmp: 188 if (SemaBuiltinLongjmp(TheCall)) 189 return ExprError(); 190 break; 191 case Builtin::BI__sync_fetch_and_add: 192 case Builtin::BI__sync_fetch_and_sub: 193 case Builtin::BI__sync_fetch_and_or: 194 case Builtin::BI__sync_fetch_and_and: 195 case Builtin::BI__sync_fetch_and_xor: 196 case Builtin::BI__sync_add_and_fetch: 197 case Builtin::BI__sync_sub_and_fetch: 198 case Builtin::BI__sync_and_and_fetch: 199 case Builtin::BI__sync_or_and_fetch: 200 case Builtin::BI__sync_xor_and_fetch: 201 case Builtin::BI__sync_val_compare_and_swap: 202 case Builtin::BI__sync_bool_compare_and_swap: 203 case Builtin::BI__sync_lock_test_and_set: 204 case Builtin::BI__sync_lock_release: 205 return SemaBuiltinAtomicOverloaded(move(TheCallResult)); 206 } 207 208 // Since the target specific builtins for each arch overlap, only check those 209 // of the arch we are compiling for. 210 if (BuiltinID >= Builtin::FirstTSBuiltin) { 211 switch (Context.Target.getTriple().getArch()) { 212 case llvm::Triple::arm: 213 case llvm::Triple::thumb: 214 if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) 215 return ExprError(); 216 break; 217 case llvm::Triple::x86: 218 case llvm::Triple::x86_64: 219 if (CheckX86BuiltinFunctionCall(BuiltinID, TheCall)) 220 return ExprError(); 221 break; 222 default: 223 break; 224 } 225 } 226 227 return move(TheCallResult); 228} 229 230bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 231 switch (BuiltinID) { 232 case X86::BI__builtin_ia32_palignr128: 233 case X86::BI__builtin_ia32_palignr: { 234 llvm::APSInt Result; 235 if (SemaBuiltinConstantArg(TheCall, 2, Result)) 236 return true; 237 break; 238 } 239 } 240 return false; 241} 242 243// Get the valid immediate range for the specified NEON type code. 244static unsigned RFT(unsigned t, bool shift = false) { 245 bool quad = t & 0x10; 246 247 switch (t & 0x7) { 248 case 0: // i8 249 return shift ? 7 : (8 << (int)quad) - 1; 250 case 1: // i16 251 return shift ? 15 : (4 << (int)quad) - 1; 252 case 2: // i32 253 return shift ? 31 : (2 << (int)quad) - 1; 254 case 3: // i64 255 return shift ? 63 : (1 << (int)quad) - 1; 256 case 4: // f32 257 assert(!shift && "cannot shift float types!"); 258 return (2 << (int)quad) - 1; 259 case 5: // poly8 260 assert(!shift && "cannot shift polynomial types!"); 261 return (8 << (int)quad) - 1; 262 case 6: // poly16 263 assert(!shift && "cannot shift polynomial types!"); 264 return (4 << (int)quad) - 1; 265 case 7: // float16 266 assert(!shift && "cannot shift float types!"); 267 return (4 << (int)quad) - 1; 268 } 269 return 0; 270} 271 272bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 273 llvm::APSInt Result; 274 275 unsigned mask = 0; 276 unsigned TV = 0; 277 switch (BuiltinID) { 278#define GET_NEON_OVERLOAD_CHECK 279#include "clang/Basic/arm_neon.inc" 280#undef GET_NEON_OVERLOAD_CHECK 281 } 282 283 // For NEON intrinsics which are overloaded on vector element type, validate 284 // the immediate which specifies which variant to emit. 285 if (mask) { 286 unsigned ArgNo = TheCall->getNumArgs()-1; 287 if (SemaBuiltinConstantArg(TheCall, ArgNo, Result)) 288 return true; 289 290 TV = Result.getLimitedValue(32); 291 if ((TV > 31) || (mask & (1 << TV)) == 0) 292 return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code) 293 << TheCall->getArg(ArgNo)->getSourceRange(); 294 } 295 296 // For NEON intrinsics which take an immediate value as part of the 297 // instruction, range check them here. 298 unsigned i = 0, l = 0, u = 0; 299 switch (BuiltinID) { 300 default: return false; 301#define GET_NEON_IMMEDIATE_CHECK 302#include "clang/Basic/arm_neon.inc" 303#undef GET_NEON_IMMEDIATE_CHECK 304 }; 305 306 // Check that the immediate argument is actually a constant. 307 if (SemaBuiltinConstantArg(TheCall, i, Result)) 308 return true; 309 310 // Range check against the upper/lower values for this isntruction. 311 unsigned Val = Result.getZExtValue(); 312 if (Val < l || Val > (u + l)) 313 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 314 << llvm::utostr(l) << llvm::utostr(u+l) 315 << TheCall->getArg(i)->getSourceRange(); 316 317 return false; 318} 319 320/// CheckFunctionCall - Check a direct function call for various correctness 321/// and safety properties not strictly enforced by the C type system. 322bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 323 // Get the IdentifierInfo* for the called function. 324 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 325 326 // None of the checks below are needed for functions that don't have 327 // simple names (e.g., C++ conversion functions). 328 if (!FnInfo) 329 return false; 330 331 // FIXME: This mechanism should be abstracted to be less fragile and 332 // more efficient. For example, just map function ids to custom 333 // handlers. 334 335 // Printf checking. 336 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 337 const bool b = Format->getType() == "scanf"; 338 if (b || CheckablePrintfAttr(Format, TheCall)) { 339 bool HasVAListArg = Format->getFirstArg() == 0; 340 CheckPrintfScanfArguments(TheCall, HasVAListArg, 341 Format->getFormatIdx() - 1, 342 HasVAListArg ? 0 : Format->getFirstArg() - 1, 343 !b); 344 } 345 } 346 347 for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull; 348 NonNull = NonNull->getNext<NonNullAttr>()) 349 CheckNonNullArguments(NonNull, TheCall); 350 351 return false; 352} 353 354bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 355 // Printf checking. 356 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 357 if (!Format) 358 return false; 359 360 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 361 if (!V) 362 return false; 363 364 QualType Ty = V->getType(); 365 if (!Ty->isBlockPointerType()) 366 return false; 367 368 const bool b = Format->getType() == "scanf"; 369 if (!b && !CheckablePrintfAttr(Format, TheCall)) 370 return false; 371 372 bool HasVAListArg = Format->getFirstArg() == 0; 373 CheckPrintfScanfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 374 HasVAListArg ? 0 : Format->getFirstArg() - 1, !b); 375 376 return false; 377} 378 379/// SemaBuiltinAtomicOverloaded - We have a call to a function like 380/// __sync_fetch_and_add, which is an overloaded function based on the pointer 381/// type of its first argument. The main ActOnCallExpr routines have already 382/// promoted the types of arguments because all of these calls are prototyped as 383/// void(...). 384/// 385/// This function goes through and does final semantic checking for these 386/// builtins, 387Sema::OwningExprResult 388Sema::SemaBuiltinAtomicOverloaded(OwningExprResult TheCallResult) { 389 CallExpr *TheCall = (CallExpr *)TheCallResult.get(); 390 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 391 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 392 393 // Ensure that we have at least one argument to do type inference from. 394 if (TheCall->getNumArgs() < 1) { 395 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 396 << 0 << 1 << TheCall->getNumArgs() 397 << TheCall->getCallee()->getSourceRange(); 398 return ExprError(); 399 } 400 401 // Inspect the first argument of the atomic builtin. This should always be 402 // a pointer type, whose element is an integral scalar or pointer type. 403 // Because it is a pointer type, we don't have to worry about any implicit 404 // casts here. 405 // FIXME: We don't allow floating point scalars as input. 406 Expr *FirstArg = TheCall->getArg(0); 407 if (!FirstArg->getType()->isPointerType()) { 408 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 409 << FirstArg->getType() << FirstArg->getSourceRange(); 410 return ExprError(); 411 } 412 413 QualType ValType = 414 FirstArg->getType()->getAs<PointerType>()->getPointeeType(); 415 if (!ValType->isIntegerType() && !ValType->isPointerType() && 416 !ValType->isBlockPointerType()) { 417 Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer_intptr) 418 << FirstArg->getType() << FirstArg->getSourceRange(); 419 return ExprError(); 420 } 421 422 // The majority of builtins return a value, but a few have special return 423 // types, so allow them to override appropriately below. 424 QualType ResultType = ValType; 425 426 // We need to figure out which concrete builtin this maps onto. For example, 427 // __sync_fetch_and_add with a 2 byte object turns into 428 // __sync_fetch_and_add_2. 429#define BUILTIN_ROW(x) \ 430 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 431 Builtin::BI##x##_8, Builtin::BI##x##_16 } 432 433 static const unsigned BuiltinIndices[][5] = { 434 BUILTIN_ROW(__sync_fetch_and_add), 435 BUILTIN_ROW(__sync_fetch_and_sub), 436 BUILTIN_ROW(__sync_fetch_and_or), 437 BUILTIN_ROW(__sync_fetch_and_and), 438 BUILTIN_ROW(__sync_fetch_and_xor), 439 440 BUILTIN_ROW(__sync_add_and_fetch), 441 BUILTIN_ROW(__sync_sub_and_fetch), 442 BUILTIN_ROW(__sync_and_and_fetch), 443 BUILTIN_ROW(__sync_or_and_fetch), 444 BUILTIN_ROW(__sync_xor_and_fetch), 445 446 BUILTIN_ROW(__sync_val_compare_and_swap), 447 BUILTIN_ROW(__sync_bool_compare_and_swap), 448 BUILTIN_ROW(__sync_lock_test_and_set), 449 BUILTIN_ROW(__sync_lock_release) 450 }; 451#undef BUILTIN_ROW 452 453 // Determine the index of the size. 454 unsigned SizeIndex; 455 switch (Context.getTypeSizeInChars(ValType).getQuantity()) { 456 case 1: SizeIndex = 0; break; 457 case 2: SizeIndex = 1; break; 458 case 4: SizeIndex = 2; break; 459 case 8: SizeIndex = 3; break; 460 case 16: SizeIndex = 4; break; 461 default: 462 Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 463 << FirstArg->getType() << FirstArg->getSourceRange(); 464 return ExprError(); 465 } 466 467 // Each of these builtins has one pointer argument, followed by some number of 468 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 469 // that we ignore. Find out which row of BuiltinIndices to read from as well 470 // as the number of fixed args. 471 unsigned BuiltinID = FDecl->getBuiltinID(); 472 unsigned BuiltinIndex, NumFixed = 1; 473 switch (BuiltinID) { 474 default: assert(0 && "Unknown overloaded atomic builtin!"); 475 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 476 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 477 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 478 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 479 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 480 481 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 5; break; 482 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 6; break; 483 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 7; break; 484 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 8; break; 485 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex = 9; break; 486 487 case Builtin::BI__sync_val_compare_and_swap: 488 BuiltinIndex = 10; 489 NumFixed = 2; 490 break; 491 case Builtin::BI__sync_bool_compare_and_swap: 492 BuiltinIndex = 11; 493 NumFixed = 2; 494 ResultType = Context.BoolTy; 495 break; 496 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 12; break; 497 case Builtin::BI__sync_lock_release: 498 BuiltinIndex = 13; 499 NumFixed = 0; 500 ResultType = Context.VoidTy; 501 break; 502 } 503 504 // Now that we know how many fixed arguments we expect, first check that we 505 // have at least that many. 506 if (TheCall->getNumArgs() < 1+NumFixed) { 507 Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args_at_least) 508 << 0 << 1+NumFixed << TheCall->getNumArgs() 509 << TheCall->getCallee()->getSourceRange(); 510 return ExprError(); 511 } 512 513 // Get the decl for the concrete builtin from this, we can tell what the 514 // concrete integer type we should convert to is. 515 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 516 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 517 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 518 FunctionDecl *NewBuiltinDecl = 519 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 520 TUScope, false, DRE->getLocStart())); 521 522 // The first argument is by definition correct, we use it's type as the type 523 // of the entire operation. Walk the remaining arguments promoting them to 524 // the deduced value type. 525 for (unsigned i = 0; i != NumFixed; ++i) { 526 Expr *Arg = TheCall->getArg(i+1); 527 528 // If the argument is an implicit cast, then there was a promotion due to 529 // "...", just remove it now. 530 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) { 531 Arg = ICE->getSubExpr(); 532 ICE->setSubExpr(0); 533 TheCall->setArg(i+1, Arg); 534 } 535 536 // GCC does an implicit conversion to the pointer or integer ValType. This 537 // can fail in some cases (1i -> int**), check for this error case now. 538 CastExpr::CastKind Kind = CastExpr::CK_Unknown; 539 CXXBaseSpecifierArray BasePath; 540 if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind, BasePath)) 541 return ExprError(); 542 543 // Okay, we have something that *can* be converted to the right type. Check 544 // to see if there is a potentially weird extension going on here. This can 545 // happen when you do an atomic operation on something like an char* and 546 // pass in 42. The 42 gets converted to char. This is even more strange 547 // for things like 45.123 -> char, etc. 548 // FIXME: Do this check. 549 ImpCastExprToType(Arg, ValType, Kind); 550 TheCall->setArg(i+1, Arg); 551 } 552 553 // Switch the DeclRefExpr to refer to the new decl. 554 DRE->setDecl(NewBuiltinDecl); 555 DRE->setType(NewBuiltinDecl->getType()); 556 557 // Set the callee in the CallExpr. 558 // FIXME: This leaks the original parens and implicit casts. 559 Expr *PromotedCall = DRE; 560 UsualUnaryConversions(PromotedCall); 561 TheCall->setCallee(PromotedCall); 562 563 // Change the result type of the call to match the original value type. This 564 // is arbitrary, but the codegen for these builtins ins design to handle it 565 // gracefully. 566 TheCall->setType(ResultType); 567 568 return move(TheCallResult); 569} 570 571 572/// CheckObjCString - Checks that the argument to the builtin 573/// CFString constructor is correct 574/// FIXME: GCC currently emits the following warning: 575/// "warning: input conversion stopped due to an input byte that does not 576/// belong to the input codeset UTF-8" 577/// Note: It might also make sense to do the UTF-16 conversion here (would 578/// simplify the backend). 579bool Sema::CheckObjCString(Expr *Arg) { 580 Arg = Arg->IgnoreParenCasts(); 581 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 582 583 if (!Literal || Literal->isWide()) { 584 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 585 << Arg->getSourceRange(); 586 return true; 587 } 588 589 const char *Data = Literal->getStrData(); 590 unsigned Length = Literal->getByteLength(); 591 592 for (unsigned i = 0; i < Length; ++i) { 593 if (!Data[i]) { 594 Diag(getLocationOfStringLiteralByte(Literal, i), 595 diag::warn_cfstring_literal_contains_nul_character) 596 << Arg->getSourceRange(); 597 break; 598 } 599 } 600 601 return false; 602} 603 604/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 605/// Emit an error and return true on failure, return false on success. 606bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 607 Expr *Fn = TheCall->getCallee(); 608 if (TheCall->getNumArgs() > 2) { 609 Diag(TheCall->getArg(2)->getLocStart(), 610 diag::err_typecheck_call_too_many_args) 611 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 612 << Fn->getSourceRange() 613 << SourceRange(TheCall->getArg(2)->getLocStart(), 614 (*(TheCall->arg_end()-1))->getLocEnd()); 615 return true; 616 } 617 618 if (TheCall->getNumArgs() < 2) { 619 return Diag(TheCall->getLocEnd(), 620 diag::err_typecheck_call_too_few_args_at_least) 621 << 0 /*function call*/ << 2 << TheCall->getNumArgs(); 622 } 623 624 // Determine whether the current function is variadic or not. 625 BlockScopeInfo *CurBlock = getCurBlock(); 626 bool isVariadic; 627 if (CurBlock) 628 isVariadic = CurBlock->TheDecl->isVariadic(); 629 else if (FunctionDecl *FD = getCurFunctionDecl()) 630 isVariadic = FD->isVariadic(); 631 else 632 isVariadic = getCurMethodDecl()->isVariadic(); 633 634 if (!isVariadic) { 635 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 636 return true; 637 } 638 639 // Verify that the second argument to the builtin is the last argument of the 640 // current function or method. 641 bool SecondArgIsLastNamedArgument = false; 642 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 643 644 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 645 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 646 // FIXME: This isn't correct for methods (results in bogus warning). 647 // Get the last formal in the current function. 648 const ParmVarDecl *LastArg; 649 if (CurBlock) 650 LastArg = *(CurBlock->TheDecl->param_end()-1); 651 else if (FunctionDecl *FD = getCurFunctionDecl()) 652 LastArg = *(FD->param_end()-1); 653 else 654 LastArg = *(getCurMethodDecl()->param_end()-1); 655 SecondArgIsLastNamedArgument = PV == LastArg; 656 } 657 } 658 659 if (!SecondArgIsLastNamedArgument) 660 Diag(TheCall->getArg(1)->getLocStart(), 661 diag::warn_second_parameter_of_va_start_not_last_named_argument); 662 return false; 663} 664 665/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 666/// friends. This is declared to take (...), so we have to check everything. 667bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 668 if (TheCall->getNumArgs() < 2) 669 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 670 << 0 << 2 << TheCall->getNumArgs()/*function call*/; 671 if (TheCall->getNumArgs() > 2) 672 return Diag(TheCall->getArg(2)->getLocStart(), 673 diag::err_typecheck_call_too_many_args) 674 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 675 << SourceRange(TheCall->getArg(2)->getLocStart(), 676 (*(TheCall->arg_end()-1))->getLocEnd()); 677 678 Expr *OrigArg0 = TheCall->getArg(0); 679 Expr *OrigArg1 = TheCall->getArg(1); 680 681 // Do standard promotions between the two arguments, returning their common 682 // type. 683 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 684 685 // Make sure any conversions are pushed back into the call; this is 686 // type safe since unordered compare builtins are declared as "_Bool 687 // foo(...)". 688 TheCall->setArg(0, OrigArg0); 689 TheCall->setArg(1, OrigArg1); 690 691 if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent()) 692 return false; 693 694 // If the common type isn't a real floating type, then the arguments were 695 // invalid for this operation. 696 if (!Res->isRealFloatingType()) 697 return Diag(OrigArg0->getLocStart(), 698 diag::err_typecheck_call_invalid_ordered_compare) 699 << OrigArg0->getType() << OrigArg1->getType() 700 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 701 702 return false; 703} 704 705/// SemaBuiltinSemaBuiltinFPClassification - Handle functions like 706/// __builtin_isnan and friends. This is declared to take (...), so we have 707/// to check everything. We expect the last argument to be a floating point 708/// value. 709bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { 710 if (TheCall->getNumArgs() < NumArgs) 711 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 712 << 0 << NumArgs << TheCall->getNumArgs()/*function call*/; 713 if (TheCall->getNumArgs() > NumArgs) 714 return Diag(TheCall->getArg(NumArgs)->getLocStart(), 715 diag::err_typecheck_call_too_many_args) 716 << 0 /*function call*/ << NumArgs << TheCall->getNumArgs() 717 << SourceRange(TheCall->getArg(NumArgs)->getLocStart(), 718 (*(TheCall->arg_end()-1))->getLocEnd()); 719 720 Expr *OrigArg = TheCall->getArg(NumArgs-1); 721 722 if (OrigArg->isTypeDependent()) 723 return false; 724 725 // This operation requires a non-_Complex floating-point number. 726 if (!OrigArg->getType()->isRealFloatingType()) 727 return Diag(OrigArg->getLocStart(), 728 diag::err_typecheck_call_invalid_unary_fp) 729 << OrigArg->getType() << OrigArg->getSourceRange(); 730 731 // If this is an implicit conversion from float -> double, remove it. 732 if (ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(OrigArg)) { 733 Expr *CastArg = Cast->getSubExpr(); 734 if (CastArg->getType()->isSpecificBuiltinType(BuiltinType::Float)) { 735 assert(Cast->getType()->isSpecificBuiltinType(BuiltinType::Double) && 736 "promotion from float to double is the only expected cast here"); 737 Cast->setSubExpr(0); 738 TheCall->setArg(NumArgs-1, CastArg); 739 OrigArg = CastArg; 740 } 741 } 742 743 return false; 744} 745 746/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 747// This is declared to take (...), so we have to check everything. 748Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 749 if (TheCall->getNumArgs() < 2) 750 return ExprError(Diag(TheCall->getLocEnd(), 751 diag::err_typecheck_call_too_few_args_at_least) 752 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 753 << TheCall->getSourceRange()); 754 755 // Determine which of the following types of shufflevector we're checking: 756 // 1) unary, vector mask: (lhs, mask) 757 // 2) binary, vector mask: (lhs, rhs, mask) 758 // 3) binary, scalar mask: (lhs, rhs, index, ..., index) 759 QualType resType = TheCall->getArg(0)->getType(); 760 unsigned numElements = 0; 761 762 if (!TheCall->getArg(0)->isTypeDependent() && 763 !TheCall->getArg(1)->isTypeDependent()) { 764 QualType LHSType = TheCall->getArg(0)->getType(); 765 QualType RHSType = TheCall->getArg(1)->getType(); 766 767 if (!LHSType->isVectorType() || !RHSType->isVectorType()) { 768 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 769 << SourceRange(TheCall->getArg(0)->getLocStart(), 770 TheCall->getArg(1)->getLocEnd()); 771 return ExprError(); 772 } 773 774 numElements = LHSType->getAs<VectorType>()->getNumElements(); 775 unsigned numResElements = TheCall->getNumArgs() - 2; 776 777 // Check to see if we have a call with 2 vector arguments, the unary shuffle 778 // with mask. If so, verify that RHS is an integer vector type with the 779 // same number of elts as lhs. 780 if (TheCall->getNumArgs() == 2) { 781 if (!RHSType->hasIntegerRepresentation() || 782 RHSType->getAs<VectorType>()->getNumElements() != numElements) 783 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 784 << SourceRange(TheCall->getArg(1)->getLocStart(), 785 TheCall->getArg(1)->getLocEnd()); 786 numResElements = numElements; 787 } 788 else if (!Context.hasSameUnqualifiedType(LHSType, RHSType)) { 789 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 790 << SourceRange(TheCall->getArg(0)->getLocStart(), 791 TheCall->getArg(1)->getLocEnd()); 792 return ExprError(); 793 } else if (numElements != numResElements) { 794 QualType eltType = LHSType->getAs<VectorType>()->getElementType(); 795 resType = Context.getVectorType(eltType, numResElements, 796 VectorType::NotAltiVec); 797 } 798 } 799 800 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 801 if (TheCall->getArg(i)->isTypeDependent() || 802 TheCall->getArg(i)->isValueDependent()) 803 continue; 804 805 llvm::APSInt Result(32); 806 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 807 return ExprError(Diag(TheCall->getLocStart(), 808 diag::err_shufflevector_nonconstant_argument) 809 << TheCall->getArg(i)->getSourceRange()); 810 811 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 812 return ExprError(Diag(TheCall->getLocStart(), 813 diag::err_shufflevector_argument_too_large) 814 << TheCall->getArg(i)->getSourceRange()); 815 } 816 817 llvm::SmallVector<Expr*, 32> exprs; 818 819 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 820 exprs.push_back(TheCall->getArg(i)); 821 TheCall->setArg(i, 0); 822 } 823 824 return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(), 825 exprs.size(), resType, 826 TheCall->getCallee()->getLocStart(), 827 TheCall->getRParenLoc())); 828} 829 830/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 831// This is declared to take (const void*, ...) and can take two 832// optional constant int args. 833bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 834 unsigned NumArgs = TheCall->getNumArgs(); 835 836 if (NumArgs > 3) 837 return Diag(TheCall->getLocEnd(), 838 diag::err_typecheck_call_too_many_args_at_most) 839 << 0 /*function call*/ << 3 << NumArgs 840 << TheCall->getSourceRange(); 841 842 // Argument 0 is checked for us and the remaining arguments must be 843 // constant integers. 844 for (unsigned i = 1; i != NumArgs; ++i) { 845 Expr *Arg = TheCall->getArg(i); 846 847 llvm::APSInt Result; 848 if (SemaBuiltinConstantArg(TheCall, i, Result)) 849 return true; 850 851 // FIXME: gcc issues a warning and rewrites these to 0. These 852 // seems especially odd for the third argument since the default 853 // is 3. 854 if (i == 1) { 855 if (Result.getLimitedValue() > 1) 856 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 857 << "0" << "1" << Arg->getSourceRange(); 858 } else { 859 if (Result.getLimitedValue() > 3) 860 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 861 << "0" << "3" << Arg->getSourceRange(); 862 } 863 } 864 865 return false; 866} 867 868/// SemaBuiltinConstantArg - Handle a check if argument ArgNum of CallExpr 869/// TheCall is a constant expression. 870bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, 871 llvm::APSInt &Result) { 872 Expr *Arg = TheCall->getArg(ArgNum); 873 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 874 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 875 876 if (Arg->isTypeDependent() || Arg->isValueDependent()) return false; 877 878 if (!Arg->isIntegerConstantExpr(Result, Context)) 879 return Diag(TheCall->getLocStart(), diag::err_constant_integer_arg_type) 880 << FDecl->getDeclName() << Arg->getSourceRange(); 881 882 return false; 883} 884 885/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 886/// int type). This simply type checks that type is one of the defined 887/// constants (0-3). 888// For compatability check 0-3, llvm only handles 0 and 2. 889bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 890 llvm::APSInt Result; 891 892 // Check constant-ness first. 893 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 894 return true; 895 896 Expr *Arg = TheCall->getArg(1); 897 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 898 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 899 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 900 } 901 902 return false; 903} 904 905/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 906/// This checks that val is a constant 1. 907bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 908 Expr *Arg = TheCall->getArg(1); 909 llvm::APSInt Result; 910 911 // TODO: This is less than ideal. Overload this to take a value. 912 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 913 return true; 914 915 if (Result != 1) 916 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 917 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 918 919 return false; 920} 921 922// Handle i > 1 ? "x" : "y", recursivelly 923bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 924 bool HasVAListArg, 925 unsigned format_idx, unsigned firstDataArg, 926 bool isPrintf) { 927 928 if (E->isTypeDependent() || E->isValueDependent()) 929 return false; 930 931 switch (E->getStmtClass()) { 932 case Stmt::ConditionalOperatorClass: { 933 const ConditionalOperator *C = cast<ConditionalOperator>(E); 934 return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, HasVAListArg, 935 format_idx, firstDataArg, isPrintf) 936 && SemaCheckStringLiteral(C->getRHS(), TheCall, HasVAListArg, 937 format_idx, firstDataArg, isPrintf); 938 } 939 940 case Stmt::ImplicitCastExprClass: { 941 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 942 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 943 format_idx, firstDataArg, isPrintf); 944 } 945 946 case Stmt::ParenExprClass: { 947 const ParenExpr *Expr = cast<ParenExpr>(E); 948 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 949 format_idx, firstDataArg, isPrintf); 950 } 951 952 case Stmt::DeclRefExprClass: { 953 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 954 955 // As an exception, do not flag errors for variables binding to 956 // const string literals. 957 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 958 bool isConstant = false; 959 QualType T = DR->getType(); 960 961 if (const ArrayType *AT = Context.getAsArrayType(T)) { 962 isConstant = AT->getElementType().isConstant(Context); 963 } else if (const PointerType *PT = T->getAs<PointerType>()) { 964 isConstant = T.isConstant(Context) && 965 PT->getPointeeType().isConstant(Context); 966 } 967 968 if (isConstant) { 969 if (const Expr *Init = VD->getAnyInitializer()) 970 return SemaCheckStringLiteral(Init, TheCall, 971 HasVAListArg, format_idx, firstDataArg, 972 isPrintf); 973 } 974 975 // For vprintf* functions (i.e., HasVAListArg==true), we add a 976 // special check to see if the format string is a function parameter 977 // of the function calling the printf function. If the function 978 // has an attribute indicating it is a printf-like function, then we 979 // should suppress warnings concerning non-literals being used in a call 980 // to a vprintf function. For example: 981 // 982 // void 983 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ 984 // va_list ap; 985 // va_start(ap, fmt); 986 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 987 // ... 988 // 989 // 990 // FIXME: We don't have full attribute support yet, so just check to see 991 // if the argument is a DeclRefExpr that references a parameter. We'll 992 // add proper support for checking the attribute later. 993 if (HasVAListArg) 994 if (isa<ParmVarDecl>(VD)) 995 return true; 996 } 997 998 return false; 999 } 1000 1001 case Stmt::CallExprClass: { 1002 const CallExpr *CE = cast<CallExpr>(E); 1003 if (const ImplicitCastExpr *ICE 1004 = dyn_cast<ImplicitCastExpr>(CE->getCallee())) { 1005 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) { 1006 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { 1007 if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) { 1008 unsigned ArgIndex = FA->getFormatIdx(); 1009 const Expr *Arg = CE->getArg(ArgIndex - 1); 1010 1011 return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg, 1012 format_idx, firstDataArg, isPrintf); 1013 } 1014 } 1015 } 1016 } 1017 1018 return false; 1019 } 1020 case Stmt::ObjCStringLiteralClass: 1021 case Stmt::StringLiteralClass: { 1022 const StringLiteral *StrE = NULL; 1023 1024 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 1025 StrE = ObjCFExpr->getString(); 1026 else 1027 StrE = cast<StringLiteral>(E); 1028 1029 if (StrE) { 1030 CheckFormatString(StrE, E, TheCall, HasVAListArg, format_idx, 1031 firstDataArg, isPrintf); 1032 return true; 1033 } 1034 1035 return false; 1036 } 1037 1038 default: 1039 return false; 1040 } 1041} 1042 1043void 1044Sema::CheckNonNullArguments(const NonNullAttr *NonNull, 1045 const CallExpr *TheCall) { 1046 for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end(); 1047 i != e; ++i) { 1048 const Expr *ArgExpr = TheCall->getArg(*i); 1049 if (ArgExpr->isNullPointerConstant(Context, 1050 Expr::NPC_ValueDependentIsNotNull)) 1051 Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg) 1052 << ArgExpr->getSourceRange(); 1053 } 1054} 1055 1056/// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar 1057/// functions) for correct use of format strings. 1058void 1059Sema::CheckPrintfScanfArguments(const CallExpr *TheCall, bool HasVAListArg, 1060 unsigned format_idx, unsigned firstDataArg, 1061 bool isPrintf) { 1062 1063 const Expr *Fn = TheCall->getCallee(); 1064 1065 // The way the format attribute works in GCC, the implicit this argument 1066 // of member functions is counted. However, it doesn't appear in our own 1067 // lists, so decrement format_idx in that case. 1068 if (isa<CXXMemberCallExpr>(TheCall)) { 1069 // Catch a format attribute mistakenly referring to the object argument. 1070 if (format_idx == 0) 1071 return; 1072 --format_idx; 1073 if(firstDataArg != 0) 1074 --firstDataArg; 1075 } 1076 1077 // CHECK: printf/scanf-like function is called with no format string. 1078 if (format_idx >= TheCall->getNumArgs()) { 1079 Diag(TheCall->getRParenLoc(), diag::warn_missing_format_string) 1080 << Fn->getSourceRange(); 1081 return; 1082 } 1083 1084 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 1085 1086 // CHECK: format string is not a string literal. 1087 // 1088 // Dynamically generated format strings are difficult to 1089 // automatically vet at compile time. Requiring that format strings 1090 // are string literals: (1) permits the checking of format strings by 1091 // the compiler and thereby (2) can practically remove the source of 1092 // many format string exploits. 1093 1094 // Format string can be either ObjC string (e.g. @"%d") or 1095 // C string (e.g. "%d") 1096 // ObjC string uses the same format specifiers as C string, so we can use 1097 // the same format string checking logic for both ObjC and C strings. 1098 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 1099 firstDataArg, isPrintf)) 1100 return; // Literal format string found, check done! 1101 1102 // If there are no arguments specified, warn with -Wformat-security, otherwise 1103 // warn only with -Wformat-nonliteral. 1104 if (TheCall->getNumArgs() == format_idx+1) 1105 Diag(TheCall->getArg(format_idx)->getLocStart(), 1106 diag::warn_format_nonliteral_noargs) 1107 << OrigFormatExpr->getSourceRange(); 1108 else 1109 Diag(TheCall->getArg(format_idx)->getLocStart(), 1110 diag::warn_format_nonliteral) 1111 << OrigFormatExpr->getSourceRange(); 1112} 1113 1114namespace { 1115class CheckFormatHandler : public analyze_format_string::FormatStringHandler { 1116protected: 1117 Sema &S; 1118 const StringLiteral *FExpr; 1119 const Expr *OrigFormatExpr; 1120 const unsigned FirstDataArg; 1121 const unsigned NumDataArgs; 1122 const bool IsObjCLiteral; 1123 const char *Beg; // Start of format string. 1124 const bool HasVAListArg; 1125 const CallExpr *TheCall; 1126 unsigned FormatIdx; 1127 llvm::BitVector CoveredArgs; 1128 bool usesPositionalArgs; 1129 bool atFirstArg; 1130public: 1131 CheckFormatHandler(Sema &s, const StringLiteral *fexpr, 1132 const Expr *origFormatExpr, unsigned firstDataArg, 1133 unsigned numDataArgs, bool isObjCLiteral, 1134 const char *beg, bool hasVAListArg, 1135 const CallExpr *theCall, unsigned formatIdx) 1136 : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), 1137 FirstDataArg(firstDataArg), 1138 NumDataArgs(numDataArgs), 1139 IsObjCLiteral(isObjCLiteral), Beg(beg), 1140 HasVAListArg(hasVAListArg), 1141 TheCall(theCall), FormatIdx(formatIdx), 1142 usesPositionalArgs(false), atFirstArg(true) { 1143 CoveredArgs.resize(numDataArgs); 1144 CoveredArgs.reset(); 1145 } 1146 1147 void DoneProcessing(); 1148 1149 void HandleIncompleteSpecifier(const char *startSpecifier, 1150 unsigned specifierLen); 1151 1152 virtual void HandleInvalidPosition(const char *startSpecifier, 1153 unsigned specifierLen, 1154 analyze_format_string::PositionContext p); 1155 1156 virtual void HandleZeroPosition(const char *startPos, unsigned posLen); 1157 1158 void HandleNullChar(const char *nullCharacter); 1159 1160protected: 1161 bool HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, 1162 const char *startSpec, 1163 unsigned specifierLen, 1164 const char *csStart, unsigned csLen); 1165 1166 SourceRange getFormatStringRange(); 1167 CharSourceRange getSpecifierRange(const char *startSpecifier, 1168 unsigned specifierLen); 1169 SourceLocation getLocationOfByte(const char *x); 1170 1171 const Expr *getDataArg(unsigned i) const; 1172}; 1173} 1174 1175SourceRange CheckFormatHandler::getFormatStringRange() { 1176 return OrigFormatExpr->getSourceRange(); 1177} 1178 1179CharSourceRange CheckFormatHandler:: 1180getSpecifierRange(const char *startSpecifier, unsigned specifierLen) { 1181 SourceLocation Start = getLocationOfByte(startSpecifier); 1182 SourceLocation End = getLocationOfByte(startSpecifier + specifierLen - 1); 1183 1184 // Advance the end SourceLocation by one due to half-open ranges. 1185 End = End.getFileLocWithOffset(1); 1186 1187 return CharSourceRange::getCharRange(Start, End); 1188} 1189 1190SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { 1191 return S.getLocationOfStringLiteralByte(FExpr, x - Beg); 1192} 1193 1194void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, 1195 unsigned specifierLen){ 1196 SourceLocation Loc = getLocationOfByte(startSpecifier); 1197 S.Diag(Loc, diag::warn_printf_incomplete_specifier) 1198 << getSpecifierRange(startSpecifier, specifierLen); 1199} 1200 1201void 1202CheckFormatHandler::HandleInvalidPosition(const char *startPos, unsigned posLen, 1203 analyze_format_string::PositionContext p) { 1204 SourceLocation Loc = getLocationOfByte(startPos); 1205 S.Diag(Loc, diag::warn_format_invalid_positional_specifier) 1206 << (unsigned) p << getSpecifierRange(startPos, posLen); 1207} 1208 1209void CheckFormatHandler::HandleZeroPosition(const char *startPos, 1210 unsigned posLen) { 1211 SourceLocation Loc = getLocationOfByte(startPos); 1212 S.Diag(Loc, diag::warn_format_zero_positional_specifier) 1213 << getSpecifierRange(startPos, posLen); 1214} 1215 1216void CheckFormatHandler::HandleNullChar(const char *nullCharacter) { 1217 // The presence of a null character is likely an error. 1218 S.Diag(getLocationOfByte(nullCharacter), 1219 diag::warn_printf_format_string_contains_null_char) 1220 << getFormatStringRange(); 1221} 1222 1223const Expr *CheckFormatHandler::getDataArg(unsigned i) const { 1224 return TheCall->getArg(FirstDataArg + i); 1225} 1226 1227void CheckFormatHandler::DoneProcessing() { 1228 // Does the number of data arguments exceed the number of 1229 // format conversions in the format string? 1230 if (!HasVAListArg) { 1231 // Find any arguments that weren't covered. 1232 CoveredArgs.flip(); 1233 signed notCoveredArg = CoveredArgs.find_first(); 1234 if (notCoveredArg >= 0) { 1235 assert((unsigned)notCoveredArg < NumDataArgs); 1236 S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), 1237 diag::warn_printf_data_arg_not_used) 1238 << getFormatStringRange(); 1239 } 1240 } 1241} 1242 1243bool 1244CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, 1245 SourceLocation Loc, 1246 const char *startSpec, 1247 unsigned specifierLen, 1248 const char *csStart, 1249 unsigned csLen) { 1250 1251 bool keepGoing = true; 1252 if (argIndex < NumDataArgs) { 1253 // Consider the argument coverered, even though the specifier doesn't 1254 // make sense. 1255 CoveredArgs.set(argIndex); 1256 } 1257 else { 1258 // If argIndex exceeds the number of data arguments we 1259 // don't issue a warning because that is just a cascade of warnings (and 1260 // they may have intended '%%' anyway). We don't want to continue processing 1261 // the format string after this point, however, as we will like just get 1262 // gibberish when trying to match arguments. 1263 keepGoing = false; 1264 } 1265 1266 S.Diag(Loc, diag::warn_format_invalid_conversion) 1267 << llvm::StringRef(csStart, csLen) 1268 << getSpecifierRange(startSpec, specifierLen); 1269 1270 return keepGoing; 1271} 1272 1273//===--- CHECK: Printf format string checking ------------------------------===// 1274 1275namespace { 1276class CheckPrintfHandler : public CheckFormatHandler { 1277public: 1278 CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, 1279 const Expr *origFormatExpr, unsigned firstDataArg, 1280 unsigned numDataArgs, bool isObjCLiteral, 1281 const char *beg, bool hasVAListArg, 1282 const CallExpr *theCall, unsigned formatIdx) 1283 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1284 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1285 theCall, formatIdx) {} 1286 1287 1288 bool HandleInvalidPrintfConversionSpecifier( 1289 const analyze_printf::PrintfSpecifier &FS, 1290 const char *startSpecifier, 1291 unsigned specifierLen); 1292 1293 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 1294 const char *startSpecifier, 1295 unsigned specifierLen); 1296 1297 bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k, 1298 const char *startSpecifier, unsigned specifierLen); 1299 void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS, 1300 const analyze_printf::OptionalAmount &Amt, 1301 unsigned type, 1302 const char *startSpecifier, unsigned specifierLen); 1303 void HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1304 const analyze_printf::OptionalFlag &flag, 1305 const char *startSpecifier, unsigned specifierLen); 1306 void HandleIgnoredFlag(const analyze_printf::PrintfSpecifier &FS, 1307 const analyze_printf::OptionalFlag &ignoredFlag, 1308 const analyze_printf::OptionalFlag &flag, 1309 const char *startSpecifier, unsigned specifierLen); 1310}; 1311} 1312 1313bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier( 1314 const analyze_printf::PrintfSpecifier &FS, 1315 const char *startSpecifier, 1316 unsigned specifierLen) { 1317 const analyze_printf::PrintfConversionSpecifier &CS = 1318 FS.getConversionSpecifier(); 1319 1320 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1321 getLocationOfByte(CS.getStart()), 1322 startSpecifier, specifierLen, 1323 CS.getStart(), CS.getLength()); 1324} 1325 1326bool CheckPrintfHandler::HandleAmount( 1327 const analyze_format_string::OptionalAmount &Amt, 1328 unsigned k, const char *startSpecifier, 1329 unsigned specifierLen) { 1330 1331 if (Amt.hasDataArgument()) { 1332 if (!HasVAListArg) { 1333 unsigned argIndex = Amt.getArgIndex(); 1334 if (argIndex >= NumDataArgs) { 1335 S.Diag(getLocationOfByte(Amt.getStart()), 1336 diag::warn_printf_asterisk_missing_arg) 1337 << k << getSpecifierRange(startSpecifier, specifierLen); 1338 // Don't do any more checking. We will just emit 1339 // spurious errors. 1340 return false; 1341 } 1342 1343 // Type check the data argument. It should be an 'int'. 1344 // Although not in conformance with C99, we also allow the argument to be 1345 // an 'unsigned int' as that is a reasonably safe case. GCC also 1346 // doesn't emit a warning for that case. 1347 CoveredArgs.set(argIndex); 1348 const Expr *Arg = getDataArg(argIndex); 1349 QualType T = Arg->getType(); 1350 1351 const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); 1352 assert(ATR.isValid()); 1353 1354 if (!ATR.matchesType(S.Context, T)) { 1355 S.Diag(getLocationOfByte(Amt.getStart()), 1356 diag::warn_printf_asterisk_wrong_type) 1357 << k 1358 << ATR.getRepresentativeType(S.Context) << T 1359 << getSpecifierRange(startSpecifier, specifierLen) 1360 << Arg->getSourceRange(); 1361 // Don't do any more checking. We will just emit 1362 // spurious errors. 1363 return false; 1364 } 1365 } 1366 } 1367 return true; 1368} 1369 1370void CheckPrintfHandler::HandleInvalidAmount( 1371 const analyze_printf::PrintfSpecifier &FS, 1372 const analyze_printf::OptionalAmount &Amt, 1373 unsigned type, 1374 const char *startSpecifier, 1375 unsigned specifierLen) { 1376 const analyze_printf::PrintfConversionSpecifier &CS = 1377 FS.getConversionSpecifier(); 1378 switch (Amt.getHowSpecified()) { 1379 case analyze_printf::OptionalAmount::Constant: 1380 S.Diag(getLocationOfByte(Amt.getStart()), 1381 diag::warn_printf_nonsensical_optional_amount) 1382 << type 1383 << CS.toString() 1384 << getSpecifierRange(startSpecifier, specifierLen) 1385 << FixItHint::CreateRemoval(getSpecifierRange(Amt.getStart(), 1386 Amt.getConstantLength())); 1387 break; 1388 1389 default: 1390 S.Diag(getLocationOfByte(Amt.getStart()), 1391 diag::warn_printf_nonsensical_optional_amount) 1392 << type 1393 << CS.toString() 1394 << getSpecifierRange(startSpecifier, specifierLen); 1395 break; 1396 } 1397} 1398 1399void CheckPrintfHandler::HandleFlag(const analyze_printf::PrintfSpecifier &FS, 1400 const analyze_printf::OptionalFlag &flag, 1401 const char *startSpecifier, 1402 unsigned specifierLen) { 1403 // Warn about pointless flag with a fixit removal. 1404 const analyze_printf::PrintfConversionSpecifier &CS = 1405 FS.getConversionSpecifier(); 1406 S.Diag(getLocationOfByte(flag.getPosition()), 1407 diag::warn_printf_nonsensical_flag) 1408 << flag.toString() << CS.toString() 1409 << getSpecifierRange(startSpecifier, specifierLen) 1410 << FixItHint::CreateRemoval(getSpecifierRange(flag.getPosition(), 1)); 1411} 1412 1413void CheckPrintfHandler::HandleIgnoredFlag( 1414 const analyze_printf::PrintfSpecifier &FS, 1415 const analyze_printf::OptionalFlag &ignoredFlag, 1416 const analyze_printf::OptionalFlag &flag, 1417 const char *startSpecifier, 1418 unsigned specifierLen) { 1419 // Warn about ignored flag with a fixit removal. 1420 S.Diag(getLocationOfByte(ignoredFlag.getPosition()), 1421 diag::warn_printf_ignored_flag) 1422 << ignoredFlag.toString() << flag.toString() 1423 << getSpecifierRange(startSpecifier, specifierLen) 1424 << FixItHint::CreateRemoval(getSpecifierRange( 1425 ignoredFlag.getPosition(), 1)); 1426} 1427 1428bool 1429CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier 1430 &FS, 1431 const char *startSpecifier, 1432 unsigned specifierLen) { 1433 1434 using namespace analyze_format_string; 1435 using namespace analyze_printf; 1436 const PrintfConversionSpecifier &CS = FS.getConversionSpecifier(); 1437 1438 if (FS.consumesDataArgument()) { 1439 if (atFirstArg) { 1440 atFirstArg = false; 1441 usesPositionalArgs = FS.usesPositionalArg(); 1442 } 1443 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1444 // Cannot mix-and-match positional and non-positional arguments. 1445 S.Diag(getLocationOfByte(CS.getStart()), 1446 diag::warn_format_mix_positional_nonpositional_args) 1447 << getSpecifierRange(startSpecifier, specifierLen); 1448 return false; 1449 } 1450 } 1451 1452 // First check if the field width, precision, and conversion specifier 1453 // have matching data arguments. 1454 if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0, 1455 startSpecifier, specifierLen)) { 1456 return false; 1457 } 1458 1459 if (!HandleAmount(FS.getPrecision(), /* precision */ 1, 1460 startSpecifier, specifierLen)) { 1461 return false; 1462 } 1463 1464 if (!CS.consumesDataArgument()) { 1465 // FIXME: Technically specifying a precision or field width here 1466 // makes no sense. Worth issuing a warning at some point. 1467 return true; 1468 } 1469 1470 // Consume the argument. 1471 unsigned argIndex = FS.getArgIndex(); 1472 if (argIndex < NumDataArgs) { 1473 // The check to see if the argIndex is valid will come later. 1474 // We set the bit here because we may exit early from this 1475 // function if we encounter some other error. 1476 CoveredArgs.set(argIndex); 1477 } 1478 1479 // Check for using an Objective-C specific conversion specifier 1480 // in a non-ObjC literal. 1481 if (!IsObjCLiteral && CS.isObjCArg()) { 1482 return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, 1483 specifierLen); 1484 } 1485 1486 // Check for invalid use of field width 1487 if (!FS.hasValidFieldWidth()) { 1488 HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 0, 1489 startSpecifier, specifierLen); 1490 } 1491 1492 // Check for invalid use of precision 1493 if (!FS.hasValidPrecision()) { 1494 HandleInvalidAmount(FS, FS.getPrecision(), /* precision */ 1, 1495 startSpecifier, specifierLen); 1496 } 1497 1498 // Check each flag does not conflict with any other component. 1499 if (!FS.hasValidLeadingZeros()) 1500 HandleFlag(FS, FS.hasLeadingZeros(), startSpecifier, specifierLen); 1501 if (!FS.hasValidPlusPrefix()) 1502 HandleFlag(FS, FS.hasPlusPrefix(), startSpecifier, specifierLen); 1503 if (!FS.hasValidSpacePrefix()) 1504 HandleFlag(FS, FS.hasSpacePrefix(), startSpecifier, specifierLen); 1505 if (!FS.hasValidAlternativeForm()) 1506 HandleFlag(FS, FS.hasAlternativeForm(), startSpecifier, specifierLen); 1507 if (!FS.hasValidLeftJustified()) 1508 HandleFlag(FS, FS.isLeftJustified(), startSpecifier, specifierLen); 1509 1510 // Check that flags are not ignored by another flag 1511 if (FS.hasSpacePrefix() && FS.hasPlusPrefix()) // ' ' ignored by '+' 1512 HandleIgnoredFlag(FS, FS.hasSpacePrefix(), FS.hasPlusPrefix(), 1513 startSpecifier, specifierLen); 1514 if (FS.hasLeadingZeros() && FS.isLeftJustified()) // '0' ignored by '-' 1515 HandleIgnoredFlag(FS, FS.hasLeadingZeros(), FS.isLeftJustified(), 1516 startSpecifier, specifierLen); 1517 1518 // Check the length modifier is valid with the given conversion specifier. 1519 const LengthModifier &LM = FS.getLengthModifier(); 1520 if (!FS.hasValidLengthModifier()) 1521 S.Diag(getLocationOfByte(LM.getStart()), 1522 diag::warn_format_nonsensical_length) 1523 << LM.toString() << CS.toString() 1524 << getSpecifierRange(startSpecifier, specifierLen) 1525 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1526 LM.getLength())); 1527 1528 // Are we using '%n'? 1529 if (CS.getKind() == ConversionSpecifier::nArg) { 1530 // Issue a warning about this being a possible security issue. 1531 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) 1532 << getSpecifierRange(startSpecifier, specifierLen); 1533 // Continue checking the other format specifiers. 1534 return true; 1535 } 1536 1537 // The remaining checks depend on the data arguments. 1538 if (HasVAListArg) 1539 return true; 1540 1541 if (argIndex >= NumDataArgs) { 1542 if (FS.usesPositionalArg()) { 1543 S.Diag(getLocationOfByte(CS.getStart()), 1544 diag::warn_printf_positional_arg_exceeds_data_args) 1545 << (argIndex+1) << NumDataArgs 1546 << getSpecifierRange(startSpecifier, specifierLen); 1547 } 1548 else { 1549 S.Diag(getLocationOfByte(CS.getStart()), 1550 diag::warn_printf_insufficient_data_args) 1551 << getSpecifierRange(startSpecifier, specifierLen); 1552 } 1553 1554 // Don't do any more checking. 1555 return false; 1556 } 1557 1558 // Now type check the data expression that matches the 1559 // format specifier. 1560 const Expr *Ex = getDataArg(argIndex); 1561 const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); 1562 if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { 1563 // Check if we didn't match because of an implicit cast from a 'char' 1564 // or 'short' to an 'int'. This is done because printf is a varargs 1565 // function. 1566 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Ex)) 1567 if (ICE->getType() == S.Context.IntTy) 1568 if (ATR.matchesType(S.Context, ICE->getSubExpr()->getType())) 1569 return true; 1570 1571 // We may be able to offer a FixItHint if it is a supported type. 1572 PrintfSpecifier fixedFS = FS; 1573 bool success = fixedFS.fixType(Ex->getType()); 1574 1575 if (success) { 1576 // Get the fix string from the fixed format specifier 1577 llvm::SmallString<128> buf; 1578 llvm::raw_svector_ostream os(buf); 1579 fixedFS.toString(os); 1580 1581 S.Diag(getLocationOfByte(CS.getStart()), 1582 diag::warn_printf_conversion_argument_type_mismatch) 1583 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1584 << getSpecifierRange(startSpecifier, specifierLen) 1585 << Ex->getSourceRange() 1586 << FixItHint::CreateReplacement( 1587 getSpecifierRange(startSpecifier, specifierLen), 1588 os.str()); 1589 } 1590 else { 1591 S.Diag(getLocationOfByte(CS.getStart()), 1592 diag::warn_printf_conversion_argument_type_mismatch) 1593 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1594 << getSpecifierRange(startSpecifier, specifierLen) 1595 << Ex->getSourceRange(); 1596 } 1597 } 1598 1599 return true; 1600} 1601 1602//===--- CHECK: Scanf format string checking ------------------------------===// 1603 1604namespace { 1605class CheckScanfHandler : public CheckFormatHandler { 1606public: 1607 CheckScanfHandler(Sema &s, const StringLiteral *fexpr, 1608 const Expr *origFormatExpr, unsigned firstDataArg, 1609 unsigned numDataArgs, bool isObjCLiteral, 1610 const char *beg, bool hasVAListArg, 1611 const CallExpr *theCall, unsigned formatIdx) 1612 : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, 1613 numDataArgs, isObjCLiteral, beg, hasVAListArg, 1614 theCall, formatIdx) {} 1615 1616 bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 1617 const char *startSpecifier, 1618 unsigned specifierLen); 1619 1620 bool HandleInvalidScanfConversionSpecifier( 1621 const analyze_scanf::ScanfSpecifier &FS, 1622 const char *startSpecifier, 1623 unsigned specifierLen); 1624 1625 void HandleIncompleteScanList(const char *start, const char *end); 1626}; 1627} 1628 1629void CheckScanfHandler::HandleIncompleteScanList(const char *start, 1630 const char *end) { 1631 S.Diag(getLocationOfByte(end), diag::warn_scanf_scanlist_incomplete) 1632 << getSpecifierRange(start, end - start); 1633} 1634 1635bool CheckScanfHandler::HandleInvalidScanfConversionSpecifier( 1636 const analyze_scanf::ScanfSpecifier &FS, 1637 const char *startSpecifier, 1638 unsigned specifierLen) { 1639 1640 const analyze_scanf::ScanfConversionSpecifier &CS = 1641 FS.getConversionSpecifier(); 1642 1643 return HandleInvalidConversionSpecifier(FS.getArgIndex(), 1644 getLocationOfByte(CS.getStart()), 1645 startSpecifier, specifierLen, 1646 CS.getStart(), CS.getLength()); 1647} 1648 1649bool CheckScanfHandler::HandleScanfSpecifier( 1650 const analyze_scanf::ScanfSpecifier &FS, 1651 const char *startSpecifier, 1652 unsigned specifierLen) { 1653 1654 using namespace analyze_scanf; 1655 using namespace analyze_format_string; 1656 1657 const ScanfConversionSpecifier &CS = FS.getConversionSpecifier(); 1658 1659 // Handle case where '%' and '*' don't consume an argument. These shouldn't 1660 // be used to decide if we are using positional arguments consistently. 1661 if (FS.consumesDataArgument()) { 1662 if (atFirstArg) { 1663 atFirstArg = false; 1664 usesPositionalArgs = FS.usesPositionalArg(); 1665 } 1666 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1667 // Cannot mix-and-match positional and non-positional arguments. 1668 S.Diag(getLocationOfByte(CS.getStart()), 1669 diag::warn_format_mix_positional_nonpositional_args) 1670 << getSpecifierRange(startSpecifier, specifierLen); 1671 return false; 1672 } 1673 } 1674 1675 // Check if the field with is non-zero. 1676 const OptionalAmount &Amt = FS.getFieldWidth(); 1677 if (Amt.getHowSpecified() == OptionalAmount::Constant) { 1678 if (Amt.getConstantAmount() == 0) { 1679 const CharSourceRange &R = getSpecifierRange(Amt.getStart(), 1680 Amt.getConstantLength()); 1681 S.Diag(getLocationOfByte(Amt.getStart()), 1682 diag::warn_scanf_nonzero_width) 1683 << R << FixItHint::CreateRemoval(R); 1684 } 1685 } 1686 1687 if (!FS.consumesDataArgument()) { 1688 // FIXME: Technically specifying a precision or field width here 1689 // makes no sense. Worth issuing a warning at some point. 1690 return true; 1691 } 1692 1693 // Consume the argument. 1694 unsigned argIndex = FS.getArgIndex(); 1695 if (argIndex < NumDataArgs) { 1696 // The check to see if the argIndex is valid will come later. 1697 // We set the bit here because we may exit early from this 1698 // function if we encounter some other error. 1699 CoveredArgs.set(argIndex); 1700 } 1701 1702 // Check the length modifier is valid with the given conversion specifier. 1703 const LengthModifier &LM = FS.getLengthModifier(); 1704 if (!FS.hasValidLengthModifier()) { 1705 S.Diag(getLocationOfByte(LM.getStart()), 1706 diag::warn_format_nonsensical_length) 1707 << LM.toString() << CS.toString() 1708 << getSpecifierRange(startSpecifier, specifierLen) 1709 << FixItHint::CreateRemoval(getSpecifierRange(LM.getStart(), 1710 LM.getLength())); 1711 } 1712 1713 // The remaining checks depend on the data arguments. 1714 if (HasVAListArg) 1715 return true; 1716 1717 if (argIndex >= NumDataArgs) { 1718 if (FS.usesPositionalArg()) { 1719 S.Diag(getLocationOfByte(CS.getStart()), 1720 diag::warn_printf_positional_arg_exceeds_data_args) 1721 << (argIndex+1) << NumDataArgs 1722 << getSpecifierRange(startSpecifier, specifierLen); 1723 } 1724 else { 1725 S.Diag(getLocationOfByte(CS.getStart()), 1726 diag::warn_printf_insufficient_data_args) 1727 << getSpecifierRange(startSpecifier, specifierLen); 1728 } 1729 1730 // Don't do any more checking. 1731 return false; 1732 } 1733 1734 // FIXME: Check that the argument type matches the format specifier. 1735 1736 return true; 1737} 1738 1739void Sema::CheckFormatString(const StringLiteral *FExpr, 1740 const Expr *OrigFormatExpr, 1741 const CallExpr *TheCall, bool HasVAListArg, 1742 unsigned format_idx, unsigned firstDataArg, 1743 bool isPrintf) { 1744 1745 // CHECK: is the format string a wide literal? 1746 if (FExpr->isWide()) { 1747 Diag(FExpr->getLocStart(), 1748 diag::warn_format_string_is_wide_literal) 1749 << OrigFormatExpr->getSourceRange(); 1750 return; 1751 } 1752 1753 // Str - The format string. NOTE: this is NOT null-terminated! 1754 const char *Str = FExpr->getStrData(); 1755 1756 // CHECK: empty format string? 1757 unsigned StrLen = FExpr->getByteLength(); 1758 1759 if (StrLen == 0) { 1760 Diag(FExpr->getLocStart(), diag::warn_empty_format_string) 1761 << OrigFormatExpr->getSourceRange(); 1762 return; 1763 } 1764 1765 if (isPrintf) { 1766 CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1767 TheCall->getNumArgs() - firstDataArg, 1768 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1769 HasVAListArg, TheCall, format_idx); 1770 1771 if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen)) 1772 H.DoneProcessing(); 1773 } 1774 else { 1775 CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1776 TheCall->getNumArgs() - firstDataArg, 1777 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1778 HasVAListArg, TheCall, format_idx); 1779 1780 if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen)) 1781 H.DoneProcessing(); 1782 } 1783} 1784 1785//===--- CHECK: Return Address of Stack Variable --------------------------===// 1786 1787static DeclRefExpr* EvalVal(Expr *E); 1788static DeclRefExpr* EvalAddr(Expr* E); 1789 1790/// CheckReturnStackAddr - Check if a return statement returns the address 1791/// of a stack variable. 1792void 1793Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1794 SourceLocation ReturnLoc) { 1795 1796 // Perform checking for returned stack addresses. 1797 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1798 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 1799 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 1800 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1801 1802 // Skip over implicit cast expressions when checking for block expressions. 1803 RetValExp = RetValExp->IgnoreParenCasts(); 1804 1805 if (BlockExpr *C = dyn_cast<BlockExpr>(RetValExp)) 1806 if (C->hasBlockDeclRefExprs()) 1807 Diag(C->getLocStart(), diag::err_ret_local_block) 1808 << C->getSourceRange(); 1809 1810 if (AddrLabelExpr *ALE = dyn_cast<AddrLabelExpr>(RetValExp)) 1811 Diag(ALE->getLocStart(), diag::warn_ret_addr_label) 1812 << ALE->getSourceRange(); 1813 1814 } else if (lhsType->isReferenceType()) { 1815 // Perform checking for stack values returned by reference. 1816 // Check for a reference to the stack 1817 if (DeclRefExpr *DR = EvalVal(RetValExp)) 1818 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 1819 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1820 } 1821} 1822 1823/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1824/// check if the expression in a return statement evaluates to an address 1825/// to a location on the stack. The recursion is used to traverse the 1826/// AST of the return expression, with recursion backtracking when we 1827/// encounter a subexpression that (1) clearly does not lead to the address 1828/// of a stack variable or (2) is something we cannot determine leads to 1829/// the address of a stack variable based on such local checking. 1830/// 1831/// EvalAddr processes expressions that are pointers that are used as 1832/// references (and not L-values). EvalVal handles all other values. 1833/// At the base case of the recursion is a check for a DeclRefExpr* in 1834/// the refers to a stack variable. 1835/// 1836/// This implementation handles: 1837/// 1838/// * pointer-to-pointer casts 1839/// * implicit conversions from array references to pointers 1840/// * taking the address of fields 1841/// * arbitrary interplay between "&" and "*" operators 1842/// * pointer arithmetic from an address of a stack variable 1843/// * taking the address of an array element where the array is on the stack 1844static DeclRefExpr* EvalAddr(Expr *E) { 1845 // We should only be called for evaluating pointer expressions. 1846 assert((E->getType()->isAnyPointerType() || 1847 E->getType()->isBlockPointerType() || 1848 E->getType()->isObjCQualifiedIdType()) && 1849 "EvalAddr only works on pointers"); 1850 1851 // Our "symbolic interpreter" is just a dispatch off the currently 1852 // viewed AST node. We then recursively traverse the AST by calling 1853 // EvalAddr and EvalVal appropriately. 1854 switch (E->getStmtClass()) { 1855 case Stmt::ParenExprClass: 1856 // Ignore parentheses. 1857 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 1858 1859 case Stmt::UnaryOperatorClass: { 1860 // The only unary operator that make sense to handle here 1861 // is AddrOf. All others don't make sense as pointers. 1862 UnaryOperator *U = cast<UnaryOperator>(E); 1863 1864 if (U->getOpcode() == UnaryOperator::AddrOf) 1865 return EvalVal(U->getSubExpr()); 1866 else 1867 return NULL; 1868 } 1869 1870 case Stmt::BinaryOperatorClass: { 1871 // Handle pointer arithmetic. All other binary operators are not valid 1872 // in this context. 1873 BinaryOperator *B = cast<BinaryOperator>(E); 1874 BinaryOperator::Opcode op = B->getOpcode(); 1875 1876 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 1877 return NULL; 1878 1879 Expr *Base = B->getLHS(); 1880 1881 // Determine which argument is the real pointer base. It could be 1882 // the RHS argument instead of the LHS. 1883 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1884 1885 assert (Base->getType()->isPointerType()); 1886 return EvalAddr(Base); 1887 } 1888 1889 // For conditional operators we need to see if either the LHS or RHS are 1890 // valid DeclRefExpr*s. If one of them is valid, we return it. 1891 case Stmt::ConditionalOperatorClass: { 1892 ConditionalOperator *C = cast<ConditionalOperator>(E); 1893 1894 // Handle the GNU extension for missing LHS. 1895 if (Expr *lhsExpr = C->getLHS()) 1896 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 1897 return LHS; 1898 1899 return EvalAddr(C->getRHS()); 1900 } 1901 1902 // For casts, we need to handle conversions from arrays to 1903 // pointer values, and pointer-to-pointer conversions. 1904 case Stmt::ImplicitCastExprClass: 1905 case Stmt::CStyleCastExprClass: 1906 case Stmt::CXXFunctionalCastExprClass: { 1907 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1908 QualType T = SubExpr->getType(); 1909 1910 if (SubExpr->getType()->isPointerType() || 1911 SubExpr->getType()->isBlockPointerType() || 1912 SubExpr->getType()->isObjCQualifiedIdType()) 1913 return EvalAddr(SubExpr); 1914 else if (T->isArrayType()) 1915 return EvalVal(SubExpr); 1916 else 1917 return 0; 1918 } 1919 1920 // C++ casts. For dynamic casts, static casts, and const casts, we 1921 // are always converting from a pointer-to-pointer, so we just blow 1922 // through the cast. In the case the dynamic cast doesn't fail (and 1923 // return NULL), we take the conservative route and report cases 1924 // where we return the address of a stack variable. For Reinterpre 1925 // FIXME: The comment about is wrong; we're not always converting 1926 // from pointer to pointer. I'm guessing that this code should also 1927 // handle references to objects. 1928 case Stmt::CXXStaticCastExprClass: 1929 case Stmt::CXXDynamicCastExprClass: 1930 case Stmt::CXXConstCastExprClass: 1931 case Stmt::CXXReinterpretCastExprClass: { 1932 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1933 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1934 return EvalAddr(S); 1935 else 1936 return NULL; 1937 } 1938 1939 // Everything else: we simply don't reason about them. 1940 default: 1941 return NULL; 1942 } 1943} 1944 1945 1946/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1947/// See the comments for EvalAddr for more details. 1948static DeclRefExpr* EvalVal(Expr *E) { 1949 1950 // We should only be called for evaluating non-pointer expressions, or 1951 // expressions with a pointer type that are not used as references but instead 1952 // are l-values (e.g., DeclRefExpr with a pointer type). 1953 1954 // Our "symbolic interpreter" is just a dispatch off the currently 1955 // viewed AST node. We then recursively traverse the AST by calling 1956 // EvalAddr and EvalVal appropriately. 1957 switch (E->getStmtClass()) { 1958 case Stmt::DeclRefExprClass: { 1959 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1960 // at code that refers to a variable's name. We check if it has local 1961 // storage within the function, and if so, return the expression. 1962 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1963 1964 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1965 if (V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1966 1967 return NULL; 1968 } 1969 1970 case Stmt::ParenExprClass: 1971 // Ignore parentheses. 1972 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1973 1974 case Stmt::UnaryOperatorClass: { 1975 // The only unary operator that make sense to handle here 1976 // is Deref. All others don't resolve to a "name." This includes 1977 // handling all sorts of rvalues passed to a unary operator. 1978 UnaryOperator *U = cast<UnaryOperator>(E); 1979 1980 if (U->getOpcode() == UnaryOperator::Deref) 1981 return EvalAddr(U->getSubExpr()); 1982 1983 return NULL; 1984 } 1985 1986 case Stmt::ArraySubscriptExprClass: { 1987 // Array subscripts are potential references to data on the stack. We 1988 // retrieve the DeclRefExpr* for the array variable if it indeed 1989 // has local storage. 1990 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1991 } 1992 1993 case Stmt::ConditionalOperatorClass: { 1994 // For conditional operators we need to see if either the LHS or RHS are 1995 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1996 ConditionalOperator *C = cast<ConditionalOperator>(E); 1997 1998 // Handle the GNU extension for missing LHS. 1999 if (Expr *lhsExpr = C->getLHS()) 2000 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 2001 return LHS; 2002 2003 return EvalVal(C->getRHS()); 2004 } 2005 2006 // Accesses to members are potential references to data on the stack. 2007 case Stmt::MemberExprClass: { 2008 MemberExpr *M = cast<MemberExpr>(E); 2009 2010 // Check for indirect access. We only want direct field accesses. 2011 if (!M->isArrow()) 2012 return EvalVal(M->getBase()); 2013 else 2014 return NULL; 2015 } 2016 2017 // Everything else: we simply don't reason about them. 2018 default: 2019 return NULL; 2020 } 2021} 2022 2023//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 2024 2025/// Check for comparisons of floating point operands using != and ==. 2026/// Issue a warning if these are no self-comparisons, as they are not likely 2027/// to do what the programmer intended. 2028void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 2029 bool EmitWarning = true; 2030 2031 Expr* LeftExprSansParen = lex->IgnoreParens(); 2032 Expr* RightExprSansParen = rex->IgnoreParens(); 2033 2034 // Special case: check for x == x (which is OK). 2035 // Do not emit warnings for such cases. 2036 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 2037 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 2038 if (DRL->getDecl() == DRR->getDecl()) 2039 EmitWarning = false; 2040 2041 2042 // Special case: check for comparisons against literals that can be exactly 2043 // represented by APFloat. In such cases, do not emit a warning. This 2044 // is a heuristic: often comparison against such literals are used to 2045 // detect if a value in a variable has not changed. This clearly can 2046 // lead to false negatives. 2047 if (EmitWarning) { 2048 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 2049 if (FLL->isExact()) 2050 EmitWarning = false; 2051 } else 2052 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 2053 if (FLR->isExact()) 2054 EmitWarning = false; 2055 } 2056 } 2057 2058 // Check for comparisons with builtin types. 2059 if (EmitWarning) 2060 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 2061 if (CL->isBuiltinCall(Context)) 2062 EmitWarning = false; 2063 2064 if (EmitWarning) 2065 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 2066 if (CR->isBuiltinCall(Context)) 2067 EmitWarning = false; 2068 2069 // Emit the diagnostic. 2070 if (EmitWarning) 2071 Diag(loc, diag::warn_floatingpoint_eq) 2072 << lex->getSourceRange() << rex->getSourceRange(); 2073} 2074 2075//===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// 2076//===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===// 2077 2078namespace { 2079 2080/// Structure recording the 'active' range of an integer-valued 2081/// expression. 2082struct IntRange { 2083 /// The number of bits active in the int. 2084 unsigned Width; 2085 2086 /// True if the int is known not to have negative values. 2087 bool NonNegative; 2088 2089 IntRange() {} 2090 IntRange(unsigned Width, bool NonNegative) 2091 : Width(Width), NonNegative(NonNegative) 2092 {} 2093 2094 // Returns the range of the bool type. 2095 static IntRange forBoolType() { 2096 return IntRange(1, true); 2097 } 2098 2099 // Returns the range of an integral type. 2100 static IntRange forType(ASTContext &C, QualType T) { 2101 return forCanonicalType(C, T->getCanonicalTypeInternal().getTypePtr()); 2102 } 2103 2104 // Returns the range of an integeral type based on its canonical 2105 // representation. 2106 static IntRange forCanonicalType(ASTContext &C, const Type *T) { 2107 assert(T->isCanonicalUnqualified()); 2108 2109 if (const VectorType *VT = dyn_cast<VectorType>(T)) 2110 T = VT->getElementType().getTypePtr(); 2111 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 2112 T = CT->getElementType().getTypePtr(); 2113 2114 if (const EnumType *ET = dyn_cast<EnumType>(T)) { 2115 EnumDecl *Enum = ET->getDecl(); 2116 unsigned NumPositive = Enum->getNumPositiveBits(); 2117 unsigned NumNegative = Enum->getNumNegativeBits(); 2118 2119 return IntRange(std::max(NumPositive, NumNegative), NumNegative == 0); 2120 } 2121 2122 const BuiltinType *BT = cast<BuiltinType>(T); 2123 assert(BT->isInteger()); 2124 2125 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 2126 } 2127 2128 // Returns the supremum of two ranges: i.e. their conservative merge. 2129 static IntRange join(IntRange L, IntRange R) { 2130 return IntRange(std::max(L.Width, R.Width), 2131 L.NonNegative && R.NonNegative); 2132 } 2133 2134 // Returns the infinum of two ranges: i.e. their aggressive merge. 2135 static IntRange meet(IntRange L, IntRange R) { 2136 return IntRange(std::min(L.Width, R.Width), 2137 L.NonNegative || R.NonNegative); 2138 } 2139}; 2140 2141IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) { 2142 if (value.isSigned() && value.isNegative()) 2143 return IntRange(value.getMinSignedBits(), false); 2144 2145 if (value.getBitWidth() > MaxWidth) 2146 value.trunc(MaxWidth); 2147 2148 // isNonNegative() just checks the sign bit without considering 2149 // signedness. 2150 return IntRange(value.getActiveBits(), true); 2151} 2152 2153IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty, 2154 unsigned MaxWidth) { 2155 if (result.isInt()) 2156 return GetValueRange(C, result.getInt(), MaxWidth); 2157 2158 if (result.isVector()) { 2159 IntRange R = GetValueRange(C, result.getVectorElt(0), Ty, MaxWidth); 2160 for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i) { 2161 IntRange El = GetValueRange(C, result.getVectorElt(i), Ty, MaxWidth); 2162 R = IntRange::join(R, El); 2163 } 2164 return R; 2165 } 2166 2167 if (result.isComplexInt()) { 2168 IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth); 2169 IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth); 2170 return IntRange::join(R, I); 2171 } 2172 2173 // This can happen with lossless casts to intptr_t of "based" lvalues. 2174 // Assume it might use arbitrary bits. 2175 // FIXME: The only reason we need to pass the type in here is to get 2176 // the sign right on this one case. It would be nice if APValue 2177 // preserved this. 2178 assert(result.isLValue()); 2179 return IntRange(MaxWidth, Ty->isUnsignedIntegerType()); 2180} 2181 2182/// Pseudo-evaluate the given integer expression, estimating the 2183/// range of values it might take. 2184/// 2185/// \param MaxWidth - the width to which the value will be truncated 2186IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { 2187 E = E->IgnoreParens(); 2188 2189 // Try a full evaluation first. 2190 Expr::EvalResult result; 2191 if (E->Evaluate(result, C)) 2192 return GetValueRange(C, result.Val, E->getType(), MaxWidth); 2193 2194 // I think we only want to look through implicit casts here; if the 2195 // user has an explicit widening cast, we should treat the value as 2196 // being of the new, wider type. 2197 if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) { 2198 if (CE->getCastKind() == CastExpr::CK_NoOp) 2199 return GetExprRange(C, CE->getSubExpr(), MaxWidth); 2200 2201 IntRange OutputTypeRange = IntRange::forType(C, CE->getType()); 2202 2203 bool isIntegerCast = (CE->getCastKind() == CastExpr::CK_IntegralCast); 2204 if (!isIntegerCast && CE->getCastKind() == CastExpr::CK_Unknown) 2205 isIntegerCast = CE->getSubExpr()->getType()->isIntegerType(); 2206 2207 // Assume that non-integer casts can span the full range of the type. 2208 if (!isIntegerCast) 2209 return OutputTypeRange; 2210 2211 IntRange SubRange 2212 = GetExprRange(C, CE->getSubExpr(), 2213 std::min(MaxWidth, OutputTypeRange.Width)); 2214 2215 // Bail out if the subexpr's range is as wide as the cast type. 2216 if (SubRange.Width >= OutputTypeRange.Width) 2217 return OutputTypeRange; 2218 2219 // Otherwise, we take the smaller width, and we're non-negative if 2220 // either the output type or the subexpr is. 2221 return IntRange(SubRange.Width, 2222 SubRange.NonNegative || OutputTypeRange.NonNegative); 2223 } 2224 2225 if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) { 2226 // If we can fold the condition, just take that operand. 2227 bool CondResult; 2228 if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) 2229 return GetExprRange(C, CondResult ? CO->getTrueExpr() 2230 : CO->getFalseExpr(), 2231 MaxWidth); 2232 2233 // Otherwise, conservatively merge. 2234 IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth); 2235 IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth); 2236 return IntRange::join(L, R); 2237 } 2238 2239 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 2240 switch (BO->getOpcode()) { 2241 2242 // Boolean-valued operations are single-bit and positive. 2243 case BinaryOperator::LAnd: 2244 case BinaryOperator::LOr: 2245 case BinaryOperator::LT: 2246 case BinaryOperator::GT: 2247 case BinaryOperator::LE: 2248 case BinaryOperator::GE: 2249 case BinaryOperator::EQ: 2250 case BinaryOperator::NE: 2251 return IntRange::forBoolType(); 2252 2253 // The type of these compound assignments is the type of the LHS, 2254 // so the RHS is not necessarily an integer. 2255 case BinaryOperator::MulAssign: 2256 case BinaryOperator::DivAssign: 2257 case BinaryOperator::RemAssign: 2258 case BinaryOperator::AddAssign: 2259 case BinaryOperator::SubAssign: 2260 return IntRange::forType(C, E->getType()); 2261 2262 // Operations with opaque sources are black-listed. 2263 case BinaryOperator::PtrMemD: 2264 case BinaryOperator::PtrMemI: 2265 return IntRange::forType(C, E->getType()); 2266 2267 // Bitwise-and uses the *infinum* of the two source ranges. 2268 case BinaryOperator::And: 2269 case BinaryOperator::AndAssign: 2270 return IntRange::meet(GetExprRange(C, BO->getLHS(), MaxWidth), 2271 GetExprRange(C, BO->getRHS(), MaxWidth)); 2272 2273 // Left shift gets black-listed based on a judgement call. 2274 case BinaryOperator::Shl: 2275 // ...except that we want to treat '1 << (blah)' as logically 2276 // positive. It's an important idiom. 2277 if (IntegerLiteral *I 2278 = dyn_cast<IntegerLiteral>(BO->getLHS()->IgnoreParenCasts())) { 2279 if (I->getValue() == 1) { 2280 IntRange R = IntRange::forType(C, E->getType()); 2281 return IntRange(R.Width, /*NonNegative*/ true); 2282 } 2283 } 2284 // fallthrough 2285 2286 case BinaryOperator::ShlAssign: 2287 return IntRange::forType(C, E->getType()); 2288 2289 // Right shift by a constant can narrow its left argument. 2290 case BinaryOperator::Shr: 2291 case BinaryOperator::ShrAssign: { 2292 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2293 2294 // If the shift amount is a positive constant, drop the width by 2295 // that much. 2296 llvm::APSInt shift; 2297 if (BO->getRHS()->isIntegerConstantExpr(shift, C) && 2298 shift.isNonNegative()) { 2299 unsigned zext = shift.getZExtValue(); 2300 if (zext >= L.Width) 2301 L.Width = (L.NonNegative ? 0 : 1); 2302 else 2303 L.Width -= zext; 2304 } 2305 2306 return L; 2307 } 2308 2309 // Comma acts as its right operand. 2310 case BinaryOperator::Comma: 2311 return GetExprRange(C, BO->getRHS(), MaxWidth); 2312 2313 // Black-list pointer subtractions. 2314 case BinaryOperator::Sub: 2315 if (BO->getLHS()->getType()->isPointerType()) 2316 return IntRange::forType(C, E->getType()); 2317 // fallthrough 2318 2319 default: 2320 break; 2321 } 2322 2323 // Treat every other operator as if it were closed on the 2324 // narrowest type that encompasses both operands. 2325 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 2326 IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth); 2327 return IntRange::join(L, R); 2328 } 2329 2330 if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { 2331 switch (UO->getOpcode()) { 2332 // Boolean-valued operations are white-listed. 2333 case UnaryOperator::LNot: 2334 return IntRange::forBoolType(); 2335 2336 // Operations with opaque sources are black-listed. 2337 case UnaryOperator::Deref: 2338 case UnaryOperator::AddrOf: // should be impossible 2339 case UnaryOperator::OffsetOf: 2340 return IntRange::forType(C, E->getType()); 2341 2342 default: 2343 return GetExprRange(C, UO->getSubExpr(), MaxWidth); 2344 } 2345 } 2346 2347 if (dyn_cast<OffsetOfExpr>(E)) { 2348 IntRange::forType(C, E->getType()); 2349 } 2350 2351 FieldDecl *BitField = E->getBitField(); 2352 if (BitField) { 2353 llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C); 2354 unsigned BitWidth = BitWidthAP.getZExtValue(); 2355 2356 return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType()); 2357 } 2358 2359 return IntRange::forType(C, E->getType()); 2360} 2361 2362IntRange GetExprRange(ASTContext &C, Expr *E) { 2363 return GetExprRange(C, E, C.getIntWidth(E->getType())); 2364} 2365 2366/// Checks whether the given value, which currently has the given 2367/// source semantics, has the same value when coerced through the 2368/// target semantics. 2369bool IsSameFloatAfterCast(const llvm::APFloat &value, 2370 const llvm::fltSemantics &Src, 2371 const llvm::fltSemantics &Tgt) { 2372 llvm::APFloat truncated = value; 2373 2374 bool ignored; 2375 truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored); 2376 truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored); 2377 2378 return truncated.bitwiseIsEqual(value); 2379} 2380 2381/// Checks whether the given value, which currently has the given 2382/// source semantics, has the same value when coerced through the 2383/// target semantics. 2384/// 2385/// The value might be a vector of floats (or a complex number). 2386bool IsSameFloatAfterCast(const APValue &value, 2387 const llvm::fltSemantics &Src, 2388 const llvm::fltSemantics &Tgt) { 2389 if (value.isFloat()) 2390 return IsSameFloatAfterCast(value.getFloat(), Src, Tgt); 2391 2392 if (value.isVector()) { 2393 for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i) 2394 if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt)) 2395 return false; 2396 return true; 2397 } 2398 2399 assert(value.isComplexFloat()); 2400 return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) && 2401 IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt)); 2402} 2403 2404void AnalyzeImplicitConversions(Sema &S, Expr *E); 2405 2406bool IsZero(Sema &S, Expr *E) { 2407 llvm::APSInt Value; 2408 return E->isIntegerConstantExpr(Value, S.Context) && Value == 0; 2409} 2410 2411void CheckTrivialUnsignedComparison(Sema &S, BinaryOperator *E) { 2412 BinaryOperator::Opcode op = E->getOpcode(); 2413 if (op == BinaryOperator::LT && IsZero(S, E->getRHS())) { 2414 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2415 << "< 0" << "false" 2416 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2417 } else if (op == BinaryOperator::GE && IsZero(S, E->getRHS())) { 2418 S.Diag(E->getOperatorLoc(), diag::warn_lunsigned_always_true_comparison) 2419 << ">= 0" << "true" 2420 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2421 } else if (op == BinaryOperator::GT && IsZero(S, E->getLHS())) { 2422 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2423 << "0 >" << "false" 2424 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2425 } else if (op == BinaryOperator::LE && IsZero(S, E->getLHS())) { 2426 S.Diag(E->getOperatorLoc(), diag::warn_runsigned_always_true_comparison) 2427 << "0 <=" << "true" 2428 << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); 2429 } 2430} 2431 2432/// Analyze the operands of the given comparison. Implements the 2433/// fallback case from AnalyzeComparison. 2434void AnalyzeImpConvsInComparison(Sema &S, BinaryOperator *E) { 2435 AnalyzeImplicitConversions(S, E->getLHS()); 2436 AnalyzeImplicitConversions(S, E->getRHS()); 2437} 2438 2439/// \brief Implements -Wsign-compare. 2440/// 2441/// \param lex the left-hand expression 2442/// \param rex the right-hand expression 2443/// \param OpLoc the location of the joining operator 2444/// \param BinOpc binary opcode or 0 2445void AnalyzeComparison(Sema &S, BinaryOperator *E) { 2446 // The type the comparison is being performed in. 2447 QualType T = E->getLHS()->getType(); 2448 assert(S.Context.hasSameUnqualifiedType(T, E->getRHS()->getType()) 2449 && "comparison with mismatched types"); 2450 2451 // We don't do anything special if this isn't an unsigned integral 2452 // comparison: we're only interested in integral comparisons, and 2453 // signed comparisons only happen in cases we don't care to warn about. 2454 if (!T->hasUnsignedIntegerRepresentation()) 2455 return AnalyzeImpConvsInComparison(S, E); 2456 2457 Expr *lex = E->getLHS()->IgnoreParenImpCasts(); 2458 Expr *rex = E->getRHS()->IgnoreParenImpCasts(); 2459 2460 // Check to see if one of the (unmodified) operands is of different 2461 // signedness. 2462 Expr *signedOperand, *unsignedOperand; 2463 if (lex->getType()->hasSignedIntegerRepresentation()) { 2464 assert(!rex->getType()->hasSignedIntegerRepresentation() && 2465 "unsigned comparison between two signed integer expressions?"); 2466 signedOperand = lex; 2467 unsignedOperand = rex; 2468 } else if (rex->getType()->hasSignedIntegerRepresentation()) { 2469 signedOperand = rex; 2470 unsignedOperand = lex; 2471 } else { 2472 CheckTrivialUnsignedComparison(S, E); 2473 return AnalyzeImpConvsInComparison(S, E); 2474 } 2475 2476 // Otherwise, calculate the effective range of the signed operand. 2477 IntRange signedRange = GetExprRange(S.Context, signedOperand); 2478 2479 // Go ahead and analyze implicit conversions in the operands. Note 2480 // that we skip the implicit conversions on both sides. 2481 AnalyzeImplicitConversions(S, lex); 2482 AnalyzeImplicitConversions(S, rex); 2483 2484 // If the signed range is non-negative, -Wsign-compare won't fire, 2485 // but we should still check for comparisons which are always true 2486 // or false. 2487 if (signedRange.NonNegative) 2488 return CheckTrivialUnsignedComparison(S, E); 2489 2490 // For (in)equality comparisons, if the unsigned operand is a 2491 // constant which cannot collide with a overflowed signed operand, 2492 // then reinterpreting the signed operand as unsigned will not 2493 // change the result of the comparison. 2494 if (E->isEqualityOp()) { 2495 unsigned comparisonWidth = S.Context.getIntWidth(T); 2496 IntRange unsignedRange = GetExprRange(S.Context, unsignedOperand); 2497 2498 // We should never be unable to prove that the unsigned operand is 2499 // non-negative. 2500 assert(unsignedRange.NonNegative && "unsigned range includes negative?"); 2501 2502 if (unsignedRange.Width < comparisonWidth) 2503 return; 2504 } 2505 2506 S.Diag(E->getOperatorLoc(), diag::warn_mixed_sign_comparison) 2507 << lex->getType() << rex->getType() 2508 << lex->getSourceRange() << rex->getSourceRange(); 2509} 2510 2511/// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2512void DiagnoseImpCast(Sema &S, Expr *E, QualType T, unsigned diag) { 2513 S.Diag(E->getExprLoc(), diag) << E->getType() << T << E->getSourceRange(); 2514} 2515 2516void CheckImplicitConversion(Sema &S, Expr *E, QualType T, 2517 bool *ICContext = 0) { 2518 if (E->isTypeDependent() || E->isValueDependent()) return; 2519 2520 const Type *Source = S.Context.getCanonicalType(E->getType()).getTypePtr(); 2521 const Type *Target = S.Context.getCanonicalType(T).getTypePtr(); 2522 if (Source == Target) return; 2523 if (Target->isDependentType()) return; 2524 2525 // Never diagnose implicit casts to bool. 2526 if (Target->isSpecificBuiltinType(BuiltinType::Bool)) 2527 return; 2528 2529 // Strip vector types. 2530 if (isa<VectorType>(Source)) { 2531 if (!isa<VectorType>(Target)) 2532 return DiagnoseImpCast(S, E, T, diag::warn_impcast_vector_scalar); 2533 2534 Source = cast<VectorType>(Source)->getElementType().getTypePtr(); 2535 Target = cast<VectorType>(Target)->getElementType().getTypePtr(); 2536 } 2537 2538 // Strip complex types. 2539 if (isa<ComplexType>(Source)) { 2540 if (!isa<ComplexType>(Target)) 2541 return DiagnoseImpCast(S, E, T, diag::warn_impcast_complex_scalar); 2542 2543 Source = cast<ComplexType>(Source)->getElementType().getTypePtr(); 2544 Target = cast<ComplexType>(Target)->getElementType().getTypePtr(); 2545 } 2546 2547 const BuiltinType *SourceBT = dyn_cast<BuiltinType>(Source); 2548 const BuiltinType *TargetBT = dyn_cast<BuiltinType>(Target); 2549 2550 // If the source is floating point... 2551 if (SourceBT && SourceBT->isFloatingPoint()) { 2552 // ...and the target is floating point... 2553 if (TargetBT && TargetBT->isFloatingPoint()) { 2554 // ...then warn if we're dropping FP rank. 2555 2556 // Builtin FP kinds are ordered by increasing FP rank. 2557 if (SourceBT->getKind() > TargetBT->getKind()) { 2558 // Don't warn about float constants that are precisely 2559 // representable in the target type. 2560 Expr::EvalResult result; 2561 if (E->Evaluate(result, S.Context)) { 2562 // Value might be a float, a float vector, or a float complex. 2563 if (IsSameFloatAfterCast(result.Val, 2564 S.Context.getFloatTypeSemantics(QualType(TargetBT, 0)), 2565 S.Context.getFloatTypeSemantics(QualType(SourceBT, 0)))) 2566 return; 2567 } 2568 2569 DiagnoseImpCast(S, E, T, diag::warn_impcast_float_precision); 2570 } 2571 return; 2572 } 2573 2574 // If the target is integral, always warn. 2575 if ((TargetBT && TargetBT->isInteger())) 2576 // TODO: don't warn for integer values? 2577 DiagnoseImpCast(S, E, T, diag::warn_impcast_float_integer); 2578 2579 return; 2580 } 2581 2582 if (!Source->isIntegerType() || !Target->isIntegerType()) 2583 return; 2584 2585 IntRange SourceRange = GetExprRange(S.Context, E); 2586 IntRange TargetRange = IntRange::forCanonicalType(S.Context, Target); 2587 2588 if (SourceRange.Width > TargetRange.Width) { 2589 // People want to build with -Wshorten-64-to-32 and not -Wconversion 2590 // and by god we'll let them. 2591 if (SourceRange.Width == 64 && TargetRange.Width == 32) 2592 return DiagnoseImpCast(S, E, T, diag::warn_impcast_integer_64_32); 2593 return DiagnoseImpCast(S, E, T, diag::warn_impcast_integer_precision); 2594 } 2595 2596 if ((TargetRange.NonNegative && !SourceRange.NonNegative) || 2597 (!TargetRange.NonNegative && SourceRange.NonNegative && 2598 SourceRange.Width == TargetRange.Width)) { 2599 unsigned DiagID = diag::warn_impcast_integer_sign; 2600 2601 // Traditionally, gcc has warned about this under -Wsign-compare. 2602 // We also want to warn about it in -Wconversion. 2603 // So if -Wconversion is off, use a completely identical diagnostic 2604 // in the sign-compare group. 2605 // The conditional-checking code will 2606 if (ICContext) { 2607 DiagID = diag::warn_impcast_integer_sign_conditional; 2608 *ICContext = true; 2609 } 2610 2611 return DiagnoseImpCast(S, E, T, DiagID); 2612 } 2613 2614 return; 2615} 2616 2617void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T); 2618 2619void CheckConditionalOperand(Sema &S, Expr *E, QualType T, 2620 bool &ICContext) { 2621 E = E->IgnoreParenImpCasts(); 2622 2623 if (isa<ConditionalOperator>(E)) 2624 return CheckConditionalOperator(S, cast<ConditionalOperator>(E), T); 2625 2626 AnalyzeImplicitConversions(S, E); 2627 if (E->getType() != T) 2628 return CheckImplicitConversion(S, E, T, &ICContext); 2629 return; 2630} 2631 2632void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T) { 2633 AnalyzeImplicitConversions(S, E->getCond()); 2634 2635 bool Suspicious = false; 2636 CheckConditionalOperand(S, E->getTrueExpr(), T, Suspicious); 2637 CheckConditionalOperand(S, E->getFalseExpr(), T, Suspicious); 2638 2639 // If -Wconversion would have warned about either of the candidates 2640 // for a signedness conversion to the context type... 2641 if (!Suspicious) return; 2642 2643 // ...but it's currently ignored... 2644 if (S.Diags.getDiagnosticLevel(diag::warn_impcast_integer_sign_conditional)) 2645 return; 2646 2647 // ...and -Wsign-compare isn't... 2648 if (!S.Diags.getDiagnosticLevel(diag::warn_mixed_sign_conditional)) 2649 return; 2650 2651 // ...then check whether it would have warned about either of the 2652 // candidates for a signedness conversion to the condition type. 2653 if (E->getType() != T) { 2654 Suspicious = false; 2655 CheckImplicitConversion(S, E->getTrueExpr()->IgnoreParenImpCasts(), 2656 E->getType(), &Suspicious); 2657 if (!Suspicious) 2658 CheckImplicitConversion(S, E->getFalseExpr()->IgnoreParenImpCasts(), 2659 E->getType(), &Suspicious); 2660 if (!Suspicious) 2661 return; 2662 } 2663 2664 // If so, emit a diagnostic under -Wsign-compare. 2665 Expr *lex = E->getTrueExpr()->IgnoreParenImpCasts(); 2666 Expr *rex = E->getFalseExpr()->IgnoreParenImpCasts(); 2667 S.Diag(E->getQuestionLoc(), diag::warn_mixed_sign_conditional) 2668 << lex->getType() << rex->getType() 2669 << lex->getSourceRange() << rex->getSourceRange(); 2670} 2671 2672/// AnalyzeImplicitConversions - Find and report any interesting 2673/// implicit conversions in the given expression. There are a couple 2674/// of competing diagnostics here, -Wconversion and -Wsign-compare. 2675void AnalyzeImplicitConversions(Sema &S, Expr *OrigE) { 2676 QualType T = OrigE->getType(); 2677 Expr *E = OrigE->IgnoreParenImpCasts(); 2678 2679 // For conditional operators, we analyze the arguments as if they 2680 // were being fed directly into the output. 2681 if (isa<ConditionalOperator>(E)) { 2682 ConditionalOperator *CO = cast<ConditionalOperator>(E); 2683 CheckConditionalOperator(S, CO, T); 2684 return; 2685 } 2686 2687 // Go ahead and check any implicit conversions we might have skipped. 2688 // The non-canonical typecheck is just an optimization; 2689 // CheckImplicitConversion will filter out dead implicit conversions. 2690 if (E->getType() != T) 2691 CheckImplicitConversion(S, E, T); 2692 2693 // Now continue drilling into this expression. 2694 2695 // Skip past explicit casts. 2696 if (isa<ExplicitCastExpr>(E)) { 2697 E = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreParenImpCasts(); 2698 return AnalyzeImplicitConversions(S, E); 2699 } 2700 2701 // Do a somewhat different check with comparison operators. 2702 if (isa<BinaryOperator>(E) && cast<BinaryOperator>(E)->isComparisonOp()) 2703 return AnalyzeComparison(S, cast<BinaryOperator>(E)); 2704 2705 // These break the otherwise-useful invariant below. Fortunately, 2706 // we don't really need to recurse into them, because any internal 2707 // expressions should have been analyzed already when they were 2708 // built into statements. 2709 if (isa<StmtExpr>(E)) return; 2710 2711 // Don't descend into unevaluated contexts. 2712 if (isa<SizeOfAlignOfExpr>(E)) return; 2713 2714 // Now just recurse over the expression's children. 2715 for (Stmt::child_iterator I = E->child_begin(), IE = E->child_end(); 2716 I != IE; ++I) 2717 AnalyzeImplicitConversions(S, cast<Expr>(*I)); 2718} 2719 2720} // end anonymous namespace 2721 2722/// Diagnoses "dangerous" implicit conversions within the given 2723/// expression (which is a full expression). Implements -Wconversion 2724/// and -Wsign-compare. 2725void Sema::CheckImplicitConversions(Expr *E) { 2726 // Don't diagnose in unevaluated contexts. 2727 if (ExprEvalContexts.back().Context == Sema::Unevaluated) 2728 return; 2729 2730 // Don't diagnose for value- or type-dependent expressions. 2731 if (E->isTypeDependent() || E->isValueDependent()) 2732 return; 2733 2734 AnalyzeImplicitConversions(*this, E); 2735} 2736 2737/// CheckParmsForFunctionDef - Check that the parameters of the given 2738/// function are appropriate for the definition of a function. This 2739/// takes care of any checks that cannot be performed on the 2740/// declaration itself, e.g., that the types of each of the function 2741/// parameters are complete. 2742bool Sema::CheckParmsForFunctionDef(FunctionDecl *FD) { 2743 bool HasInvalidParm = false; 2744 for (unsigned p = 0, NumParams = FD->getNumParams(); p < NumParams; ++p) { 2745 ParmVarDecl *Param = FD->getParamDecl(p); 2746 2747 // C99 6.7.5.3p4: the parameters in a parameter type list in a 2748 // function declarator that is part of a function definition of 2749 // that function shall not have incomplete type. 2750 // 2751 // This is also C++ [dcl.fct]p6. 2752 if (!Param->isInvalidDecl() && 2753 RequireCompleteType(Param->getLocation(), Param->getType(), 2754 diag::err_typecheck_decl_incomplete_type)) { 2755 Param->setInvalidDecl(); 2756 HasInvalidParm = true; 2757 } 2758 2759 // C99 6.9.1p5: If the declarator includes a parameter type list, the 2760 // declaration of each parameter shall include an identifier. 2761 if (Param->getIdentifier() == 0 && 2762 !Param->isImplicit() && 2763 !getLangOptions().CPlusPlus) 2764 Diag(Param->getLocation(), diag::err_parameter_name_omitted); 2765 2766 // C99 6.7.5.3p12: 2767 // If the function declarator is not part of a definition of that 2768 // function, parameters may have incomplete type and may use the [*] 2769 // notation in their sequences of declarator specifiers to specify 2770 // variable length array types. 2771 QualType PType = Param->getOriginalType(); 2772 if (const ArrayType *AT = Context.getAsArrayType(PType)) { 2773 if (AT->getSizeModifier() == ArrayType::Star) { 2774 // FIXME: This diagnosic should point the the '[*]' if source-location 2775 // information is added for it. 2776 Diag(Param->getLocation(), diag::err_array_star_in_function_definition); 2777 } 2778 } 2779 } 2780 2781 return HasInvalidParm; 2782} 2783