SemaChecking.cpp revision 691ebc3f3225542e5abd85e107ebdbec907cf510
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/Analysis/Analyses/PrintfFormatString.h" 17#include "clang/AST/ASTContext.h" 18#include "clang/AST/CharUnits.h" 19#include "clang/AST/DeclObjC.h" 20#include "clang/AST/ExprCXX.h" 21#include "clang/AST/ExprObjC.h" 22#include "clang/AST/DeclObjC.h" 23#include "clang/AST/StmtCXX.h" 24#include "clang/AST/StmtObjC.h" 25#include "clang/Lex/LiteralSupport.h" 26#include "clang/Lex/Preprocessor.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/STLExtras.h" 29#include "clang/Basic/TargetBuiltins.h" 30#include <limits> 31using namespace clang; 32 33/// getLocationOfStringLiteralByte - Return a source location that points to the 34/// specified byte of the specified string literal. 35/// 36/// Strings are amazingly complex. They can be formed from multiple tokens and 37/// can have escape sequences in them in addition to the usual trigraph and 38/// escaped newline business. This routine handles this complexity. 39/// 40SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 41 unsigned ByteNo) const { 42 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 43 44 // Loop over all of the tokens in this string until we find the one that 45 // contains the byte we're looking for. 46 unsigned TokNo = 0; 47 while (1) { 48 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 49 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 50 51 // Get the spelling of the string so that we can get the data that makes up 52 // the string literal, not the identifier for the macro it is potentially 53 // expanded through. 54 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 55 56 // Re-lex the token to get its length and original spelling. 57 std::pair<FileID, unsigned> LocInfo = 58 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 59 bool Invalid = false; 60 llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid); 61 if (Invalid) 62 return StrTokSpellingLoc; 63 64 const char *StrData = Buffer.data()+LocInfo.second; 65 66 // Create a langops struct and enable trigraphs. This is sufficient for 67 // relexing tokens. 68 LangOptions LangOpts; 69 LangOpts.Trigraphs = true; 70 71 // Create a lexer starting at the beginning of this token. 72 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData, 73 Buffer.end()); 74 Token TheTok; 75 TheLexer.LexFromRawLexer(TheTok); 76 77 // Use the StringLiteralParser to compute the length of the string in bytes. 78 StringLiteralParser SLP(&TheTok, 1, PP); 79 unsigned TokNumBytes = SLP.GetStringLength(); 80 81 // If the byte is in this token, return the location of the byte. 82 if (ByteNo < TokNumBytes || 83 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 84 unsigned Offset = 85 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 86 87 // Now that we know the offset of the token in the spelling, use the 88 // preprocessor to get the offset in the original source. 89 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 90 } 91 92 // Move to the next string token. 93 ++TokNo; 94 ByteNo -= TokNumBytes; 95 } 96} 97 98/// CheckablePrintfAttr - does a function call have a "printf" attribute 99/// and arguments that merit checking? 100bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) { 101 if (Format->getType() == "printf") return true; 102 if (Format->getType() == "printf0") { 103 // printf0 allows null "format" string; if so don't check format/args 104 unsigned format_idx = Format->getFormatIdx() - 1; 105 // Does the index refer to the implicit object argument? 106 if (isa<CXXMemberCallExpr>(TheCall)) { 107 if (format_idx == 0) 108 return false; 109 --format_idx; 110 } 111 if (format_idx < TheCall->getNumArgs()) { 112 Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts(); 113 if (!Format->isNullPointerConstant(Context, 114 Expr::NPC_ValueDependentIsNull)) 115 return true; 116 } 117 } 118 return false; 119} 120 121Action::OwningExprResult 122Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 123 OwningExprResult TheCallResult(Owned(TheCall)); 124 125 switch (BuiltinID) { 126 case Builtin::BI__builtin___CFStringMakeConstantString: 127 assert(TheCall->getNumArgs() == 1 && 128 "Wrong # arguments to builtin CFStringMakeConstantString"); 129 if (CheckObjCString(TheCall->getArg(0))) 130 return ExprError(); 131 break; 132 case Builtin::BI__builtin_stdarg_start: 133 case Builtin::BI__builtin_va_start: 134 if (SemaBuiltinVAStart(TheCall)) 135 return ExprError(); 136 break; 137 case Builtin::BI__builtin_isgreater: 138 case Builtin::BI__builtin_isgreaterequal: 139 case Builtin::BI__builtin_isless: 140 case Builtin::BI__builtin_islessequal: 141 case Builtin::BI__builtin_islessgreater: 142 case Builtin::BI__builtin_isunordered: 143 if (SemaBuiltinUnorderedCompare(TheCall)) 144 return ExprError(); 145 break; 146 case Builtin::BI__builtin_fpclassify: 147 if (SemaBuiltinFPClassification(TheCall, 6)) 148 return ExprError(); 149 break; 150 case Builtin::BI__builtin_isfinite: 151 case Builtin::BI__builtin_isinf: 152 case Builtin::BI__builtin_isinf_sign: 153 case Builtin::BI__builtin_isnan: 154 case Builtin::BI__builtin_isnormal: 155 if (SemaBuiltinFPClassification(TheCall, 1)) 156 return ExprError(); 157 break; 158 case Builtin::BI__builtin_return_address: 159 case Builtin::BI__builtin_frame_address: { 160 llvm::APSInt Result; 161 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 162 return ExprError(); 163 break; 164 } 165 case Builtin::BI__builtin_eh_return_data_regno: { 166 llvm::APSInt Result; 167 if (SemaBuiltinConstantArg(TheCall, 0, Result)) 168 return ExprError(); 169 break; 170 } 171 case Builtin::BI__builtin_shufflevector: 172 return SemaBuiltinShuffleVector(TheCall); 173 // TheCall will be freed by the smart pointer here, but that's fine, since 174 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 175 case Builtin::BI__builtin_prefetch: 176 if (SemaBuiltinPrefetch(TheCall)) 177 return ExprError(); 178 break; 179 case Builtin::BI__builtin_object_size: 180 if (SemaBuiltinObjectSize(TheCall)) 181 return ExprError(); 182 break; 183 case Builtin::BI__builtin_longjmp: 184 if (SemaBuiltinLongjmp(TheCall)) 185 return ExprError(); 186 break; 187 case Builtin::BI__sync_fetch_and_add: 188 case Builtin::BI__sync_fetch_and_sub: 189 case Builtin::BI__sync_fetch_and_or: 190 case Builtin::BI__sync_fetch_and_and: 191 case Builtin::BI__sync_fetch_and_xor: 192 case Builtin::BI__sync_add_and_fetch: 193 case Builtin::BI__sync_sub_and_fetch: 194 case Builtin::BI__sync_and_and_fetch: 195 case Builtin::BI__sync_or_and_fetch: 196 case Builtin::BI__sync_xor_and_fetch: 197 case Builtin::BI__sync_val_compare_and_swap: 198 case Builtin::BI__sync_bool_compare_and_swap: 199 case Builtin::BI__sync_lock_test_and_set: 200 case Builtin::BI__sync_lock_release: 201 if (SemaBuiltinAtomicOverloaded(TheCall)) 202 return ExprError(); 203 break; 204 205 // Target specific builtins start here. 206 case X86::BI__builtin_ia32_palignr128: 207 case X86::BI__builtin_ia32_palignr: { 208 llvm::APSInt Result; 209 if (SemaBuiltinConstantArg(TheCall, 2, Result)) 210 return ExprError(); 211 break; 212 } 213 } 214 215 return move(TheCallResult); 216} 217 218/// CheckFunctionCall - Check a direct function call for various correctness 219/// and safety properties not strictly enforced by the C type system. 220bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 221 // Get the IdentifierInfo* for the called function. 222 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 223 224 // None of the checks below are needed for functions that don't have 225 // simple names (e.g., C++ conversion functions). 226 if (!FnInfo) 227 return false; 228 229 // FIXME: This mechanism should be abstracted to be less fragile and 230 // more efficient. For example, just map function ids to custom 231 // handlers. 232 233 // Printf checking. 234 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 235 if (CheckablePrintfAttr(Format, TheCall)) { 236 bool HasVAListArg = Format->getFirstArg() == 0; 237 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 238 HasVAListArg ? 0 : Format->getFirstArg() - 1); 239 } 240 } 241 242 for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull; 243 NonNull = NonNull->getNext<NonNullAttr>()) 244 CheckNonNullArguments(NonNull, TheCall); 245 246 return false; 247} 248 249bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 250 // Printf checking. 251 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 252 if (!Format) 253 return false; 254 255 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 256 if (!V) 257 return false; 258 259 QualType Ty = V->getType(); 260 if (!Ty->isBlockPointerType()) 261 return false; 262 263 if (!CheckablePrintfAttr(Format, TheCall)) 264 return false; 265 266 bool HasVAListArg = Format->getFirstArg() == 0; 267 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 268 HasVAListArg ? 0 : Format->getFirstArg() - 1); 269 270 return false; 271} 272 273/// SemaBuiltinAtomicOverloaded - We have a call to a function like 274/// __sync_fetch_and_add, which is an overloaded function based on the pointer 275/// type of its first argument. The main ActOnCallExpr routines have already 276/// promoted the types of arguments because all of these calls are prototyped as 277/// void(...). 278/// 279/// This function goes through and does final semantic checking for these 280/// builtins, 281bool Sema::SemaBuiltinAtomicOverloaded(CallExpr *TheCall) { 282 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 283 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 284 285 // Ensure that we have at least one argument to do type inference from. 286 if (TheCall->getNumArgs() < 1) 287 return Diag(TheCall->getLocEnd(), 288 diag::err_typecheck_call_too_few_args_at_least) 289 << 0 << 1 << TheCall->getNumArgs() 290 << TheCall->getCallee()->getSourceRange(); 291 292 // Inspect the first argument of the atomic builtin. This should always be 293 // a pointer type, whose element is an integral scalar or pointer type. 294 // Because it is a pointer type, we don't have to worry about any implicit 295 // casts here. 296 Expr *FirstArg = TheCall->getArg(0); 297 if (!FirstArg->getType()->isPointerType()) 298 return Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 299 << FirstArg->getType() << FirstArg->getSourceRange(); 300 301 QualType ValType = FirstArg->getType()->getAs<PointerType>()->getPointeeType(); 302 if (!ValType->isIntegerType() && !ValType->isPointerType() && 303 !ValType->isBlockPointerType()) 304 return Diag(DRE->getLocStart(), 305 diag::err_atomic_builtin_must_be_pointer_intptr) 306 << FirstArg->getType() << FirstArg->getSourceRange(); 307 308 // We need to figure out which concrete builtin this maps onto. For example, 309 // __sync_fetch_and_add with a 2 byte object turns into 310 // __sync_fetch_and_add_2. 311#define BUILTIN_ROW(x) \ 312 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 313 Builtin::BI##x##_8, Builtin::BI##x##_16 } 314 315 static const unsigned BuiltinIndices[][5] = { 316 BUILTIN_ROW(__sync_fetch_and_add), 317 BUILTIN_ROW(__sync_fetch_and_sub), 318 BUILTIN_ROW(__sync_fetch_and_or), 319 BUILTIN_ROW(__sync_fetch_and_and), 320 BUILTIN_ROW(__sync_fetch_and_xor), 321 322 BUILTIN_ROW(__sync_add_and_fetch), 323 BUILTIN_ROW(__sync_sub_and_fetch), 324 BUILTIN_ROW(__sync_and_and_fetch), 325 BUILTIN_ROW(__sync_or_and_fetch), 326 BUILTIN_ROW(__sync_xor_and_fetch), 327 328 BUILTIN_ROW(__sync_val_compare_and_swap), 329 BUILTIN_ROW(__sync_bool_compare_and_swap), 330 BUILTIN_ROW(__sync_lock_test_and_set), 331 BUILTIN_ROW(__sync_lock_release) 332 }; 333#undef BUILTIN_ROW 334 335 // Determine the index of the size. 336 unsigned SizeIndex; 337 switch (Context.getTypeSizeInChars(ValType).getQuantity()) { 338 case 1: SizeIndex = 0; break; 339 case 2: SizeIndex = 1; break; 340 case 4: SizeIndex = 2; break; 341 case 8: SizeIndex = 3; break; 342 case 16: SizeIndex = 4; break; 343 default: 344 return Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 345 << FirstArg->getType() << FirstArg->getSourceRange(); 346 } 347 348 // Each of these builtins has one pointer argument, followed by some number of 349 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 350 // that we ignore. Find out which row of BuiltinIndices to read from as well 351 // as the number of fixed args. 352 unsigned BuiltinID = FDecl->getBuiltinID(); 353 unsigned BuiltinIndex, NumFixed = 1; 354 switch (BuiltinID) { 355 default: assert(0 && "Unknown overloaded atomic builtin!"); 356 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 357 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 358 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 359 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 360 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 361 362 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 5; break; 363 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 6; break; 364 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 7; break; 365 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 8; break; 366 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex = 9; break; 367 368 case Builtin::BI__sync_val_compare_and_swap: 369 BuiltinIndex = 10; 370 NumFixed = 2; 371 break; 372 case Builtin::BI__sync_bool_compare_and_swap: 373 BuiltinIndex = 11; 374 NumFixed = 2; 375 break; 376 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 12; break; 377 case Builtin::BI__sync_lock_release: 378 BuiltinIndex = 13; 379 NumFixed = 0; 380 break; 381 } 382 383 // Now that we know how many fixed arguments we expect, first check that we 384 // have at least that many. 385 if (TheCall->getNumArgs() < 1+NumFixed) 386 return Diag(TheCall->getLocEnd(), 387 diag::err_typecheck_call_too_few_args_at_least) 388 << 0 << 1+NumFixed << TheCall->getNumArgs() 389 << TheCall->getCallee()->getSourceRange(); 390 391 392 // Get the decl for the concrete builtin from this, we can tell what the 393 // concrete integer type we should convert to is. 394 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 395 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 396 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 397 FunctionDecl *NewBuiltinDecl = 398 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 399 TUScope, false, DRE->getLocStart())); 400 const FunctionProtoType *BuiltinFT = 401 NewBuiltinDecl->getType()->getAs<FunctionProtoType>(); 402 ValType = BuiltinFT->getArgType(0)->getAs<PointerType>()->getPointeeType(); 403 404 // If the first type needs to be converted (e.g. void** -> int*), do it now. 405 if (BuiltinFT->getArgType(0) != FirstArg->getType()) { 406 ImpCastExprToType(FirstArg, BuiltinFT->getArgType(0), CastExpr::CK_BitCast); 407 TheCall->setArg(0, FirstArg); 408 } 409 410 // Next, walk the valid ones promoting to the right type. 411 for (unsigned i = 0; i != NumFixed; ++i) { 412 Expr *Arg = TheCall->getArg(i+1); 413 414 // If the argument is an implicit cast, then there was a promotion due to 415 // "...", just remove it now. 416 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) { 417 Arg = ICE->getSubExpr(); 418 ICE->setSubExpr(0); 419 ICE->Destroy(Context); 420 TheCall->setArg(i+1, Arg); 421 } 422 423 // GCC does an implicit conversion to the pointer or integer ValType. This 424 // can fail in some cases (1i -> int**), check for this error case now. 425 CastExpr::CastKind Kind = CastExpr::CK_Unknown; 426 if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind)) 427 return true; 428 429 // Okay, we have something that *can* be converted to the right type. Check 430 // to see if there is a potentially weird extension going on here. This can 431 // happen when you do an atomic operation on something like an char* and 432 // pass in 42. The 42 gets converted to char. This is even more strange 433 // for things like 45.123 -> char, etc. 434 // FIXME: Do this check. 435 ImpCastExprToType(Arg, ValType, Kind, /*isLvalue=*/false); 436 TheCall->setArg(i+1, Arg); 437 } 438 439 // Switch the DeclRefExpr to refer to the new decl. 440 DRE->setDecl(NewBuiltinDecl); 441 DRE->setType(NewBuiltinDecl->getType()); 442 443 // Set the callee in the CallExpr. 444 // FIXME: This leaks the original parens and implicit casts. 445 Expr *PromotedCall = DRE; 446 UsualUnaryConversions(PromotedCall); 447 TheCall->setCallee(PromotedCall); 448 449 450 // Change the result type of the call to match the result type of the decl. 451 TheCall->setType(NewBuiltinDecl->getResultType()); 452 return false; 453} 454 455 456/// CheckObjCString - Checks that the argument to the builtin 457/// CFString constructor is correct 458/// FIXME: GCC currently emits the following warning: 459/// "warning: input conversion stopped due to an input byte that does not 460/// belong to the input codeset UTF-8" 461/// Note: It might also make sense to do the UTF-16 conversion here (would 462/// simplify the backend). 463bool Sema::CheckObjCString(Expr *Arg) { 464 Arg = Arg->IgnoreParenCasts(); 465 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 466 467 if (!Literal || Literal->isWide()) { 468 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 469 << Arg->getSourceRange(); 470 return true; 471 } 472 473 const char *Data = Literal->getStrData(); 474 unsigned Length = Literal->getByteLength(); 475 476 for (unsigned i = 0; i < Length; ++i) { 477 if (!Data[i]) { 478 Diag(getLocationOfStringLiteralByte(Literal, i), 479 diag::warn_cfstring_literal_contains_nul_character) 480 << Arg->getSourceRange(); 481 break; 482 } 483 } 484 485 return false; 486} 487 488/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 489/// Emit an error and return true on failure, return false on success. 490bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 491 Expr *Fn = TheCall->getCallee(); 492 if (TheCall->getNumArgs() > 2) { 493 Diag(TheCall->getArg(2)->getLocStart(), 494 diag::err_typecheck_call_too_many_args) 495 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 496 << Fn->getSourceRange() 497 << SourceRange(TheCall->getArg(2)->getLocStart(), 498 (*(TheCall->arg_end()-1))->getLocEnd()); 499 return true; 500 } 501 502 if (TheCall->getNumArgs() < 2) { 503 return Diag(TheCall->getLocEnd(), 504 diag::err_typecheck_call_too_few_args_at_least) 505 << 0 /*function call*/ << 2 << TheCall->getNumArgs(); 506 } 507 508 // Determine whether the current function is variadic or not. 509 BlockScopeInfo *CurBlock = getCurBlock(); 510 bool isVariadic; 511 if (CurBlock) 512 isVariadic = CurBlock->isVariadic; 513 else if (getCurFunctionDecl()) { 514 if (FunctionProtoType* FTP = 515 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 516 isVariadic = FTP->isVariadic(); 517 else 518 isVariadic = false; 519 } else { 520 isVariadic = getCurMethodDecl()->isVariadic(); 521 } 522 523 if (!isVariadic) { 524 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 525 return true; 526 } 527 528 // Verify that the second argument to the builtin is the last argument of the 529 // current function or method. 530 bool SecondArgIsLastNamedArgument = false; 531 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 532 533 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 534 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 535 // FIXME: This isn't correct for methods (results in bogus warning). 536 // Get the last formal in the current function. 537 const ParmVarDecl *LastArg; 538 if (CurBlock) 539 LastArg = *(CurBlock->TheDecl->param_end()-1); 540 else if (FunctionDecl *FD = getCurFunctionDecl()) 541 LastArg = *(FD->param_end()-1); 542 else 543 LastArg = *(getCurMethodDecl()->param_end()-1); 544 SecondArgIsLastNamedArgument = PV == LastArg; 545 } 546 } 547 548 if (!SecondArgIsLastNamedArgument) 549 Diag(TheCall->getArg(1)->getLocStart(), 550 diag::warn_second_parameter_of_va_start_not_last_named_argument); 551 return false; 552} 553 554/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 555/// friends. This is declared to take (...), so we have to check everything. 556bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 557 if (TheCall->getNumArgs() < 2) 558 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 559 << 0 << 2 << TheCall->getNumArgs()/*function call*/; 560 if (TheCall->getNumArgs() > 2) 561 return Diag(TheCall->getArg(2)->getLocStart(), 562 diag::err_typecheck_call_too_many_args) 563 << 0 /*function call*/ << 2 << TheCall->getNumArgs() 564 << SourceRange(TheCall->getArg(2)->getLocStart(), 565 (*(TheCall->arg_end()-1))->getLocEnd()); 566 567 Expr *OrigArg0 = TheCall->getArg(0); 568 Expr *OrigArg1 = TheCall->getArg(1); 569 570 // Do standard promotions between the two arguments, returning their common 571 // type. 572 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 573 574 // Make sure any conversions are pushed back into the call; this is 575 // type safe since unordered compare builtins are declared as "_Bool 576 // foo(...)". 577 TheCall->setArg(0, OrigArg0); 578 TheCall->setArg(1, OrigArg1); 579 580 if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent()) 581 return false; 582 583 // If the common type isn't a real floating type, then the arguments were 584 // invalid for this operation. 585 if (!Res->isRealFloatingType()) 586 return Diag(OrigArg0->getLocStart(), 587 diag::err_typecheck_call_invalid_ordered_compare) 588 << OrigArg0->getType() << OrigArg1->getType() 589 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 590 591 return false; 592} 593 594/// SemaBuiltinSemaBuiltinFPClassification - Handle functions like 595/// __builtin_isnan and friends. This is declared to take (...), so we have 596/// to check everything. We expect the last argument to be a floating point 597/// value. 598bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { 599 if (TheCall->getNumArgs() < NumArgs) 600 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 601 << 0 << NumArgs << TheCall->getNumArgs()/*function call*/; 602 if (TheCall->getNumArgs() > NumArgs) 603 return Diag(TheCall->getArg(NumArgs)->getLocStart(), 604 diag::err_typecheck_call_too_many_args) 605 << 0 /*function call*/ << NumArgs << TheCall->getNumArgs() 606 << SourceRange(TheCall->getArg(NumArgs)->getLocStart(), 607 (*(TheCall->arg_end()-1))->getLocEnd()); 608 609 Expr *OrigArg = TheCall->getArg(NumArgs-1); 610 611 if (OrigArg->isTypeDependent()) 612 return false; 613 614 // This operation requires a floating-point number 615 if (!OrigArg->getType()->isRealFloatingType()) 616 return Diag(OrigArg->getLocStart(), 617 diag::err_typecheck_call_invalid_unary_fp) 618 << OrigArg->getType() << OrigArg->getSourceRange(); 619 620 return false; 621} 622 623/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 624// This is declared to take (...), so we have to check everything. 625Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 626 if (TheCall->getNumArgs() < 3) 627 return ExprError(Diag(TheCall->getLocEnd(), 628 diag::err_typecheck_call_too_few_args_at_least) 629 << 0 /*function call*/ << 3 << TheCall->getNumArgs() 630 << TheCall->getSourceRange()); 631 632 unsigned numElements = std::numeric_limits<unsigned>::max(); 633 if (!TheCall->getArg(0)->isTypeDependent() && 634 !TheCall->getArg(1)->isTypeDependent()) { 635 QualType FAType = TheCall->getArg(0)->getType(); 636 QualType SAType = TheCall->getArg(1)->getType(); 637 638 if (!FAType->isVectorType() || !SAType->isVectorType()) { 639 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 640 << SourceRange(TheCall->getArg(0)->getLocStart(), 641 TheCall->getArg(1)->getLocEnd()); 642 return ExprError(); 643 } 644 645 if (!Context.hasSameUnqualifiedType(FAType, SAType)) { 646 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 647 << SourceRange(TheCall->getArg(0)->getLocStart(), 648 TheCall->getArg(1)->getLocEnd()); 649 return ExprError(); 650 } 651 652 numElements = FAType->getAs<VectorType>()->getNumElements(); 653 if (TheCall->getNumArgs() != numElements+2) { 654 if (TheCall->getNumArgs() < numElements+2) 655 return ExprError(Diag(TheCall->getLocEnd(), 656 diag::err_typecheck_call_too_few_args) 657 << 0 /*function call*/ 658 << numElements+2 << TheCall->getNumArgs() 659 << TheCall->getSourceRange()); 660 return ExprError(Diag(TheCall->getLocEnd(), 661 diag::err_typecheck_call_too_many_args) 662 << 0 /*function call*/ 663 << numElements+2 << TheCall->getNumArgs() 664 << TheCall->getSourceRange()); 665 } 666 } 667 668 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 669 if (TheCall->getArg(i)->isTypeDependent() || 670 TheCall->getArg(i)->isValueDependent()) 671 continue; 672 673 llvm::APSInt Result; 674 if (SemaBuiltinConstantArg(TheCall, i, Result)) 675 return ExprError(); 676 677 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 678 return ExprError(Diag(TheCall->getLocStart(), 679 diag::err_shufflevector_argument_too_large) 680 << TheCall->getArg(i)->getSourceRange()); 681 } 682 683 llvm::SmallVector<Expr*, 32> exprs; 684 685 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 686 exprs.push_back(TheCall->getArg(i)); 687 TheCall->setArg(i, 0); 688 } 689 690 return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(), 691 exprs.size(), exprs[0]->getType(), 692 TheCall->getCallee()->getLocStart(), 693 TheCall->getRParenLoc())); 694} 695 696/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 697// This is declared to take (const void*, ...) and can take two 698// optional constant int args. 699bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 700 unsigned NumArgs = TheCall->getNumArgs(); 701 702 if (NumArgs > 3) 703 return Diag(TheCall->getLocEnd(), 704 diag::err_typecheck_call_too_many_args_at_most) 705 << 0 /*function call*/ << 3 << NumArgs 706 << TheCall->getSourceRange(); 707 708 // Argument 0 is checked for us and the remaining arguments must be 709 // constant integers. 710 for (unsigned i = 1; i != NumArgs; ++i) { 711 Expr *Arg = TheCall->getArg(i); 712 713 llvm::APSInt Result; 714 if (SemaBuiltinConstantArg(TheCall, i, Result)) 715 return true; 716 717 // FIXME: gcc issues a warning and rewrites these to 0. These 718 // seems especially odd for the third argument since the default 719 // is 3. 720 if (i == 1) { 721 if (Result.getLimitedValue() > 1) 722 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 723 << "0" << "1" << Arg->getSourceRange(); 724 } else { 725 if (Result.getLimitedValue() > 3) 726 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 727 << "0" << "3" << Arg->getSourceRange(); 728 } 729 } 730 731 return false; 732} 733 734/// SemaBuiltinConstantArg - Handle a check if argument ArgNum of CallExpr 735/// TheCall is a constant expression. 736bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, 737 llvm::APSInt &Result) { 738 Expr *Arg = TheCall->getArg(ArgNum); 739 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 740 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 741 742 if (Arg->isTypeDependent() || Arg->isValueDependent()) return false; 743 744 if (!Arg->isIntegerConstantExpr(Result, Context)) 745 return Diag(TheCall->getLocStart(), diag::err_constant_integer_arg_type) 746 << ArgNum << FDecl->getDeclName() << Arg->getSourceRange(); 747 748 return false; 749} 750 751/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 752/// int type). This simply type checks that type is one of the defined 753/// constants (0-3). 754// For compatability check 0-3, llvm only handles 0 and 2. 755bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 756 llvm::APSInt Result; 757 758 // Check constant-ness first. 759 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 760 return true; 761 762 Expr *Arg = TheCall->getArg(1); 763 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 764 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 765 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 766 } 767 768 return false; 769} 770 771/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 772/// This checks that val is a constant 1. 773bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 774 Expr *Arg = TheCall->getArg(1); 775 llvm::APSInt Result; 776 777 // TODO: This is less than ideal. Overload this to take a value. 778 if (SemaBuiltinConstantArg(TheCall, 1, Result)) 779 return true; 780 781 if (Result != 1) 782 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 783 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 784 785 return false; 786} 787 788// Handle i > 1 ? "x" : "y", recursivelly 789bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 790 bool HasVAListArg, 791 unsigned format_idx, unsigned firstDataArg) { 792 if (E->isTypeDependent() || E->isValueDependent()) 793 return false; 794 795 switch (E->getStmtClass()) { 796 case Stmt::ConditionalOperatorClass: { 797 const ConditionalOperator *C = cast<ConditionalOperator>(E); 798 return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, 799 HasVAListArg, format_idx, firstDataArg) 800 && SemaCheckStringLiteral(C->getRHS(), TheCall, 801 HasVAListArg, format_idx, firstDataArg); 802 } 803 804 case Stmt::ImplicitCastExprClass: { 805 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 806 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 807 format_idx, firstDataArg); 808 } 809 810 case Stmt::ParenExprClass: { 811 const ParenExpr *Expr = cast<ParenExpr>(E); 812 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 813 format_idx, firstDataArg); 814 } 815 816 case Stmt::DeclRefExprClass: { 817 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 818 819 // As an exception, do not flag errors for variables binding to 820 // const string literals. 821 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 822 bool isConstant = false; 823 QualType T = DR->getType(); 824 825 if (const ArrayType *AT = Context.getAsArrayType(T)) { 826 isConstant = AT->getElementType().isConstant(Context); 827 } else if (const PointerType *PT = T->getAs<PointerType>()) { 828 isConstant = T.isConstant(Context) && 829 PT->getPointeeType().isConstant(Context); 830 } 831 832 if (isConstant) { 833 if (const Expr *Init = VD->getAnyInitializer()) 834 return SemaCheckStringLiteral(Init, TheCall, 835 HasVAListArg, format_idx, firstDataArg); 836 } 837 838 // For vprintf* functions (i.e., HasVAListArg==true), we add a 839 // special check to see if the format string is a function parameter 840 // of the function calling the printf function. If the function 841 // has an attribute indicating it is a printf-like function, then we 842 // should suppress warnings concerning non-literals being used in a call 843 // to a vprintf function. For example: 844 // 845 // void 846 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ 847 // va_list ap; 848 // va_start(ap, fmt); 849 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 850 // ... 851 // 852 // 853 // FIXME: We don't have full attribute support yet, so just check to see 854 // if the argument is a DeclRefExpr that references a parameter. We'll 855 // add proper support for checking the attribute later. 856 if (HasVAListArg) 857 if (isa<ParmVarDecl>(VD)) 858 return true; 859 } 860 861 return false; 862 } 863 864 case Stmt::CallExprClass: { 865 const CallExpr *CE = cast<CallExpr>(E); 866 if (const ImplicitCastExpr *ICE 867 = dyn_cast<ImplicitCastExpr>(CE->getCallee())) { 868 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) { 869 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { 870 if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) { 871 unsigned ArgIndex = FA->getFormatIdx(); 872 const Expr *Arg = CE->getArg(ArgIndex - 1); 873 874 return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg, 875 format_idx, firstDataArg); 876 } 877 } 878 } 879 } 880 881 return false; 882 } 883 case Stmt::ObjCStringLiteralClass: 884 case Stmt::StringLiteralClass: { 885 const StringLiteral *StrE = NULL; 886 887 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 888 StrE = ObjCFExpr->getString(); 889 else 890 StrE = cast<StringLiteral>(E); 891 892 if (StrE) { 893 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 894 firstDataArg); 895 return true; 896 } 897 898 return false; 899 } 900 901 default: 902 return false; 903 } 904} 905 906void 907Sema::CheckNonNullArguments(const NonNullAttr *NonNull, 908 const CallExpr *TheCall) { 909 for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end(); 910 i != e; ++i) { 911 const Expr *ArgExpr = TheCall->getArg(*i); 912 if (ArgExpr->isNullPointerConstant(Context, 913 Expr::NPC_ValueDependentIsNotNull)) 914 Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg) 915 << ArgExpr->getSourceRange(); 916 } 917} 918 919/// CheckPrintfArguments - Check calls to printf (and similar functions) for 920/// correct use of format strings. 921/// 922/// HasVAListArg - A predicate indicating whether the printf-like 923/// function is passed an explicit va_arg argument (e.g., vprintf) 924/// 925/// format_idx - The index into Args for the format string. 926/// 927/// Improper format strings to functions in the printf family can be 928/// the source of bizarre bugs and very serious security holes. A 929/// good source of information is available in the following paper 930/// (which includes additional references): 931/// 932/// FormatGuard: Automatic Protection From printf Format String 933/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 934/// 935/// TODO: 936/// Functionality implemented: 937/// 938/// We can statically check the following properties for string 939/// literal format strings for non v.*printf functions (where the 940/// arguments are passed directly): 941// 942/// (1) Are the number of format conversions equal to the number of 943/// data arguments? 944/// 945/// (2) Does each format conversion correctly match the type of the 946/// corresponding data argument? 947/// 948/// Moreover, for all printf functions we can: 949/// 950/// (3) Check for a missing format string (when not caught by type checking). 951/// 952/// (4) Check for no-operation flags; e.g. using "#" with format 953/// conversion 'c' (TODO) 954/// 955/// (5) Check the use of '%n', a major source of security holes. 956/// 957/// (6) Check for malformed format conversions that don't specify anything. 958/// 959/// (7) Check for empty format strings. e.g: printf(""); 960/// 961/// (8) Check that the format string is a wide literal. 962/// 963/// All of these checks can be done by parsing the format string. 964/// 965void 966Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 967 unsigned format_idx, unsigned firstDataArg) { 968 const Expr *Fn = TheCall->getCallee(); 969 970 // The way the format attribute works in GCC, the implicit this argument 971 // of member functions is counted. However, it doesn't appear in our own 972 // lists, so decrement format_idx in that case. 973 if (isa<CXXMemberCallExpr>(TheCall)) { 974 // Catch a format attribute mistakenly referring to the object argument. 975 if (format_idx == 0) 976 return; 977 --format_idx; 978 if(firstDataArg != 0) 979 --firstDataArg; 980 } 981 982 // CHECK: printf-like function is called with no format string. 983 if (format_idx >= TheCall->getNumArgs()) { 984 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 985 << Fn->getSourceRange(); 986 return; 987 } 988 989 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 990 991 // CHECK: format string is not a string literal. 992 // 993 // Dynamically generated format strings are difficult to 994 // automatically vet at compile time. Requiring that format strings 995 // are string literals: (1) permits the checking of format strings by 996 // the compiler and thereby (2) can practically remove the source of 997 // many format string exploits. 998 999 // Format string can be either ObjC string (e.g. @"%d") or 1000 // C string (e.g. "%d") 1001 // ObjC string uses the same format specifiers as C string, so we can use 1002 // the same format string checking logic for both ObjC and C strings. 1003 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 1004 firstDataArg)) 1005 return; // Literal format string found, check done! 1006 1007 // If there are no arguments specified, warn with -Wformat-security, otherwise 1008 // warn only with -Wformat-nonliteral. 1009 if (TheCall->getNumArgs() == format_idx+1) 1010 Diag(TheCall->getArg(format_idx)->getLocStart(), 1011 diag::warn_printf_nonliteral_noargs) 1012 << OrigFormatExpr->getSourceRange(); 1013 else 1014 Diag(TheCall->getArg(format_idx)->getLocStart(), 1015 diag::warn_printf_nonliteral) 1016 << OrigFormatExpr->getSourceRange(); 1017} 1018 1019namespace { 1020class CheckPrintfHandler : public analyze_printf::FormatStringHandler { 1021 Sema &S; 1022 const StringLiteral *FExpr; 1023 const Expr *OrigFormatExpr; 1024 const unsigned FirstDataArg; 1025 const unsigned NumDataArgs; 1026 const bool IsObjCLiteral; 1027 const char *Beg; // Start of format string. 1028 const bool HasVAListArg; 1029 const CallExpr *TheCall; 1030 unsigned FormatIdx; 1031 llvm::BitVector CoveredArgs; 1032 bool usesPositionalArgs; 1033 bool atFirstArg; 1034public: 1035 CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, 1036 const Expr *origFormatExpr, unsigned firstDataArg, 1037 unsigned numDataArgs, bool isObjCLiteral, 1038 const char *beg, bool hasVAListArg, 1039 const CallExpr *theCall, unsigned formatIdx) 1040 : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), 1041 FirstDataArg(firstDataArg), 1042 NumDataArgs(numDataArgs), 1043 IsObjCLiteral(isObjCLiteral), Beg(beg), 1044 HasVAListArg(hasVAListArg), 1045 TheCall(theCall), FormatIdx(formatIdx), 1046 usesPositionalArgs(false), atFirstArg(true) { 1047 CoveredArgs.resize(numDataArgs); 1048 CoveredArgs.reset(); 1049 } 1050 1051 void DoneProcessing(); 1052 1053 void HandleIncompleteFormatSpecifier(const char *startSpecifier, 1054 unsigned specifierLen); 1055 1056 bool 1057 HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, 1058 const char *startSpecifier, 1059 unsigned specifierLen); 1060 1061 virtual void HandleInvalidPosition(const char *startSpecifier, 1062 unsigned specifierLen, 1063 analyze_printf::PositionContext p); 1064 1065 virtual void HandleZeroPosition(const char *startPos, unsigned posLen); 1066 1067 void HandleNullChar(const char *nullCharacter); 1068 1069 bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS, 1070 const char *startSpecifier, 1071 unsigned specifierLen); 1072private: 1073 SourceRange getFormatStringRange(); 1074 SourceRange getFormatSpecifierRange(const char *startSpecifier, 1075 unsigned specifierLen); 1076 SourceLocation getLocationOfByte(const char *x); 1077 1078 bool HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned k, 1079 const char *startSpecifier, unsigned specifierLen); 1080 void HandleFlags(const analyze_printf::FormatSpecifier &FS, 1081 llvm::StringRef flag, llvm::StringRef cspec, 1082 const char *startSpecifier, unsigned specifierLen); 1083 1084 const Expr *getDataArg(unsigned i) const; 1085}; 1086} 1087 1088SourceRange CheckPrintfHandler::getFormatStringRange() { 1089 return OrigFormatExpr->getSourceRange(); 1090} 1091 1092SourceRange CheckPrintfHandler:: 1093getFormatSpecifierRange(const char *startSpecifier, unsigned specifierLen) { 1094 return SourceRange(getLocationOfByte(startSpecifier), 1095 getLocationOfByte(startSpecifier+specifierLen-1)); 1096} 1097 1098SourceLocation CheckPrintfHandler::getLocationOfByte(const char *x) { 1099 return S.getLocationOfStringLiteralByte(FExpr, x - Beg); 1100} 1101 1102void CheckPrintfHandler:: 1103HandleIncompleteFormatSpecifier(const char *startSpecifier, 1104 unsigned specifierLen) { 1105 SourceLocation Loc = getLocationOfByte(startSpecifier); 1106 S.Diag(Loc, diag::warn_printf_incomplete_specifier) 1107 << getFormatSpecifierRange(startSpecifier, specifierLen); 1108} 1109 1110void 1111CheckPrintfHandler::HandleInvalidPosition(const char *startPos, unsigned posLen, 1112 analyze_printf::PositionContext p) { 1113 SourceLocation Loc = getLocationOfByte(startPos); 1114 S.Diag(Loc, diag::warn_printf_invalid_positional_specifier) 1115 << (unsigned) p << getFormatSpecifierRange(startPos, posLen); 1116} 1117 1118void CheckPrintfHandler::HandleZeroPosition(const char *startPos, 1119 unsigned posLen) { 1120 SourceLocation Loc = getLocationOfByte(startPos); 1121 S.Diag(Loc, diag::warn_printf_zero_positional_specifier) 1122 << getFormatSpecifierRange(startPos, posLen); 1123} 1124 1125bool CheckPrintfHandler:: 1126HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, 1127 const char *startSpecifier, 1128 unsigned specifierLen) { 1129 1130 unsigned argIndex = FS.getArgIndex(); 1131 bool keepGoing = true; 1132 if (argIndex < NumDataArgs) { 1133 // Consider the argument coverered, even though the specifier doesn't 1134 // make sense. 1135 CoveredArgs.set(argIndex); 1136 } 1137 else { 1138 // If argIndex exceeds the number of data arguments we 1139 // don't issue a warning because that is just a cascade of warnings (and 1140 // they may have intended '%%' anyway). We don't want to continue processing 1141 // the format string after this point, however, as we will like just get 1142 // gibberish when trying to match arguments. 1143 keepGoing = false; 1144 } 1145 1146 const analyze_printf::ConversionSpecifier &CS = 1147 FS.getConversionSpecifier(); 1148 SourceLocation Loc = getLocationOfByte(CS.getStart()); 1149 S.Diag(Loc, diag::warn_printf_invalid_conversion) 1150 << llvm::StringRef(CS.getStart(), CS.getLength()) 1151 << getFormatSpecifierRange(startSpecifier, specifierLen); 1152 1153 return keepGoing; 1154} 1155 1156void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { 1157 // The presence of a null character is likely an error. 1158 S.Diag(getLocationOfByte(nullCharacter), 1159 diag::warn_printf_format_string_contains_null_char) 1160 << getFormatStringRange(); 1161} 1162 1163const Expr *CheckPrintfHandler::getDataArg(unsigned i) const { 1164 return TheCall->getArg(FirstDataArg + i); 1165} 1166 1167void CheckPrintfHandler::HandleFlags(const analyze_printf::FormatSpecifier &FS, 1168 llvm::StringRef flag, 1169 llvm::StringRef cspec, 1170 const char *startSpecifier, 1171 unsigned specifierLen) { 1172 const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier(); 1173 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_nonsensical_flag) 1174 << flag << cspec << getFormatSpecifierRange(startSpecifier, specifierLen); 1175} 1176 1177bool 1178CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, 1179 unsigned k, const char *startSpecifier, 1180 unsigned specifierLen) { 1181 1182 if (Amt.hasDataArgument()) { 1183 if (!HasVAListArg) { 1184 unsigned argIndex = Amt.getArgIndex(); 1185 if (argIndex >= NumDataArgs) { 1186 S.Diag(getLocationOfByte(Amt.getStart()), 1187 diag::warn_printf_asterisk_missing_arg) 1188 << k << getFormatSpecifierRange(startSpecifier, specifierLen); 1189 // Don't do any more checking. We will just emit 1190 // spurious errors. 1191 return false; 1192 } 1193 1194 // Type check the data argument. It should be an 'int'. 1195 // Although not in conformance with C99, we also allow the argument to be 1196 // an 'unsigned int' as that is a reasonably safe case. GCC also 1197 // doesn't emit a warning for that case. 1198 CoveredArgs.set(argIndex); 1199 const Expr *Arg = getDataArg(argIndex); 1200 QualType T = Arg->getType(); 1201 1202 const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); 1203 assert(ATR.isValid()); 1204 1205 if (!ATR.matchesType(S.Context, T)) { 1206 S.Diag(getLocationOfByte(Amt.getStart()), 1207 diag::warn_printf_asterisk_wrong_type) 1208 << k 1209 << ATR.getRepresentativeType(S.Context) << T 1210 << getFormatSpecifierRange(startSpecifier, specifierLen) 1211 << Arg->getSourceRange(); 1212 // Don't do any more checking. We will just emit 1213 // spurious errors. 1214 return false; 1215 } 1216 } 1217 } 1218 return true; 1219} 1220 1221bool 1222CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier 1223 &FS, 1224 const char *startSpecifier, 1225 unsigned specifierLen) { 1226 1227 using namespace analyze_printf; 1228 const ConversionSpecifier &CS = FS.getConversionSpecifier(); 1229 1230 if (atFirstArg) { 1231 atFirstArg = false; 1232 usesPositionalArgs = FS.usesPositionalArg(); 1233 } 1234 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1235 // Cannot mix-and-match positional and non-positional arguments. 1236 S.Diag(getLocationOfByte(CS.getStart()), 1237 diag::warn_printf_mix_positional_nonpositional_args) 1238 << getFormatSpecifierRange(startSpecifier, specifierLen); 1239 return false; 1240 } 1241 1242 // First check if the field width, precision, and conversion specifier 1243 // have matching data arguments. 1244 if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0, 1245 startSpecifier, specifierLen)) { 1246 return false; 1247 } 1248 1249 if (!HandleAmount(FS.getPrecision(), /* precision */ 1, 1250 startSpecifier, specifierLen)) { 1251 return false; 1252 } 1253 1254 if (!CS.consumesDataArgument()) { 1255 // FIXME: Technically specifying a precision or field width here 1256 // makes no sense. Worth issuing a warning at some point. 1257 return true; 1258 } 1259 1260 // Consume the argument. 1261 unsigned argIndex = FS.getArgIndex(); 1262 if (argIndex < NumDataArgs) { 1263 // The check to see if the argIndex is valid will come later. 1264 // We set the bit here because we may exit early from this 1265 // function if we encounter some other error. 1266 CoveredArgs.set(argIndex); 1267 } 1268 1269 // Check for using an Objective-C specific conversion specifier 1270 // in a non-ObjC literal. 1271 if (!IsObjCLiteral && CS.isObjCArg()) { 1272 return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen); 1273 } 1274 1275 // Are we using '%n'? Issue a warning about this being 1276 // a possible security issue. 1277 if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) { 1278 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) 1279 << getFormatSpecifierRange(startSpecifier, specifierLen); 1280 // Continue checking the other format specifiers. 1281 return true; 1282 } 1283 1284 if (CS.getKind() == ConversionSpecifier::VoidPtrArg) { 1285 if (FS.getPrecision().getHowSpecified() != OptionalAmount::NotSpecified) 1286 S.Diag(getLocationOfByte(CS.getStart()), 1287 diag::warn_printf_nonsensical_precision) 1288 << CS.getCharacters() 1289 << getFormatSpecifierRange(startSpecifier, specifierLen); 1290 } 1291 if (CS.getKind() == ConversionSpecifier::VoidPtrArg || 1292 CS.getKind() == ConversionSpecifier::CStrArg) { 1293 // FIXME: Instead of using "0", "+", etc., eventually get them from 1294 // the FormatSpecifier. 1295 if (FS.hasLeadingZeros()) 1296 HandleFlags(FS, "0", CS.getCharacters(), startSpecifier, specifierLen); 1297 if (FS.hasPlusPrefix()) 1298 HandleFlags(FS, "+", CS.getCharacters(), startSpecifier, specifierLen); 1299 if (FS.hasSpacePrefix()) 1300 HandleFlags(FS, " ", CS.getCharacters(), startSpecifier, specifierLen); 1301 } 1302 1303 // The remaining checks depend on the data arguments. 1304 if (HasVAListArg) 1305 return true; 1306 1307 if (argIndex >= NumDataArgs) { 1308 if (FS.usesPositionalArg()) { 1309 S.Diag(getLocationOfByte(CS.getStart()), 1310 diag::warn_printf_positional_arg_exceeds_data_args) 1311 << (argIndex+1) << NumDataArgs 1312 << getFormatSpecifierRange(startSpecifier, specifierLen); 1313 } 1314 else { 1315 S.Diag(getLocationOfByte(CS.getStart()), 1316 diag::warn_printf_insufficient_data_args) 1317 << getFormatSpecifierRange(startSpecifier, specifierLen); 1318 } 1319 1320 // Don't do any more checking. 1321 return false; 1322 } 1323 1324 // Now type check the data expression that matches the 1325 // format specifier. 1326 const Expr *Ex = getDataArg(argIndex); 1327 const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); 1328 if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { 1329 // Check if we didn't match because of an implicit cast from a 'char' 1330 // or 'short' to an 'int'. This is done because printf is a varargs 1331 // function. 1332 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Ex)) 1333 if (ICE->getType() == S.Context.IntTy) 1334 if (ATR.matchesType(S.Context, ICE->getSubExpr()->getType())) 1335 return true; 1336 1337 S.Diag(getLocationOfByte(CS.getStart()), 1338 diag::warn_printf_conversion_argument_type_mismatch) 1339 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1340 << getFormatSpecifierRange(startSpecifier, specifierLen) 1341 << Ex->getSourceRange(); 1342 } 1343 1344 return true; 1345} 1346 1347void CheckPrintfHandler::DoneProcessing() { 1348 // Does the number of data arguments exceed the number of 1349 // format conversions in the format string? 1350 if (!HasVAListArg) { 1351 // Find any arguments that weren't covered. 1352 CoveredArgs.flip(); 1353 signed notCoveredArg = CoveredArgs.find_first(); 1354 if (notCoveredArg >= 0) { 1355 assert((unsigned)notCoveredArg < NumDataArgs); 1356 S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), 1357 diag::warn_printf_data_arg_not_used) 1358 << getFormatStringRange(); 1359 } 1360 } 1361} 1362 1363void Sema::CheckPrintfString(const StringLiteral *FExpr, 1364 const Expr *OrigFormatExpr, 1365 const CallExpr *TheCall, bool HasVAListArg, 1366 unsigned format_idx, unsigned firstDataArg) { 1367 1368 // CHECK: is the format string a wide literal? 1369 if (FExpr->isWide()) { 1370 Diag(FExpr->getLocStart(), 1371 diag::warn_printf_format_string_is_wide_literal) 1372 << OrigFormatExpr->getSourceRange(); 1373 return; 1374 } 1375 1376 // Str - The format string. NOTE: this is NOT null-terminated! 1377 const char *Str = FExpr->getStrData(); 1378 1379 // CHECK: empty format string? 1380 unsigned StrLen = FExpr->getByteLength(); 1381 1382 if (StrLen == 0) { 1383 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 1384 << OrigFormatExpr->getSourceRange(); 1385 return; 1386 } 1387 1388 CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1389 TheCall->getNumArgs() - firstDataArg, 1390 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1391 HasVAListArg, TheCall, format_idx); 1392 1393 if (!analyze_printf::ParseFormatString(H, Str, Str + StrLen)) 1394 H.DoneProcessing(); 1395} 1396 1397//===--- CHECK: Return Address of Stack Variable --------------------------===// 1398 1399static DeclRefExpr* EvalVal(Expr *E); 1400static DeclRefExpr* EvalAddr(Expr* E); 1401 1402/// CheckReturnStackAddr - Check if a return statement returns the address 1403/// of a stack variable. 1404void 1405Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1406 SourceLocation ReturnLoc) { 1407 1408 // Perform checking for returned stack addresses. 1409 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1410 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 1411 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 1412 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1413 1414 // Skip over implicit cast expressions when checking for block expressions. 1415 RetValExp = RetValExp->IgnoreParenCasts(); 1416 1417 if (BlockExpr *C = dyn_cast<BlockExpr>(RetValExp)) 1418 if (C->hasBlockDeclRefExprs()) 1419 Diag(C->getLocStart(), diag::err_ret_local_block) 1420 << C->getSourceRange(); 1421 1422 if (AddrLabelExpr *ALE = dyn_cast<AddrLabelExpr>(RetValExp)) 1423 Diag(ALE->getLocStart(), diag::warn_ret_addr_label) 1424 << ALE->getSourceRange(); 1425 1426 } else if (lhsType->isReferenceType()) { 1427 // Perform checking for stack values returned by reference. 1428 // Check for a reference to the stack 1429 if (DeclRefExpr *DR = EvalVal(RetValExp)) 1430 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 1431 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1432 } 1433} 1434 1435/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1436/// check if the expression in a return statement evaluates to an address 1437/// to a location on the stack. The recursion is used to traverse the 1438/// AST of the return expression, with recursion backtracking when we 1439/// encounter a subexpression that (1) clearly does not lead to the address 1440/// of a stack variable or (2) is something we cannot determine leads to 1441/// the address of a stack variable based on such local checking. 1442/// 1443/// EvalAddr processes expressions that are pointers that are used as 1444/// references (and not L-values). EvalVal handles all other values. 1445/// At the base case of the recursion is a check for a DeclRefExpr* in 1446/// the refers to a stack variable. 1447/// 1448/// This implementation handles: 1449/// 1450/// * pointer-to-pointer casts 1451/// * implicit conversions from array references to pointers 1452/// * taking the address of fields 1453/// * arbitrary interplay between "&" and "*" operators 1454/// * pointer arithmetic from an address of a stack variable 1455/// * taking the address of an array element where the array is on the stack 1456static DeclRefExpr* EvalAddr(Expr *E) { 1457 // We should only be called for evaluating pointer expressions. 1458 assert((E->getType()->isAnyPointerType() || 1459 E->getType()->isBlockPointerType() || 1460 E->getType()->isObjCQualifiedIdType()) && 1461 "EvalAddr only works on pointers"); 1462 1463 // Our "symbolic interpreter" is just a dispatch off the currently 1464 // viewed AST node. We then recursively traverse the AST by calling 1465 // EvalAddr and EvalVal appropriately. 1466 switch (E->getStmtClass()) { 1467 case Stmt::ParenExprClass: 1468 // Ignore parentheses. 1469 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 1470 1471 case Stmt::UnaryOperatorClass: { 1472 // The only unary operator that make sense to handle here 1473 // is AddrOf. All others don't make sense as pointers. 1474 UnaryOperator *U = cast<UnaryOperator>(E); 1475 1476 if (U->getOpcode() == UnaryOperator::AddrOf) 1477 return EvalVal(U->getSubExpr()); 1478 else 1479 return NULL; 1480 } 1481 1482 case Stmt::BinaryOperatorClass: { 1483 // Handle pointer arithmetic. All other binary operators are not valid 1484 // in this context. 1485 BinaryOperator *B = cast<BinaryOperator>(E); 1486 BinaryOperator::Opcode op = B->getOpcode(); 1487 1488 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 1489 return NULL; 1490 1491 Expr *Base = B->getLHS(); 1492 1493 // Determine which argument is the real pointer base. It could be 1494 // the RHS argument instead of the LHS. 1495 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1496 1497 assert (Base->getType()->isPointerType()); 1498 return EvalAddr(Base); 1499 } 1500 1501 // For conditional operators we need to see if either the LHS or RHS are 1502 // valid DeclRefExpr*s. If one of them is valid, we return it. 1503 case Stmt::ConditionalOperatorClass: { 1504 ConditionalOperator *C = cast<ConditionalOperator>(E); 1505 1506 // Handle the GNU extension for missing LHS. 1507 if (Expr *lhsExpr = C->getLHS()) 1508 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 1509 return LHS; 1510 1511 return EvalAddr(C->getRHS()); 1512 } 1513 1514 // For casts, we need to handle conversions from arrays to 1515 // pointer values, and pointer-to-pointer conversions. 1516 case Stmt::ImplicitCastExprClass: 1517 case Stmt::CStyleCastExprClass: 1518 case Stmt::CXXFunctionalCastExprClass: { 1519 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1520 QualType T = SubExpr->getType(); 1521 1522 if (SubExpr->getType()->isPointerType() || 1523 SubExpr->getType()->isBlockPointerType() || 1524 SubExpr->getType()->isObjCQualifiedIdType()) 1525 return EvalAddr(SubExpr); 1526 else if (T->isArrayType()) 1527 return EvalVal(SubExpr); 1528 else 1529 return 0; 1530 } 1531 1532 // C++ casts. For dynamic casts, static casts, and const casts, we 1533 // are always converting from a pointer-to-pointer, so we just blow 1534 // through the cast. In the case the dynamic cast doesn't fail (and 1535 // return NULL), we take the conservative route and report cases 1536 // where we return the address of a stack variable. For Reinterpre 1537 // FIXME: The comment about is wrong; we're not always converting 1538 // from pointer to pointer. I'm guessing that this code should also 1539 // handle references to objects. 1540 case Stmt::CXXStaticCastExprClass: 1541 case Stmt::CXXDynamicCastExprClass: 1542 case Stmt::CXXConstCastExprClass: 1543 case Stmt::CXXReinterpretCastExprClass: { 1544 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1545 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1546 return EvalAddr(S); 1547 else 1548 return NULL; 1549 } 1550 1551 // Everything else: we simply don't reason about them. 1552 default: 1553 return NULL; 1554 } 1555} 1556 1557 1558/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1559/// See the comments for EvalAddr for more details. 1560static DeclRefExpr* EvalVal(Expr *E) { 1561 1562 // We should only be called for evaluating non-pointer expressions, or 1563 // expressions with a pointer type that are not used as references but instead 1564 // are l-values (e.g., DeclRefExpr with a pointer type). 1565 1566 // Our "symbolic interpreter" is just a dispatch off the currently 1567 // viewed AST node. We then recursively traverse the AST by calling 1568 // EvalAddr and EvalVal appropriately. 1569 switch (E->getStmtClass()) { 1570 case Stmt::DeclRefExprClass: { 1571 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1572 // at code that refers to a variable's name. We check if it has local 1573 // storage within the function, and if so, return the expression. 1574 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1575 1576 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1577 if (V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1578 1579 return NULL; 1580 } 1581 1582 case Stmt::ParenExprClass: 1583 // Ignore parentheses. 1584 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1585 1586 case Stmt::UnaryOperatorClass: { 1587 // The only unary operator that make sense to handle here 1588 // is Deref. All others don't resolve to a "name." This includes 1589 // handling all sorts of rvalues passed to a unary operator. 1590 UnaryOperator *U = cast<UnaryOperator>(E); 1591 1592 if (U->getOpcode() == UnaryOperator::Deref) 1593 return EvalAddr(U->getSubExpr()); 1594 1595 return NULL; 1596 } 1597 1598 case Stmt::ArraySubscriptExprClass: { 1599 // Array subscripts are potential references to data on the stack. We 1600 // retrieve the DeclRefExpr* for the array variable if it indeed 1601 // has local storage. 1602 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1603 } 1604 1605 case Stmt::ConditionalOperatorClass: { 1606 // For conditional operators we need to see if either the LHS or RHS are 1607 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1608 ConditionalOperator *C = cast<ConditionalOperator>(E); 1609 1610 // Handle the GNU extension for missing LHS. 1611 if (Expr *lhsExpr = C->getLHS()) 1612 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1613 return LHS; 1614 1615 return EvalVal(C->getRHS()); 1616 } 1617 1618 // Accesses to members are potential references to data on the stack. 1619 case Stmt::MemberExprClass: { 1620 MemberExpr *M = cast<MemberExpr>(E); 1621 1622 // Check for indirect access. We only want direct field accesses. 1623 if (!M->isArrow()) 1624 return EvalVal(M->getBase()); 1625 else 1626 return NULL; 1627 } 1628 1629 // Everything else: we simply don't reason about them. 1630 default: 1631 return NULL; 1632 } 1633} 1634 1635//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1636 1637/// Check for comparisons of floating point operands using != and ==. 1638/// Issue a warning if these are no self-comparisons, as they are not likely 1639/// to do what the programmer intended. 1640void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1641 bool EmitWarning = true; 1642 1643 Expr* LeftExprSansParen = lex->IgnoreParens(); 1644 Expr* RightExprSansParen = rex->IgnoreParens(); 1645 1646 // Special case: check for x == x (which is OK). 1647 // Do not emit warnings for such cases. 1648 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1649 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1650 if (DRL->getDecl() == DRR->getDecl()) 1651 EmitWarning = false; 1652 1653 1654 // Special case: check for comparisons against literals that can be exactly 1655 // represented by APFloat. In such cases, do not emit a warning. This 1656 // is a heuristic: often comparison against such literals are used to 1657 // detect if a value in a variable has not changed. This clearly can 1658 // lead to false negatives. 1659 if (EmitWarning) { 1660 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1661 if (FLL->isExact()) 1662 EmitWarning = false; 1663 } else 1664 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1665 if (FLR->isExact()) 1666 EmitWarning = false; 1667 } 1668 } 1669 1670 // Check for comparisons with builtin types. 1671 if (EmitWarning) 1672 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1673 if (CL->isBuiltinCall(Context)) 1674 EmitWarning = false; 1675 1676 if (EmitWarning) 1677 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1678 if (CR->isBuiltinCall(Context)) 1679 EmitWarning = false; 1680 1681 // Emit the diagnostic. 1682 if (EmitWarning) 1683 Diag(loc, diag::warn_floatingpoint_eq) 1684 << lex->getSourceRange() << rex->getSourceRange(); 1685} 1686 1687//===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// 1688//===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===// 1689 1690namespace { 1691 1692/// Structure recording the 'active' range of an integer-valued 1693/// expression. 1694struct IntRange { 1695 /// The number of bits active in the int. 1696 unsigned Width; 1697 1698 /// True if the int is known not to have negative values. 1699 bool NonNegative; 1700 1701 IntRange() {} 1702 IntRange(unsigned Width, bool NonNegative) 1703 : Width(Width), NonNegative(NonNegative) 1704 {} 1705 1706 // Returns the range of the bool type. 1707 static IntRange forBoolType() { 1708 return IntRange(1, true); 1709 } 1710 1711 // Returns the range of an integral type. 1712 static IntRange forType(ASTContext &C, QualType T) { 1713 return forCanonicalType(C, T->getCanonicalTypeInternal().getTypePtr()); 1714 } 1715 1716 // Returns the range of an integeral type based on its canonical 1717 // representation. 1718 static IntRange forCanonicalType(ASTContext &C, const Type *T) { 1719 assert(T->isCanonicalUnqualified()); 1720 1721 if (const VectorType *VT = dyn_cast<VectorType>(T)) 1722 T = VT->getElementType().getTypePtr(); 1723 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 1724 T = CT->getElementType().getTypePtr(); 1725 if (const EnumType *ET = dyn_cast<EnumType>(T)) 1726 T = ET->getDecl()->getIntegerType().getTypePtr(); 1727 1728 const BuiltinType *BT = cast<BuiltinType>(T); 1729 assert(BT->isInteger()); 1730 1731 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 1732 } 1733 1734 // Returns the supremum of two ranges: i.e. their conservative merge. 1735 static IntRange join(IntRange L, IntRange R) { 1736 return IntRange(std::max(L.Width, R.Width), 1737 L.NonNegative && R.NonNegative); 1738 } 1739 1740 // Returns the infinum of two ranges: i.e. their aggressive merge. 1741 static IntRange meet(IntRange L, IntRange R) { 1742 return IntRange(std::min(L.Width, R.Width), 1743 L.NonNegative || R.NonNegative); 1744 } 1745}; 1746 1747IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) { 1748 if (value.isSigned() && value.isNegative()) 1749 return IntRange(value.getMinSignedBits(), false); 1750 1751 if (value.getBitWidth() > MaxWidth) 1752 value.trunc(MaxWidth); 1753 1754 // isNonNegative() just checks the sign bit without considering 1755 // signedness. 1756 return IntRange(value.getActiveBits(), true); 1757} 1758 1759IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty, 1760 unsigned MaxWidth) { 1761 if (result.isInt()) 1762 return GetValueRange(C, result.getInt(), MaxWidth); 1763 1764 if (result.isVector()) { 1765 IntRange R = GetValueRange(C, result.getVectorElt(0), Ty, MaxWidth); 1766 for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i) { 1767 IntRange El = GetValueRange(C, result.getVectorElt(i), Ty, MaxWidth); 1768 R = IntRange::join(R, El); 1769 } 1770 return R; 1771 } 1772 1773 if (result.isComplexInt()) { 1774 IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth); 1775 IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth); 1776 return IntRange::join(R, I); 1777 } 1778 1779 // This can happen with lossless casts to intptr_t of "based" lvalues. 1780 // Assume it might use arbitrary bits. 1781 // FIXME: The only reason we need to pass the type in here is to get 1782 // the sign right on this one case. It would be nice if APValue 1783 // preserved this. 1784 assert(result.isLValue()); 1785 return IntRange(MaxWidth, Ty->isUnsignedIntegerType()); 1786} 1787 1788/// Pseudo-evaluate the given integer expression, estimating the 1789/// range of values it might take. 1790/// 1791/// \param MaxWidth - the width to which the value will be truncated 1792IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { 1793 E = E->IgnoreParens(); 1794 1795 // Try a full evaluation first. 1796 Expr::EvalResult result; 1797 if (E->Evaluate(result, C)) 1798 return GetValueRange(C, result.Val, E->getType(), MaxWidth); 1799 1800 // I think we only want to look through implicit casts here; if the 1801 // user has an explicit widening cast, we should treat the value as 1802 // being of the new, wider type. 1803 if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) { 1804 if (CE->getCastKind() == CastExpr::CK_NoOp) 1805 return GetExprRange(C, CE->getSubExpr(), MaxWidth); 1806 1807 IntRange OutputTypeRange = IntRange::forType(C, CE->getType()); 1808 1809 bool isIntegerCast = (CE->getCastKind() == CastExpr::CK_IntegralCast); 1810 if (!isIntegerCast && CE->getCastKind() == CastExpr::CK_Unknown) 1811 isIntegerCast = CE->getSubExpr()->getType()->isIntegerType(); 1812 1813 // Assume that non-integer casts can span the full range of the type. 1814 if (!isIntegerCast) 1815 return OutputTypeRange; 1816 1817 IntRange SubRange 1818 = GetExprRange(C, CE->getSubExpr(), 1819 std::min(MaxWidth, OutputTypeRange.Width)); 1820 1821 // Bail out if the subexpr's range is as wide as the cast type. 1822 if (SubRange.Width >= OutputTypeRange.Width) 1823 return OutputTypeRange; 1824 1825 // Otherwise, we take the smaller width, and we're non-negative if 1826 // either the output type or the subexpr is. 1827 return IntRange(SubRange.Width, 1828 SubRange.NonNegative || OutputTypeRange.NonNegative); 1829 } 1830 1831 if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) { 1832 // If we can fold the condition, just take that operand. 1833 bool CondResult; 1834 if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) 1835 return GetExprRange(C, CondResult ? CO->getTrueExpr() 1836 : CO->getFalseExpr(), 1837 MaxWidth); 1838 1839 // Otherwise, conservatively merge. 1840 IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth); 1841 IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth); 1842 return IntRange::join(L, R); 1843 } 1844 1845 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 1846 switch (BO->getOpcode()) { 1847 1848 // Boolean-valued operations are single-bit and positive. 1849 case BinaryOperator::LAnd: 1850 case BinaryOperator::LOr: 1851 case BinaryOperator::LT: 1852 case BinaryOperator::GT: 1853 case BinaryOperator::LE: 1854 case BinaryOperator::GE: 1855 case BinaryOperator::EQ: 1856 case BinaryOperator::NE: 1857 return IntRange::forBoolType(); 1858 1859 // The type of these compound assignments is the type of the LHS, 1860 // so the RHS is not necessarily an integer. 1861 case BinaryOperator::MulAssign: 1862 case BinaryOperator::DivAssign: 1863 case BinaryOperator::RemAssign: 1864 case BinaryOperator::AddAssign: 1865 case BinaryOperator::SubAssign: 1866 return IntRange::forType(C, E->getType()); 1867 1868 // Operations with opaque sources are black-listed. 1869 case BinaryOperator::PtrMemD: 1870 case BinaryOperator::PtrMemI: 1871 return IntRange::forType(C, E->getType()); 1872 1873 // Bitwise-and uses the *infinum* of the two source ranges. 1874 case BinaryOperator::And: 1875 case BinaryOperator::AndAssign: 1876 return IntRange::meet(GetExprRange(C, BO->getLHS(), MaxWidth), 1877 GetExprRange(C, BO->getRHS(), MaxWidth)); 1878 1879 // Left shift gets black-listed based on a judgement call. 1880 case BinaryOperator::Shl: 1881 // ...except that we want to treat '1 << (blah)' as logically 1882 // positive. It's an important idiom. 1883 if (IntegerLiteral *I 1884 = dyn_cast<IntegerLiteral>(BO->getLHS()->IgnoreParenCasts())) { 1885 if (I->getValue() == 1) { 1886 IntRange R = IntRange::forType(C, E->getType()); 1887 return IntRange(R.Width, /*NonNegative*/ true); 1888 } 1889 } 1890 // fallthrough 1891 1892 case BinaryOperator::ShlAssign: 1893 return IntRange::forType(C, E->getType()); 1894 1895 // Right shift by a constant can narrow its left argument. 1896 case BinaryOperator::Shr: 1897 case BinaryOperator::ShrAssign: { 1898 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 1899 1900 // If the shift amount is a positive constant, drop the width by 1901 // that much. 1902 llvm::APSInt shift; 1903 if (BO->getRHS()->isIntegerConstantExpr(shift, C) && 1904 shift.isNonNegative()) { 1905 unsigned zext = shift.getZExtValue(); 1906 if (zext >= L.Width) 1907 L.Width = (L.NonNegative ? 0 : 1); 1908 else 1909 L.Width -= zext; 1910 } 1911 1912 return L; 1913 } 1914 1915 // Comma acts as its right operand. 1916 case BinaryOperator::Comma: 1917 return GetExprRange(C, BO->getRHS(), MaxWidth); 1918 1919 // Black-list pointer subtractions. 1920 case BinaryOperator::Sub: 1921 if (BO->getLHS()->getType()->isPointerType()) 1922 return IntRange::forType(C, E->getType()); 1923 // fallthrough 1924 1925 default: 1926 break; 1927 } 1928 1929 // Treat every other operator as if it were closed on the 1930 // narrowest type that encompasses both operands. 1931 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 1932 IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth); 1933 return IntRange::join(L, R); 1934 } 1935 1936 if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { 1937 switch (UO->getOpcode()) { 1938 // Boolean-valued operations are white-listed. 1939 case UnaryOperator::LNot: 1940 return IntRange::forBoolType(); 1941 1942 // Operations with opaque sources are black-listed. 1943 case UnaryOperator::Deref: 1944 case UnaryOperator::AddrOf: // should be impossible 1945 case UnaryOperator::OffsetOf: 1946 return IntRange::forType(C, E->getType()); 1947 1948 default: 1949 return GetExprRange(C, UO->getSubExpr(), MaxWidth); 1950 } 1951 } 1952 1953 FieldDecl *BitField = E->getBitField(); 1954 if (BitField) { 1955 llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C); 1956 unsigned BitWidth = BitWidthAP.getZExtValue(); 1957 1958 return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType()); 1959 } 1960 1961 return IntRange::forType(C, E->getType()); 1962} 1963 1964/// Checks whether the given value, which currently has the given 1965/// source semantics, has the same value when coerced through the 1966/// target semantics. 1967bool IsSameFloatAfterCast(const llvm::APFloat &value, 1968 const llvm::fltSemantics &Src, 1969 const llvm::fltSemantics &Tgt) { 1970 llvm::APFloat truncated = value; 1971 1972 bool ignored; 1973 truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored); 1974 truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored); 1975 1976 return truncated.bitwiseIsEqual(value); 1977} 1978 1979/// Checks whether the given value, which currently has the given 1980/// source semantics, has the same value when coerced through the 1981/// target semantics. 1982/// 1983/// The value might be a vector of floats (or a complex number). 1984bool IsSameFloatAfterCast(const APValue &value, 1985 const llvm::fltSemantics &Src, 1986 const llvm::fltSemantics &Tgt) { 1987 if (value.isFloat()) 1988 return IsSameFloatAfterCast(value.getFloat(), Src, Tgt); 1989 1990 if (value.isVector()) { 1991 for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i) 1992 if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt)) 1993 return false; 1994 return true; 1995 } 1996 1997 assert(value.isComplexFloat()); 1998 return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) && 1999 IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt)); 2000} 2001 2002} // end anonymous namespace 2003 2004/// \brief Implements -Wsign-compare. 2005/// 2006/// \param lex the left-hand expression 2007/// \param rex the right-hand expression 2008/// \param OpLoc the location of the joining operator 2009/// \param BinOpc binary opcode or 0 2010void Sema::CheckSignCompare(Expr *lex, Expr *rex, SourceLocation OpLoc, 2011 const BinaryOperator::Opcode* BinOpc) { 2012 // Don't warn if we're in an unevaluated context. 2013 if (ExprEvalContexts.back().Context == Unevaluated) 2014 return; 2015 2016 // If either expression is value-dependent, don't warn. We'll get another 2017 // chance at instantiation time. 2018 if (lex->isValueDependent() || rex->isValueDependent()) 2019 return; 2020 2021 QualType lt = lex->getType(), rt = rex->getType(); 2022 2023 // Only warn if both operands are integral. 2024 if (!lt->isIntegerType() || !rt->isIntegerType()) 2025 return; 2026 2027 // In C, the width of a bitfield determines its type, and the 2028 // declared type only contributes the signedness. This duplicates 2029 // the work that will later be done by UsualUnaryConversions. 2030 // Eventually, this check will be reorganized in a way that avoids 2031 // this duplication. 2032 if (!getLangOptions().CPlusPlus) { 2033 QualType tmp; 2034 tmp = Context.isPromotableBitField(lex); 2035 if (!tmp.isNull()) lt = tmp; 2036 tmp = Context.isPromotableBitField(rex); 2037 if (!tmp.isNull()) rt = tmp; 2038 } 2039 2040 if (const EnumType *E = lt->getAs<EnumType>()) 2041 lt = E->getDecl()->getPromotionType(); 2042 if (const EnumType *E = rt->getAs<EnumType>()) 2043 rt = E->getDecl()->getPromotionType(); 2044 2045 // The rule is that the signed operand becomes unsigned, so isolate the 2046 // signed operand. 2047 Expr *signedOperand = lex, *unsignedOperand = rex; 2048 QualType signedType = lt, unsignedType = rt; 2049 if (lt->isSignedIntegerType()) { 2050 if (rt->isSignedIntegerType()) return; 2051 } else { 2052 if (!rt->isSignedIntegerType()) return; 2053 std::swap(signedOperand, unsignedOperand); 2054 std::swap(signedType, unsignedType); 2055 } 2056 2057 unsigned unsignedWidth = Context.getIntWidth(unsignedType); 2058 unsigned signedWidth = Context.getIntWidth(signedType); 2059 2060 // If the unsigned type is strictly smaller than the signed type, 2061 // then (1) the result type will be signed and (2) the unsigned 2062 // value will fit fully within the signed type, and thus the result 2063 // of the comparison will be exact. 2064 if (signedWidth > unsignedWidth) 2065 return; 2066 2067 // Otherwise, calculate the effective ranges. 2068 IntRange signedRange = GetExprRange(Context, signedOperand, signedWidth); 2069 IntRange unsignedRange = GetExprRange(Context, unsignedOperand, unsignedWidth); 2070 2071 // We should never be unable to prove that the unsigned operand is 2072 // non-negative. 2073 assert(unsignedRange.NonNegative && "unsigned range includes negative?"); 2074 2075 // If the signed operand is non-negative, then the signed->unsigned 2076 // conversion won't change it. 2077 if (signedRange.NonNegative) { 2078 // Emit warnings for comparisons of unsigned to integer constant 0. 2079 // always false: x < 0 (or 0 > x) 2080 // always true: x >= 0 (or 0 <= x) 2081 llvm::APSInt X; 2082 if (BinOpc && signedOperand->isIntegerConstantExpr(X, Context) && X == 0) { 2083 if (signedOperand != lex) { 2084 if (*BinOpc == BinaryOperator::LT) { 2085 Diag(OpLoc, diag::warn_lunsigned_always_true_comparison) 2086 << "< 0" << "false" 2087 << lex->getSourceRange() << rex->getSourceRange(); 2088 } 2089 else if (*BinOpc == BinaryOperator::GE) { 2090 Diag(OpLoc, diag::warn_lunsigned_always_true_comparison) 2091 << ">= 0" << "true" 2092 << lex->getSourceRange() << rex->getSourceRange(); 2093 } 2094 } 2095 else { 2096 if (*BinOpc == BinaryOperator::GT) { 2097 Diag(OpLoc, diag::warn_runsigned_always_true_comparison) 2098 << "0 >" << "false" 2099 << lex->getSourceRange() << rex->getSourceRange(); 2100 } 2101 else if (*BinOpc == BinaryOperator::LE) { 2102 Diag(OpLoc, diag::warn_runsigned_always_true_comparison) 2103 << "0 <=" << "true" 2104 << lex->getSourceRange() << rex->getSourceRange(); 2105 } 2106 } 2107 } 2108 return; 2109 } 2110 2111 // For (in)equality comparisons, if the unsigned operand is a 2112 // constant which cannot collide with a overflowed signed operand, 2113 // then reinterpreting the signed operand as unsigned will not 2114 // change the result of the comparison. 2115 if (BinOpc && 2116 (*BinOpc == BinaryOperator::EQ || *BinOpc == BinaryOperator::NE) && 2117 unsignedRange.Width < unsignedWidth) 2118 return; 2119 2120 Diag(OpLoc, BinOpc ? diag::warn_mixed_sign_comparison 2121 : diag::warn_mixed_sign_conditional) 2122 << lt << rt << lex->getSourceRange() << rex->getSourceRange(); 2123} 2124 2125/// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2126static void DiagnoseImpCast(Sema &S, Expr *E, QualType T, unsigned diag) { 2127 S.Diag(E->getExprLoc(), diag) << E->getType() << T << E->getSourceRange(); 2128} 2129 2130/// Implements -Wconversion. 2131void Sema::CheckImplicitConversion(Expr *E, QualType T) { 2132 // Don't diagnose in unevaluated contexts. 2133 if (ExprEvalContexts.back().Context == Sema::Unevaluated) 2134 return; 2135 2136 // Don't diagnose for value-dependent expressions. 2137 if (E->isValueDependent()) 2138 return; 2139 2140 const Type *Source = Context.getCanonicalType(E->getType()).getTypePtr(); 2141 const Type *Target = Context.getCanonicalType(T).getTypePtr(); 2142 2143 // Never diagnose implicit casts to bool. 2144 if (Target->isSpecificBuiltinType(BuiltinType::Bool)) 2145 return; 2146 2147 // Strip vector types. 2148 if (isa<VectorType>(Source)) { 2149 if (!isa<VectorType>(Target)) 2150 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_vector_scalar); 2151 2152 Source = cast<VectorType>(Source)->getElementType().getTypePtr(); 2153 Target = cast<VectorType>(Target)->getElementType().getTypePtr(); 2154 } 2155 2156 // Strip complex types. 2157 if (isa<ComplexType>(Source)) { 2158 if (!isa<ComplexType>(Target)) 2159 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_complex_scalar); 2160 2161 Source = cast<ComplexType>(Source)->getElementType().getTypePtr(); 2162 Target = cast<ComplexType>(Target)->getElementType().getTypePtr(); 2163 } 2164 2165 const BuiltinType *SourceBT = dyn_cast<BuiltinType>(Source); 2166 const BuiltinType *TargetBT = dyn_cast<BuiltinType>(Target); 2167 2168 // If the source is floating point... 2169 if (SourceBT && SourceBT->isFloatingPoint()) { 2170 // ...and the target is floating point... 2171 if (TargetBT && TargetBT->isFloatingPoint()) { 2172 // ...then warn if we're dropping FP rank. 2173 2174 // Builtin FP kinds are ordered by increasing FP rank. 2175 if (SourceBT->getKind() > TargetBT->getKind()) { 2176 // Don't warn about float constants that are precisely 2177 // representable in the target type. 2178 Expr::EvalResult result; 2179 if (E->Evaluate(result, Context)) { 2180 // Value might be a float, a float vector, or a float complex. 2181 if (IsSameFloatAfterCast(result.Val, 2182 Context.getFloatTypeSemantics(QualType(TargetBT, 0)), 2183 Context.getFloatTypeSemantics(QualType(SourceBT, 0)))) 2184 return; 2185 } 2186 2187 DiagnoseImpCast(*this, E, T, diag::warn_impcast_float_precision); 2188 } 2189 return; 2190 } 2191 2192 // If the target is integral, always warn. 2193 if ((TargetBT && TargetBT->isInteger())) 2194 // TODO: don't warn for integer values? 2195 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_float_integer); 2196 2197 return; 2198 } 2199 2200 if (!Source->isIntegerType() || !Target->isIntegerType()) 2201 return; 2202 2203 IntRange SourceRange = GetExprRange(Context, E, Context.getIntWidth(E->getType())); 2204 IntRange TargetRange = IntRange::forCanonicalType(Context, Target); 2205 2206 // FIXME: also signed<->unsigned? 2207 2208 if (SourceRange.Width > TargetRange.Width) { 2209 // People want to build with -Wshorten-64-to-32 and not -Wconversion 2210 // and by god we'll let them. 2211 if (SourceRange.Width == 64 && TargetRange.Width == 32) 2212 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_integer_64_32); 2213 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_integer_precision); 2214 } 2215 2216 return; 2217} 2218 2219/// CheckParmsForFunctionDef - Check that the parameters of the given 2220/// function are appropriate for the definition of a function. This 2221/// takes care of any checks that cannot be performed on the 2222/// declaration itself, e.g., that the types of each of the function 2223/// parameters are complete. 2224bool Sema::CheckParmsForFunctionDef(FunctionDecl *FD) { 2225 bool HasInvalidParm = false; 2226 for (unsigned p = 0, NumParams = FD->getNumParams(); p < NumParams; ++p) { 2227 ParmVarDecl *Param = FD->getParamDecl(p); 2228 2229 // C99 6.7.5.3p4: the parameters in a parameter type list in a 2230 // function declarator that is part of a function definition of 2231 // that function shall not have incomplete type. 2232 // 2233 // This is also C++ [dcl.fct]p6. 2234 if (!Param->isInvalidDecl() && 2235 RequireCompleteType(Param->getLocation(), Param->getType(), 2236 diag::err_typecheck_decl_incomplete_type)) { 2237 Param->setInvalidDecl(); 2238 HasInvalidParm = true; 2239 } 2240 2241 // C99 6.9.1p5: If the declarator includes a parameter type list, the 2242 // declaration of each parameter shall include an identifier. 2243 if (Param->getIdentifier() == 0 && 2244 !Param->isImplicit() && 2245 !getLangOptions().CPlusPlus) 2246 Diag(Param->getLocation(), diag::err_parameter_name_omitted); 2247 2248 // C99 6.7.5.3p12: 2249 // If the function declarator is not part of a definition of that 2250 // function, parameters may have incomplete type and may use the [*] 2251 // notation in their sequences of declarator specifiers to specify 2252 // variable length array types. 2253 QualType PType = Param->getOriginalType(); 2254 if (const ArrayType *AT = Context.getAsArrayType(PType)) { 2255 if (AT->getSizeModifier() == ArrayType::Star) { 2256 // FIXME: This diagnosic should point the the '[*]' if source-location 2257 // information is added for it. 2258 Diag(Param->getLocation(), diag::err_array_star_in_function_definition); 2259 } 2260 } 2261 } 2262 2263 return HasInvalidParm; 2264} 2265