SemaChecking.cpp revision 6ee765348b2855c702fa593fb030ef6abe0d01f6
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/Analysis/Analyses/PrintfFormatString.h" 17#include "clang/AST/ASTContext.h" 18#include "clang/AST/CharUnits.h" 19#include "clang/AST/DeclObjC.h" 20#include "clang/AST/ExprCXX.h" 21#include "clang/AST/ExprObjC.h" 22#include "clang/AST/DeclObjC.h" 23#include "clang/AST/StmtCXX.h" 24#include "clang/AST/StmtObjC.h" 25#include "clang/Lex/LiteralSupport.h" 26#include "clang/Lex/Preprocessor.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/STLExtras.h" 29#include <limits> 30using namespace clang; 31 32/// getLocationOfStringLiteralByte - Return a source location that points to the 33/// specified byte of the specified string literal. 34/// 35/// Strings are amazingly complex. They can be formed from multiple tokens and 36/// can have escape sequences in them in addition to the usual trigraph and 37/// escaped newline business. This routine handles this complexity. 38/// 39SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 40 unsigned ByteNo) const { 41 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 42 43 // Loop over all of the tokens in this string until we find the one that 44 // contains the byte we're looking for. 45 unsigned TokNo = 0; 46 while (1) { 47 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 48 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 49 50 // Get the spelling of the string so that we can get the data that makes up 51 // the string literal, not the identifier for the macro it is potentially 52 // expanded through. 53 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 54 55 // Re-lex the token to get its length and original spelling. 56 std::pair<FileID, unsigned> LocInfo = 57 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 58 bool Invalid = false; 59 llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid); 60 if (Invalid) 61 return StrTokSpellingLoc; 62 63 const char *StrData = Buffer.data()+LocInfo.second; 64 65 // Create a langops struct and enable trigraphs. This is sufficient for 66 // relexing tokens. 67 LangOptions LangOpts; 68 LangOpts.Trigraphs = true; 69 70 // Create a lexer starting at the beginning of this token. 71 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData, 72 Buffer.end()); 73 Token TheTok; 74 TheLexer.LexFromRawLexer(TheTok); 75 76 // Use the StringLiteralParser to compute the length of the string in bytes. 77 StringLiteralParser SLP(&TheTok, 1, PP); 78 unsigned TokNumBytes = SLP.GetStringLength(); 79 80 // If the byte is in this token, return the location of the byte. 81 if (ByteNo < TokNumBytes || 82 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 83 unsigned Offset = 84 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 85 86 // Now that we know the offset of the token in the spelling, use the 87 // preprocessor to get the offset in the original source. 88 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 89 } 90 91 // Move to the next string token. 92 ++TokNo; 93 ByteNo -= TokNumBytes; 94 } 95} 96 97/// CheckablePrintfAttr - does a function call have a "printf" attribute 98/// and arguments that merit checking? 99bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) { 100 if (Format->getType() == "printf") return true; 101 if (Format->getType() == "printf0") { 102 // printf0 allows null "format" string; if so don't check format/args 103 unsigned format_idx = Format->getFormatIdx() - 1; 104 // Does the index refer to the implicit object argument? 105 if (isa<CXXMemberCallExpr>(TheCall)) { 106 if (format_idx == 0) 107 return false; 108 --format_idx; 109 } 110 if (format_idx < TheCall->getNumArgs()) { 111 Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts(); 112 if (!Format->isNullPointerConstant(Context, 113 Expr::NPC_ValueDependentIsNull)) 114 return true; 115 } 116 } 117 return false; 118} 119 120Action::OwningExprResult 121Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { 122 OwningExprResult TheCallResult(Owned(TheCall)); 123 124 switch (BuiltinID) { 125 case Builtin::BI__builtin___CFStringMakeConstantString: 126 assert(TheCall->getNumArgs() == 1 && 127 "Wrong # arguments to builtin CFStringMakeConstantString"); 128 if (CheckObjCString(TheCall->getArg(0))) 129 return ExprError(); 130 break; 131 case Builtin::BI__builtin_stdarg_start: 132 case Builtin::BI__builtin_va_start: 133 if (SemaBuiltinVAStart(TheCall)) 134 return ExprError(); 135 break; 136 case Builtin::BI__builtin_isgreater: 137 case Builtin::BI__builtin_isgreaterequal: 138 case Builtin::BI__builtin_isless: 139 case Builtin::BI__builtin_islessequal: 140 case Builtin::BI__builtin_islessgreater: 141 case Builtin::BI__builtin_isunordered: 142 if (SemaBuiltinUnorderedCompare(TheCall)) 143 return ExprError(); 144 break; 145 case Builtin::BI__builtin_fpclassify: 146 if (SemaBuiltinFPClassification(TheCall, 6)) 147 return ExprError(); 148 break; 149 case Builtin::BI__builtin_isfinite: 150 case Builtin::BI__builtin_isinf: 151 case Builtin::BI__builtin_isinf_sign: 152 case Builtin::BI__builtin_isnan: 153 case Builtin::BI__builtin_isnormal: 154 if (SemaBuiltinFPClassification(TheCall, 1)) 155 return ExprError(); 156 break; 157 case Builtin::BI__builtin_return_address: 158 case Builtin::BI__builtin_frame_address: 159 if (SemaBuiltinStackAddress(TheCall)) 160 return ExprError(); 161 break; 162 case Builtin::BI__builtin_eh_return_data_regno: 163 if (SemaBuiltinEHReturnDataRegNo(TheCall)) 164 return ExprError(); 165 break; 166 case Builtin::BI__builtin_shufflevector: 167 return SemaBuiltinShuffleVector(TheCall); 168 // TheCall will be freed by the smart pointer here, but that's fine, since 169 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 170 case Builtin::BI__builtin_prefetch: 171 if (SemaBuiltinPrefetch(TheCall)) 172 return ExprError(); 173 break; 174 case Builtin::BI__builtin_object_size: 175 if (SemaBuiltinObjectSize(TheCall)) 176 return ExprError(); 177 break; 178 case Builtin::BI__builtin_longjmp: 179 if (SemaBuiltinLongjmp(TheCall)) 180 return ExprError(); 181 break; 182 case Builtin::BI__sync_fetch_and_add: 183 case Builtin::BI__sync_fetch_and_sub: 184 case Builtin::BI__sync_fetch_and_or: 185 case Builtin::BI__sync_fetch_and_and: 186 case Builtin::BI__sync_fetch_and_xor: 187 case Builtin::BI__sync_fetch_and_nand: 188 case Builtin::BI__sync_add_and_fetch: 189 case Builtin::BI__sync_sub_and_fetch: 190 case Builtin::BI__sync_and_and_fetch: 191 case Builtin::BI__sync_or_and_fetch: 192 case Builtin::BI__sync_xor_and_fetch: 193 case Builtin::BI__sync_nand_and_fetch: 194 case Builtin::BI__sync_val_compare_and_swap: 195 case Builtin::BI__sync_bool_compare_and_swap: 196 case Builtin::BI__sync_lock_test_and_set: 197 case Builtin::BI__sync_lock_release: 198 if (SemaBuiltinAtomicOverloaded(TheCall)) 199 return ExprError(); 200 break; 201 } 202 203 return move(TheCallResult); 204} 205 206/// CheckFunctionCall - Check a direct function call for various correctness 207/// and safety properties not strictly enforced by the C type system. 208bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 209 // Get the IdentifierInfo* for the called function. 210 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 211 212 // None of the checks below are needed for functions that don't have 213 // simple names (e.g., C++ conversion functions). 214 if (!FnInfo) 215 return false; 216 217 // FIXME: This mechanism should be abstracted to be less fragile and 218 // more efficient. For example, just map function ids to custom 219 // handlers. 220 221 // Printf checking. 222 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 223 if (CheckablePrintfAttr(Format, TheCall)) { 224 bool HasVAListArg = Format->getFirstArg() == 0; 225 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 226 HasVAListArg ? 0 : Format->getFirstArg() - 1); 227 } 228 } 229 230 for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull; 231 NonNull = NonNull->getNext<NonNullAttr>()) 232 CheckNonNullArguments(NonNull, TheCall); 233 234 return false; 235} 236 237bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 238 // Printf checking. 239 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 240 if (!Format) 241 return false; 242 243 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 244 if (!V) 245 return false; 246 247 QualType Ty = V->getType(); 248 if (!Ty->isBlockPointerType()) 249 return false; 250 251 if (!CheckablePrintfAttr(Format, TheCall)) 252 return false; 253 254 bool HasVAListArg = Format->getFirstArg() == 0; 255 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 256 HasVAListArg ? 0 : Format->getFirstArg() - 1); 257 258 return false; 259} 260 261/// SemaBuiltinAtomicOverloaded - We have a call to a function like 262/// __sync_fetch_and_add, which is an overloaded function based on the pointer 263/// type of its first argument. The main ActOnCallExpr routines have already 264/// promoted the types of arguments because all of these calls are prototyped as 265/// void(...). 266/// 267/// This function goes through and does final semantic checking for these 268/// builtins, 269bool Sema::SemaBuiltinAtomicOverloaded(CallExpr *TheCall) { 270 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 271 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 272 273 // Ensure that we have at least one argument to do type inference from. 274 if (TheCall->getNumArgs() < 1) 275 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 276 << 0 << TheCall->getCallee()->getSourceRange(); 277 278 // Inspect the first argument of the atomic builtin. This should always be 279 // a pointer type, whose element is an integral scalar or pointer type. 280 // Because it is a pointer type, we don't have to worry about any implicit 281 // casts here. 282 Expr *FirstArg = TheCall->getArg(0); 283 if (!FirstArg->getType()->isPointerType()) 284 return Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 285 << FirstArg->getType() << FirstArg->getSourceRange(); 286 287 QualType ValType = FirstArg->getType()->getAs<PointerType>()->getPointeeType(); 288 if (!ValType->isIntegerType() && !ValType->isPointerType() && 289 !ValType->isBlockPointerType()) 290 return Diag(DRE->getLocStart(), 291 diag::err_atomic_builtin_must_be_pointer_intptr) 292 << FirstArg->getType() << FirstArg->getSourceRange(); 293 294 // We need to figure out which concrete builtin this maps onto. For example, 295 // __sync_fetch_and_add with a 2 byte object turns into 296 // __sync_fetch_and_add_2. 297#define BUILTIN_ROW(x) \ 298 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 299 Builtin::BI##x##_8, Builtin::BI##x##_16 } 300 301 static const unsigned BuiltinIndices[][5] = { 302 BUILTIN_ROW(__sync_fetch_and_add), 303 BUILTIN_ROW(__sync_fetch_and_sub), 304 BUILTIN_ROW(__sync_fetch_and_or), 305 BUILTIN_ROW(__sync_fetch_and_and), 306 BUILTIN_ROW(__sync_fetch_and_xor), 307 BUILTIN_ROW(__sync_fetch_and_nand), 308 309 BUILTIN_ROW(__sync_add_and_fetch), 310 BUILTIN_ROW(__sync_sub_and_fetch), 311 BUILTIN_ROW(__sync_and_and_fetch), 312 BUILTIN_ROW(__sync_or_and_fetch), 313 BUILTIN_ROW(__sync_xor_and_fetch), 314 BUILTIN_ROW(__sync_nand_and_fetch), 315 316 BUILTIN_ROW(__sync_val_compare_and_swap), 317 BUILTIN_ROW(__sync_bool_compare_and_swap), 318 BUILTIN_ROW(__sync_lock_test_and_set), 319 BUILTIN_ROW(__sync_lock_release) 320 }; 321#undef BUILTIN_ROW 322 323 // Determine the index of the size. 324 unsigned SizeIndex; 325 switch (Context.getTypeSizeInChars(ValType).getQuantity()) { 326 case 1: SizeIndex = 0; break; 327 case 2: SizeIndex = 1; break; 328 case 4: SizeIndex = 2; break; 329 case 8: SizeIndex = 3; break; 330 case 16: SizeIndex = 4; break; 331 default: 332 return Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 333 << FirstArg->getType() << FirstArg->getSourceRange(); 334 } 335 336 // Each of these builtins has one pointer argument, followed by some number of 337 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 338 // that we ignore. Find out which row of BuiltinIndices to read from as well 339 // as the number of fixed args. 340 unsigned BuiltinID = FDecl->getBuiltinID(); 341 unsigned BuiltinIndex, NumFixed = 1; 342 switch (BuiltinID) { 343 default: assert(0 && "Unknown overloaded atomic builtin!"); 344 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 345 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 346 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 347 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 348 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 349 case Builtin::BI__sync_fetch_and_nand:BuiltinIndex = 5; break; 350 351 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 6; break; 352 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 7; break; 353 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 8; break; 354 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 9; break; 355 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex =10; break; 356 case Builtin::BI__sync_nand_and_fetch:BuiltinIndex =11; break; 357 358 case Builtin::BI__sync_val_compare_and_swap: 359 BuiltinIndex = 12; 360 NumFixed = 2; 361 break; 362 case Builtin::BI__sync_bool_compare_and_swap: 363 BuiltinIndex = 13; 364 NumFixed = 2; 365 break; 366 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 14; break; 367 case Builtin::BI__sync_lock_release: 368 BuiltinIndex = 15; 369 NumFixed = 0; 370 break; 371 } 372 373 // Now that we know how many fixed arguments we expect, first check that we 374 // have at least that many. 375 if (TheCall->getNumArgs() < 1+NumFixed) 376 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 377 << 0 << TheCall->getCallee()->getSourceRange(); 378 379 380 // Get the decl for the concrete builtin from this, we can tell what the 381 // concrete integer type we should convert to is. 382 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 383 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 384 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 385 FunctionDecl *NewBuiltinDecl = 386 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 387 TUScope, false, DRE->getLocStart())); 388 const FunctionProtoType *BuiltinFT = 389 NewBuiltinDecl->getType()->getAs<FunctionProtoType>(); 390 ValType = BuiltinFT->getArgType(0)->getAs<PointerType>()->getPointeeType(); 391 392 // If the first type needs to be converted (e.g. void** -> int*), do it now. 393 if (BuiltinFT->getArgType(0) != FirstArg->getType()) { 394 ImpCastExprToType(FirstArg, BuiltinFT->getArgType(0), CastExpr::CK_BitCast); 395 TheCall->setArg(0, FirstArg); 396 } 397 398 // Next, walk the valid ones promoting to the right type. 399 for (unsigned i = 0; i != NumFixed; ++i) { 400 Expr *Arg = TheCall->getArg(i+1); 401 402 // If the argument is an implicit cast, then there was a promotion due to 403 // "...", just remove it now. 404 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) { 405 Arg = ICE->getSubExpr(); 406 ICE->setSubExpr(0); 407 ICE->Destroy(Context); 408 TheCall->setArg(i+1, Arg); 409 } 410 411 // GCC does an implicit conversion to the pointer or integer ValType. This 412 // can fail in some cases (1i -> int**), check for this error case now. 413 CastExpr::CastKind Kind = CastExpr::CK_Unknown; 414 CXXMethodDecl *ConversionDecl = 0; 415 if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind, 416 ConversionDecl)) 417 return true; 418 419 // Okay, we have something that *can* be converted to the right type. Check 420 // to see if there is a potentially weird extension going on here. This can 421 // happen when you do an atomic operation on something like an char* and 422 // pass in 42. The 42 gets converted to char. This is even more strange 423 // for things like 45.123 -> char, etc. 424 // FIXME: Do this check. 425 ImpCastExprToType(Arg, ValType, Kind, /*isLvalue=*/false); 426 TheCall->setArg(i+1, Arg); 427 } 428 429 // Switch the DeclRefExpr to refer to the new decl. 430 DRE->setDecl(NewBuiltinDecl); 431 DRE->setType(NewBuiltinDecl->getType()); 432 433 // Set the callee in the CallExpr. 434 // FIXME: This leaks the original parens and implicit casts. 435 Expr *PromotedCall = DRE; 436 UsualUnaryConversions(PromotedCall); 437 TheCall->setCallee(PromotedCall); 438 439 440 // Change the result type of the call to match the result type of the decl. 441 TheCall->setType(NewBuiltinDecl->getResultType()); 442 return false; 443} 444 445 446/// CheckObjCString - Checks that the argument to the builtin 447/// CFString constructor is correct 448/// FIXME: GCC currently emits the following warning: 449/// "warning: input conversion stopped due to an input byte that does not 450/// belong to the input codeset UTF-8" 451/// Note: It might also make sense to do the UTF-16 conversion here (would 452/// simplify the backend). 453bool Sema::CheckObjCString(Expr *Arg) { 454 Arg = Arg->IgnoreParenCasts(); 455 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 456 457 if (!Literal || Literal->isWide()) { 458 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 459 << Arg->getSourceRange(); 460 return true; 461 } 462 463 const char *Data = Literal->getStrData(); 464 unsigned Length = Literal->getByteLength(); 465 466 for (unsigned i = 0; i < Length; ++i) { 467 if (!Data[i]) { 468 Diag(getLocationOfStringLiteralByte(Literal, i), 469 diag::warn_cfstring_literal_contains_nul_character) 470 << Arg->getSourceRange(); 471 break; 472 } 473 } 474 475 return false; 476} 477 478/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 479/// Emit an error and return true on failure, return false on success. 480bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 481 Expr *Fn = TheCall->getCallee(); 482 if (TheCall->getNumArgs() > 2) { 483 Diag(TheCall->getArg(2)->getLocStart(), 484 diag::err_typecheck_call_too_many_args) 485 << 0 /*function call*/ << Fn->getSourceRange() 486 << SourceRange(TheCall->getArg(2)->getLocStart(), 487 (*(TheCall->arg_end()-1))->getLocEnd()); 488 return true; 489 } 490 491 if (TheCall->getNumArgs() < 2) { 492 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 493 << 0 /*function call*/; 494 } 495 496 // Determine whether the current function is variadic or not. 497 BlockScopeInfo *CurBlock = getCurBlock(); 498 bool isVariadic; 499 if (CurBlock) 500 isVariadic = CurBlock->isVariadic; 501 else if (getCurFunctionDecl()) { 502 if (FunctionProtoType* FTP = 503 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 504 isVariadic = FTP->isVariadic(); 505 else 506 isVariadic = false; 507 } else { 508 isVariadic = getCurMethodDecl()->isVariadic(); 509 } 510 511 if (!isVariadic) { 512 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 513 return true; 514 } 515 516 // Verify that the second argument to the builtin is the last argument of the 517 // current function or method. 518 bool SecondArgIsLastNamedArgument = false; 519 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 520 521 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 522 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 523 // FIXME: This isn't correct for methods (results in bogus warning). 524 // Get the last formal in the current function. 525 const ParmVarDecl *LastArg; 526 if (CurBlock) 527 LastArg = *(CurBlock->TheDecl->param_end()-1); 528 else if (FunctionDecl *FD = getCurFunctionDecl()) 529 LastArg = *(FD->param_end()-1); 530 else 531 LastArg = *(getCurMethodDecl()->param_end()-1); 532 SecondArgIsLastNamedArgument = PV == LastArg; 533 } 534 } 535 536 if (!SecondArgIsLastNamedArgument) 537 Diag(TheCall->getArg(1)->getLocStart(), 538 diag::warn_second_parameter_of_va_start_not_last_named_argument); 539 return false; 540} 541 542/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 543/// friends. This is declared to take (...), so we have to check everything. 544bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 545 if (TheCall->getNumArgs() < 2) 546 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 547 << 0 /*function call*/; 548 if (TheCall->getNumArgs() > 2) 549 return Diag(TheCall->getArg(2)->getLocStart(), 550 diag::err_typecheck_call_too_many_args) 551 << 0 /*function call*/ 552 << SourceRange(TheCall->getArg(2)->getLocStart(), 553 (*(TheCall->arg_end()-1))->getLocEnd()); 554 555 Expr *OrigArg0 = TheCall->getArg(0); 556 Expr *OrigArg1 = TheCall->getArg(1); 557 558 // Do standard promotions between the two arguments, returning their common 559 // type. 560 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 561 562 // Make sure any conversions are pushed back into the call; this is 563 // type safe since unordered compare builtins are declared as "_Bool 564 // foo(...)". 565 TheCall->setArg(0, OrigArg0); 566 TheCall->setArg(1, OrigArg1); 567 568 if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent()) 569 return false; 570 571 // If the common type isn't a real floating type, then the arguments were 572 // invalid for this operation. 573 if (!Res->isRealFloatingType()) 574 return Diag(OrigArg0->getLocStart(), 575 diag::err_typecheck_call_invalid_ordered_compare) 576 << OrigArg0->getType() << OrigArg1->getType() 577 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 578 579 return false; 580} 581 582/// SemaBuiltinSemaBuiltinFPClassification - Handle functions like 583/// __builtin_isnan and friends. This is declared to take (...), so we have 584/// to check everything. We expect the last argument to be a floating point 585/// value. 586bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { 587 if (TheCall->getNumArgs() < NumArgs) 588 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 589 << 0 /*function call*/; 590 if (TheCall->getNumArgs() > NumArgs) 591 return Diag(TheCall->getArg(NumArgs)->getLocStart(), 592 diag::err_typecheck_call_too_many_args) 593 << 0 /*function call*/ 594 << SourceRange(TheCall->getArg(NumArgs)->getLocStart(), 595 (*(TheCall->arg_end()-1))->getLocEnd()); 596 597 Expr *OrigArg = TheCall->getArg(NumArgs-1); 598 599 if (OrigArg->isTypeDependent()) 600 return false; 601 602 // This operation requires a floating-point number 603 if (!OrigArg->getType()->isRealFloatingType()) 604 return Diag(OrigArg->getLocStart(), 605 diag::err_typecheck_call_invalid_unary_fp) 606 << OrigArg->getType() << OrigArg->getSourceRange(); 607 608 return false; 609} 610 611bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 612 // The signature for these builtins is exact; the only thing we need 613 // to check is that the argument is a constant. 614 SourceLocation Loc; 615 if (!TheCall->getArg(0)->isTypeDependent() && 616 !TheCall->getArg(0)->isValueDependent() && 617 !TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 618 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 619 620 return false; 621} 622 623/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 624// This is declared to take (...), so we have to check everything. 625Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 626 if (TheCall->getNumArgs() < 3) 627 return ExprError(Diag(TheCall->getLocEnd(), 628 diag::err_typecheck_call_too_few_args) 629 << 0 /*function call*/ << TheCall->getSourceRange()); 630 631 unsigned numElements = std::numeric_limits<unsigned>::max(); 632 if (!TheCall->getArg(0)->isTypeDependent() && 633 !TheCall->getArg(1)->isTypeDependent()) { 634 QualType FAType = TheCall->getArg(0)->getType(); 635 QualType SAType = TheCall->getArg(1)->getType(); 636 637 if (!FAType->isVectorType() || !SAType->isVectorType()) { 638 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 639 << SourceRange(TheCall->getArg(0)->getLocStart(), 640 TheCall->getArg(1)->getLocEnd()); 641 return ExprError(); 642 } 643 644 if (!Context.hasSameUnqualifiedType(FAType, SAType)) { 645 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 646 << SourceRange(TheCall->getArg(0)->getLocStart(), 647 TheCall->getArg(1)->getLocEnd()); 648 return ExprError(); 649 } 650 651 numElements = FAType->getAs<VectorType>()->getNumElements(); 652 if (TheCall->getNumArgs() != numElements+2) { 653 if (TheCall->getNumArgs() < numElements+2) 654 return ExprError(Diag(TheCall->getLocEnd(), 655 diag::err_typecheck_call_too_few_args) 656 << 0 /*function call*/ << TheCall->getSourceRange()); 657 return ExprError(Diag(TheCall->getLocEnd(), 658 diag::err_typecheck_call_too_many_args) 659 << 0 /*function call*/ << TheCall->getSourceRange()); 660 } 661 } 662 663 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 664 if (TheCall->getArg(i)->isTypeDependent() || 665 TheCall->getArg(i)->isValueDependent()) 666 continue; 667 668 llvm::APSInt Result(32); 669 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 670 return ExprError(Diag(TheCall->getLocStart(), 671 diag::err_shufflevector_nonconstant_argument) 672 << TheCall->getArg(i)->getSourceRange()); 673 674 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 675 return ExprError(Diag(TheCall->getLocStart(), 676 diag::err_shufflevector_argument_too_large) 677 << TheCall->getArg(i)->getSourceRange()); 678 } 679 680 llvm::SmallVector<Expr*, 32> exprs; 681 682 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 683 exprs.push_back(TheCall->getArg(i)); 684 TheCall->setArg(i, 0); 685 } 686 687 return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(), 688 exprs.size(), exprs[0]->getType(), 689 TheCall->getCallee()->getLocStart(), 690 TheCall->getRParenLoc())); 691} 692 693/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 694// This is declared to take (const void*, ...) and can take two 695// optional constant int args. 696bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 697 unsigned NumArgs = TheCall->getNumArgs(); 698 699 if (NumArgs > 3) 700 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 701 << 0 /*function call*/ << TheCall->getSourceRange(); 702 703 // Argument 0 is checked for us and the remaining arguments must be 704 // constant integers. 705 for (unsigned i = 1; i != NumArgs; ++i) { 706 Expr *Arg = TheCall->getArg(i); 707 if (Arg->isTypeDependent()) 708 continue; 709 710 if (!Arg->getType()->isIntegralType()) 711 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_arg_type) 712 << Arg->getSourceRange(); 713 714 ImpCastExprToType(Arg, Context.IntTy, CastExpr::CK_IntegralCast); 715 TheCall->setArg(i, Arg); 716 717 if (Arg->isValueDependent()) 718 continue; 719 720 llvm::APSInt Result; 721 if (!Arg->isIntegerConstantExpr(Result, Context)) 722 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_arg_ice) 723 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 724 725 // FIXME: gcc issues a warning and rewrites these to 0. These 726 // seems especially odd for the third argument since the default 727 // is 3. 728 if (i == 1) { 729 if (Result.getLimitedValue() > 1) 730 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 731 << "0" << "1" << Arg->getSourceRange(); 732 } else { 733 if (Result.getLimitedValue() > 3) 734 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 735 << "0" << "3" << Arg->getSourceRange(); 736 } 737 } 738 739 return false; 740} 741 742/// SemaBuiltinEHReturnDataRegNo - Handle __builtin_eh_return_data_regno, the 743/// operand must be an integer constant. 744bool Sema::SemaBuiltinEHReturnDataRegNo(CallExpr *TheCall) { 745 llvm::APSInt Result; 746 if (!TheCall->getArg(0)->isIntegerConstantExpr(Result, Context)) 747 return Diag(TheCall->getLocStart(), diag::err_expr_not_ice) 748 << TheCall->getArg(0)->getSourceRange(); 749 750 return false; 751} 752 753 754/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 755/// int type). This simply type checks that type is one of the defined 756/// constants (0-3). 757// For compatability check 0-3, llvm only handles 0 and 2. 758bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 759 Expr *Arg = TheCall->getArg(1); 760 if (Arg->isTypeDependent()) 761 return false; 762 763 QualType ArgType = Arg->getType(); 764 const BuiltinType *BT = ArgType->getAs<BuiltinType>(); 765 llvm::APSInt Result(32); 766 if (!BT || BT->getKind() != BuiltinType::Int) 767 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 768 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 769 770 if (Arg->isValueDependent()) 771 return false; 772 773 if (!Arg->isIntegerConstantExpr(Result, Context)) { 774 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 775 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 776 } 777 778 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 779 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 780 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 781 } 782 783 return false; 784} 785 786/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 787/// This checks that val is a constant 1. 788bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 789 Expr *Arg = TheCall->getArg(1); 790 if (Arg->isTypeDependent() || Arg->isValueDependent()) 791 return false; 792 793 llvm::APSInt Result(32); 794 if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1) 795 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 796 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 797 798 return false; 799} 800 801// Handle i > 1 ? "x" : "y", recursivelly 802bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 803 bool HasVAListArg, 804 unsigned format_idx, unsigned firstDataArg) { 805 if (E->isTypeDependent() || E->isValueDependent()) 806 return false; 807 808 switch (E->getStmtClass()) { 809 case Stmt::ConditionalOperatorClass: { 810 const ConditionalOperator *C = cast<ConditionalOperator>(E); 811 return SemaCheckStringLiteral(C->getTrueExpr(), TheCall, 812 HasVAListArg, format_idx, firstDataArg) 813 && SemaCheckStringLiteral(C->getRHS(), TheCall, 814 HasVAListArg, format_idx, firstDataArg); 815 } 816 817 case Stmt::ImplicitCastExprClass: { 818 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 819 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 820 format_idx, firstDataArg); 821 } 822 823 case Stmt::ParenExprClass: { 824 const ParenExpr *Expr = cast<ParenExpr>(E); 825 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 826 format_idx, firstDataArg); 827 } 828 829 case Stmt::DeclRefExprClass: { 830 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 831 832 // As an exception, do not flag errors for variables binding to 833 // const string literals. 834 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 835 bool isConstant = false; 836 QualType T = DR->getType(); 837 838 if (const ArrayType *AT = Context.getAsArrayType(T)) { 839 isConstant = AT->getElementType().isConstant(Context); 840 } else if (const PointerType *PT = T->getAs<PointerType>()) { 841 isConstant = T.isConstant(Context) && 842 PT->getPointeeType().isConstant(Context); 843 } 844 845 if (isConstant) { 846 if (const Expr *Init = VD->getAnyInitializer()) 847 return SemaCheckStringLiteral(Init, TheCall, 848 HasVAListArg, format_idx, firstDataArg); 849 } 850 851 // For vprintf* functions (i.e., HasVAListArg==true), we add a 852 // special check to see if the format string is a function parameter 853 // of the function calling the printf function. If the function 854 // has an attribute indicating it is a printf-like function, then we 855 // should suppress warnings concerning non-literals being used in a call 856 // to a vprintf function. For example: 857 // 858 // void 859 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){ 860 // va_list ap; 861 // va_start(ap, fmt); 862 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 863 // ... 864 // 865 // 866 // FIXME: We don't have full attribute support yet, so just check to see 867 // if the argument is a DeclRefExpr that references a parameter. We'll 868 // add proper support for checking the attribute later. 869 if (HasVAListArg) 870 if (isa<ParmVarDecl>(VD)) 871 return true; 872 } 873 874 return false; 875 } 876 877 case Stmt::CallExprClass: { 878 const CallExpr *CE = cast<CallExpr>(E); 879 if (const ImplicitCastExpr *ICE 880 = dyn_cast<ImplicitCastExpr>(CE->getCallee())) { 881 if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) { 882 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) { 883 if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) { 884 unsigned ArgIndex = FA->getFormatIdx(); 885 const Expr *Arg = CE->getArg(ArgIndex - 1); 886 887 return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg, 888 format_idx, firstDataArg); 889 } 890 } 891 } 892 } 893 894 return false; 895 } 896 case Stmt::ObjCStringLiteralClass: 897 case Stmt::StringLiteralClass: { 898 const StringLiteral *StrE = NULL; 899 900 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 901 StrE = ObjCFExpr->getString(); 902 else 903 StrE = cast<StringLiteral>(E); 904 905 if (StrE) { 906 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 907 firstDataArg); 908 return true; 909 } 910 911 return false; 912 } 913 914 default: 915 return false; 916 } 917} 918 919void 920Sema::CheckNonNullArguments(const NonNullAttr *NonNull, 921 const CallExpr *TheCall) { 922 for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end(); 923 i != e; ++i) { 924 const Expr *ArgExpr = TheCall->getArg(*i); 925 if (ArgExpr->isNullPointerConstant(Context, 926 Expr::NPC_ValueDependentIsNotNull)) 927 Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg) 928 << ArgExpr->getSourceRange(); 929 } 930} 931 932/// CheckPrintfArguments - Check calls to printf (and similar functions) for 933/// correct use of format strings. 934/// 935/// HasVAListArg - A predicate indicating whether the printf-like 936/// function is passed an explicit va_arg argument (e.g., vprintf) 937/// 938/// format_idx - The index into Args for the format string. 939/// 940/// Improper format strings to functions in the printf family can be 941/// the source of bizarre bugs and very serious security holes. A 942/// good source of information is available in the following paper 943/// (which includes additional references): 944/// 945/// FormatGuard: Automatic Protection From printf Format String 946/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 947/// 948/// TODO: 949/// Functionality implemented: 950/// 951/// We can statically check the following properties for string 952/// literal format strings for non v.*printf functions (where the 953/// arguments are passed directly): 954// 955/// (1) Are the number of format conversions equal to the number of 956/// data arguments? 957/// 958/// (2) Does each format conversion correctly match the type of the 959/// corresponding data argument? 960/// 961/// Moreover, for all printf functions we can: 962/// 963/// (3) Check for a missing format string (when not caught by type checking). 964/// 965/// (4) Check for no-operation flags; e.g. using "#" with format 966/// conversion 'c' (TODO) 967/// 968/// (5) Check the use of '%n', a major source of security holes. 969/// 970/// (6) Check for malformed format conversions that don't specify anything. 971/// 972/// (7) Check for empty format strings. e.g: printf(""); 973/// 974/// (8) Check that the format string is a wide literal. 975/// 976/// All of these checks can be done by parsing the format string. 977/// 978void 979Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 980 unsigned format_idx, unsigned firstDataArg) { 981 const Expr *Fn = TheCall->getCallee(); 982 983 // The way the format attribute works in GCC, the implicit this argument 984 // of member functions is counted. However, it doesn't appear in our own 985 // lists, so decrement format_idx in that case. 986 if (isa<CXXMemberCallExpr>(TheCall)) { 987 // Catch a format attribute mistakenly referring to the object argument. 988 if (format_idx == 0) 989 return; 990 --format_idx; 991 if(firstDataArg != 0) 992 --firstDataArg; 993 } 994 995 // CHECK: printf-like function is called with no format string. 996 if (format_idx >= TheCall->getNumArgs()) { 997 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 998 << Fn->getSourceRange(); 999 return; 1000 } 1001 1002 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 1003 1004 // CHECK: format string is not a string literal. 1005 // 1006 // Dynamically generated format strings are difficult to 1007 // automatically vet at compile time. Requiring that format strings 1008 // are string literals: (1) permits the checking of format strings by 1009 // the compiler and thereby (2) can practically remove the source of 1010 // many format string exploits. 1011 1012 // Format string can be either ObjC string (e.g. @"%d") or 1013 // C string (e.g. "%d") 1014 // ObjC string uses the same format specifiers as C string, so we can use 1015 // the same format string checking logic for both ObjC and C strings. 1016 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 1017 firstDataArg)) 1018 return; // Literal format string found, check done! 1019 1020 // If there are no arguments specified, warn with -Wformat-security, otherwise 1021 // warn only with -Wformat-nonliteral. 1022 if (TheCall->getNumArgs() == format_idx+1) 1023 Diag(TheCall->getArg(format_idx)->getLocStart(), 1024 diag::warn_printf_nonliteral_noargs) 1025 << OrigFormatExpr->getSourceRange(); 1026 else 1027 Diag(TheCall->getArg(format_idx)->getLocStart(), 1028 diag::warn_printf_nonliteral) 1029 << OrigFormatExpr->getSourceRange(); 1030} 1031 1032namespace { 1033class CheckPrintfHandler : public analyze_printf::FormatStringHandler { 1034 Sema &S; 1035 const StringLiteral *FExpr; 1036 const Expr *OrigFormatExpr; 1037 const unsigned FirstDataArg; 1038 const unsigned NumDataArgs; 1039 const bool IsObjCLiteral; 1040 const char *Beg; // Start of format string. 1041 const bool HasVAListArg; 1042 const CallExpr *TheCall; 1043 unsigned FormatIdx; 1044 llvm::BitVector CoveredArgs; 1045 bool usesPositionalArgs; 1046 bool atFirstArg; 1047public: 1048 CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, 1049 const Expr *origFormatExpr, unsigned firstDataArg, 1050 unsigned numDataArgs, bool isObjCLiteral, 1051 const char *beg, bool hasVAListArg, 1052 const CallExpr *theCall, unsigned formatIdx) 1053 : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), 1054 FirstDataArg(firstDataArg), 1055 NumDataArgs(numDataArgs), 1056 IsObjCLiteral(isObjCLiteral), Beg(beg), 1057 HasVAListArg(hasVAListArg), 1058 TheCall(theCall), FormatIdx(formatIdx), 1059 usesPositionalArgs(false), atFirstArg(true) { 1060 CoveredArgs.resize(numDataArgs); 1061 CoveredArgs.reset(); 1062 } 1063 1064 void DoneProcessing(); 1065 1066 void HandleIncompleteFormatSpecifier(const char *startSpecifier, 1067 unsigned specifierLen); 1068 1069 bool 1070 HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, 1071 const char *startSpecifier, 1072 unsigned specifierLen); 1073 1074 virtual void HandleInvalidPosition(const char *startSpecifier, 1075 unsigned specifierLen, 1076 analyze_printf::PositionContext p); 1077 1078 virtual void HandleZeroPosition(const char *startPos, unsigned posLen); 1079 1080 void HandleNullChar(const char *nullCharacter); 1081 1082 bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS, 1083 const char *startSpecifier, 1084 unsigned specifierLen); 1085private: 1086 SourceRange getFormatStringRange(); 1087 SourceRange getFormatSpecifierRange(const char *startSpecifier, 1088 unsigned specifierLen); 1089 SourceLocation getLocationOfByte(const char *x); 1090 1091 bool HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned k, 1092 const char *startSpecifier, unsigned specifierLen); 1093 void HandleFlags(const analyze_printf::FormatSpecifier &FS, 1094 llvm::StringRef flag, llvm::StringRef cspec, 1095 const char *startSpecifier, unsigned specifierLen); 1096 1097 const Expr *getDataArg(unsigned i) const; 1098}; 1099} 1100 1101SourceRange CheckPrintfHandler::getFormatStringRange() { 1102 return OrigFormatExpr->getSourceRange(); 1103} 1104 1105SourceRange CheckPrintfHandler:: 1106getFormatSpecifierRange(const char *startSpecifier, unsigned specifierLen) { 1107 return SourceRange(getLocationOfByte(startSpecifier), 1108 getLocationOfByte(startSpecifier+specifierLen-1)); 1109} 1110 1111SourceLocation CheckPrintfHandler::getLocationOfByte(const char *x) { 1112 return S.getLocationOfStringLiteralByte(FExpr, x - Beg); 1113} 1114 1115void CheckPrintfHandler:: 1116HandleIncompleteFormatSpecifier(const char *startSpecifier, 1117 unsigned specifierLen) { 1118 SourceLocation Loc = getLocationOfByte(startSpecifier); 1119 S.Diag(Loc, diag::warn_printf_incomplete_specifier) 1120 << getFormatSpecifierRange(startSpecifier, specifierLen); 1121} 1122 1123void 1124CheckPrintfHandler::HandleInvalidPosition(const char *startPos, unsigned posLen, 1125 analyze_printf::PositionContext p) { 1126 SourceLocation Loc = getLocationOfByte(startPos); 1127 S.Diag(Loc, diag::warn_printf_invalid_positional_specifier) 1128 << (unsigned) p << getFormatSpecifierRange(startPos, posLen); 1129} 1130 1131void CheckPrintfHandler::HandleZeroPosition(const char *startPos, 1132 unsigned posLen) { 1133 SourceLocation Loc = getLocationOfByte(startPos); 1134 S.Diag(Loc, diag::warn_printf_zero_positional_specifier) 1135 << getFormatSpecifierRange(startPos, posLen); 1136} 1137 1138bool CheckPrintfHandler:: 1139HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, 1140 const char *startSpecifier, 1141 unsigned specifierLen) { 1142 1143 unsigned argIndex = FS.getArgIndex(); 1144 bool keepGoing = true; 1145 if (argIndex < NumDataArgs) { 1146 // Consider the argument coverered, even though the specifier doesn't 1147 // make sense. 1148 CoveredArgs.set(argIndex); 1149 } 1150 else { 1151 // If argIndex exceeds the number of data arguments we 1152 // don't issue a warning because that is just a cascade of warnings (and 1153 // they may have intended '%%' anyway). We don't want to continue processing 1154 // the format string after this point, however, as we will like just get 1155 // gibberish when trying to match arguments. 1156 keepGoing = false; 1157 } 1158 1159 const analyze_printf::ConversionSpecifier &CS = 1160 FS.getConversionSpecifier(); 1161 SourceLocation Loc = getLocationOfByte(CS.getStart()); 1162 S.Diag(Loc, diag::warn_printf_invalid_conversion) 1163 << llvm::StringRef(CS.getStart(), CS.getLength()) 1164 << getFormatSpecifierRange(startSpecifier, specifierLen); 1165 1166 return keepGoing; 1167} 1168 1169void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { 1170 // The presence of a null character is likely an error. 1171 S.Diag(getLocationOfByte(nullCharacter), 1172 diag::warn_printf_format_string_contains_null_char) 1173 << getFormatStringRange(); 1174} 1175 1176const Expr *CheckPrintfHandler::getDataArg(unsigned i) const { 1177 return TheCall->getArg(FirstDataArg + i); 1178} 1179 1180void CheckPrintfHandler::HandleFlags(const analyze_printf::FormatSpecifier &FS, 1181 llvm::StringRef flag, 1182 llvm::StringRef cspec, 1183 const char *startSpecifier, 1184 unsigned specifierLen) { 1185 const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier(); 1186 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_nonsensical_flag) 1187 << flag << cspec << getFormatSpecifierRange(startSpecifier, specifierLen); 1188} 1189 1190bool 1191CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, 1192 unsigned k, const char *startSpecifier, 1193 unsigned specifierLen) { 1194 1195 if (Amt.hasDataArgument()) { 1196 if (!HasVAListArg) { 1197 unsigned argIndex = Amt.getArgIndex(); 1198 if (argIndex >= NumDataArgs) { 1199 S.Diag(getLocationOfByte(Amt.getStart()), 1200 diag::warn_printf_asterisk_missing_arg) 1201 << k << getFormatSpecifierRange(startSpecifier, specifierLen); 1202 // Don't do any more checking. We will just emit 1203 // spurious errors. 1204 return false; 1205 } 1206 1207 // Type check the data argument. It should be an 'int'. 1208 // Although not in conformance with C99, we also allow the argument to be 1209 // an 'unsigned int' as that is a reasonably safe case. GCC also 1210 // doesn't emit a warning for that case. 1211 CoveredArgs.set(argIndex); 1212 const Expr *Arg = getDataArg(argIndex); 1213 QualType T = Arg->getType(); 1214 1215 const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); 1216 assert(ATR.isValid()); 1217 1218 if (!ATR.matchesType(S.Context, T)) { 1219 S.Diag(getLocationOfByte(Amt.getStart()), 1220 diag::warn_printf_asterisk_wrong_type) 1221 << k 1222 << ATR.getRepresentativeType(S.Context) << T 1223 << getFormatSpecifierRange(startSpecifier, specifierLen) 1224 << Arg->getSourceRange(); 1225 // Don't do any more checking. We will just emit 1226 // spurious errors. 1227 return false; 1228 } 1229 } 1230 } 1231 return true; 1232} 1233 1234bool 1235CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier 1236 &FS, 1237 const char *startSpecifier, 1238 unsigned specifierLen) { 1239 1240 using namespace analyze_printf; 1241 const ConversionSpecifier &CS = FS.getConversionSpecifier(); 1242 1243 if (atFirstArg) { 1244 atFirstArg = false; 1245 usesPositionalArgs = FS.usesPositionalArg(); 1246 } 1247 else if (usesPositionalArgs != FS.usesPositionalArg()) { 1248 // Cannot mix-and-match positional and non-positional arguments. 1249 S.Diag(getLocationOfByte(CS.getStart()), 1250 diag::warn_printf_mix_positional_nonpositional_args) 1251 << getFormatSpecifierRange(startSpecifier, specifierLen); 1252 return false; 1253 } 1254 1255 // First check if the field width, precision, and conversion specifier 1256 // have matching data arguments. 1257 if (!HandleAmount(FS.getFieldWidth(), /* field width */ 0, 1258 startSpecifier, specifierLen)) { 1259 return false; 1260 } 1261 1262 if (!HandleAmount(FS.getPrecision(), /* precision */ 1, 1263 startSpecifier, specifierLen)) { 1264 return false; 1265 } 1266 1267 if (!CS.consumesDataArgument()) { 1268 // FIXME: Technically specifying a precision or field width here 1269 // makes no sense. Worth issuing a warning at some point. 1270 return true; 1271 } 1272 1273 // Consume the argument. 1274 unsigned argIndex = FS.getArgIndex(); 1275 if (argIndex < NumDataArgs) { 1276 // The check to see if the argIndex is valid will come later. 1277 // We set the bit here because we may exit early from this 1278 // function if we encounter some other error. 1279 CoveredArgs.set(argIndex); 1280 } 1281 1282 // Check for using an Objective-C specific conversion specifier 1283 // in a non-ObjC literal. 1284 if (!IsObjCLiteral && CS.isObjCArg()) { 1285 return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen); 1286 } 1287 1288 // Are we using '%n'? Issue a warning about this being 1289 // a possible security issue. 1290 if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) { 1291 S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) 1292 << getFormatSpecifierRange(startSpecifier, specifierLen); 1293 // Continue checking the other format specifiers. 1294 return true; 1295 } 1296 1297 if (CS.getKind() == ConversionSpecifier::VoidPtrArg) { 1298 if (FS.getPrecision().getHowSpecified() != OptionalAmount::NotSpecified) 1299 S.Diag(getLocationOfByte(CS.getStart()), 1300 diag::warn_printf_nonsensical_precision) 1301 << CS.getCharacters() 1302 << getFormatSpecifierRange(startSpecifier, specifierLen); 1303 } 1304 if (CS.getKind() == ConversionSpecifier::VoidPtrArg || 1305 CS.getKind() == ConversionSpecifier::CStrArg) { 1306 // FIXME: Instead of using "0", "+", etc., eventually get them from 1307 // the FormatSpecifier. 1308 if (FS.hasLeadingZeros()) 1309 HandleFlags(FS, "0", CS.getCharacters(), startSpecifier, specifierLen); 1310 if (FS.hasPlusPrefix()) 1311 HandleFlags(FS, "+", CS.getCharacters(), startSpecifier, specifierLen); 1312 if (FS.hasSpacePrefix()) 1313 HandleFlags(FS, " ", CS.getCharacters(), startSpecifier, specifierLen); 1314 } 1315 1316 // The remaining checks depend on the data arguments. 1317 if (HasVAListArg) 1318 return true; 1319 1320 if (argIndex >= NumDataArgs) { 1321 if (FS.usesPositionalArg()) { 1322 S.Diag(getLocationOfByte(CS.getStart()), 1323 diag::warn_printf_positional_arg_exceeds_data_args) 1324 << (argIndex+1) << NumDataArgs 1325 << getFormatSpecifierRange(startSpecifier, specifierLen); 1326 } 1327 else { 1328 S.Diag(getLocationOfByte(CS.getStart()), 1329 diag::warn_printf_insufficient_data_args) 1330 << getFormatSpecifierRange(startSpecifier, specifierLen); 1331 } 1332 1333 // Don't do any more checking. 1334 return false; 1335 } 1336 1337 // Now type check the data expression that matches the 1338 // format specifier. 1339 const Expr *Ex = getDataArg(argIndex); 1340 const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); 1341 if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { 1342 // Check if we didn't match because of an implicit cast from a 'char' 1343 // or 'short' to an 'int'. This is done because printf is a varargs 1344 // function. 1345 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Ex)) 1346 if (ICE->getType() == S.Context.IntTy) 1347 if (ATR.matchesType(S.Context, ICE->getSubExpr()->getType())) 1348 return true; 1349 1350 S.Diag(getLocationOfByte(CS.getStart()), 1351 diag::warn_printf_conversion_argument_type_mismatch) 1352 << ATR.getRepresentativeType(S.Context) << Ex->getType() 1353 << getFormatSpecifierRange(startSpecifier, specifierLen) 1354 << Ex->getSourceRange(); 1355 } 1356 1357 return true; 1358} 1359 1360void CheckPrintfHandler::DoneProcessing() { 1361 // Does the number of data arguments exceed the number of 1362 // format conversions in the format string? 1363 if (!HasVAListArg) { 1364 // Find any arguments that weren't covered. 1365 CoveredArgs.flip(); 1366 signed notCoveredArg = CoveredArgs.find_first(); 1367 if (notCoveredArg >= 0) { 1368 assert((unsigned)notCoveredArg < NumDataArgs); 1369 S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), 1370 diag::warn_printf_data_arg_not_used) 1371 << getFormatStringRange(); 1372 } 1373 } 1374} 1375 1376void Sema::CheckPrintfString(const StringLiteral *FExpr, 1377 const Expr *OrigFormatExpr, 1378 const CallExpr *TheCall, bool HasVAListArg, 1379 unsigned format_idx, unsigned firstDataArg) { 1380 1381 // CHECK: is the format string a wide literal? 1382 if (FExpr->isWide()) { 1383 Diag(FExpr->getLocStart(), 1384 diag::warn_printf_format_string_is_wide_literal) 1385 << OrigFormatExpr->getSourceRange(); 1386 return; 1387 } 1388 1389 // Str - The format string. NOTE: this is NOT null-terminated! 1390 const char *Str = FExpr->getStrData(); 1391 1392 // CHECK: empty format string? 1393 unsigned StrLen = FExpr->getByteLength(); 1394 1395 if (StrLen == 0) { 1396 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 1397 << OrigFormatExpr->getSourceRange(); 1398 return; 1399 } 1400 1401 CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, 1402 TheCall->getNumArgs() - firstDataArg, 1403 isa<ObjCStringLiteral>(OrigFormatExpr), Str, 1404 HasVAListArg, TheCall, format_idx); 1405 1406 if (!analyze_printf::ParseFormatString(H, Str, Str + StrLen)) 1407 H.DoneProcessing(); 1408} 1409 1410//===--- CHECK: Return Address of Stack Variable --------------------------===// 1411 1412static DeclRefExpr* EvalVal(Expr *E); 1413static DeclRefExpr* EvalAddr(Expr* E); 1414 1415/// CheckReturnStackAddr - Check if a return statement returns the address 1416/// of a stack variable. 1417void 1418Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1419 SourceLocation ReturnLoc) { 1420 1421 // Perform checking for returned stack addresses. 1422 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1423 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 1424 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 1425 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1426 1427 // Skip over implicit cast expressions when checking for block expressions. 1428 RetValExp = RetValExp->IgnoreParenCasts(); 1429 1430 if (BlockExpr *C = dyn_cast<BlockExpr>(RetValExp)) 1431 if (C->hasBlockDeclRefExprs()) 1432 Diag(C->getLocStart(), diag::err_ret_local_block) 1433 << C->getSourceRange(); 1434 1435 if (AddrLabelExpr *ALE = dyn_cast<AddrLabelExpr>(RetValExp)) 1436 Diag(ALE->getLocStart(), diag::warn_ret_addr_label) 1437 << ALE->getSourceRange(); 1438 1439 } else if (lhsType->isReferenceType()) { 1440 // Perform checking for stack values returned by reference. 1441 // Check for a reference to the stack 1442 if (DeclRefExpr *DR = EvalVal(RetValExp)) 1443 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 1444 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1445 } 1446} 1447 1448/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1449/// check if the expression in a return statement evaluates to an address 1450/// to a location on the stack. The recursion is used to traverse the 1451/// AST of the return expression, with recursion backtracking when we 1452/// encounter a subexpression that (1) clearly does not lead to the address 1453/// of a stack variable or (2) is something we cannot determine leads to 1454/// the address of a stack variable based on such local checking. 1455/// 1456/// EvalAddr processes expressions that are pointers that are used as 1457/// references (and not L-values). EvalVal handles all other values. 1458/// At the base case of the recursion is a check for a DeclRefExpr* in 1459/// the refers to a stack variable. 1460/// 1461/// This implementation handles: 1462/// 1463/// * pointer-to-pointer casts 1464/// * implicit conversions from array references to pointers 1465/// * taking the address of fields 1466/// * arbitrary interplay between "&" and "*" operators 1467/// * pointer arithmetic from an address of a stack variable 1468/// * taking the address of an array element where the array is on the stack 1469static DeclRefExpr* EvalAddr(Expr *E) { 1470 // We should only be called for evaluating pointer expressions. 1471 assert((E->getType()->isAnyPointerType() || 1472 E->getType()->isBlockPointerType() || 1473 E->getType()->isObjCQualifiedIdType()) && 1474 "EvalAddr only works on pointers"); 1475 1476 // Our "symbolic interpreter" is just a dispatch off the currently 1477 // viewed AST node. We then recursively traverse the AST by calling 1478 // EvalAddr and EvalVal appropriately. 1479 switch (E->getStmtClass()) { 1480 case Stmt::ParenExprClass: 1481 // Ignore parentheses. 1482 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 1483 1484 case Stmt::UnaryOperatorClass: { 1485 // The only unary operator that make sense to handle here 1486 // is AddrOf. All others don't make sense as pointers. 1487 UnaryOperator *U = cast<UnaryOperator>(E); 1488 1489 if (U->getOpcode() == UnaryOperator::AddrOf) 1490 return EvalVal(U->getSubExpr()); 1491 else 1492 return NULL; 1493 } 1494 1495 case Stmt::BinaryOperatorClass: { 1496 // Handle pointer arithmetic. All other binary operators are not valid 1497 // in this context. 1498 BinaryOperator *B = cast<BinaryOperator>(E); 1499 BinaryOperator::Opcode op = B->getOpcode(); 1500 1501 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 1502 return NULL; 1503 1504 Expr *Base = B->getLHS(); 1505 1506 // Determine which argument is the real pointer base. It could be 1507 // the RHS argument instead of the LHS. 1508 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1509 1510 assert (Base->getType()->isPointerType()); 1511 return EvalAddr(Base); 1512 } 1513 1514 // For conditional operators we need to see if either the LHS or RHS are 1515 // valid DeclRefExpr*s. If one of them is valid, we return it. 1516 case Stmt::ConditionalOperatorClass: { 1517 ConditionalOperator *C = cast<ConditionalOperator>(E); 1518 1519 // Handle the GNU extension for missing LHS. 1520 if (Expr *lhsExpr = C->getLHS()) 1521 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 1522 return LHS; 1523 1524 return EvalAddr(C->getRHS()); 1525 } 1526 1527 // For casts, we need to handle conversions from arrays to 1528 // pointer values, and pointer-to-pointer conversions. 1529 case Stmt::ImplicitCastExprClass: 1530 case Stmt::CStyleCastExprClass: 1531 case Stmt::CXXFunctionalCastExprClass: { 1532 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1533 QualType T = SubExpr->getType(); 1534 1535 if (SubExpr->getType()->isPointerType() || 1536 SubExpr->getType()->isBlockPointerType() || 1537 SubExpr->getType()->isObjCQualifiedIdType()) 1538 return EvalAddr(SubExpr); 1539 else if (T->isArrayType()) 1540 return EvalVal(SubExpr); 1541 else 1542 return 0; 1543 } 1544 1545 // C++ casts. For dynamic casts, static casts, and const casts, we 1546 // are always converting from a pointer-to-pointer, so we just blow 1547 // through the cast. In the case the dynamic cast doesn't fail (and 1548 // return NULL), we take the conservative route and report cases 1549 // where we return the address of a stack variable. For Reinterpre 1550 // FIXME: The comment about is wrong; we're not always converting 1551 // from pointer to pointer. I'm guessing that this code should also 1552 // handle references to objects. 1553 case Stmt::CXXStaticCastExprClass: 1554 case Stmt::CXXDynamicCastExprClass: 1555 case Stmt::CXXConstCastExprClass: 1556 case Stmt::CXXReinterpretCastExprClass: { 1557 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1558 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1559 return EvalAddr(S); 1560 else 1561 return NULL; 1562 } 1563 1564 // Everything else: we simply don't reason about them. 1565 default: 1566 return NULL; 1567 } 1568} 1569 1570 1571/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1572/// See the comments for EvalAddr for more details. 1573static DeclRefExpr* EvalVal(Expr *E) { 1574 1575 // We should only be called for evaluating non-pointer expressions, or 1576 // expressions with a pointer type that are not used as references but instead 1577 // are l-values (e.g., DeclRefExpr with a pointer type). 1578 1579 // Our "symbolic interpreter" is just a dispatch off the currently 1580 // viewed AST node. We then recursively traverse the AST by calling 1581 // EvalAddr and EvalVal appropriately. 1582 switch (E->getStmtClass()) { 1583 case Stmt::DeclRefExprClass: { 1584 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1585 // at code that refers to a variable's name. We check if it has local 1586 // storage within the function, and if so, return the expression. 1587 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1588 1589 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1590 if (V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1591 1592 return NULL; 1593 } 1594 1595 case Stmt::ParenExprClass: 1596 // Ignore parentheses. 1597 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1598 1599 case Stmt::UnaryOperatorClass: { 1600 // The only unary operator that make sense to handle here 1601 // is Deref. All others don't resolve to a "name." This includes 1602 // handling all sorts of rvalues passed to a unary operator. 1603 UnaryOperator *U = cast<UnaryOperator>(E); 1604 1605 if (U->getOpcode() == UnaryOperator::Deref) 1606 return EvalAddr(U->getSubExpr()); 1607 1608 return NULL; 1609 } 1610 1611 case Stmt::ArraySubscriptExprClass: { 1612 // Array subscripts are potential references to data on the stack. We 1613 // retrieve the DeclRefExpr* for the array variable if it indeed 1614 // has local storage. 1615 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1616 } 1617 1618 case Stmt::ConditionalOperatorClass: { 1619 // For conditional operators we need to see if either the LHS or RHS are 1620 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1621 ConditionalOperator *C = cast<ConditionalOperator>(E); 1622 1623 // Handle the GNU extension for missing LHS. 1624 if (Expr *lhsExpr = C->getLHS()) 1625 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1626 return LHS; 1627 1628 return EvalVal(C->getRHS()); 1629 } 1630 1631 // Accesses to members are potential references to data on the stack. 1632 case Stmt::MemberExprClass: { 1633 MemberExpr *M = cast<MemberExpr>(E); 1634 1635 // Check for indirect access. We only want direct field accesses. 1636 if (!M->isArrow()) 1637 return EvalVal(M->getBase()); 1638 else 1639 return NULL; 1640 } 1641 1642 // Everything else: we simply don't reason about them. 1643 default: 1644 return NULL; 1645 } 1646} 1647 1648//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1649 1650/// Check for comparisons of floating point operands using != and ==. 1651/// Issue a warning if these are no self-comparisons, as they are not likely 1652/// to do what the programmer intended. 1653void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1654 bool EmitWarning = true; 1655 1656 Expr* LeftExprSansParen = lex->IgnoreParens(); 1657 Expr* RightExprSansParen = rex->IgnoreParens(); 1658 1659 // Special case: check for x == x (which is OK). 1660 // Do not emit warnings for such cases. 1661 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1662 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1663 if (DRL->getDecl() == DRR->getDecl()) 1664 EmitWarning = false; 1665 1666 1667 // Special case: check for comparisons against literals that can be exactly 1668 // represented by APFloat. In such cases, do not emit a warning. This 1669 // is a heuristic: often comparison against such literals are used to 1670 // detect if a value in a variable has not changed. This clearly can 1671 // lead to false negatives. 1672 if (EmitWarning) { 1673 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1674 if (FLL->isExact()) 1675 EmitWarning = false; 1676 } else 1677 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1678 if (FLR->isExact()) 1679 EmitWarning = false; 1680 } 1681 } 1682 1683 // Check for comparisons with builtin types. 1684 if (EmitWarning) 1685 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1686 if (CL->isBuiltinCall(Context)) 1687 EmitWarning = false; 1688 1689 if (EmitWarning) 1690 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1691 if (CR->isBuiltinCall(Context)) 1692 EmitWarning = false; 1693 1694 // Emit the diagnostic. 1695 if (EmitWarning) 1696 Diag(loc, diag::warn_floatingpoint_eq) 1697 << lex->getSourceRange() << rex->getSourceRange(); 1698} 1699 1700//===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// 1701//===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===// 1702 1703namespace { 1704 1705/// Structure recording the 'active' range of an integer-valued 1706/// expression. 1707struct IntRange { 1708 /// The number of bits active in the int. 1709 unsigned Width; 1710 1711 /// True if the int is known not to have negative values. 1712 bool NonNegative; 1713 1714 IntRange() {} 1715 IntRange(unsigned Width, bool NonNegative) 1716 : Width(Width), NonNegative(NonNegative) 1717 {} 1718 1719 // Returns the range of the bool type. 1720 static IntRange forBoolType() { 1721 return IntRange(1, true); 1722 } 1723 1724 // Returns the range of an integral type. 1725 static IntRange forType(ASTContext &C, QualType T) { 1726 return forCanonicalType(C, T->getCanonicalTypeInternal().getTypePtr()); 1727 } 1728 1729 // Returns the range of an integeral type based on its canonical 1730 // representation. 1731 static IntRange forCanonicalType(ASTContext &C, const Type *T) { 1732 assert(T->isCanonicalUnqualified()); 1733 1734 if (const VectorType *VT = dyn_cast<VectorType>(T)) 1735 T = VT->getElementType().getTypePtr(); 1736 if (const ComplexType *CT = dyn_cast<ComplexType>(T)) 1737 T = CT->getElementType().getTypePtr(); 1738 if (const EnumType *ET = dyn_cast<EnumType>(T)) 1739 T = ET->getDecl()->getIntegerType().getTypePtr(); 1740 1741 const BuiltinType *BT = cast<BuiltinType>(T); 1742 assert(BT->isInteger()); 1743 1744 return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger()); 1745 } 1746 1747 // Returns the supremum of two ranges: i.e. their conservative merge. 1748 static IntRange join(IntRange L, IntRange R) { 1749 return IntRange(std::max(L.Width, R.Width), 1750 L.NonNegative && R.NonNegative); 1751 } 1752 1753 // Returns the infinum of two ranges: i.e. their aggressive merge. 1754 static IntRange meet(IntRange L, IntRange R) { 1755 return IntRange(std::min(L.Width, R.Width), 1756 L.NonNegative || R.NonNegative); 1757 } 1758}; 1759 1760IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) { 1761 if (value.isSigned() && value.isNegative()) 1762 return IntRange(value.getMinSignedBits(), false); 1763 1764 if (value.getBitWidth() > MaxWidth) 1765 value.trunc(MaxWidth); 1766 1767 // isNonNegative() just checks the sign bit without considering 1768 // signedness. 1769 return IntRange(value.getActiveBits(), true); 1770} 1771 1772IntRange GetValueRange(ASTContext &C, APValue &result, QualType Ty, 1773 unsigned MaxWidth) { 1774 if (result.isInt()) 1775 return GetValueRange(C, result.getInt(), MaxWidth); 1776 1777 if (result.isVector()) { 1778 IntRange R = GetValueRange(C, result.getVectorElt(0), Ty, MaxWidth); 1779 for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i) { 1780 IntRange El = GetValueRange(C, result.getVectorElt(i), Ty, MaxWidth); 1781 R = IntRange::join(R, El); 1782 } 1783 return R; 1784 } 1785 1786 if (result.isComplexInt()) { 1787 IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth); 1788 IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth); 1789 return IntRange::join(R, I); 1790 } 1791 1792 // This can happen with lossless casts to intptr_t of "based" lvalues. 1793 // Assume it might use arbitrary bits. 1794 // FIXME: The only reason we need to pass the type in here is to get 1795 // the sign right on this one case. It would be nice if APValue 1796 // preserved this. 1797 assert(result.isLValue()); 1798 return IntRange(MaxWidth, Ty->isUnsignedIntegerType()); 1799} 1800 1801/// Pseudo-evaluate the given integer expression, estimating the 1802/// range of values it might take. 1803/// 1804/// \param MaxWidth - the width to which the value will be truncated 1805IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) { 1806 E = E->IgnoreParens(); 1807 1808 // Try a full evaluation first. 1809 Expr::EvalResult result; 1810 if (E->Evaluate(result, C)) 1811 return GetValueRange(C, result.Val, E->getType(), MaxWidth); 1812 1813 // I think we only want to look through implicit casts here; if the 1814 // user has an explicit widening cast, we should treat the value as 1815 // being of the new, wider type. 1816 if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) { 1817 if (CE->getCastKind() == CastExpr::CK_NoOp) 1818 return GetExprRange(C, CE->getSubExpr(), MaxWidth); 1819 1820 IntRange OutputTypeRange = IntRange::forType(C, CE->getType()); 1821 1822 bool isIntegerCast = (CE->getCastKind() == CastExpr::CK_IntegralCast); 1823 if (!isIntegerCast && CE->getCastKind() == CastExpr::CK_Unknown) 1824 isIntegerCast = CE->getSubExpr()->getType()->isIntegerType(); 1825 1826 // Assume that non-integer casts can span the full range of the type. 1827 if (!isIntegerCast) 1828 return OutputTypeRange; 1829 1830 IntRange SubRange 1831 = GetExprRange(C, CE->getSubExpr(), 1832 std::min(MaxWidth, OutputTypeRange.Width)); 1833 1834 // Bail out if the subexpr's range is as wide as the cast type. 1835 if (SubRange.Width >= OutputTypeRange.Width) 1836 return OutputTypeRange; 1837 1838 // Otherwise, we take the smaller width, and we're non-negative if 1839 // either the output type or the subexpr is. 1840 return IntRange(SubRange.Width, 1841 SubRange.NonNegative || OutputTypeRange.NonNegative); 1842 } 1843 1844 if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) { 1845 // If we can fold the condition, just take that operand. 1846 bool CondResult; 1847 if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) 1848 return GetExprRange(C, CondResult ? CO->getTrueExpr() 1849 : CO->getFalseExpr(), 1850 MaxWidth); 1851 1852 // Otherwise, conservatively merge. 1853 IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth); 1854 IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth); 1855 return IntRange::join(L, R); 1856 } 1857 1858 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) { 1859 switch (BO->getOpcode()) { 1860 1861 // Boolean-valued operations are single-bit and positive. 1862 case BinaryOperator::LAnd: 1863 case BinaryOperator::LOr: 1864 case BinaryOperator::LT: 1865 case BinaryOperator::GT: 1866 case BinaryOperator::LE: 1867 case BinaryOperator::GE: 1868 case BinaryOperator::EQ: 1869 case BinaryOperator::NE: 1870 return IntRange::forBoolType(); 1871 1872 // The type of these compound assignments is the type of the LHS, 1873 // so the RHS is not necessarily an integer. 1874 case BinaryOperator::MulAssign: 1875 case BinaryOperator::DivAssign: 1876 case BinaryOperator::RemAssign: 1877 case BinaryOperator::AddAssign: 1878 case BinaryOperator::SubAssign: 1879 return IntRange::forType(C, E->getType()); 1880 1881 // Operations with opaque sources are black-listed. 1882 case BinaryOperator::PtrMemD: 1883 case BinaryOperator::PtrMemI: 1884 return IntRange::forType(C, E->getType()); 1885 1886 // Bitwise-and uses the *infinum* of the two source ranges. 1887 case BinaryOperator::And: 1888 case BinaryOperator::AndAssign: 1889 return IntRange::meet(GetExprRange(C, BO->getLHS(), MaxWidth), 1890 GetExprRange(C, BO->getRHS(), MaxWidth)); 1891 1892 // Left shift gets black-listed based on a judgement call. 1893 case BinaryOperator::Shl: 1894 case BinaryOperator::ShlAssign: 1895 return IntRange::forType(C, E->getType()); 1896 1897 // Right shift by a constant can narrow its left argument. 1898 case BinaryOperator::Shr: 1899 case BinaryOperator::ShrAssign: { 1900 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 1901 1902 // If the shift amount is a positive constant, drop the width by 1903 // that much. 1904 llvm::APSInt shift; 1905 if (BO->getRHS()->isIntegerConstantExpr(shift, C) && 1906 shift.isNonNegative()) { 1907 unsigned zext = shift.getZExtValue(); 1908 if (zext >= L.Width) 1909 L.Width = (L.NonNegative ? 0 : 1); 1910 else 1911 L.Width -= zext; 1912 } 1913 1914 return L; 1915 } 1916 1917 // Comma acts as its right operand. 1918 case BinaryOperator::Comma: 1919 return GetExprRange(C, BO->getRHS(), MaxWidth); 1920 1921 // Black-list pointer subtractions. 1922 case BinaryOperator::Sub: 1923 if (BO->getLHS()->getType()->isPointerType()) 1924 return IntRange::forType(C, E->getType()); 1925 // fallthrough 1926 1927 default: 1928 break; 1929 } 1930 1931 // Treat every other operator as if it were closed on the 1932 // narrowest type that encompasses both operands. 1933 IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth); 1934 IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth); 1935 return IntRange::join(L, R); 1936 } 1937 1938 if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) { 1939 switch (UO->getOpcode()) { 1940 // Boolean-valued operations are white-listed. 1941 case UnaryOperator::LNot: 1942 return IntRange::forBoolType(); 1943 1944 // Operations with opaque sources are black-listed. 1945 case UnaryOperator::Deref: 1946 case UnaryOperator::AddrOf: // should be impossible 1947 case UnaryOperator::OffsetOf: 1948 return IntRange::forType(C, E->getType()); 1949 1950 default: 1951 return GetExprRange(C, UO->getSubExpr(), MaxWidth); 1952 } 1953 } 1954 1955 FieldDecl *BitField = E->getBitField(); 1956 if (BitField) { 1957 llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C); 1958 unsigned BitWidth = BitWidthAP.getZExtValue(); 1959 1960 return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType()); 1961 } 1962 1963 return IntRange::forType(C, E->getType()); 1964} 1965 1966/// Checks whether the given value, which currently has the given 1967/// source semantics, has the same value when coerced through the 1968/// target semantics. 1969bool IsSameFloatAfterCast(const llvm::APFloat &value, 1970 const llvm::fltSemantics &Src, 1971 const llvm::fltSemantics &Tgt) { 1972 llvm::APFloat truncated = value; 1973 1974 bool ignored; 1975 truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored); 1976 truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored); 1977 1978 return truncated.bitwiseIsEqual(value); 1979} 1980 1981/// Checks whether the given value, which currently has the given 1982/// source semantics, has the same value when coerced through the 1983/// target semantics. 1984/// 1985/// The value might be a vector of floats (or a complex number). 1986bool IsSameFloatAfterCast(const APValue &value, 1987 const llvm::fltSemantics &Src, 1988 const llvm::fltSemantics &Tgt) { 1989 if (value.isFloat()) 1990 return IsSameFloatAfterCast(value.getFloat(), Src, Tgt); 1991 1992 if (value.isVector()) { 1993 for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i) 1994 if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt)) 1995 return false; 1996 return true; 1997 } 1998 1999 assert(value.isComplexFloat()); 2000 return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) && 2001 IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt)); 2002} 2003 2004} // end anonymous namespace 2005 2006/// \brief Implements -Wsign-compare. 2007/// 2008/// \param lex the left-hand expression 2009/// \param rex the right-hand expression 2010/// \param OpLoc the location of the joining operator 2011/// \param BinOpc binary opcode or 0 2012void Sema::CheckSignCompare(Expr *lex, Expr *rex, SourceLocation OpLoc, 2013 const BinaryOperator::Opcode* BinOpc) { 2014 // Don't warn if we're in an unevaluated context. 2015 if (ExprEvalContexts.back().Context == Unevaluated) 2016 return; 2017 2018 // If either expression is value-dependent, don't warn. We'll get another 2019 // chance at instantiation time. 2020 if (lex->isValueDependent() || rex->isValueDependent()) 2021 return; 2022 2023 QualType lt = lex->getType(), rt = rex->getType(); 2024 2025 // Only warn if both operands are integral. 2026 if (!lt->isIntegerType() || !rt->isIntegerType()) 2027 return; 2028 2029 // In C, the width of a bitfield determines its type, and the 2030 // declared type only contributes the signedness. This duplicates 2031 // the work that will later be done by UsualUnaryConversions. 2032 // Eventually, this check will be reorganized in a way that avoids 2033 // this duplication. 2034 if (!getLangOptions().CPlusPlus) { 2035 QualType tmp; 2036 tmp = Context.isPromotableBitField(lex); 2037 if (!tmp.isNull()) lt = tmp; 2038 tmp = Context.isPromotableBitField(rex); 2039 if (!tmp.isNull()) rt = tmp; 2040 } 2041 2042 if (const EnumType *E = lt->getAs<EnumType>()) 2043 lt = E->getDecl()->getPromotionType(); 2044 if (const EnumType *E = rt->getAs<EnumType>()) 2045 rt = E->getDecl()->getPromotionType(); 2046 2047 // The rule is that the signed operand becomes unsigned, so isolate the 2048 // signed operand. 2049 Expr *signedOperand = lex, *unsignedOperand = rex; 2050 QualType signedType = lt, unsignedType = rt; 2051 if (lt->isSignedIntegerType()) { 2052 if (rt->isSignedIntegerType()) return; 2053 } else { 2054 if (!rt->isSignedIntegerType()) return; 2055 std::swap(signedOperand, unsignedOperand); 2056 std::swap(signedType, unsignedType); 2057 } 2058 2059 unsigned unsignedWidth = Context.getIntWidth(unsignedType); 2060 unsigned signedWidth = Context.getIntWidth(signedType); 2061 2062 // If the unsigned type is strictly smaller than the signed type, 2063 // then (1) the result type will be signed and (2) the unsigned 2064 // value will fit fully within the signed type, and thus the result 2065 // of the comparison will be exact. 2066 if (signedWidth > unsignedWidth) 2067 return; 2068 2069 // Otherwise, calculate the effective ranges. 2070 IntRange signedRange = GetExprRange(Context, signedOperand, signedWidth); 2071 IntRange unsignedRange = GetExprRange(Context, unsignedOperand, unsignedWidth); 2072 2073 // We should never be unable to prove that the unsigned operand is 2074 // non-negative. 2075 assert(unsignedRange.NonNegative && "unsigned range includes negative?"); 2076 2077 // If the signed operand is non-negative, then the signed->unsigned 2078 // conversion won't change it. 2079 if (signedRange.NonNegative) { 2080 // Emit warnings for comparisons of unsigned to integer constant 0. 2081 // always false: x < 0 (or 0 > x) 2082 // always true: x >= 0 (or 0 <= x) 2083 llvm::APSInt X; 2084 if (BinOpc && signedOperand->isIntegerConstantExpr(X, Context) && X == 0) { 2085 if (signedOperand != lex) { 2086 if (*BinOpc == BinaryOperator::LT) { 2087 Diag(OpLoc, diag::warn_lunsigned_always_true_comparison) 2088 << "< 0" << "false" 2089 << lex->getSourceRange() << rex->getSourceRange(); 2090 } 2091 else if (*BinOpc == BinaryOperator::GE) { 2092 Diag(OpLoc, diag::warn_lunsigned_always_true_comparison) 2093 << ">= 0" << "true" 2094 << lex->getSourceRange() << rex->getSourceRange(); 2095 } 2096 } 2097 else { 2098 if (*BinOpc == BinaryOperator::GT) { 2099 Diag(OpLoc, diag::warn_runsigned_always_true_comparison) 2100 << "0 >" << "false" 2101 << lex->getSourceRange() << rex->getSourceRange(); 2102 } 2103 else if (*BinOpc == BinaryOperator::LE) { 2104 Diag(OpLoc, diag::warn_runsigned_always_true_comparison) 2105 << "0 <=" << "true" 2106 << lex->getSourceRange() << rex->getSourceRange(); 2107 } 2108 } 2109 } 2110 return; 2111 } 2112 2113 // For (in)equality comparisons, if the unsigned operand is a 2114 // constant which cannot collide with a overflowed signed operand, 2115 // then reinterpreting the signed operand as unsigned will not 2116 // change the result of the comparison. 2117 if (BinOpc && 2118 (*BinOpc == BinaryOperator::EQ || *BinOpc == BinaryOperator::NE) && 2119 unsignedRange.Width < unsignedWidth) 2120 return; 2121 2122 Diag(OpLoc, BinOpc ? diag::warn_mixed_sign_comparison 2123 : diag::warn_mixed_sign_conditional) 2124 << lt << rt << lex->getSourceRange() << rex->getSourceRange(); 2125} 2126 2127/// Diagnose an implicit cast; purely a helper for CheckImplicitConversion. 2128static void DiagnoseImpCast(Sema &S, Expr *E, QualType T, unsigned diag) { 2129 S.Diag(E->getExprLoc(), diag) << E->getType() << T << E->getSourceRange(); 2130} 2131 2132/// Implements -Wconversion. 2133void Sema::CheckImplicitConversion(Expr *E, QualType T) { 2134 // Don't diagnose in unevaluated contexts. 2135 if (ExprEvalContexts.back().Context == Sema::Unevaluated) 2136 return; 2137 2138 // Don't diagnose for value-dependent expressions. 2139 if (E->isValueDependent()) 2140 return; 2141 2142 const Type *Source = Context.getCanonicalType(E->getType()).getTypePtr(); 2143 const Type *Target = Context.getCanonicalType(T).getTypePtr(); 2144 2145 // Never diagnose implicit casts to bool. 2146 if (Target->isSpecificBuiltinType(BuiltinType::Bool)) 2147 return; 2148 2149 // Strip vector types. 2150 if (isa<VectorType>(Source)) { 2151 if (!isa<VectorType>(Target)) 2152 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_vector_scalar); 2153 2154 Source = cast<VectorType>(Source)->getElementType().getTypePtr(); 2155 Target = cast<VectorType>(Target)->getElementType().getTypePtr(); 2156 } 2157 2158 // Strip complex types. 2159 if (isa<ComplexType>(Source)) { 2160 if (!isa<ComplexType>(Target)) 2161 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_complex_scalar); 2162 2163 Source = cast<ComplexType>(Source)->getElementType().getTypePtr(); 2164 Target = cast<ComplexType>(Target)->getElementType().getTypePtr(); 2165 } 2166 2167 const BuiltinType *SourceBT = dyn_cast<BuiltinType>(Source); 2168 const BuiltinType *TargetBT = dyn_cast<BuiltinType>(Target); 2169 2170 // If the source is floating point... 2171 if (SourceBT && SourceBT->isFloatingPoint()) { 2172 // ...and the target is floating point... 2173 if (TargetBT && TargetBT->isFloatingPoint()) { 2174 // ...then warn if we're dropping FP rank. 2175 2176 // Builtin FP kinds are ordered by increasing FP rank. 2177 if (SourceBT->getKind() > TargetBT->getKind()) { 2178 // Don't warn about float constants that are precisely 2179 // representable in the target type. 2180 Expr::EvalResult result; 2181 if (E->Evaluate(result, Context)) { 2182 // Value might be a float, a float vector, or a float complex. 2183 if (IsSameFloatAfterCast(result.Val, 2184 Context.getFloatTypeSemantics(QualType(TargetBT, 0)), 2185 Context.getFloatTypeSemantics(QualType(SourceBT, 0)))) 2186 return; 2187 } 2188 2189 DiagnoseImpCast(*this, E, T, diag::warn_impcast_float_precision); 2190 } 2191 return; 2192 } 2193 2194 // If the target is integral, always warn. 2195 if ((TargetBT && TargetBT->isInteger())) 2196 // TODO: don't warn for integer values? 2197 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_float_integer); 2198 2199 return; 2200 } 2201 2202 if (!Source->isIntegerType() || !Target->isIntegerType()) 2203 return; 2204 2205 IntRange SourceRange = GetExprRange(Context, E, Context.getIntWidth(E->getType())); 2206 IntRange TargetRange = IntRange::forCanonicalType(Context, Target); 2207 2208 // FIXME: also signed<->unsigned? 2209 2210 if (SourceRange.Width > TargetRange.Width) { 2211 // People want to build with -Wshorten-64-to-32 and not -Wconversion 2212 // and by god we'll let them. 2213 if (SourceRange.Width == 64 && TargetRange.Width == 32) 2214 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_integer_64_32); 2215 return DiagnoseImpCast(*this, E, T, diag::warn_impcast_integer_precision); 2216 } 2217 2218 return; 2219} 2220 2221/// CheckParmsForFunctionDef - Check that the parameters of the given 2222/// function are appropriate for the definition of a function. This 2223/// takes care of any checks that cannot be performed on the 2224/// declaration itself, e.g., that the types of each of the function 2225/// parameters are complete. 2226bool Sema::CheckParmsForFunctionDef(FunctionDecl *FD) { 2227 bool HasInvalidParm = false; 2228 for (unsigned p = 0, NumParams = FD->getNumParams(); p < NumParams; ++p) { 2229 ParmVarDecl *Param = FD->getParamDecl(p); 2230 2231 // C99 6.7.5.3p4: the parameters in a parameter type list in a 2232 // function declarator that is part of a function definition of 2233 // that function shall not have incomplete type. 2234 // 2235 // This is also C++ [dcl.fct]p6. 2236 if (!Param->isInvalidDecl() && 2237 RequireCompleteType(Param->getLocation(), Param->getType(), 2238 diag::err_typecheck_decl_incomplete_type)) { 2239 Param->setInvalidDecl(); 2240 HasInvalidParm = true; 2241 } 2242 2243 // C99 6.9.1p5: If the declarator includes a parameter type list, the 2244 // declaration of each parameter shall include an identifier. 2245 if (Param->getIdentifier() == 0 && 2246 !Param->isImplicit() && 2247 !getLangOptions().CPlusPlus) 2248 Diag(Param->getLocation(), diag::err_parameter_name_omitted); 2249 2250 // C99 6.7.5.3p12: 2251 // If the function declarator is not part of a definition of that 2252 // function, parameters may have incomplete type and may use the [*] 2253 // notation in their sequences of declarator specifiers to specify 2254 // variable length array types. 2255 QualType PType = Param->getOriginalType(); 2256 if (const ArrayType *AT = Context.getAsArrayType(PType)) { 2257 if (AT->getSizeModifier() == ArrayType::Star) { 2258 // FIXME: This diagnosic should point the the '[*]' if source-location 2259 // information is added for it. 2260 Diag(Param->getLocation(), diag::err_array_star_in_function_definition); 2261 } 2262 } 2263 2264 if (getLangOptions().CPlusPlus) 2265 if (const RecordType *RT = Param->getType()->getAs<RecordType>()) 2266 FinalizeVarWithDestructor(Param, RT); 2267 } 2268 2269 return HasInvalidParm; 2270} 2271