SemaChecking.cpp revision f83c85f32383f4bf9af679ebd1ff595c5164a20f
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements extra semantic analysis beyond what is enforced 11// by the C type system. 12// 13//===----------------------------------------------------------------------===// 14 15#include "Sema.h" 16#include "clang/AST/ASTContext.h" 17#include "clang/AST/DeclObjC.h" 18#include "clang/AST/ExprCXX.h" 19#include "clang/AST/ExprObjC.h" 20#include "clang/Lex/LiteralSupport.h" 21#include "clang/Lex/Preprocessor.h" 22using namespace clang; 23 24/// getLocationOfStringLiteralByte - Return a source location that points to the 25/// specified byte of the specified string literal. 26/// 27/// Strings are amazingly complex. They can be formed from multiple tokens and 28/// can have escape sequences in them in addition to the usual trigraph and 29/// escaped newline business. This routine handles this complexity. 30/// 31SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, 32 unsigned ByteNo) const { 33 assert(!SL->isWide() && "This doesn't work for wide strings yet"); 34 35 // Loop over all of the tokens in this string until we find the one that 36 // contains the byte we're looking for. 37 unsigned TokNo = 0; 38 while (1) { 39 assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); 40 SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); 41 42 // Get the spelling of the string so that we can get the data that makes up 43 // the string literal, not the identifier for the macro it is potentially 44 // expanded through. 45 SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); 46 47 // Re-lex the token to get its length and original spelling. 48 std::pair<FileID, unsigned> LocInfo = 49 SourceMgr.getDecomposedLoc(StrTokSpellingLoc); 50 std::pair<const char *,const char *> Buffer = 51 SourceMgr.getBufferData(LocInfo.first); 52 const char *StrData = Buffer.first+LocInfo.second; 53 54 // Create a langops struct and enable trigraphs. This is sufficient for 55 // relexing tokens. 56 LangOptions LangOpts; 57 LangOpts.Trigraphs = true; 58 59 // Create a lexer starting at the beginning of this token. 60 Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData, 61 Buffer.second); 62 Token TheTok; 63 TheLexer.LexFromRawLexer(TheTok); 64 65 // Use the StringLiteralParser to compute the length of the string in bytes. 66 StringLiteralParser SLP(&TheTok, 1, PP); 67 unsigned TokNumBytes = SLP.GetStringLength(); 68 69 // If the byte is in this token, return the location of the byte. 70 if (ByteNo < TokNumBytes || 71 (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { 72 unsigned Offset = 73 StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP); 74 75 // Now that we know the offset of the token in the spelling, use the 76 // preprocessor to get the offset in the original source. 77 return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); 78 } 79 80 // Move to the next string token. 81 ++TokNo; 82 ByteNo -= TokNumBytes; 83 } 84} 85 86 87/// CheckFunctionCall - Check a direct function call for various correctness 88/// and safety properties not strictly enforced by the C type system. 89Action::OwningExprResult 90Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { 91 OwningExprResult TheCallResult(Owned(TheCall)); 92 // Get the IdentifierInfo* for the called function. 93 IdentifierInfo *FnInfo = FDecl->getIdentifier(); 94 95 // None of the checks below are needed for functions that don't have 96 // simple names (e.g., C++ conversion functions). 97 if (!FnInfo) 98 return move(TheCallResult); 99 100 switch (FDecl->getBuiltinID(Context)) { 101 case Builtin::BI__builtin___CFStringMakeConstantString: 102 assert(TheCall->getNumArgs() == 1 && 103 "Wrong # arguments to builtin CFStringMakeConstantString"); 104 if (CheckObjCString(TheCall->getArg(0))) 105 return ExprError(); 106 return move(TheCallResult); 107 case Builtin::BI__builtin_stdarg_start: 108 case Builtin::BI__builtin_va_start: 109 if (SemaBuiltinVAStart(TheCall)) 110 return ExprError(); 111 return move(TheCallResult); 112 case Builtin::BI__builtin_isgreater: 113 case Builtin::BI__builtin_isgreaterequal: 114 case Builtin::BI__builtin_isless: 115 case Builtin::BI__builtin_islessequal: 116 case Builtin::BI__builtin_islessgreater: 117 case Builtin::BI__builtin_isunordered: 118 if (SemaBuiltinUnorderedCompare(TheCall)) 119 return ExprError(); 120 return move(TheCallResult); 121 case Builtin::BI__builtin_return_address: 122 case Builtin::BI__builtin_frame_address: 123 if (SemaBuiltinStackAddress(TheCall)) 124 return ExprError(); 125 return move(TheCallResult); 126 case Builtin::BI__builtin_shufflevector: 127 return SemaBuiltinShuffleVector(TheCall); 128 // TheCall will be freed by the smart pointer here, but that's fine, since 129 // SemaBuiltinShuffleVector guts it, but then doesn't release it. 130 case Builtin::BI__builtin_prefetch: 131 if (SemaBuiltinPrefetch(TheCall)) 132 return ExprError(); 133 return move(TheCallResult); 134 case Builtin::BI__builtin_object_size: 135 if (SemaBuiltinObjectSize(TheCall)) 136 return ExprError(); 137 return move(TheCallResult); 138 case Builtin::BI__builtin_longjmp: 139 if (SemaBuiltinLongjmp(TheCall)) 140 return ExprError(); 141 return move(TheCallResult); 142 case Builtin::BI__sync_fetch_and_add: 143 case Builtin::BI__sync_fetch_and_sub: 144 case Builtin::BI__sync_fetch_and_or: 145 case Builtin::BI__sync_fetch_and_and: 146 case Builtin::BI__sync_fetch_and_xor: 147 case Builtin::BI__sync_fetch_and_nand: 148 case Builtin::BI__sync_add_and_fetch: 149 case Builtin::BI__sync_sub_and_fetch: 150 case Builtin::BI__sync_and_and_fetch: 151 case Builtin::BI__sync_or_and_fetch: 152 case Builtin::BI__sync_xor_and_fetch: 153 case Builtin::BI__sync_nand_and_fetch: 154 case Builtin::BI__sync_val_compare_and_swap: 155 case Builtin::BI__sync_bool_compare_and_swap: 156 case Builtin::BI__sync_lock_test_and_set: 157 case Builtin::BI__sync_lock_release: 158 if (SemaBuiltinAtomicOverloaded(TheCall)) 159 return ExprError(); 160 return move(TheCallResult); 161 } 162 163 // FIXME: This mechanism should be abstracted to be less fragile and 164 // more efficient. For example, just map function ids to custom 165 // handlers. 166 167 // Printf checking. 168 if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) { 169 if (Format->getType() == "printf") { 170 bool HasVAListArg = Format->getFirstArg() == 0; 171 if (!HasVAListArg) { 172 if (const FunctionProtoType *Proto 173 = FDecl->getType()->getAsFunctionProtoType()) 174 HasVAListArg = !Proto->isVariadic(); 175 } 176 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 177 HasVAListArg ? 0 : Format->getFirstArg() - 1); 178 } 179 } 180 181 return move(TheCallResult); 182} 183 184Action::OwningExprResult 185Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { 186 187 OwningExprResult TheCallResult(Owned(TheCall)); 188 // Printf checking. 189 const FormatAttr *Format = NDecl->getAttr<FormatAttr>(); 190 if (!Format) 191 return move(TheCallResult); 192 const VarDecl *V = dyn_cast<VarDecl>(NDecl); 193 if (!V) 194 return move(TheCallResult); 195 QualType Ty = V->getType(); 196 if (!Ty->isBlockPointerType()) 197 return move(TheCallResult); 198 if (Format->getType() == "printf") { 199 bool HasVAListArg = Format->getFirstArg() == 0; 200 if (!HasVAListArg) { 201 const FunctionType *FT = 202 Ty->getAsBlockPointerType()->getPointeeType()->getAsFunctionType(); 203 if (const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT)) 204 HasVAListArg = !Proto->isVariadic(); 205 } 206 CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1, 207 HasVAListArg ? 0 : Format->getFirstArg() - 1); 208 } 209 return move(TheCallResult); 210} 211 212/// SemaBuiltinAtomicOverloaded - We have a call to a function like 213/// __sync_fetch_and_add, which is an overloaded function based on the pointer 214/// type of its first argument. The main ActOnCallExpr routines have already 215/// promoted the types of arguments because all of these calls are prototyped as 216/// void(...). 217/// 218/// This function goes through and does final semantic checking for these 219/// builtins, 220bool Sema::SemaBuiltinAtomicOverloaded(CallExpr *TheCall) { 221 DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts()); 222 FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl()); 223 224 // Ensure that we have at least one argument to do type inference from. 225 if (TheCall->getNumArgs() < 1) 226 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 227 << 0 << TheCall->getCallee()->getSourceRange(); 228 229 // Inspect the first argument of the atomic builtin. This should always be 230 // a pointer type, whose element is an integral scalar or pointer type. 231 // Because it is a pointer type, we don't have to worry about any implicit 232 // casts here. 233 Expr *FirstArg = TheCall->getArg(0); 234 if (!FirstArg->getType()->isPointerType()) 235 return Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer) 236 << FirstArg->getType() << FirstArg->getSourceRange(); 237 238 QualType ValType = FirstArg->getType()->getAsPointerType()->getPointeeType(); 239 if (!ValType->isIntegerType() && !ValType->isPointerType() && 240 !ValType->isBlockPointerType()) 241 return Diag(DRE->getLocStart(), 242 diag::err_atomic_builtin_must_be_pointer_intptr) 243 << FirstArg->getType() << FirstArg->getSourceRange(); 244 245 // We need to figure out which concrete builtin this maps onto. For example, 246 // __sync_fetch_and_add with a 2 byte object turns into 247 // __sync_fetch_and_add_2. 248#define BUILTIN_ROW(x) \ 249 { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \ 250 Builtin::BI##x##_8, Builtin::BI##x##_16 } 251 252 static const unsigned BuiltinIndices[][5] = { 253 BUILTIN_ROW(__sync_fetch_and_add), 254 BUILTIN_ROW(__sync_fetch_and_sub), 255 BUILTIN_ROW(__sync_fetch_and_or), 256 BUILTIN_ROW(__sync_fetch_and_and), 257 BUILTIN_ROW(__sync_fetch_and_xor), 258 BUILTIN_ROW(__sync_fetch_and_nand), 259 260 BUILTIN_ROW(__sync_add_and_fetch), 261 BUILTIN_ROW(__sync_sub_and_fetch), 262 BUILTIN_ROW(__sync_and_and_fetch), 263 BUILTIN_ROW(__sync_or_and_fetch), 264 BUILTIN_ROW(__sync_xor_and_fetch), 265 BUILTIN_ROW(__sync_nand_and_fetch), 266 267 BUILTIN_ROW(__sync_val_compare_and_swap), 268 BUILTIN_ROW(__sync_bool_compare_and_swap), 269 BUILTIN_ROW(__sync_lock_test_and_set), 270 BUILTIN_ROW(__sync_lock_release) 271 }; 272#undef BUILTIN_ROW 273 274 // Determine the index of the size. 275 unsigned SizeIndex; 276 switch (Context.getTypeSize(ValType)/8) { 277 case 1: SizeIndex = 0; break; 278 case 2: SizeIndex = 1; break; 279 case 4: SizeIndex = 2; break; 280 case 8: SizeIndex = 3; break; 281 case 16: SizeIndex = 4; break; 282 default: 283 return Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size) 284 << FirstArg->getType() << FirstArg->getSourceRange(); 285 } 286 287 // Each of these builtins has one pointer argument, followed by some number of 288 // values (0, 1 or 2) followed by a potentially empty varags list of stuff 289 // that we ignore. Find out which row of BuiltinIndices to read from as well 290 // as the number of fixed args. 291 unsigned BuiltinID = FDecl->getBuiltinID(Context); 292 unsigned BuiltinIndex, NumFixed = 1; 293 switch (BuiltinID) { 294 default: assert(0 && "Unknown overloaded atomic builtin!"); 295 case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break; 296 case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break; 297 case Builtin::BI__sync_fetch_and_or: BuiltinIndex = 2; break; 298 case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break; 299 case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break; 300 case Builtin::BI__sync_fetch_and_nand:BuiltinIndex = 5; break; 301 302 case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 6; break; 303 case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 7; break; 304 case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 8; break; 305 case Builtin::BI__sync_or_and_fetch: BuiltinIndex = 9; break; 306 case Builtin::BI__sync_xor_and_fetch: BuiltinIndex =10; break; 307 case Builtin::BI__sync_nand_and_fetch:BuiltinIndex =11; break; 308 309 case Builtin::BI__sync_val_compare_and_swap: 310 BuiltinIndex = 12; 311 NumFixed = 2; 312 break; 313 case Builtin::BI__sync_bool_compare_and_swap: 314 BuiltinIndex = 13; 315 NumFixed = 2; 316 break; 317 case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 14; break; 318 case Builtin::BI__sync_lock_release: 319 BuiltinIndex = 15; 320 NumFixed = 0; 321 break; 322 } 323 324 // Now that we know how many fixed arguments we expect, first check that we 325 // have at least that many. 326 if (TheCall->getNumArgs() < 1+NumFixed) 327 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 328 << 0 << TheCall->getCallee()->getSourceRange(); 329 330 331 // Get the decl for the concrete builtin from this, we can tell what the 332 // concrete integer type we should convert to is. 333 unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex]; 334 const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID); 335 IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName); 336 FunctionDecl *NewBuiltinDecl = 337 cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID, 338 TUScope, false, DRE->getLocStart())); 339 const FunctionProtoType *BuiltinFT = 340 NewBuiltinDecl->getType()->getAsFunctionProtoType(); 341 ValType = BuiltinFT->getArgType(0)->getAsPointerType()->getPointeeType(); 342 343 // If the first type needs to be converted (e.g. void** -> int*), do it now. 344 if (BuiltinFT->getArgType(0) != FirstArg->getType()) { 345 ImpCastExprToType(FirstArg, BuiltinFT->getArgType(0), false); 346 TheCall->setArg(0, FirstArg); 347 } 348 349 // Next, walk the valid ones promoting to the right type. 350 for (unsigned i = 0; i != NumFixed; ++i) { 351 Expr *Arg = TheCall->getArg(i+1); 352 353 // If the argument is an implicit cast, then there was a promotion due to 354 // "...", just remove it now. 355 if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) { 356 Arg = ICE->getSubExpr(); 357 ICE->setSubExpr(0); 358 ICE->Destroy(Context); 359 TheCall->setArg(i+1, Arg); 360 } 361 362 // GCC does an implicit conversion to the pointer or integer ValType. This 363 // can fail in some cases (1i -> int**), check for this error case now. 364 if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg)) 365 return true; 366 367 // Okay, we have something that *can* be converted to the right type. Check 368 // to see if there is a potentially weird extension going on here. This can 369 // happen when you do an atomic operation on something like an char* and 370 // pass in 42. The 42 gets converted to char. This is even more strange 371 // for things like 45.123 -> char, etc. 372 // FIXME: Do this check. 373 ImpCastExprToType(Arg, ValType, false); 374 TheCall->setArg(i+1, Arg); 375 } 376 377 // Switch the DeclRefExpr to refer to the new decl. 378 DRE->setDecl(NewBuiltinDecl); 379 DRE->setType(NewBuiltinDecl->getType()); 380 381 // Set the callee in the CallExpr. 382 // FIXME: This leaks the original parens and implicit casts. 383 Expr *PromotedCall = DRE; 384 UsualUnaryConversions(PromotedCall); 385 TheCall->setCallee(PromotedCall); 386 387 388 // Change the result type of the call to match the result type of the decl. 389 TheCall->setType(NewBuiltinDecl->getResultType()); 390 return false; 391} 392 393 394/// CheckObjCString - Checks that the argument to the builtin 395/// CFString constructor is correct 396/// FIXME: GCC currently emits the following warning: 397/// "warning: input conversion stopped due to an input byte that does not 398/// belong to the input codeset UTF-8" 399/// Note: It might also make sense to do the UTF-16 conversion here (would 400/// simplify the backend). 401bool Sema::CheckObjCString(Expr *Arg) { 402 Arg = Arg->IgnoreParenCasts(); 403 StringLiteral *Literal = dyn_cast<StringLiteral>(Arg); 404 405 if (!Literal || Literal->isWide()) { 406 Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant) 407 << Arg->getSourceRange(); 408 return true; 409 } 410 411 const char *Data = Literal->getStrData(); 412 unsigned Length = Literal->getByteLength(); 413 414 for (unsigned i = 0; i < Length; ++i) { 415 if (!Data[i]) { 416 Diag(getLocationOfStringLiteralByte(Literal, i), 417 diag::warn_cfstring_literal_contains_nul_character) 418 << Arg->getSourceRange(); 419 break; 420 } 421 } 422 423 return false; 424} 425 426/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity. 427/// Emit an error and return true on failure, return false on success. 428bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) { 429 Expr *Fn = TheCall->getCallee(); 430 if (TheCall->getNumArgs() > 2) { 431 Diag(TheCall->getArg(2)->getLocStart(), 432 diag::err_typecheck_call_too_many_args) 433 << 0 /*function call*/ << Fn->getSourceRange() 434 << SourceRange(TheCall->getArg(2)->getLocStart(), 435 (*(TheCall->arg_end()-1))->getLocEnd()); 436 return true; 437 } 438 439 if (TheCall->getNumArgs() < 2) { 440 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 441 << 0 /*function call*/; 442 } 443 444 // Determine whether the current function is variadic or not. 445 bool isVariadic; 446 if (CurBlock) 447 isVariadic = CurBlock->isVariadic; 448 else if (getCurFunctionDecl()) { 449 if (FunctionProtoType* FTP = 450 dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType())) 451 isVariadic = FTP->isVariadic(); 452 else 453 isVariadic = false; 454 } else { 455 isVariadic = getCurMethodDecl()->isVariadic(); 456 } 457 458 if (!isVariadic) { 459 Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function); 460 return true; 461 } 462 463 // Verify that the second argument to the builtin is the last argument of the 464 // current function or method. 465 bool SecondArgIsLastNamedArgument = false; 466 const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts(); 467 468 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) { 469 if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) { 470 // FIXME: This isn't correct for methods (results in bogus warning). 471 // Get the last formal in the current function. 472 const ParmVarDecl *LastArg; 473 if (CurBlock) 474 LastArg = *(CurBlock->TheDecl->param_end()-1); 475 else if (FunctionDecl *FD = getCurFunctionDecl()) 476 LastArg = *(FD->param_end()-1); 477 else 478 LastArg = *(getCurMethodDecl()->param_end()-1); 479 SecondArgIsLastNamedArgument = PV == LastArg; 480 } 481 } 482 483 if (!SecondArgIsLastNamedArgument) 484 Diag(TheCall->getArg(1)->getLocStart(), 485 diag::warn_second_parameter_of_va_start_not_last_named_argument); 486 return false; 487} 488 489/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and 490/// friends. This is declared to take (...), so we have to check everything. 491bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { 492 if (TheCall->getNumArgs() < 2) 493 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args) 494 << 0 /*function call*/; 495 if (TheCall->getNumArgs() > 2) 496 return Diag(TheCall->getArg(2)->getLocStart(), 497 diag::err_typecheck_call_too_many_args) 498 << 0 /*function call*/ 499 << SourceRange(TheCall->getArg(2)->getLocStart(), 500 (*(TheCall->arg_end()-1))->getLocEnd()); 501 502 Expr *OrigArg0 = TheCall->getArg(0); 503 Expr *OrigArg1 = TheCall->getArg(1); 504 505 // Do standard promotions between the two arguments, returning their common 506 // type. 507 QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false); 508 509 // Make sure any conversions are pushed back into the call; this is 510 // type safe since unordered compare builtins are declared as "_Bool 511 // foo(...)". 512 TheCall->setArg(0, OrigArg0); 513 TheCall->setArg(1, OrigArg1); 514 515 // If the common type isn't a real floating type, then the arguments were 516 // invalid for this operation. 517 if (!Res->isRealFloatingType()) 518 return Diag(OrigArg0->getLocStart(), 519 diag::err_typecheck_call_invalid_ordered_compare) 520 << OrigArg0->getType() << OrigArg1->getType() 521 << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()); 522 523 return false; 524} 525 526bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) { 527 // The signature for these builtins is exact; the only thing we need 528 // to check is that the argument is a constant. 529 SourceLocation Loc; 530 if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) 531 return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange(); 532 533 return false; 534} 535 536/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. 537// This is declared to take (...), so we have to check everything. 538Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { 539 if (TheCall->getNumArgs() < 3) 540 return ExprError(Diag(TheCall->getLocEnd(), 541 diag::err_typecheck_call_too_few_args) 542 << 0 /*function call*/ << TheCall->getSourceRange()); 543 544 QualType FAType = TheCall->getArg(0)->getType(); 545 QualType SAType = TheCall->getArg(1)->getType(); 546 547 if (!FAType->isVectorType() || !SAType->isVectorType()) { 548 Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector) 549 << SourceRange(TheCall->getArg(0)->getLocStart(), 550 TheCall->getArg(1)->getLocEnd()); 551 return ExprError(); 552 } 553 554 if (Context.getCanonicalType(FAType).getUnqualifiedType() != 555 Context.getCanonicalType(SAType).getUnqualifiedType()) { 556 Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector) 557 << SourceRange(TheCall->getArg(0)->getLocStart(), 558 TheCall->getArg(1)->getLocEnd()); 559 return ExprError(); 560 } 561 562 unsigned numElements = FAType->getAsVectorType()->getNumElements(); 563 if (TheCall->getNumArgs() != numElements+2) { 564 if (TheCall->getNumArgs() < numElements+2) 565 return ExprError(Diag(TheCall->getLocEnd(), 566 diag::err_typecheck_call_too_few_args) 567 << 0 /*function call*/ << TheCall->getSourceRange()); 568 return ExprError(Diag(TheCall->getLocEnd(), 569 diag::err_typecheck_call_too_many_args) 570 << 0 /*function call*/ << TheCall->getSourceRange()); 571 } 572 573 for (unsigned i = 2; i < TheCall->getNumArgs(); i++) { 574 llvm::APSInt Result(32); 575 if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) 576 return ExprError(Diag(TheCall->getLocStart(), 577 diag::err_shufflevector_nonconstant_argument) 578 << TheCall->getArg(i)->getSourceRange()); 579 580 if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) 581 return ExprError(Diag(TheCall->getLocStart(), 582 diag::err_shufflevector_argument_too_large) 583 << TheCall->getArg(i)->getSourceRange()); 584 } 585 586 llvm::SmallVector<Expr*, 32> exprs; 587 588 for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) { 589 exprs.push_back(TheCall->getArg(i)); 590 TheCall->setArg(i, 0); 591 } 592 593 return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2, 594 FAType, 595 TheCall->getCallee()->getLocStart(), 596 TheCall->getRParenLoc())); 597} 598 599/// SemaBuiltinPrefetch - Handle __builtin_prefetch. 600// This is declared to take (const void*, ...) and can take two 601// optional constant int args. 602bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { 603 unsigned NumArgs = TheCall->getNumArgs(); 604 605 if (NumArgs > 3) 606 return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args) 607 << 0 /*function call*/ << TheCall->getSourceRange(); 608 609 // Argument 0 is checked for us and the remaining arguments must be 610 // constant integers. 611 for (unsigned i = 1; i != NumArgs; ++i) { 612 Expr *Arg = TheCall->getArg(i); 613 QualType RWType = Arg->getType(); 614 615 const BuiltinType *BT = RWType->getAsBuiltinType(); 616 llvm::APSInt Result; 617 if (!BT || BT->getKind() != BuiltinType::Int || 618 !Arg->isIntegerConstantExpr(Result, Context)) 619 return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument) 620 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 621 622 // FIXME: gcc issues a warning and rewrites these to 0. These 623 // seems especially odd for the third argument since the default 624 // is 3. 625 if (i == 1) { 626 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1) 627 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 628 << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 629 } else { 630 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) 631 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 632 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 633 } 634 } 635 636 return false; 637} 638 639/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr, 640/// int type). This simply type checks that type is one of the defined 641/// constants (0-3). 642bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { 643 Expr *Arg = TheCall->getArg(1); 644 QualType ArgType = Arg->getType(); 645 const BuiltinType *BT = ArgType->getAsBuiltinType(); 646 llvm::APSInt Result(32); 647 if (!BT || BT->getKind() != BuiltinType::Int || 648 !Arg->isIntegerConstantExpr(Result, Context)) { 649 return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument) 650 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 651 } 652 653 if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) { 654 return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) 655 << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 656 } 657 658 return false; 659} 660 661/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). 662/// This checks that val is a constant 1. 663bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { 664 Expr *Arg = TheCall->getArg(1); 665 llvm::APSInt Result(32); 666 if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1) 667 return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val) 668 << SourceRange(Arg->getLocStart(), Arg->getLocEnd()); 669 670 return false; 671} 672 673// Handle i > 1 ? "x" : "y", recursivelly 674bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, 675 bool HasVAListArg, 676 unsigned format_idx, unsigned firstDataArg) { 677 678 switch (E->getStmtClass()) { 679 case Stmt::ConditionalOperatorClass: { 680 const ConditionalOperator *C = cast<ConditionalOperator>(E); 681 return SemaCheckStringLiteral(C->getLHS(), TheCall, 682 HasVAListArg, format_idx, firstDataArg) 683 && SemaCheckStringLiteral(C->getRHS(), TheCall, 684 HasVAListArg, format_idx, firstDataArg); 685 } 686 687 case Stmt::ImplicitCastExprClass: { 688 const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E); 689 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 690 format_idx, firstDataArg); 691 } 692 693 case Stmt::ParenExprClass: { 694 const ParenExpr *Expr = cast<ParenExpr>(E); 695 return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg, 696 format_idx, firstDataArg); 697 } 698 699 case Stmt::DeclRefExprClass: { 700 const DeclRefExpr *DR = cast<DeclRefExpr>(E); 701 702 // As an exception, do not flag errors for variables binding to 703 // const string literals. 704 if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) { 705 bool isConstant = false; 706 QualType T = DR->getType(); 707 708 if (const ArrayType *AT = Context.getAsArrayType(T)) { 709 isConstant = AT->getElementType().isConstant(Context); 710 } 711 else if (const PointerType *PT = T->getAsPointerType()) { 712 isConstant = T.isConstant(Context) && 713 PT->getPointeeType().isConstant(Context); 714 } 715 716 if (isConstant) { 717 const VarDecl *Def = 0; 718 if (const Expr *Init = VD->getDefinition(Def)) 719 return SemaCheckStringLiteral(Init, TheCall, 720 HasVAListArg, format_idx, firstDataArg); 721 } 722 } 723 724 return false; 725 } 726 727 case Stmt::ObjCStringLiteralClass: 728 case Stmt::StringLiteralClass: { 729 const StringLiteral *StrE = NULL; 730 731 if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E)) 732 StrE = ObjCFExpr->getString(); 733 else 734 StrE = cast<StringLiteral>(E); 735 736 if (StrE) { 737 CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx, 738 firstDataArg); 739 return true; 740 } 741 742 return false; 743 } 744 745 default: 746 return false; 747 } 748} 749 750 751/// CheckPrintfArguments - Check calls to printf (and similar functions) for 752/// correct use of format strings. 753/// 754/// HasVAListArg - A predicate indicating whether the printf-like 755/// function is passed an explicit va_arg argument (e.g., vprintf) 756/// 757/// format_idx - The index into Args for the format string. 758/// 759/// Improper format strings to functions in the printf family can be 760/// the source of bizarre bugs and very serious security holes. A 761/// good source of information is available in the following paper 762/// (which includes additional references): 763/// 764/// FormatGuard: Automatic Protection From printf Format String 765/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. 766/// 767/// Functionality implemented: 768/// 769/// We can statically check the following properties for string 770/// literal format strings for non v.*printf functions (where the 771/// arguments are passed directly): 772// 773/// (1) Are the number of format conversions equal to the number of 774/// data arguments? 775/// 776/// (2) Does each format conversion correctly match the type of the 777/// corresponding data argument? (TODO) 778/// 779/// Moreover, for all printf functions we can: 780/// 781/// (3) Check for a missing format string (when not caught by type checking). 782/// 783/// (4) Check for no-operation flags; e.g. using "#" with format 784/// conversion 'c' (TODO) 785/// 786/// (5) Check the use of '%n', a major source of security holes. 787/// 788/// (6) Check for malformed format conversions that don't specify anything. 789/// 790/// (7) Check for empty format strings. e.g: printf(""); 791/// 792/// (8) Check that the format string is a wide literal. 793/// 794/// (9) Also check the arguments of functions with the __format__ attribute. 795/// (TODO). 796/// 797/// All of these checks can be done by parsing the format string. 798/// 799/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). 800void 801Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, 802 unsigned format_idx, unsigned firstDataArg) { 803 const Expr *Fn = TheCall->getCallee(); 804 805 // CHECK: printf-like function is called with no format string. 806 if (format_idx >= TheCall->getNumArgs()) { 807 Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string) 808 << Fn->getSourceRange(); 809 return; 810 } 811 812 const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts(); 813 814 // CHECK: format string is not a string literal. 815 // 816 // Dynamically generated format strings are difficult to 817 // automatically vet at compile time. Requiring that format strings 818 // are string literals: (1) permits the checking of format strings by 819 // the compiler and thereby (2) can practically remove the source of 820 // many format string exploits. 821 822 // Format string can be either ObjC string (e.g. @"%d") or 823 // C string (e.g. "%d") 824 // ObjC string uses the same format specifiers as C string, so we can use 825 // the same format string checking logic for both ObjC and C strings. 826 if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx, 827 firstDataArg)) 828 return; // Literal format string found, check done! 829 830 // For vprintf* functions (i.e., HasVAListArg==true), we add a 831 // special check to see if the format string is a function parameter 832 // of the function calling the printf function. If the function 833 // has an attribute indicating it is a printf-like function, then we 834 // should suppress warnings concerning non-literals being used in a call 835 // to a vprintf function. For example: 836 // 837 // void 838 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) { 839 // va_list ap; 840 // va_start(ap, fmt); 841 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt". 842 // ... 843 // 844 // 845 // FIXME: We don't have full attribute support yet, so just check to see 846 // if the argument is a DeclRefExpr that references a parameter. We'll 847 // add proper support for checking the attribute later. 848 if (HasVAListArg) 849 if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(OrigFormatExpr)) 850 if (isa<ParmVarDecl>(DR->getDecl())) 851 return; 852 853 // If there are no arguments specified, warn with -Wformat-security, otherwise 854 // warn only with -Wformat-nonliteral. 855 if (TheCall->getNumArgs() == format_idx+1) 856 Diag(TheCall->getArg(format_idx)->getLocStart(), 857 diag::warn_printf_nonliteral_noargs) 858 << OrigFormatExpr->getSourceRange(); 859 else 860 Diag(TheCall->getArg(format_idx)->getLocStart(), 861 diag::warn_printf_nonliteral) 862 << OrigFormatExpr->getSourceRange(); 863} 864 865void Sema::CheckPrintfString(const StringLiteral *FExpr, 866 const Expr *OrigFormatExpr, 867 const CallExpr *TheCall, bool HasVAListArg, 868 unsigned format_idx, unsigned firstDataArg) { 869 870 const ObjCStringLiteral *ObjCFExpr = 871 dyn_cast<ObjCStringLiteral>(OrigFormatExpr); 872 873 // CHECK: is the format string a wide literal? 874 if (FExpr->isWide()) { 875 Diag(FExpr->getLocStart(), 876 diag::warn_printf_format_string_is_wide_literal) 877 << OrigFormatExpr->getSourceRange(); 878 return; 879 } 880 881 // Str - The format string. NOTE: this is NOT null-terminated! 882 const char *Str = FExpr->getStrData(); 883 884 // CHECK: empty format string? 885 unsigned StrLen = FExpr->getByteLength(); 886 887 if (StrLen == 0) { 888 Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) 889 << OrigFormatExpr->getSourceRange(); 890 return; 891 } 892 893 // We process the format string using a binary state machine. The 894 // current state is stored in CurrentState. 895 enum { 896 state_OrdChr, 897 state_Conversion 898 } CurrentState = state_OrdChr; 899 900 // numConversions - The number of conversions seen so far. This is 901 // incremented as we traverse the format string. 902 unsigned numConversions = 0; 903 904 // numDataArgs - The number of data arguments after the format 905 // string. This can only be determined for non vprintf-like 906 // functions. For those functions, this value is 1 (the sole 907 // va_arg argument). 908 unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; 909 910 // Inspect the format string. 911 unsigned StrIdx = 0; 912 913 // LastConversionIdx - Index within the format string where we last saw 914 // a '%' character that starts a new format conversion. 915 unsigned LastConversionIdx = 0; 916 917 for (; StrIdx < StrLen; ++StrIdx) { 918 919 // Is the number of detected conversion conversions greater than 920 // the number of matching data arguments? If so, stop. 921 if (!HasVAListArg && numConversions > numDataArgs) break; 922 923 // Handle "\0" 924 if (Str[StrIdx] == '\0') { 925 // The string returned by getStrData() is not null-terminated, 926 // so the presence of a null character is likely an error. 927 Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), 928 diag::warn_printf_format_string_contains_null_char) 929 << OrigFormatExpr->getSourceRange(); 930 return; 931 } 932 933 // Ordinary characters (not processing a format conversion). 934 if (CurrentState == state_OrdChr) { 935 if (Str[StrIdx] == '%') { 936 CurrentState = state_Conversion; 937 LastConversionIdx = StrIdx; 938 } 939 continue; 940 } 941 942 // Seen '%'. Now processing a format conversion. 943 switch (Str[StrIdx]) { 944 // Handle dynamic precision or width specifier. 945 case '*': { 946 ++numConversions; 947 948 if (!HasVAListArg) { 949 if (numConversions > numDataArgs) { 950 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 951 952 if (Str[StrIdx-1] == '.') 953 Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) 954 << OrigFormatExpr->getSourceRange(); 955 else 956 Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) 957 << OrigFormatExpr->getSourceRange(); 958 959 // Don't do any more checking. We'll just emit spurious errors. 960 return; 961 } 962 963 // Perform type checking on width/precision specifier. 964 const Expr *E = TheCall->getArg(format_idx+numConversions); 965 if (const BuiltinType *BT = E->getType()->getAsBuiltinType()) 966 if (BT->getKind() == BuiltinType::Int) 967 break; 968 969 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); 970 971 if (Str[StrIdx-1] == '.') 972 Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) 973 << E->getType() << E->getSourceRange(); 974 else 975 Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) 976 << E->getType() << E->getSourceRange(); 977 978 break; 979 } 980 } 981 982 // Characters which can terminate a format conversion 983 // (e.g. "%d"). Characters that specify length modifiers or 984 // other flags are handled by the default case below. 985 // 986 // FIXME: additional checks will go into the following cases. 987 case 'i': 988 case 'd': 989 case 'o': 990 case 'u': 991 case 'x': 992 case 'X': 993 case 'D': 994 case 'O': 995 case 'U': 996 case 'e': 997 case 'E': 998 case 'f': 999 case 'F': 1000 case 'g': 1001 case 'G': 1002 case 'a': 1003 case 'A': 1004 case 'c': 1005 case 'C': 1006 case 'S': 1007 case 's': 1008 case 'p': 1009 ++numConversions; 1010 CurrentState = state_OrdChr; 1011 break; 1012 1013 // CHECK: Are we using "%n"? Issue a warning. 1014 case 'n': { 1015 ++numConversions; 1016 CurrentState = state_OrdChr; 1017 SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, 1018 LastConversionIdx); 1019 1020 Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); 1021 break; 1022 } 1023 1024 // Handle "%@" 1025 case '@': 1026 // %@ is allowed in ObjC format strings only. 1027 if(ObjCFExpr != NULL) 1028 CurrentState = state_OrdChr; 1029 else { 1030 // Issue a warning: invalid format conversion. 1031 SourceLocation Loc = 1032 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 1033 1034 Diag(Loc, diag::warn_printf_invalid_conversion) 1035 << std::string(Str+LastConversionIdx, 1036 Str+std::min(LastConversionIdx+2, StrLen)) 1037 << OrigFormatExpr->getSourceRange(); 1038 } 1039 ++numConversions; 1040 break; 1041 1042 // Handle "%%" 1043 case '%': 1044 // Sanity check: Was the first "%" character the previous one? 1045 // If not, we will assume that we have a malformed format 1046 // conversion, and that the current "%" character is the start 1047 // of a new conversion. 1048 if (StrIdx - LastConversionIdx == 1) 1049 CurrentState = state_OrdChr; 1050 else { 1051 // Issue a warning: invalid format conversion. 1052 SourceLocation Loc = 1053 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 1054 1055 Diag(Loc, diag::warn_printf_invalid_conversion) 1056 << std::string(Str+LastConversionIdx, Str+StrIdx) 1057 << OrigFormatExpr->getSourceRange(); 1058 1059 // This conversion is broken. Advance to the next format 1060 // conversion. 1061 LastConversionIdx = StrIdx; 1062 ++numConversions; 1063 } 1064 break; 1065 1066 default: 1067 // This case catches all other characters: flags, widths, etc. 1068 // We should eventually process those as well. 1069 break; 1070 } 1071 } 1072 1073 if (CurrentState == state_Conversion) { 1074 // Issue a warning: invalid format conversion. 1075 SourceLocation Loc = 1076 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 1077 1078 Diag(Loc, diag::warn_printf_invalid_conversion) 1079 << std::string(Str+LastConversionIdx, 1080 Str+std::min(LastConversionIdx+2, StrLen)) 1081 << OrigFormatExpr->getSourceRange(); 1082 return; 1083 } 1084 1085 if (!HasVAListArg) { 1086 // CHECK: Does the number of format conversions exceed the number 1087 // of data arguments? 1088 if (numConversions > numDataArgs) { 1089 SourceLocation Loc = 1090 getLocationOfStringLiteralByte(FExpr, LastConversionIdx); 1091 1092 Diag(Loc, diag::warn_printf_insufficient_data_args) 1093 << OrigFormatExpr->getSourceRange(); 1094 } 1095 // CHECK: Does the number of data arguments exceed the number of 1096 // format conversions in the format string? 1097 else if (numConversions < numDataArgs) 1098 Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), 1099 diag::warn_printf_too_many_data_args) 1100 << OrigFormatExpr->getSourceRange(); 1101 } 1102} 1103 1104//===--- CHECK: Return Address of Stack Variable --------------------------===// 1105 1106static DeclRefExpr* EvalVal(Expr *E); 1107static DeclRefExpr* EvalAddr(Expr* E); 1108 1109/// CheckReturnStackAddr - Check if a return statement returns the address 1110/// of a stack variable. 1111void 1112Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, 1113 SourceLocation ReturnLoc) { 1114 1115 // Perform checking for returned stack addresses. 1116 if (lhsType->isPointerType() || lhsType->isBlockPointerType()) { 1117 if (DeclRefExpr *DR = EvalAddr(RetValExp)) 1118 Diag(DR->getLocStart(), diag::warn_ret_stack_addr) 1119 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1120 1121 // Skip over implicit cast expressions when checking for block expressions. 1122 if (ImplicitCastExpr *IcExpr = 1123 dyn_cast_or_null<ImplicitCastExpr>(RetValExp)) 1124 RetValExp = IcExpr->getSubExpr(); 1125 1126 if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp)) 1127 if (C->hasBlockDeclRefExprs()) 1128 Diag(C->getLocStart(), diag::err_ret_local_block) 1129 << C->getSourceRange(); 1130 } 1131 // Perform checking for stack values returned by reference. 1132 else if (lhsType->isReferenceType()) { 1133 // Check for a reference to the stack 1134 if (DeclRefExpr *DR = EvalVal(RetValExp)) 1135 Diag(DR->getLocStart(), diag::warn_ret_stack_ref) 1136 << DR->getDecl()->getDeclName() << RetValExp->getSourceRange(); 1137 } 1138} 1139 1140/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that 1141/// check if the expression in a return statement evaluates to an address 1142/// to a location on the stack. The recursion is used to traverse the 1143/// AST of the return expression, with recursion backtracking when we 1144/// encounter a subexpression that (1) clearly does not lead to the address 1145/// of a stack variable or (2) is something we cannot determine leads to 1146/// the address of a stack variable based on such local checking. 1147/// 1148/// EvalAddr processes expressions that are pointers that are used as 1149/// references (and not L-values). EvalVal handles all other values. 1150/// At the base case of the recursion is a check for a DeclRefExpr* in 1151/// the refers to a stack variable. 1152/// 1153/// This implementation handles: 1154/// 1155/// * pointer-to-pointer casts 1156/// * implicit conversions from array references to pointers 1157/// * taking the address of fields 1158/// * arbitrary interplay between "&" and "*" operators 1159/// * pointer arithmetic from an address of a stack variable 1160/// * taking the address of an array element where the array is on the stack 1161static DeclRefExpr* EvalAddr(Expr *E) { 1162 // We should only be called for evaluating pointer expressions. 1163 assert((E->getType()->isPointerType() || 1164 E->getType()->isBlockPointerType() || 1165 E->getType()->isObjCQualifiedIdType()) && 1166 "EvalAddr only works on pointers"); 1167 1168 // Our "symbolic interpreter" is just a dispatch off the currently 1169 // viewed AST node. We then recursively traverse the AST by calling 1170 // EvalAddr and EvalVal appropriately. 1171 switch (E->getStmtClass()) { 1172 case Stmt::ParenExprClass: 1173 // Ignore parentheses. 1174 return EvalAddr(cast<ParenExpr>(E)->getSubExpr()); 1175 1176 case Stmt::UnaryOperatorClass: { 1177 // The only unary operator that make sense to handle here 1178 // is AddrOf. All others don't make sense as pointers. 1179 UnaryOperator *U = cast<UnaryOperator>(E); 1180 1181 if (U->getOpcode() == UnaryOperator::AddrOf) 1182 return EvalVal(U->getSubExpr()); 1183 else 1184 return NULL; 1185 } 1186 1187 case Stmt::BinaryOperatorClass: { 1188 // Handle pointer arithmetic. All other binary operators are not valid 1189 // in this context. 1190 BinaryOperator *B = cast<BinaryOperator>(E); 1191 BinaryOperator::Opcode op = B->getOpcode(); 1192 1193 if (op != BinaryOperator::Add && op != BinaryOperator::Sub) 1194 return NULL; 1195 1196 Expr *Base = B->getLHS(); 1197 1198 // Determine which argument is the real pointer base. It could be 1199 // the RHS argument instead of the LHS. 1200 if (!Base->getType()->isPointerType()) Base = B->getRHS(); 1201 1202 assert (Base->getType()->isPointerType()); 1203 return EvalAddr(Base); 1204 } 1205 1206 // For conditional operators we need to see if either the LHS or RHS are 1207 // valid DeclRefExpr*s. If one of them is valid, we return it. 1208 case Stmt::ConditionalOperatorClass: { 1209 ConditionalOperator *C = cast<ConditionalOperator>(E); 1210 1211 // Handle the GNU extension for missing LHS. 1212 if (Expr *lhsExpr = C->getLHS()) 1213 if (DeclRefExpr* LHS = EvalAddr(lhsExpr)) 1214 return LHS; 1215 1216 return EvalAddr(C->getRHS()); 1217 } 1218 1219 // For casts, we need to handle conversions from arrays to 1220 // pointer values, and pointer-to-pointer conversions. 1221 case Stmt::ImplicitCastExprClass: 1222 case Stmt::CStyleCastExprClass: 1223 case Stmt::CXXFunctionalCastExprClass: { 1224 Expr* SubExpr = cast<CastExpr>(E)->getSubExpr(); 1225 QualType T = SubExpr->getType(); 1226 1227 if (SubExpr->getType()->isPointerType() || 1228 SubExpr->getType()->isBlockPointerType() || 1229 SubExpr->getType()->isObjCQualifiedIdType()) 1230 return EvalAddr(SubExpr); 1231 else if (T->isArrayType()) 1232 return EvalVal(SubExpr); 1233 else 1234 return 0; 1235 } 1236 1237 // C++ casts. For dynamic casts, static casts, and const casts, we 1238 // are always converting from a pointer-to-pointer, so we just blow 1239 // through the cast. In the case the dynamic cast doesn't fail (and 1240 // return NULL), we take the conservative route and report cases 1241 // where we return the address of a stack variable. For Reinterpre 1242 // FIXME: The comment about is wrong; we're not always converting 1243 // from pointer to pointer. I'm guessing that this code should also 1244 // handle references to objects. 1245 case Stmt::CXXStaticCastExprClass: 1246 case Stmt::CXXDynamicCastExprClass: 1247 case Stmt::CXXConstCastExprClass: 1248 case Stmt::CXXReinterpretCastExprClass: { 1249 Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr(); 1250 if (S->getType()->isPointerType() || S->getType()->isBlockPointerType()) 1251 return EvalAddr(S); 1252 else 1253 return NULL; 1254 } 1255 1256 // Everything else: we simply don't reason about them. 1257 default: 1258 return NULL; 1259 } 1260} 1261 1262 1263/// EvalVal - This function is complements EvalAddr in the mutual recursion. 1264/// See the comments for EvalAddr for more details. 1265static DeclRefExpr* EvalVal(Expr *E) { 1266 1267 // We should only be called for evaluating non-pointer expressions, or 1268 // expressions with a pointer type that are not used as references but instead 1269 // are l-values (e.g., DeclRefExpr with a pointer type). 1270 1271 // Our "symbolic interpreter" is just a dispatch off the currently 1272 // viewed AST node. We then recursively traverse the AST by calling 1273 // EvalAddr and EvalVal appropriately. 1274 switch (E->getStmtClass()) { 1275 case Stmt::DeclRefExprClass: 1276 case Stmt::QualifiedDeclRefExprClass: { 1277 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking 1278 // at code that refers to a variable's name. We check if it has local 1279 // storage within the function, and if so, return the expression. 1280 DeclRefExpr *DR = cast<DeclRefExpr>(E); 1281 1282 if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl())) 1283 if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR; 1284 1285 return NULL; 1286 } 1287 1288 case Stmt::ParenExprClass: 1289 // Ignore parentheses. 1290 return EvalVal(cast<ParenExpr>(E)->getSubExpr()); 1291 1292 case Stmt::UnaryOperatorClass: { 1293 // The only unary operator that make sense to handle here 1294 // is Deref. All others don't resolve to a "name." This includes 1295 // handling all sorts of rvalues passed to a unary operator. 1296 UnaryOperator *U = cast<UnaryOperator>(E); 1297 1298 if (U->getOpcode() == UnaryOperator::Deref) 1299 return EvalAddr(U->getSubExpr()); 1300 1301 return NULL; 1302 } 1303 1304 case Stmt::ArraySubscriptExprClass: { 1305 // Array subscripts are potential references to data on the stack. We 1306 // retrieve the DeclRefExpr* for the array variable if it indeed 1307 // has local storage. 1308 return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase()); 1309 } 1310 1311 case Stmt::ConditionalOperatorClass: { 1312 // For conditional operators we need to see if either the LHS or RHS are 1313 // non-NULL DeclRefExpr's. If one is non-NULL, we return it. 1314 ConditionalOperator *C = cast<ConditionalOperator>(E); 1315 1316 // Handle the GNU extension for missing LHS. 1317 if (Expr *lhsExpr = C->getLHS()) 1318 if (DeclRefExpr *LHS = EvalVal(lhsExpr)) 1319 return LHS; 1320 1321 return EvalVal(C->getRHS()); 1322 } 1323 1324 // Accesses to members are potential references to data on the stack. 1325 case Stmt::MemberExprClass: { 1326 MemberExpr *M = cast<MemberExpr>(E); 1327 1328 // Check for indirect access. We only want direct field accesses. 1329 if (!M->isArrow()) 1330 return EvalVal(M->getBase()); 1331 else 1332 return NULL; 1333 } 1334 1335 // Everything else: we simply don't reason about them. 1336 default: 1337 return NULL; 1338 } 1339} 1340 1341//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===// 1342 1343/// Check for comparisons of floating point operands using != and ==. 1344/// Issue a warning if these are no self-comparisons, as they are not likely 1345/// to do what the programmer intended. 1346void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) { 1347 bool EmitWarning = true; 1348 1349 Expr* LeftExprSansParen = lex->IgnoreParens(); 1350 Expr* RightExprSansParen = rex->IgnoreParens(); 1351 1352 // Special case: check for x == x (which is OK). 1353 // Do not emit warnings for such cases. 1354 if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen)) 1355 if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen)) 1356 if (DRL->getDecl() == DRR->getDecl()) 1357 EmitWarning = false; 1358 1359 1360 // Special case: check for comparisons against literals that can be exactly 1361 // represented by APFloat. In such cases, do not emit a warning. This 1362 // is a heuristic: often comparison against such literals are used to 1363 // detect if a value in a variable has not changed. This clearly can 1364 // lead to false negatives. 1365 if (EmitWarning) { 1366 if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) { 1367 if (FLL->isExact()) 1368 EmitWarning = false; 1369 } 1370 else 1371 if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){ 1372 if (FLR->isExact()) 1373 EmitWarning = false; 1374 } 1375 } 1376 1377 // Check for comparisons with builtin types. 1378 if (EmitWarning) 1379 if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen)) 1380 if (CL->isBuiltinCall(Context)) 1381 EmitWarning = false; 1382 1383 if (EmitWarning) 1384 if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen)) 1385 if (CR->isBuiltinCall(Context)) 1386 EmitWarning = false; 1387 1388 // Emit the diagnostic. 1389 if (EmitWarning) 1390 Diag(loc, diag::warn_floatingpoint_eq) 1391 << lex->getSourceRange() << rex->getSourceRange(); 1392} 1393