SemaChecking.cpp revision bd4c4aebe6035e7a7125470cc9f0f92511230ee3
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements extra semantic analysis beyond what is enforced
11//  by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/DeclObjC.h"
18#include "clang/AST/ExprCXX.h"
19#include "clang/AST/ExprObjC.h"
20#include "clang/Lex/LiteralSupport.h"
21#include "clang/Lex/Preprocessor.h"
22#include <limits>
23using namespace clang;
24
25/// getLocationOfStringLiteralByte - Return a source location that points to the
26/// specified byte of the specified string literal.
27///
28/// Strings are amazingly complex.  They can be formed from multiple tokens and
29/// can have escape sequences in them in addition to the usual trigraph and
30/// escaped newline business.  This routine handles this complexity.
31///
32SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
33                                                    unsigned ByteNo) const {
34  assert(!SL->isWide() && "This doesn't work for wide strings yet");
35
36  // Loop over all of the tokens in this string until we find the one that
37  // contains the byte we're looking for.
38  unsigned TokNo = 0;
39  while (1) {
40    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
41    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
42
43    // Get the spelling of the string so that we can get the data that makes up
44    // the string literal, not the identifier for the macro it is potentially
45    // expanded through.
46    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
47
48    // Re-lex the token to get its length and original spelling.
49    std::pair<FileID, unsigned> LocInfo =
50      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
51    std::pair<const char *,const char *> Buffer =
52      SourceMgr.getBufferData(LocInfo.first);
53    const char *StrData = Buffer.first+LocInfo.second;
54
55    // Create a langops struct and enable trigraphs.  This is sufficient for
56    // relexing tokens.
57    LangOptions LangOpts;
58    LangOpts.Trigraphs = true;
59
60    // Create a lexer starting at the beginning of this token.
61    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
62                   Buffer.second);
63    Token TheTok;
64    TheLexer.LexFromRawLexer(TheTok);
65
66    // Use the StringLiteralParser to compute the length of the string in bytes.
67    StringLiteralParser SLP(&TheTok, 1, PP);
68    unsigned TokNumBytes = SLP.GetStringLength();
69
70    // If the byte is in this token, return the location of the byte.
71    if (ByteNo < TokNumBytes ||
72        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
73      unsigned Offset =
74        StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP);
75
76      // Now that we know the offset of the token in the spelling, use the
77      // preprocessor to get the offset in the original source.
78      return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
79    }
80
81    // Move to the next string token.
82    ++TokNo;
83    ByteNo -= TokNumBytes;
84  }
85}
86
87/// CheckablePrintfAttr - does a function call have a "printf" attribute
88/// and arguments that merit checking?
89bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) {
90  if (Format->getType() == "printf") return true;
91  if (Format->getType() == "printf0") {
92    // printf0 allows null "format" string; if so don't check format/args
93    unsigned format_idx = Format->getFormatIdx() - 1;
94    if (format_idx < TheCall->getNumArgs()) {
95      Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts();
96      if (!Format->isNullPointerConstant(Context))
97        return true;
98    }
99  }
100  return false;
101}
102
103Action::OwningExprResult
104Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
105  OwningExprResult TheCallResult(Owned(TheCall));
106
107  switch (BuiltinID) {
108  case Builtin::BI__builtin___CFStringMakeConstantString:
109    assert(TheCall->getNumArgs() == 1 &&
110           "Wrong # arguments to builtin CFStringMakeConstantString");
111    if (CheckObjCString(TheCall->getArg(0)))
112      return ExprError();
113    break;
114  case Builtin::BI__builtin_stdarg_start:
115  case Builtin::BI__builtin_va_start:
116    if (SemaBuiltinVAStart(TheCall))
117      return ExprError();
118    break;
119  case Builtin::BI__builtin_isgreater:
120  case Builtin::BI__builtin_isgreaterequal:
121  case Builtin::BI__builtin_isless:
122  case Builtin::BI__builtin_islessequal:
123  case Builtin::BI__builtin_islessgreater:
124  case Builtin::BI__builtin_isunordered:
125    if (SemaBuiltinUnorderedCompare(TheCall))
126      return ExprError();
127    break;
128  case Builtin::BI__builtin_return_address:
129  case Builtin::BI__builtin_frame_address:
130    if (SemaBuiltinStackAddress(TheCall))
131      return ExprError();
132    break;
133  case Builtin::BI__builtin_shufflevector:
134    return SemaBuiltinShuffleVector(TheCall);
135    // TheCall will be freed by the smart pointer here, but that's fine, since
136    // SemaBuiltinShuffleVector guts it, but then doesn't release it.
137  case Builtin::BI__builtin_prefetch:
138    if (SemaBuiltinPrefetch(TheCall))
139      return ExprError();
140    break;
141  case Builtin::BI__builtin_object_size:
142    if (SemaBuiltinObjectSize(TheCall))
143      return ExprError();
144    break;
145  case Builtin::BI__builtin_longjmp:
146    if (SemaBuiltinLongjmp(TheCall))
147      return ExprError();
148    break;
149  case Builtin::BI__sync_fetch_and_add:
150  case Builtin::BI__sync_fetch_and_sub:
151  case Builtin::BI__sync_fetch_and_or:
152  case Builtin::BI__sync_fetch_and_and:
153  case Builtin::BI__sync_fetch_and_xor:
154  case Builtin::BI__sync_fetch_and_nand:
155  case Builtin::BI__sync_add_and_fetch:
156  case Builtin::BI__sync_sub_and_fetch:
157  case Builtin::BI__sync_and_and_fetch:
158  case Builtin::BI__sync_or_and_fetch:
159  case Builtin::BI__sync_xor_and_fetch:
160  case Builtin::BI__sync_nand_and_fetch:
161  case Builtin::BI__sync_val_compare_and_swap:
162  case Builtin::BI__sync_bool_compare_and_swap:
163  case Builtin::BI__sync_lock_test_and_set:
164  case Builtin::BI__sync_lock_release:
165    if (SemaBuiltinAtomicOverloaded(TheCall))
166      return ExprError();
167    break;
168  }
169
170  return move(TheCallResult);
171}
172
173/// CheckFunctionCall - Check a direct function call for various correctness
174/// and safety properties not strictly enforced by the C type system.
175bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
176  // Get the IdentifierInfo* for the called function.
177  IdentifierInfo *FnInfo = FDecl->getIdentifier();
178
179  // None of the checks below are needed for functions that don't have
180  // simple names (e.g., C++ conversion functions).
181  if (!FnInfo)
182    return false;
183
184  // FIXME: This mechanism should be abstracted to be less fragile and
185  // more efficient. For example, just map function ids to custom
186  // handlers.
187
188  // Printf checking.
189  if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
190    if (CheckablePrintfAttr(Format, TheCall)) {
191      bool HasVAListArg = Format->getFirstArg() == 0;
192      if (!HasVAListArg) {
193        if (const FunctionProtoType *Proto
194            = FDecl->getType()->getAsFunctionProtoType())
195        HasVAListArg = !Proto->isVariadic();
196      }
197      CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
198                           HasVAListArg ? 0 : Format->getFirstArg() - 1);
199    }
200  }
201
202  for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull;
203       NonNull = NonNull->getNext<NonNullAttr>())
204    CheckNonNullArguments(NonNull, TheCall);
205
206  return false;
207}
208
209bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) {
210  // Printf checking.
211  const FormatAttr *Format = NDecl->getAttr<FormatAttr>();
212  if (!Format)
213    return false;
214
215  const VarDecl *V = dyn_cast<VarDecl>(NDecl);
216  if (!V)
217    return false;
218
219  QualType Ty = V->getType();
220  if (!Ty->isBlockPointerType())
221    return false;
222
223  if (!CheckablePrintfAttr(Format, TheCall))
224    return false;
225
226  bool HasVAListArg = Format->getFirstArg() == 0;
227  if (!HasVAListArg) {
228    const FunctionType *FT =
229      Ty->getAs<BlockPointerType>()->getPointeeType()->getAsFunctionType();
230    if (const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT))
231      HasVAListArg = !Proto->isVariadic();
232  }
233  CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
234                       HasVAListArg ? 0 : Format->getFirstArg() - 1);
235
236  return false;
237}
238
239/// SemaBuiltinAtomicOverloaded - We have a call to a function like
240/// __sync_fetch_and_add, which is an overloaded function based on the pointer
241/// type of its first argument.  The main ActOnCallExpr routines have already
242/// promoted the types of arguments because all of these calls are prototyped as
243/// void(...).
244///
245/// This function goes through and does final semantic checking for these
246/// builtins,
247bool Sema::SemaBuiltinAtomicOverloaded(CallExpr *TheCall) {
248  DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
249  FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
250
251  // Ensure that we have at least one argument to do type inference from.
252  if (TheCall->getNumArgs() < 1)
253    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
254              << 0 << TheCall->getCallee()->getSourceRange();
255
256  // Inspect the first argument of the atomic builtin.  This should always be
257  // a pointer type, whose element is an integral scalar or pointer type.
258  // Because it is a pointer type, we don't have to worry about any implicit
259  // casts here.
260  Expr *FirstArg = TheCall->getArg(0);
261  if (!FirstArg->getType()->isPointerType())
262    return Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer)
263             << FirstArg->getType() << FirstArg->getSourceRange();
264
265  QualType ValType = FirstArg->getType()->getAs<PointerType>()->getPointeeType();
266  if (!ValType->isIntegerType() && !ValType->isPointerType() &&
267      !ValType->isBlockPointerType())
268    return Diag(DRE->getLocStart(),
269                diag::err_atomic_builtin_must_be_pointer_intptr)
270             << FirstArg->getType() << FirstArg->getSourceRange();
271
272  // We need to figure out which concrete builtin this maps onto.  For example,
273  // __sync_fetch_and_add with a 2 byte object turns into
274  // __sync_fetch_and_add_2.
275#define BUILTIN_ROW(x) \
276  { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \
277    Builtin::BI##x##_8, Builtin::BI##x##_16 }
278
279  static const unsigned BuiltinIndices[][5] = {
280    BUILTIN_ROW(__sync_fetch_and_add),
281    BUILTIN_ROW(__sync_fetch_and_sub),
282    BUILTIN_ROW(__sync_fetch_and_or),
283    BUILTIN_ROW(__sync_fetch_and_and),
284    BUILTIN_ROW(__sync_fetch_and_xor),
285    BUILTIN_ROW(__sync_fetch_and_nand),
286
287    BUILTIN_ROW(__sync_add_and_fetch),
288    BUILTIN_ROW(__sync_sub_and_fetch),
289    BUILTIN_ROW(__sync_and_and_fetch),
290    BUILTIN_ROW(__sync_or_and_fetch),
291    BUILTIN_ROW(__sync_xor_and_fetch),
292    BUILTIN_ROW(__sync_nand_and_fetch),
293
294    BUILTIN_ROW(__sync_val_compare_and_swap),
295    BUILTIN_ROW(__sync_bool_compare_and_swap),
296    BUILTIN_ROW(__sync_lock_test_and_set),
297    BUILTIN_ROW(__sync_lock_release)
298  };
299#undef BUILTIN_ROW
300
301  // Determine the index of the size.
302  unsigned SizeIndex;
303  switch (Context.getTypeSize(ValType)/8) {
304  case 1: SizeIndex = 0; break;
305  case 2: SizeIndex = 1; break;
306  case 4: SizeIndex = 2; break;
307  case 8: SizeIndex = 3; break;
308  case 16: SizeIndex = 4; break;
309  default:
310    return Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size)
311             << FirstArg->getType() << FirstArg->getSourceRange();
312  }
313
314  // Each of these builtins has one pointer argument, followed by some number of
315  // values (0, 1 or 2) followed by a potentially empty varags list of stuff
316  // that we ignore.  Find out which row of BuiltinIndices to read from as well
317  // as the number of fixed args.
318  unsigned BuiltinID = FDecl->getBuiltinID(Context);
319  unsigned BuiltinIndex, NumFixed = 1;
320  switch (BuiltinID) {
321  default: assert(0 && "Unknown overloaded atomic builtin!");
322  case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break;
323  case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break;
324  case Builtin::BI__sync_fetch_and_or:  BuiltinIndex = 2; break;
325  case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break;
326  case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break;
327  case Builtin::BI__sync_fetch_and_nand:BuiltinIndex = 5; break;
328
329  case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 6; break;
330  case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 7; break;
331  case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 8; break;
332  case Builtin::BI__sync_or_and_fetch:  BuiltinIndex = 9; break;
333  case Builtin::BI__sync_xor_and_fetch: BuiltinIndex =10; break;
334  case Builtin::BI__sync_nand_and_fetch:BuiltinIndex =11; break;
335
336  case Builtin::BI__sync_val_compare_and_swap:
337    BuiltinIndex = 12;
338    NumFixed = 2;
339    break;
340  case Builtin::BI__sync_bool_compare_and_swap:
341    BuiltinIndex = 13;
342    NumFixed = 2;
343    break;
344  case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 14; break;
345  case Builtin::BI__sync_lock_release:
346    BuiltinIndex = 15;
347    NumFixed = 0;
348    break;
349  }
350
351  // Now that we know how many fixed arguments we expect, first check that we
352  // have at least that many.
353  if (TheCall->getNumArgs() < 1+NumFixed)
354    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
355            << 0 << TheCall->getCallee()->getSourceRange();
356
357
358  // Get the decl for the concrete builtin from this, we can tell what the
359  // concrete integer type we should convert to is.
360  unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex];
361  const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID);
362  IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName);
363  FunctionDecl *NewBuiltinDecl =
364    cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID,
365                                           TUScope, false, DRE->getLocStart()));
366  const FunctionProtoType *BuiltinFT =
367    NewBuiltinDecl->getType()->getAsFunctionProtoType();
368  ValType = BuiltinFT->getArgType(0)->getAs<PointerType>()->getPointeeType();
369
370  // If the first type needs to be converted (e.g. void** -> int*), do it now.
371  if (BuiltinFT->getArgType(0) != FirstArg->getType()) {
372    ImpCastExprToType(FirstArg, BuiltinFT->getArgType(0), CastExpr::CK_Unknown,
373                      /*isLvalue=*/false);
374    TheCall->setArg(0, FirstArg);
375  }
376
377  // Next, walk the valid ones promoting to the right type.
378  for (unsigned i = 0; i != NumFixed; ++i) {
379    Expr *Arg = TheCall->getArg(i+1);
380
381    // If the argument is an implicit cast, then there was a promotion due to
382    // "...", just remove it now.
383    if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) {
384      Arg = ICE->getSubExpr();
385      ICE->setSubExpr(0);
386      ICE->Destroy(Context);
387      TheCall->setArg(i+1, Arg);
388    }
389
390    // GCC does an implicit conversion to the pointer or integer ValType.  This
391    // can fail in some cases (1i -> int**), check for this error case now.
392    CastExpr::CastKind Kind = CastExpr::CK_Unknown;
393    CXXMethodDecl *ConversionDecl = 0;
394    if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind,
395                       ConversionDecl))
396      return true;
397
398    // Okay, we have something that *can* be converted to the right type.  Check
399    // to see if there is a potentially weird extension going on here.  This can
400    // happen when you do an atomic operation on something like an char* and
401    // pass in 42.  The 42 gets converted to char.  This is even more strange
402    // for things like 45.123 -> char, etc.
403    // FIXME: Do this check.
404    ImpCastExprToType(Arg, ValType, Kind, /*isLvalue=*/false);
405    TheCall->setArg(i+1, Arg);
406  }
407
408  // Switch the DeclRefExpr to refer to the new decl.
409  DRE->setDecl(NewBuiltinDecl);
410  DRE->setType(NewBuiltinDecl->getType());
411
412  // Set the callee in the CallExpr.
413  // FIXME: This leaks the original parens and implicit casts.
414  Expr *PromotedCall = DRE;
415  UsualUnaryConversions(PromotedCall);
416  TheCall->setCallee(PromotedCall);
417
418
419  // Change the result type of the call to match the result type of the decl.
420  TheCall->setType(NewBuiltinDecl->getResultType());
421  return false;
422}
423
424
425/// CheckObjCString - Checks that the argument to the builtin
426/// CFString constructor is correct
427/// FIXME: GCC currently emits the following warning:
428/// "warning: input conversion stopped due to an input byte that does not
429///           belong to the input codeset UTF-8"
430/// Note: It might also make sense to do the UTF-16 conversion here (would
431/// simplify the backend).
432bool Sema::CheckObjCString(Expr *Arg) {
433  Arg = Arg->IgnoreParenCasts();
434  StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
435
436  if (!Literal || Literal->isWide()) {
437    Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
438      << Arg->getSourceRange();
439    return true;
440  }
441
442  const char *Data = Literal->getStrData();
443  unsigned Length = Literal->getByteLength();
444
445  for (unsigned i = 0; i < Length; ++i) {
446    if (!Data[i]) {
447      Diag(getLocationOfStringLiteralByte(Literal, i),
448           diag::warn_cfstring_literal_contains_nul_character)
449        << Arg->getSourceRange();
450      break;
451    }
452  }
453
454  return false;
455}
456
457/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
458/// Emit an error and return true on failure, return false on success.
459bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
460  Expr *Fn = TheCall->getCallee();
461  if (TheCall->getNumArgs() > 2) {
462    Diag(TheCall->getArg(2)->getLocStart(),
463         diag::err_typecheck_call_too_many_args)
464      << 0 /*function call*/ << Fn->getSourceRange()
465      << SourceRange(TheCall->getArg(2)->getLocStart(),
466                     (*(TheCall->arg_end()-1))->getLocEnd());
467    return true;
468  }
469
470  if (TheCall->getNumArgs() < 2) {
471    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
472      << 0 /*function call*/;
473  }
474
475  // Determine whether the current function is variadic or not.
476  bool isVariadic;
477  if (CurBlock)
478    isVariadic = CurBlock->isVariadic;
479  else if (getCurFunctionDecl()) {
480    if (FunctionProtoType* FTP =
481            dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType()))
482      isVariadic = FTP->isVariadic();
483    else
484      isVariadic = false;
485  } else {
486    isVariadic = getCurMethodDecl()->isVariadic();
487  }
488
489  if (!isVariadic) {
490    Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
491    return true;
492  }
493
494  // Verify that the second argument to the builtin is the last argument of the
495  // current function or method.
496  bool SecondArgIsLastNamedArgument = false;
497  const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
498
499  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
500    if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
501      // FIXME: This isn't correct for methods (results in bogus warning).
502      // Get the last formal in the current function.
503      const ParmVarDecl *LastArg;
504      if (CurBlock)
505        LastArg = *(CurBlock->TheDecl->param_end()-1);
506      else if (FunctionDecl *FD = getCurFunctionDecl())
507        LastArg = *(FD->param_end()-1);
508      else
509        LastArg = *(getCurMethodDecl()->param_end()-1);
510      SecondArgIsLastNamedArgument = PV == LastArg;
511    }
512  }
513
514  if (!SecondArgIsLastNamedArgument)
515    Diag(TheCall->getArg(1)->getLocStart(),
516         diag::warn_second_parameter_of_va_start_not_last_named_argument);
517  return false;
518}
519
520/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
521/// friends.  This is declared to take (...), so we have to check everything.
522bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
523  if (TheCall->getNumArgs() < 2)
524    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
525      << 0 /*function call*/;
526  if (TheCall->getNumArgs() > 2)
527    return Diag(TheCall->getArg(2)->getLocStart(),
528                diag::err_typecheck_call_too_many_args)
529      << 0 /*function call*/
530      << SourceRange(TheCall->getArg(2)->getLocStart(),
531                     (*(TheCall->arg_end()-1))->getLocEnd());
532
533  Expr *OrigArg0 = TheCall->getArg(0);
534  Expr *OrigArg1 = TheCall->getArg(1);
535
536  // Do standard promotions between the two arguments, returning their common
537  // type.
538  QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
539
540  // Make sure any conversions are pushed back into the call; this is
541  // type safe since unordered compare builtins are declared as "_Bool
542  // foo(...)".
543  TheCall->setArg(0, OrigArg0);
544  TheCall->setArg(1, OrigArg1);
545
546  if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent())
547    return false;
548
549  // If the common type isn't a real floating type, then the arguments were
550  // invalid for this operation.
551  if (!Res->isRealFloatingType())
552    return Diag(OrigArg0->getLocStart(),
553                diag::err_typecheck_call_invalid_ordered_compare)
554      << OrigArg0->getType() << OrigArg1->getType()
555      << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
556
557  return false;
558}
559
560bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
561  // The signature for these builtins is exact; the only thing we need
562  // to check is that the argument is a constant.
563  SourceLocation Loc;
564  if (!TheCall->getArg(0)->isTypeDependent() &&
565      !TheCall->getArg(0)->isValueDependent() &&
566      !TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
567    return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
568
569  return false;
570}
571
572/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
573// This is declared to take (...), so we have to check everything.
574Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
575  if (TheCall->getNumArgs() < 3)
576    return ExprError(Diag(TheCall->getLocEnd(),
577                          diag::err_typecheck_call_too_few_args)
578      << 0 /*function call*/ << TheCall->getSourceRange());
579
580  unsigned numElements = std::numeric_limits<unsigned>::max();
581  if (!TheCall->getArg(0)->isTypeDependent() &&
582      !TheCall->getArg(1)->isTypeDependent()) {
583    QualType FAType = TheCall->getArg(0)->getType();
584    QualType SAType = TheCall->getArg(1)->getType();
585
586    if (!FAType->isVectorType() || !SAType->isVectorType()) {
587      Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
588        << SourceRange(TheCall->getArg(0)->getLocStart(),
589                       TheCall->getArg(1)->getLocEnd());
590      return ExprError();
591    }
592
593    if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
594        Context.getCanonicalType(SAType).getUnqualifiedType()) {
595      Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
596        << SourceRange(TheCall->getArg(0)->getLocStart(),
597                       TheCall->getArg(1)->getLocEnd());
598      return ExprError();
599    }
600
601    numElements = FAType->getAsVectorType()->getNumElements();
602    if (TheCall->getNumArgs() != numElements+2) {
603      if (TheCall->getNumArgs() < numElements+2)
604        return ExprError(Diag(TheCall->getLocEnd(),
605                              diag::err_typecheck_call_too_few_args)
606                 << 0 /*function call*/ << TheCall->getSourceRange());
607      return ExprError(Diag(TheCall->getLocEnd(),
608                            diag::err_typecheck_call_too_many_args)
609                 << 0 /*function call*/ << TheCall->getSourceRange());
610    }
611  }
612
613  for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
614    if (TheCall->getArg(i)->isTypeDependent() ||
615        TheCall->getArg(i)->isValueDependent())
616      continue;
617
618    llvm::APSInt Result(32);
619    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
620      return ExprError(Diag(TheCall->getLocStart(),
621                  diag::err_shufflevector_nonconstant_argument)
622                << TheCall->getArg(i)->getSourceRange());
623
624    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
625      return ExprError(Diag(TheCall->getLocStart(),
626                  diag::err_shufflevector_argument_too_large)
627               << TheCall->getArg(i)->getSourceRange());
628  }
629
630  llvm::SmallVector<Expr*, 32> exprs;
631
632  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
633    exprs.push_back(TheCall->getArg(i));
634    TheCall->setArg(i, 0);
635  }
636
637  return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(),
638                                            exprs.size(), exprs[0]->getType(),
639                                            TheCall->getCallee()->getLocStart(),
640                                            TheCall->getRParenLoc()));
641}
642
643/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
644// This is declared to take (const void*, ...) and can take two
645// optional constant int args.
646bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
647  unsigned NumArgs = TheCall->getNumArgs();
648
649  if (NumArgs > 3)
650    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
651             << 0 /*function call*/ << TheCall->getSourceRange();
652
653  // Argument 0 is checked for us and the remaining arguments must be
654  // constant integers.
655  for (unsigned i = 1; i != NumArgs; ++i) {
656    Expr *Arg = TheCall->getArg(i);
657    if (Arg->isTypeDependent())
658      continue;
659
660    QualType RWType = Arg->getType();
661
662    const BuiltinType *BT = RWType->getAsBuiltinType();
663    llvm::APSInt Result;
664    if (!BT || BT->getKind() != BuiltinType::Int)
665      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
666              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
667
668    if (Arg->isValueDependent())
669      continue;
670
671    if (!Arg->isIntegerConstantExpr(Result, Context))
672      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
673        << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
674
675    // FIXME: gcc issues a warning and rewrites these to 0. These
676    // seems especially odd for the third argument since the default
677    // is 3.
678    if (i == 1) {
679      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
680        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
681             << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
682    } else {
683      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
684        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
685            << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
686    }
687  }
688
689  return false;
690}
691
692/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
693/// int type). This simply type checks that type is one of the defined
694/// constants (0-3).
695bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
696  Expr *Arg = TheCall->getArg(1);
697  if (Arg->isTypeDependent())
698    return false;
699
700  QualType ArgType = Arg->getType();
701  const BuiltinType *BT = ArgType->getAsBuiltinType();
702  llvm::APSInt Result(32);
703  if (!BT || BT->getKind() != BuiltinType::Int)
704    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
705             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
706
707  if (Arg->isValueDependent())
708    return false;
709
710  if (!Arg->isIntegerConstantExpr(Result, Context)) {
711    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
712             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
713  }
714
715  if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
716    return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
717             << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
718  }
719
720  return false;
721}
722
723/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
724/// This checks that val is a constant 1.
725bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) {
726  Expr *Arg = TheCall->getArg(1);
727  if (Arg->isTypeDependent() || Arg->isValueDependent())
728    return false;
729
730  llvm::APSInt Result(32);
731  if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1)
732    return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val)
733             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
734
735  return false;
736}
737
738// Handle i > 1 ? "x" : "y", recursivelly
739bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
740                                  bool HasVAListArg,
741                                  unsigned format_idx, unsigned firstDataArg) {
742  if (E->isTypeDependent() || E->isValueDependent())
743    return false;
744
745  switch (E->getStmtClass()) {
746  case Stmt::ConditionalOperatorClass: {
747    const ConditionalOperator *C = cast<ConditionalOperator>(E);
748    return SemaCheckStringLiteral(C->getLHS(), TheCall,
749                                  HasVAListArg, format_idx, firstDataArg)
750        && SemaCheckStringLiteral(C->getRHS(), TheCall,
751                                  HasVAListArg, format_idx, firstDataArg);
752  }
753
754  case Stmt::ImplicitCastExprClass: {
755    const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E);
756    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
757                                  format_idx, firstDataArg);
758  }
759
760  case Stmt::ParenExprClass: {
761    const ParenExpr *Expr = cast<ParenExpr>(E);
762    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
763                                  format_idx, firstDataArg);
764  }
765
766  case Stmt::DeclRefExprClass: {
767    const DeclRefExpr *DR = cast<DeclRefExpr>(E);
768
769    // As an exception, do not flag errors for variables binding to
770    // const string literals.
771    if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
772      bool isConstant = false;
773      QualType T = DR->getType();
774
775      if (const ArrayType *AT = Context.getAsArrayType(T)) {
776        isConstant = AT->getElementType().isConstant(Context);
777      } else if (const PointerType *PT = T->getAs<PointerType>()) {
778        isConstant = T.isConstant(Context) &&
779                     PT->getPointeeType().isConstant(Context);
780      }
781
782      if (isConstant) {
783        const VarDecl *Def = 0;
784        if (const Expr *Init = VD->getDefinition(Def))
785          return SemaCheckStringLiteral(Init, TheCall,
786                                        HasVAListArg, format_idx, firstDataArg);
787      }
788
789      // For vprintf* functions (i.e., HasVAListArg==true), we add a
790      // special check to see if the format string is a function parameter
791      // of the function calling the printf function.  If the function
792      // has an attribute indicating it is a printf-like function, then we
793      // should suppress warnings concerning non-literals being used in a call
794      // to a vprintf function.  For example:
795      //
796      // void
797      // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){
798      //      va_list ap;
799      //      va_start(ap, fmt);
800      //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
801      //      ...
802      //
803      //
804      //  FIXME: We don't have full attribute support yet, so just check to see
805      //    if the argument is a DeclRefExpr that references a parameter.  We'll
806      //    add proper support for checking the attribute later.
807      if (HasVAListArg)
808        if (isa<ParmVarDecl>(VD))
809          return true;
810    }
811
812    return false;
813  }
814
815  case Stmt::CallExprClass: {
816    const CallExpr *CE = cast<CallExpr>(E);
817    if (const ImplicitCastExpr *ICE
818          = dyn_cast<ImplicitCastExpr>(CE->getCallee())) {
819      if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) {
820        if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
821          if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) {
822            unsigned ArgIndex = FA->getFormatIdx();
823            const Expr *Arg = CE->getArg(ArgIndex - 1);
824
825            return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg,
826                                          format_idx, firstDataArg);
827          }
828        }
829      }
830    }
831
832    return false;
833  }
834  case Stmt::ObjCStringLiteralClass:
835  case Stmt::StringLiteralClass: {
836    const StringLiteral *StrE = NULL;
837
838    if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E))
839      StrE = ObjCFExpr->getString();
840    else
841      StrE = cast<StringLiteral>(E);
842
843    if (StrE) {
844      CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
845                        firstDataArg);
846      return true;
847    }
848
849    return false;
850  }
851
852  default:
853    return false;
854  }
855}
856
857void
858Sema::CheckNonNullArguments(const NonNullAttr *NonNull, const CallExpr *TheCall)
859{
860  for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end();
861       i != e; ++i) {
862    const Expr *ArgExpr = TheCall->getArg(*i);
863    if (ArgExpr->isNullPointerConstant(Context))
864      Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg)
865        << ArgExpr->getSourceRange();
866  }
867}
868
869/// CheckPrintfArguments - Check calls to printf (and similar functions) for
870/// correct use of format strings.
871///
872///  HasVAListArg - A predicate indicating whether the printf-like
873///    function is passed an explicit va_arg argument (e.g., vprintf)
874///
875///  format_idx - The index into Args for the format string.
876///
877/// Improper format strings to functions in the printf family can be
878/// the source of bizarre bugs and very serious security holes.  A
879/// good source of information is available in the following paper
880/// (which includes additional references):
881///
882///  FormatGuard: Automatic Protection From printf Format String
883///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
884///
885/// Functionality implemented:
886///
887///  We can statically check the following properties for string
888///  literal format strings for non v.*printf functions (where the
889///  arguments are passed directly):
890//
891///  (1) Are the number of format conversions equal to the number of
892///      data arguments?
893///
894///  (2) Does each format conversion correctly match the type of the
895///      corresponding data argument?  (TODO)
896///
897/// Moreover, for all printf functions we can:
898///
899///  (3) Check for a missing format string (when not caught by type checking).
900///
901///  (4) Check for no-operation flags; e.g. using "#" with format
902///      conversion 'c'  (TODO)
903///
904///  (5) Check the use of '%n', a major source of security holes.
905///
906///  (6) Check for malformed format conversions that don't specify anything.
907///
908///  (7) Check for empty format strings.  e.g: printf("");
909///
910///  (8) Check that the format string is a wide literal.
911///
912///  (9) Also check the arguments of functions with the __format__ attribute.
913///      (TODO).
914///
915/// All of these checks can be done by parsing the format string.
916///
917/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
918void
919Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
920                           unsigned format_idx, unsigned firstDataArg) {
921  const Expr *Fn = TheCall->getCallee();
922
923  // CHECK: printf-like function is called with no format string.
924  if (format_idx >= TheCall->getNumArgs()) {
925    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
926      << Fn->getSourceRange();
927    return;
928  }
929
930  const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
931
932  // CHECK: format string is not a string literal.
933  //
934  // Dynamically generated format strings are difficult to
935  // automatically vet at compile time.  Requiring that format strings
936  // are string literals: (1) permits the checking of format strings by
937  // the compiler and thereby (2) can practically remove the source of
938  // many format string exploits.
939
940  // Format string can be either ObjC string (e.g. @"%d") or
941  // C string (e.g. "%d")
942  // ObjC string uses the same format specifiers as C string, so we can use
943  // the same format string checking logic for both ObjC and C strings.
944  if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx,
945                             firstDataArg))
946    return;  // Literal format string found, check done!
947
948  // If there are no arguments specified, warn with -Wformat-security, otherwise
949  // warn only with -Wformat-nonliteral.
950  if (TheCall->getNumArgs() == format_idx+1)
951    Diag(TheCall->getArg(format_idx)->getLocStart(),
952         diag::warn_printf_nonliteral_noargs)
953      << OrigFormatExpr->getSourceRange();
954  else
955    Diag(TheCall->getArg(format_idx)->getLocStart(),
956         diag::warn_printf_nonliteral)
957           << OrigFormatExpr->getSourceRange();
958}
959
960void Sema::CheckPrintfString(const StringLiteral *FExpr,
961                             const Expr *OrigFormatExpr,
962                             const CallExpr *TheCall, bool HasVAListArg,
963                             unsigned format_idx, unsigned firstDataArg) {
964
965  const ObjCStringLiteral *ObjCFExpr =
966    dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
967
968  // CHECK: is the format string a wide literal?
969  if (FExpr->isWide()) {
970    Diag(FExpr->getLocStart(),
971         diag::warn_printf_format_string_is_wide_literal)
972      << OrigFormatExpr->getSourceRange();
973    return;
974  }
975
976  // Str - The format string.  NOTE: this is NOT null-terminated!
977  const char *Str = FExpr->getStrData();
978
979  // CHECK: empty format string?
980  unsigned StrLen = FExpr->getByteLength();
981
982  if (StrLen == 0) {
983    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
984      << OrigFormatExpr->getSourceRange();
985    return;
986  }
987
988  // We process the format string using a binary state machine.  The
989  // current state is stored in CurrentState.
990  enum {
991    state_OrdChr,
992    state_Conversion
993  } CurrentState = state_OrdChr;
994
995  // numConversions - The number of conversions seen so far.  This is
996  //  incremented as we traverse the format string.
997  unsigned numConversions = 0;
998
999  // numDataArgs - The number of data arguments after the format
1000  //  string.  This can only be determined for non vprintf-like
1001  //  functions.  For those functions, this value is 1 (the sole
1002  //  va_arg argument).
1003  unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
1004
1005  // Inspect the format string.
1006  unsigned StrIdx = 0;
1007
1008  // LastConversionIdx - Index within the format string where we last saw
1009  //  a '%' character that starts a new format conversion.
1010  unsigned LastConversionIdx = 0;
1011
1012  for (; StrIdx < StrLen; ++StrIdx) {
1013
1014    // Is the number of detected conversion conversions greater than
1015    // the number of matching data arguments?  If so, stop.
1016    if (!HasVAListArg && numConversions > numDataArgs) break;
1017
1018    // Handle "\0"
1019    if (Str[StrIdx] == '\0') {
1020      // The string returned by getStrData() is not null-terminated,
1021      // so the presence of a null character is likely an error.
1022      Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
1023           diag::warn_printf_format_string_contains_null_char)
1024        <<  OrigFormatExpr->getSourceRange();
1025      return;
1026    }
1027
1028    // Ordinary characters (not processing a format conversion).
1029    if (CurrentState == state_OrdChr) {
1030      if (Str[StrIdx] == '%') {
1031        CurrentState = state_Conversion;
1032        LastConversionIdx = StrIdx;
1033      }
1034      continue;
1035    }
1036
1037    // Seen '%'.  Now processing a format conversion.
1038    switch (Str[StrIdx]) {
1039    // Handle dynamic precision or width specifier.
1040    case '*': {
1041      ++numConversions;
1042
1043      if (!HasVAListArg) {
1044        if (numConversions > numDataArgs) {
1045          SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
1046
1047          if (Str[StrIdx-1] == '.')
1048            Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
1049              << OrigFormatExpr->getSourceRange();
1050          else
1051            Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
1052              << OrigFormatExpr->getSourceRange();
1053
1054          // Don't do any more checking.  We'll just emit spurious errors.
1055          return;
1056        }
1057
1058        // Perform type checking on width/precision specifier.
1059        const Expr *E = TheCall->getArg(format_idx+numConversions);
1060        if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
1061          if (BT->getKind() == BuiltinType::Int)
1062            break;
1063
1064        SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
1065
1066        if (Str[StrIdx-1] == '.')
1067          Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
1068          << E->getType() << E->getSourceRange();
1069        else
1070          Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
1071          << E->getType() << E->getSourceRange();
1072
1073        break;
1074      }
1075    }
1076
1077    // Characters which can terminate a format conversion
1078    // (e.g. "%d").  Characters that specify length modifiers or
1079    // other flags are handled by the default case below.
1080    //
1081    // FIXME: additional checks will go into the following cases.
1082    case 'i':
1083    case 'd':
1084    case 'o':
1085    case 'u':
1086    case 'x':
1087    case 'X':
1088    case 'D':
1089    case 'O':
1090    case 'U':
1091    case 'e':
1092    case 'E':
1093    case 'f':
1094    case 'F':
1095    case 'g':
1096    case 'G':
1097    case 'a':
1098    case 'A':
1099    case 'c':
1100    case 'C':
1101    case 'S':
1102    case 's':
1103    case 'p':
1104      ++numConversions;
1105      CurrentState = state_OrdChr;
1106      break;
1107
1108    case 'm':
1109      // FIXME: Warn in situations where this isn't supported!
1110      CurrentState = state_OrdChr;
1111      break;
1112
1113    // CHECK: Are we using "%n"?  Issue a warning.
1114    case 'n': {
1115      ++numConversions;
1116      CurrentState = state_OrdChr;
1117      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
1118                                                          LastConversionIdx);
1119
1120      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
1121      break;
1122    }
1123
1124    // Handle "%@"
1125    case '@':
1126      // %@ is allowed in ObjC format strings only.
1127      if(ObjCFExpr != NULL)
1128        CurrentState = state_OrdChr;
1129      else {
1130        // Issue a warning: invalid format conversion.
1131        SourceLocation Loc =
1132          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1133
1134        Diag(Loc, diag::warn_printf_invalid_conversion)
1135          <<  std::string(Str+LastConversionIdx,
1136                          Str+std::min(LastConversionIdx+2, StrLen))
1137          << OrigFormatExpr->getSourceRange();
1138      }
1139      ++numConversions;
1140      break;
1141
1142    // Handle "%%"
1143    case '%':
1144      // Sanity check: Was the first "%" character the previous one?
1145      // If not, we will assume that we have a malformed format
1146      // conversion, and that the current "%" character is the start
1147      // of a new conversion.
1148      if (StrIdx - LastConversionIdx == 1)
1149        CurrentState = state_OrdChr;
1150      else {
1151        // Issue a warning: invalid format conversion.
1152        SourceLocation Loc =
1153          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1154
1155        Diag(Loc, diag::warn_printf_invalid_conversion)
1156          << std::string(Str+LastConversionIdx, Str+StrIdx)
1157          << OrigFormatExpr->getSourceRange();
1158
1159        // This conversion is broken.  Advance to the next format
1160        // conversion.
1161        LastConversionIdx = StrIdx;
1162        ++numConversions;
1163      }
1164      break;
1165
1166    default:
1167      // This case catches all other characters: flags, widths, etc.
1168      // We should eventually process those as well.
1169      break;
1170    }
1171  }
1172
1173  if (CurrentState == state_Conversion) {
1174    // Issue a warning: invalid format conversion.
1175    SourceLocation Loc =
1176      getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1177
1178    Diag(Loc, diag::warn_printf_invalid_conversion)
1179      << std::string(Str+LastConversionIdx,
1180                     Str+std::min(LastConversionIdx+2, StrLen))
1181      << OrigFormatExpr->getSourceRange();
1182    return;
1183  }
1184
1185  if (!HasVAListArg) {
1186    // CHECK: Does the number of format conversions exceed the number
1187    //        of data arguments?
1188    if (numConversions > numDataArgs) {
1189      SourceLocation Loc =
1190        getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1191
1192      Diag(Loc, diag::warn_printf_insufficient_data_args)
1193        << OrigFormatExpr->getSourceRange();
1194    }
1195    // CHECK: Does the number of data arguments exceed the number of
1196    //        format conversions in the format string?
1197    else if (numConversions < numDataArgs)
1198      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
1199           diag::warn_printf_too_many_data_args)
1200        << OrigFormatExpr->getSourceRange();
1201  }
1202}
1203
1204//===--- CHECK: Return Address of Stack Variable --------------------------===//
1205
1206static DeclRefExpr* EvalVal(Expr *E);
1207static DeclRefExpr* EvalAddr(Expr* E);
1208
1209/// CheckReturnStackAddr - Check if a return statement returns the address
1210///   of a stack variable.
1211void
1212Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
1213                           SourceLocation ReturnLoc) {
1214
1215  // Perform checking for returned stack addresses.
1216  if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
1217    if (DeclRefExpr *DR = EvalAddr(RetValExp))
1218      Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
1219       << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
1220
1221    // Skip over implicit cast expressions when checking for block expressions.
1222    if (ImplicitCastExpr *IcExpr =
1223          dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
1224      RetValExp = IcExpr->getSubExpr();
1225
1226    if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
1227      if (C->hasBlockDeclRefExprs())
1228        Diag(C->getLocStart(), diag::err_ret_local_block)
1229          << C->getSourceRange();
1230  } else if (lhsType->isReferenceType()) {
1231    // Perform checking for stack values returned by reference.
1232    // Check for a reference to the stack
1233    if (DeclRefExpr *DR = EvalVal(RetValExp))
1234      Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
1235        << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
1236  }
1237}
1238
1239/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
1240///  check if the expression in a return statement evaluates to an address
1241///  to a location on the stack.  The recursion is used to traverse the
1242///  AST of the return expression, with recursion backtracking when we
1243///  encounter a subexpression that (1) clearly does not lead to the address
1244///  of a stack variable or (2) is something we cannot determine leads to
1245///  the address of a stack variable based on such local checking.
1246///
1247///  EvalAddr processes expressions that are pointers that are used as
1248///  references (and not L-values).  EvalVal handles all other values.
1249///  At the base case of the recursion is a check for a DeclRefExpr* in
1250///  the refers to a stack variable.
1251///
1252///  This implementation handles:
1253///
1254///   * pointer-to-pointer casts
1255///   * implicit conversions from array references to pointers
1256///   * taking the address of fields
1257///   * arbitrary interplay between "&" and "*" operators
1258///   * pointer arithmetic from an address of a stack variable
1259///   * taking the address of an array element where the array is on the stack
1260static DeclRefExpr* EvalAddr(Expr *E) {
1261  // We should only be called for evaluating pointer expressions.
1262  assert((E->getType()->isAnyPointerType() ||
1263          E->getType()->isBlockPointerType() ||
1264          E->getType()->isObjCQualifiedIdType()) &&
1265         "EvalAddr only works on pointers");
1266
1267  // Our "symbolic interpreter" is just a dispatch off the currently
1268  // viewed AST node.  We then recursively traverse the AST by calling
1269  // EvalAddr and EvalVal appropriately.
1270  switch (E->getStmtClass()) {
1271  case Stmt::ParenExprClass:
1272    // Ignore parentheses.
1273    return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
1274
1275  case Stmt::UnaryOperatorClass: {
1276    // The only unary operator that make sense to handle here
1277    // is AddrOf.  All others don't make sense as pointers.
1278    UnaryOperator *U = cast<UnaryOperator>(E);
1279
1280    if (U->getOpcode() == UnaryOperator::AddrOf)
1281      return EvalVal(U->getSubExpr());
1282    else
1283      return NULL;
1284  }
1285
1286  case Stmt::BinaryOperatorClass: {
1287    // Handle pointer arithmetic.  All other binary operators are not valid
1288    // in this context.
1289    BinaryOperator *B = cast<BinaryOperator>(E);
1290    BinaryOperator::Opcode op = B->getOpcode();
1291
1292    if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
1293      return NULL;
1294
1295    Expr *Base = B->getLHS();
1296
1297    // Determine which argument is the real pointer base.  It could be
1298    // the RHS argument instead of the LHS.
1299    if (!Base->getType()->isPointerType()) Base = B->getRHS();
1300
1301    assert (Base->getType()->isPointerType());
1302    return EvalAddr(Base);
1303  }
1304
1305  // For conditional operators we need to see if either the LHS or RHS are
1306  // valid DeclRefExpr*s.  If one of them is valid, we return it.
1307  case Stmt::ConditionalOperatorClass: {
1308    ConditionalOperator *C = cast<ConditionalOperator>(E);
1309
1310    // Handle the GNU extension for missing LHS.
1311    if (Expr *lhsExpr = C->getLHS())
1312      if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
1313        return LHS;
1314
1315     return EvalAddr(C->getRHS());
1316  }
1317
1318  // For casts, we need to handle conversions from arrays to
1319  // pointer values, and pointer-to-pointer conversions.
1320  case Stmt::ImplicitCastExprClass:
1321  case Stmt::CStyleCastExprClass:
1322  case Stmt::CXXFunctionalCastExprClass: {
1323    Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
1324    QualType T = SubExpr->getType();
1325
1326    if (SubExpr->getType()->isPointerType() ||
1327        SubExpr->getType()->isBlockPointerType() ||
1328        SubExpr->getType()->isObjCQualifiedIdType())
1329      return EvalAddr(SubExpr);
1330    else if (T->isArrayType())
1331      return EvalVal(SubExpr);
1332    else
1333      return 0;
1334  }
1335
1336  // C++ casts.  For dynamic casts, static casts, and const casts, we
1337  // are always converting from a pointer-to-pointer, so we just blow
1338  // through the cast.  In the case the dynamic cast doesn't fail (and
1339  // return NULL), we take the conservative route and report cases
1340  // where we return the address of a stack variable.  For Reinterpre
1341  // FIXME: The comment about is wrong; we're not always converting
1342  // from pointer to pointer. I'm guessing that this code should also
1343  // handle references to objects.
1344  case Stmt::CXXStaticCastExprClass:
1345  case Stmt::CXXDynamicCastExprClass:
1346  case Stmt::CXXConstCastExprClass:
1347  case Stmt::CXXReinterpretCastExprClass: {
1348      Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
1349      if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
1350        return EvalAddr(S);
1351      else
1352        return NULL;
1353  }
1354
1355  // Everything else: we simply don't reason about them.
1356  default:
1357    return NULL;
1358  }
1359}
1360
1361
1362///  EvalVal - This function is complements EvalAddr in the mutual recursion.
1363///   See the comments for EvalAddr for more details.
1364static DeclRefExpr* EvalVal(Expr *E) {
1365
1366  // We should only be called for evaluating non-pointer expressions, or
1367  // expressions with a pointer type that are not used as references but instead
1368  // are l-values (e.g., DeclRefExpr with a pointer type).
1369
1370  // Our "symbolic interpreter" is just a dispatch off the currently
1371  // viewed AST node.  We then recursively traverse the AST by calling
1372  // EvalAddr and EvalVal appropriately.
1373  switch (E->getStmtClass()) {
1374  case Stmt::DeclRefExprClass:
1375  case Stmt::QualifiedDeclRefExprClass: {
1376    // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
1377    //  at code that refers to a variable's name.  We check if it has local
1378    //  storage within the function, and if so, return the expression.
1379    DeclRefExpr *DR = cast<DeclRefExpr>(E);
1380
1381    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
1382      if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
1383
1384    return NULL;
1385  }
1386
1387  case Stmt::ParenExprClass:
1388    // Ignore parentheses.
1389    return EvalVal(cast<ParenExpr>(E)->getSubExpr());
1390
1391  case Stmt::UnaryOperatorClass: {
1392    // The only unary operator that make sense to handle here
1393    // is Deref.  All others don't resolve to a "name."  This includes
1394    // handling all sorts of rvalues passed to a unary operator.
1395    UnaryOperator *U = cast<UnaryOperator>(E);
1396
1397    if (U->getOpcode() == UnaryOperator::Deref)
1398      return EvalAddr(U->getSubExpr());
1399
1400    return NULL;
1401  }
1402
1403  case Stmt::ArraySubscriptExprClass: {
1404    // Array subscripts are potential references to data on the stack.  We
1405    // retrieve the DeclRefExpr* for the array variable if it indeed
1406    // has local storage.
1407    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
1408  }
1409
1410  case Stmt::ConditionalOperatorClass: {
1411    // For conditional operators we need to see if either the LHS or RHS are
1412    // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
1413    ConditionalOperator *C = cast<ConditionalOperator>(E);
1414
1415    // Handle the GNU extension for missing LHS.
1416    if (Expr *lhsExpr = C->getLHS())
1417      if (DeclRefExpr *LHS = EvalVal(lhsExpr))
1418        return LHS;
1419
1420    return EvalVal(C->getRHS());
1421  }
1422
1423  // Accesses to members are potential references to data on the stack.
1424  case Stmt::MemberExprClass:
1425  case Stmt::CXXQualifiedMemberExprClass: {
1426    MemberExpr *M = cast<MemberExpr>(E);
1427
1428    // Check for indirect access.  We only want direct field accesses.
1429    if (!M->isArrow())
1430      return EvalVal(M->getBase());
1431    else
1432      return NULL;
1433  }
1434
1435  // Everything else: we simply don't reason about them.
1436  default:
1437    return NULL;
1438  }
1439}
1440
1441//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1442
1443/// Check for comparisons of floating point operands using != and ==.
1444/// Issue a warning if these are no self-comparisons, as they are not likely
1445/// to do what the programmer intended.
1446void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1447  bool EmitWarning = true;
1448
1449  Expr* LeftExprSansParen = lex->IgnoreParens();
1450  Expr* RightExprSansParen = rex->IgnoreParens();
1451
1452  // Special case: check for x == x (which is OK).
1453  // Do not emit warnings for such cases.
1454  if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1455    if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1456      if (DRL->getDecl() == DRR->getDecl())
1457        EmitWarning = false;
1458
1459
1460  // Special case: check for comparisons against literals that can be exactly
1461  //  represented by APFloat.  In such cases, do not emit a warning.  This
1462  //  is a heuristic: often comparison against such literals are used to
1463  //  detect if a value in a variable has not changed.  This clearly can
1464  //  lead to false negatives.
1465  if (EmitWarning) {
1466    if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1467      if (FLL->isExact())
1468        EmitWarning = false;
1469    } else
1470      if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1471        if (FLR->isExact())
1472          EmitWarning = false;
1473    }
1474  }
1475
1476  // Check for comparisons with builtin types.
1477  if (EmitWarning)
1478    if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
1479      if (CL->isBuiltinCall(Context))
1480        EmitWarning = false;
1481
1482  if (EmitWarning)
1483    if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
1484      if (CR->isBuiltinCall(Context))
1485        EmitWarning = false;
1486
1487  // Emit the diagnostic.
1488  if (EmitWarning)
1489    Diag(loc, diag::warn_floatingpoint_eq)
1490      << lex->getSourceRange() << rex->getSourceRange();
1491}
1492