SemaChecking.cpp revision 0f436560640a1cff5b6d96f80f540770f139453f
18fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
28fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//
38fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//                     The LLVM Compiler Infrastructure
48fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//
58fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor// This file is distributed under the University of Illinois Open Source
68fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor// License. See LICENSE.TXT for details.
78fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//
88fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//===----------------------------------------------------------------------===//
98fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//
108fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//  This file implements extra semantic analysis beyond what is enforced
118fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//  by the C type system.
128fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor//
136931900f43cea558c6974075256c07728dbfecc6Douglas Gregor//===----------------------------------------------------------------------===//
146931900f43cea558c6974075256c07728dbfecc6Douglas Gregor
156931900f43cea558c6974075256c07728dbfecc6Douglas Gregor#include "Sema.h"
160a35bceb7768fc0be62cb644a4e31d8bfd9fb44aDouglas Gregor#include "clang/AST/ASTContext.h"
178fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor#include "clang/AST/DeclObjC.h"
187e24256c95afb64b4d5abf201a0f9f0527cb4cf3Douglas Gregor#include "clang/AST/ExprCXX.h"
197e24256c95afb64b4d5abf201a0f9f0527cb4cf3Douglas Gregor#include "clang/AST/ExprObjC.h"
207e24256c95afb64b4d5abf201a0f9f0527cb4cf3Douglas Gregor#include "clang/Lex/LiteralSupport.h"
217e24256c95afb64b4d5abf201a0f9f0527cb4cf3Douglas Gregor#include "clang/Lex/Preprocessor.h"
227e24256c95afb64b4d5abf201a0f9f0527cb4cf3Douglas Gregor#include <limits>
237e24256c95afb64b4d5abf201a0f9f0527cb4cf3Douglas Gregorusing namespace clang;
24c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor
25c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor/// getLocationOfStringLiteralByte - Return a source location that points to the
26c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor/// specified byte of the specified string literal.
27c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor///
28c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor/// Strings are amazingly complex.  They can be formed from multiple tokens and
298fb280cbbc8348d4d38f6ed7fa68bb04326814fbDouglas Gregor/// can have escape sequences in them in addition to the usual trigraph and
30acba90f30876b4140b738f0d3dd0e50724053a96Abramo Bagnara/// escaped newline business.  This routine handles this complexity.
31acba90f30876b4140b738f0d3dd0e50724053a96Abramo Bagnara///
32ff676cb48fe8bf7be2feaa251dc7c5fb15af4730Abramo BagnaraSourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
33acba90f30876b4140b738f0d3dd0e50724053a96Abramo Bagnara                                                    unsigned ByteNo) const {
34ff676cb48fe8bf7be2feaa251dc7c5fb15af4730Abramo Bagnara  assert(!SL->isWide() && "This doesn't work for wide strings yet");
358ea5b9d832455247a15925398fb663d299d33238Douglas Gregor
366931900f43cea558c6974075256c07728dbfecc6Douglas Gregor  // Loop over all of the tokens in this string until we find the one that
378ea5b9d832455247a15925398fb663d299d33238Douglas Gregor  // contains the byte we're looking for.
388ea5b9d832455247a15925398fb663d299d33238Douglas Gregor  unsigned TokNo = 0;
390a35bceb7768fc0be62cb644a4e31d8bfd9fb44aDouglas Gregor  while (1) {
400a35bceb7768fc0be62cb644a4e31d8bfd9fb44aDouglas Gregor    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
41acba90f30876b4140b738f0d3dd0e50724053a96Abramo Bagnara    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
42ff676cb48fe8bf7be2feaa251dc7c5fb15af4730Abramo Bagnara
43651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    // Get the spelling of the string so that we can get the data that makes up
44826faa22bae112e01293a58534a40711043cce65Argyrios Kyrtzidis    // the string literal, not the identifier for the macro it is potentially
45c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor    // expanded through.
46ff676cb48fe8bf7be2feaa251dc7c5fb15af4730Abramo Bagnara    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
47c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor
48c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor    // Re-lex the token to get its length and original spelling.
49c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor    std::pair<FileID, unsigned> LocInfo =
50c5ade2e3644a5822df63e442788d68c591ccdc97Douglas Gregor      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
51    std::pair<const char *,const char *> Buffer =
52      SourceMgr.getBufferData(LocInfo.first);
53    const char *StrData = Buffer.first+LocInfo.second;
54
55    // Create a langops struct and enable trigraphs.  This is sufficient for
56    // relexing tokens.
57    LangOptions LangOpts;
58    LangOpts.Trigraphs = true;
59
60    // Create a lexer starting at the beginning of this token.
61    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
62                   Buffer.second);
63    Token TheTok;
64    TheLexer.LexFromRawLexer(TheTok);
65
66    // Use the StringLiteralParser to compute the length of the string in bytes.
67    StringLiteralParser SLP(&TheTok, 1, PP);
68    unsigned TokNumBytes = SLP.GetStringLength();
69
70    // If the byte is in this token, return the location of the byte.
71    if (ByteNo < TokNumBytes ||
72        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
73      unsigned Offset =
74        StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP);
75
76      // Now that we know the offset of the token in the spelling, use the
77      // preprocessor to get the offset in the original source.
78      return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
79    }
80
81    // Move to the next string token.
82    ++TokNo;
83    ByteNo -= TokNumBytes;
84  }
85}
86
87/// CheckablePrintfAttr - does a function call have a "printf" attribute
88/// and arguments that merit checking?
89bool Sema::CheckablePrintfAttr(const FormatAttr *Format, CallExpr *TheCall) {
90  if (Format->getType() == "printf") return true;
91  if (Format->getType() == "printf0") {
92    // printf0 allows null "format" string; if so don't check format/args
93    unsigned format_idx = Format->getFormatIdx() - 1;
94    if (format_idx < TheCall->getNumArgs()) {
95      Expr *Format = TheCall->getArg(format_idx)->IgnoreParenCasts();
96      if (!Format->isNullPointerConstant(Context))
97        return true;
98    }
99  }
100  return false;
101}
102
103Action::OwningExprResult
104Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
105  OwningExprResult TheCallResult(Owned(TheCall));
106
107  switch (BuiltinID) {
108  case Builtin::BI__builtin___CFStringMakeConstantString:
109    assert(TheCall->getNumArgs() == 1 &&
110           "Wrong # arguments to builtin CFStringMakeConstantString");
111    if (CheckObjCString(TheCall->getArg(0)))
112      return ExprError();
113    break;
114  case Builtin::BI__builtin_stdarg_start:
115  case Builtin::BI__builtin_va_start:
116    if (SemaBuiltinVAStart(TheCall))
117      return ExprError();
118    break;
119  case Builtin::BI__builtin_isgreater:
120  case Builtin::BI__builtin_isgreaterequal:
121  case Builtin::BI__builtin_isless:
122  case Builtin::BI__builtin_islessequal:
123  case Builtin::BI__builtin_islessgreater:
124  case Builtin::BI__builtin_isunordered:
125    if (SemaBuiltinUnorderedCompare(TheCall))
126      return ExprError();
127    break;
128  case Builtin::BI__builtin_return_address:
129  case Builtin::BI__builtin_frame_address:
130    if (SemaBuiltinStackAddress(TheCall))
131      return ExprError();
132    break;
133  case Builtin::BI__builtin_shufflevector:
134    return SemaBuiltinShuffleVector(TheCall);
135    // TheCall will be freed by the smart pointer here, but that's fine, since
136    // SemaBuiltinShuffleVector guts it, but then doesn't release it.
137  case Builtin::BI__builtin_prefetch:
138    if (SemaBuiltinPrefetch(TheCall))
139      return ExprError();
140    break;
141  case Builtin::BI__builtin_object_size:
142    if (SemaBuiltinObjectSize(TheCall))
143      return ExprError();
144    break;
145  case Builtin::BI__builtin_longjmp:
146    if (SemaBuiltinLongjmp(TheCall))
147      return ExprError();
148    break;
149  case Builtin::BI__sync_fetch_and_add:
150  case Builtin::BI__sync_fetch_and_sub:
151  case Builtin::BI__sync_fetch_and_or:
152  case Builtin::BI__sync_fetch_and_and:
153  case Builtin::BI__sync_fetch_and_xor:
154  case Builtin::BI__sync_fetch_and_nand:
155  case Builtin::BI__sync_add_and_fetch:
156  case Builtin::BI__sync_sub_and_fetch:
157  case Builtin::BI__sync_and_and_fetch:
158  case Builtin::BI__sync_or_and_fetch:
159  case Builtin::BI__sync_xor_and_fetch:
160  case Builtin::BI__sync_nand_and_fetch:
161  case Builtin::BI__sync_val_compare_and_swap:
162  case Builtin::BI__sync_bool_compare_and_swap:
163  case Builtin::BI__sync_lock_test_and_set:
164  case Builtin::BI__sync_lock_release:
165    if (SemaBuiltinAtomicOverloaded(TheCall))
166      return ExprError();
167    break;
168  }
169
170  return move(TheCallResult);
171}
172
173/// CheckFunctionCall - Check a direct function call for various correctness
174/// and safety properties not strictly enforced by the C type system.
175bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
176  // Get the IdentifierInfo* for the called function.
177  IdentifierInfo *FnInfo = FDecl->getIdentifier();
178
179  // None of the checks below are needed for functions that don't have
180  // simple names (e.g., C++ conversion functions).
181  if (!FnInfo)
182    return false;
183
184  // FIXME: This mechanism should be abstracted to be less fragile and
185  // more efficient. For example, just map function ids to custom
186  // handlers.
187
188  // Printf checking.
189  if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
190    if (CheckablePrintfAttr(Format, TheCall)) {
191      bool HasVAListArg = Format->getFirstArg() == 0;
192      if (!HasVAListArg) {
193        if (const FunctionProtoType *Proto
194            = FDecl->getType()->getAsFunctionProtoType())
195        HasVAListArg = !Proto->isVariadic();
196      }
197      CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
198                           HasVAListArg ? 0 : Format->getFirstArg() - 1);
199    }
200  }
201
202  for (const NonNullAttr *NonNull = FDecl->getAttr<NonNullAttr>(); NonNull;
203       NonNull = NonNull->getNext<NonNullAttr>())
204    CheckNonNullArguments(NonNull, TheCall);
205
206  return false;
207}
208
209bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) {
210  // Printf checking.
211  const FormatAttr *Format = NDecl->getAttr<FormatAttr>();
212  if (!Format)
213    return false;
214
215  const VarDecl *V = dyn_cast<VarDecl>(NDecl);
216  if (!V)
217    return false;
218
219  QualType Ty = V->getType();
220  if (!Ty->isBlockPointerType())
221    return false;
222
223  if (!CheckablePrintfAttr(Format, TheCall))
224    return false;
225
226  bool HasVAListArg = Format->getFirstArg() == 0;
227  if (!HasVAListArg) {
228    const FunctionType *FT =
229      Ty->getAs<BlockPointerType>()->getPointeeType()->getAsFunctionType();
230    if (const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT))
231      HasVAListArg = !Proto->isVariadic();
232  }
233  CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
234                       HasVAListArg ? 0 : Format->getFirstArg() - 1);
235
236  return false;
237}
238
239/// SemaBuiltinAtomicOverloaded - We have a call to a function like
240/// __sync_fetch_and_add, which is an overloaded function based on the pointer
241/// type of its first argument.  The main ActOnCallExpr routines have already
242/// promoted the types of arguments because all of these calls are prototyped as
243/// void(...).
244///
245/// This function goes through and does final semantic checking for these
246/// builtins,
247bool Sema::SemaBuiltinAtomicOverloaded(CallExpr *TheCall) {
248  DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
249  FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
250
251  // Ensure that we have at least one argument to do type inference from.
252  if (TheCall->getNumArgs() < 1)
253    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
254              << 0 << TheCall->getCallee()->getSourceRange();
255
256  // Inspect the first argument of the atomic builtin.  This should always be
257  // a pointer type, whose element is an integral scalar or pointer type.
258  // Because it is a pointer type, we don't have to worry about any implicit
259  // casts here.
260  Expr *FirstArg = TheCall->getArg(0);
261  if (!FirstArg->getType()->isPointerType())
262    return Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer)
263             << FirstArg->getType() << FirstArg->getSourceRange();
264
265  QualType ValType = FirstArg->getType()->getAs<PointerType>()->getPointeeType();
266  if (!ValType->isIntegerType() && !ValType->isPointerType() &&
267      !ValType->isBlockPointerType())
268    return Diag(DRE->getLocStart(),
269                diag::err_atomic_builtin_must_be_pointer_intptr)
270             << FirstArg->getType() << FirstArg->getSourceRange();
271
272  // We need to figure out which concrete builtin this maps onto.  For example,
273  // __sync_fetch_and_add with a 2 byte object turns into
274  // __sync_fetch_and_add_2.
275#define BUILTIN_ROW(x) \
276  { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \
277    Builtin::BI##x##_8, Builtin::BI##x##_16 }
278
279  static const unsigned BuiltinIndices[][5] = {
280    BUILTIN_ROW(__sync_fetch_and_add),
281    BUILTIN_ROW(__sync_fetch_and_sub),
282    BUILTIN_ROW(__sync_fetch_and_or),
283    BUILTIN_ROW(__sync_fetch_and_and),
284    BUILTIN_ROW(__sync_fetch_and_xor),
285    BUILTIN_ROW(__sync_fetch_and_nand),
286
287    BUILTIN_ROW(__sync_add_and_fetch),
288    BUILTIN_ROW(__sync_sub_and_fetch),
289    BUILTIN_ROW(__sync_and_and_fetch),
290    BUILTIN_ROW(__sync_or_and_fetch),
291    BUILTIN_ROW(__sync_xor_and_fetch),
292    BUILTIN_ROW(__sync_nand_and_fetch),
293
294    BUILTIN_ROW(__sync_val_compare_and_swap),
295    BUILTIN_ROW(__sync_bool_compare_and_swap),
296    BUILTIN_ROW(__sync_lock_test_and_set),
297    BUILTIN_ROW(__sync_lock_release)
298  };
299#undef BUILTIN_ROW
300
301  // Determine the index of the size.
302  unsigned SizeIndex;
303  switch (Context.getTypeSize(ValType)/8) {
304  case 1: SizeIndex = 0; break;
305  case 2: SizeIndex = 1; break;
306  case 4: SizeIndex = 2; break;
307  case 8: SizeIndex = 3; break;
308  case 16: SizeIndex = 4; break;
309  default:
310    return Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size)
311             << FirstArg->getType() << FirstArg->getSourceRange();
312  }
313
314  // Each of these builtins has one pointer argument, followed by some number of
315  // values (0, 1 or 2) followed by a potentially empty varags list of stuff
316  // that we ignore.  Find out which row of BuiltinIndices to read from as well
317  // as the number of fixed args.
318  unsigned BuiltinID = FDecl->getBuiltinID(Context);
319  unsigned BuiltinIndex, NumFixed = 1;
320  switch (BuiltinID) {
321  default: assert(0 && "Unknown overloaded atomic builtin!");
322  case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break;
323  case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break;
324  case Builtin::BI__sync_fetch_and_or:  BuiltinIndex = 2; break;
325  case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break;
326  case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break;
327  case Builtin::BI__sync_fetch_and_nand:BuiltinIndex = 5; break;
328
329  case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 6; break;
330  case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 7; break;
331  case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 8; break;
332  case Builtin::BI__sync_or_and_fetch:  BuiltinIndex = 9; break;
333  case Builtin::BI__sync_xor_and_fetch: BuiltinIndex =10; break;
334  case Builtin::BI__sync_nand_and_fetch:BuiltinIndex =11; break;
335
336  case Builtin::BI__sync_val_compare_and_swap:
337    BuiltinIndex = 12;
338    NumFixed = 2;
339    break;
340  case Builtin::BI__sync_bool_compare_and_swap:
341    BuiltinIndex = 13;
342    NumFixed = 2;
343    break;
344  case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 14; break;
345  case Builtin::BI__sync_lock_release:
346    BuiltinIndex = 15;
347    NumFixed = 0;
348    break;
349  }
350
351  // Now that we know how many fixed arguments we expect, first check that we
352  // have at least that many.
353  if (TheCall->getNumArgs() < 1+NumFixed)
354    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
355            << 0 << TheCall->getCallee()->getSourceRange();
356
357
358  // Get the decl for the concrete builtin from this, we can tell what the
359  // concrete integer type we should convert to is.
360  unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex];
361  const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID);
362  IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName);
363  FunctionDecl *NewBuiltinDecl =
364    cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID,
365                                           TUScope, false, DRE->getLocStart()));
366  const FunctionProtoType *BuiltinFT =
367    NewBuiltinDecl->getType()->getAsFunctionProtoType();
368  ValType = BuiltinFT->getArgType(0)->getAs<PointerType>()->getPointeeType();
369
370  // If the first type needs to be converted (e.g. void** -> int*), do it now.
371  if (BuiltinFT->getArgType(0) != FirstArg->getType()) {
372    ImpCastExprToType(FirstArg, BuiltinFT->getArgType(0), CastExpr::CK_Unknown,
373                      /*isLvalue=*/false);
374    TheCall->setArg(0, FirstArg);
375  }
376
377  // Next, walk the valid ones promoting to the right type.
378  for (unsigned i = 0; i != NumFixed; ++i) {
379    Expr *Arg = TheCall->getArg(i+1);
380
381    // If the argument is an implicit cast, then there was a promotion due to
382    // "...", just remove it now.
383    if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) {
384      Arg = ICE->getSubExpr();
385      ICE->setSubExpr(0);
386      ICE->Destroy(Context);
387      TheCall->setArg(i+1, Arg);
388    }
389
390    // GCC does an implicit conversion to the pointer or integer ValType.  This
391    // can fail in some cases (1i -> int**), check for this error case now.
392    CastExpr::CastKind Kind = CastExpr::CK_Unknown;
393    if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg, Kind))
394      return true;
395
396    // Okay, we have something that *can* be converted to the right type.  Check
397    // to see if there is a potentially weird extension going on here.  This can
398    // happen when you do an atomic operation on something like an char* and
399    // pass in 42.  The 42 gets converted to char.  This is even more strange
400    // for things like 45.123 -> char, etc.
401    // FIXME: Do this check.
402    ImpCastExprToType(Arg, ValType, Kind, /*isLvalue=*/false);
403    TheCall->setArg(i+1, Arg);
404  }
405
406  // Switch the DeclRefExpr to refer to the new decl.
407  DRE->setDecl(NewBuiltinDecl);
408  DRE->setType(NewBuiltinDecl->getType());
409
410  // Set the callee in the CallExpr.
411  // FIXME: This leaks the original parens and implicit casts.
412  Expr *PromotedCall = DRE;
413  UsualUnaryConversions(PromotedCall);
414  TheCall->setCallee(PromotedCall);
415
416
417  // Change the result type of the call to match the result type of the decl.
418  TheCall->setType(NewBuiltinDecl->getResultType());
419  return false;
420}
421
422
423/// CheckObjCString - Checks that the argument to the builtin
424/// CFString constructor is correct
425/// FIXME: GCC currently emits the following warning:
426/// "warning: input conversion stopped due to an input byte that does not
427///           belong to the input codeset UTF-8"
428/// Note: It might also make sense to do the UTF-16 conversion here (would
429/// simplify the backend).
430bool Sema::CheckObjCString(Expr *Arg) {
431  Arg = Arg->IgnoreParenCasts();
432  StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
433
434  if (!Literal || Literal->isWide()) {
435    Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
436      << Arg->getSourceRange();
437    return true;
438  }
439
440  const char *Data = Literal->getStrData();
441  unsigned Length = Literal->getByteLength();
442
443  for (unsigned i = 0; i < Length; ++i) {
444    if (!Data[i]) {
445      Diag(getLocationOfStringLiteralByte(Literal, i),
446           diag::warn_cfstring_literal_contains_nul_character)
447        << Arg->getSourceRange();
448      break;
449    }
450  }
451
452  return false;
453}
454
455/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
456/// Emit an error and return true on failure, return false on success.
457bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
458  Expr *Fn = TheCall->getCallee();
459  if (TheCall->getNumArgs() > 2) {
460    Diag(TheCall->getArg(2)->getLocStart(),
461         diag::err_typecheck_call_too_many_args)
462      << 0 /*function call*/ << Fn->getSourceRange()
463      << SourceRange(TheCall->getArg(2)->getLocStart(),
464                     (*(TheCall->arg_end()-1))->getLocEnd());
465    return true;
466  }
467
468  if (TheCall->getNumArgs() < 2) {
469    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
470      << 0 /*function call*/;
471  }
472
473  // Determine whether the current function is variadic or not.
474  bool isVariadic;
475  if (CurBlock)
476    isVariadic = CurBlock->isVariadic;
477  else if (getCurFunctionDecl()) {
478    if (FunctionProtoType* FTP =
479            dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType()))
480      isVariadic = FTP->isVariadic();
481    else
482      isVariadic = false;
483  } else {
484    isVariadic = getCurMethodDecl()->isVariadic();
485  }
486
487  if (!isVariadic) {
488    Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
489    return true;
490  }
491
492  // Verify that the second argument to the builtin is the last argument of the
493  // current function or method.
494  bool SecondArgIsLastNamedArgument = false;
495  const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
496
497  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
498    if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
499      // FIXME: This isn't correct for methods (results in bogus warning).
500      // Get the last formal in the current function.
501      const ParmVarDecl *LastArg;
502      if (CurBlock)
503        LastArg = *(CurBlock->TheDecl->param_end()-1);
504      else if (FunctionDecl *FD = getCurFunctionDecl())
505        LastArg = *(FD->param_end()-1);
506      else
507        LastArg = *(getCurMethodDecl()->param_end()-1);
508      SecondArgIsLastNamedArgument = PV == LastArg;
509    }
510  }
511
512  if (!SecondArgIsLastNamedArgument)
513    Diag(TheCall->getArg(1)->getLocStart(),
514         diag::warn_second_parameter_of_va_start_not_last_named_argument);
515  return false;
516}
517
518/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
519/// friends.  This is declared to take (...), so we have to check everything.
520bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
521  if (TheCall->getNumArgs() < 2)
522    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
523      << 0 /*function call*/;
524  if (TheCall->getNumArgs() > 2)
525    return Diag(TheCall->getArg(2)->getLocStart(),
526                diag::err_typecheck_call_too_many_args)
527      << 0 /*function call*/
528      << SourceRange(TheCall->getArg(2)->getLocStart(),
529                     (*(TheCall->arg_end()-1))->getLocEnd());
530
531  Expr *OrigArg0 = TheCall->getArg(0);
532  Expr *OrigArg1 = TheCall->getArg(1);
533
534  // Do standard promotions between the two arguments, returning their common
535  // type.
536  QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
537
538  // Make sure any conversions are pushed back into the call; this is
539  // type safe since unordered compare builtins are declared as "_Bool
540  // foo(...)".
541  TheCall->setArg(0, OrigArg0);
542  TheCall->setArg(1, OrigArg1);
543
544  if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent())
545    return false;
546
547  // If the common type isn't a real floating type, then the arguments were
548  // invalid for this operation.
549  if (!Res->isRealFloatingType())
550    return Diag(OrigArg0->getLocStart(),
551                diag::err_typecheck_call_invalid_ordered_compare)
552      << OrigArg0->getType() << OrigArg1->getType()
553      << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
554
555  return false;
556}
557
558bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
559  // The signature for these builtins is exact; the only thing we need
560  // to check is that the argument is a constant.
561  SourceLocation Loc;
562  if (!TheCall->getArg(0)->isTypeDependent() &&
563      !TheCall->getArg(0)->isValueDependent() &&
564      !TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
565    return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
566
567  return false;
568}
569
570/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
571// This is declared to take (...), so we have to check everything.
572Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
573  if (TheCall->getNumArgs() < 3)
574    return ExprError(Diag(TheCall->getLocEnd(),
575                          diag::err_typecheck_call_too_few_args)
576      << 0 /*function call*/ << TheCall->getSourceRange());
577
578  unsigned numElements = std::numeric_limits<unsigned>::max();
579  if (!TheCall->getArg(0)->isTypeDependent() &&
580      !TheCall->getArg(1)->isTypeDependent()) {
581    QualType FAType = TheCall->getArg(0)->getType();
582    QualType SAType = TheCall->getArg(1)->getType();
583
584    if (!FAType->isVectorType() || !SAType->isVectorType()) {
585      Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
586        << SourceRange(TheCall->getArg(0)->getLocStart(),
587                       TheCall->getArg(1)->getLocEnd());
588      return ExprError();
589    }
590
591    if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
592        Context.getCanonicalType(SAType).getUnqualifiedType()) {
593      Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
594        << SourceRange(TheCall->getArg(0)->getLocStart(),
595                       TheCall->getArg(1)->getLocEnd());
596      return ExprError();
597    }
598
599    numElements = FAType->getAsVectorType()->getNumElements();
600    if (TheCall->getNumArgs() != numElements+2) {
601      if (TheCall->getNumArgs() < numElements+2)
602        return ExprError(Diag(TheCall->getLocEnd(),
603                              diag::err_typecheck_call_too_few_args)
604                 << 0 /*function call*/ << TheCall->getSourceRange());
605      return ExprError(Diag(TheCall->getLocEnd(),
606                            diag::err_typecheck_call_too_many_args)
607                 << 0 /*function call*/ << TheCall->getSourceRange());
608    }
609  }
610
611  for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
612    if (TheCall->getArg(i)->isTypeDependent() ||
613        TheCall->getArg(i)->isValueDependent())
614      continue;
615
616    llvm::APSInt Result(32);
617    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
618      return ExprError(Diag(TheCall->getLocStart(),
619                  diag::err_shufflevector_nonconstant_argument)
620                << TheCall->getArg(i)->getSourceRange());
621
622    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
623      return ExprError(Diag(TheCall->getLocStart(),
624                  diag::err_shufflevector_argument_too_large)
625               << TheCall->getArg(i)->getSourceRange());
626  }
627
628  llvm::SmallVector<Expr*, 32> exprs;
629
630  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
631    exprs.push_back(TheCall->getArg(i));
632    TheCall->setArg(i, 0);
633  }
634
635  return Owned(new (Context) ShuffleVectorExpr(Context, exprs.begin(),
636                                            exprs.size(), exprs[0]->getType(),
637                                            TheCall->getCallee()->getLocStart(),
638                                            TheCall->getRParenLoc()));
639}
640
641/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
642// This is declared to take (const void*, ...) and can take two
643// optional constant int args.
644bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
645  unsigned NumArgs = TheCall->getNumArgs();
646
647  if (NumArgs > 3)
648    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
649             << 0 /*function call*/ << TheCall->getSourceRange();
650
651  // Argument 0 is checked for us and the remaining arguments must be
652  // constant integers.
653  for (unsigned i = 1; i != NumArgs; ++i) {
654    Expr *Arg = TheCall->getArg(i);
655    if (Arg->isTypeDependent())
656      continue;
657
658    QualType RWType = Arg->getType();
659
660    const BuiltinType *BT = RWType->getAsBuiltinType();
661    llvm::APSInt Result;
662    if (!BT || BT->getKind() != BuiltinType::Int)
663      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
664              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
665
666    if (Arg->isValueDependent())
667      continue;
668
669    if (!Arg->isIntegerConstantExpr(Result, Context))
670      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
671        << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
672
673    // FIXME: gcc issues a warning and rewrites these to 0. These
674    // seems especially odd for the third argument since the default
675    // is 3.
676    if (i == 1) {
677      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
678        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
679             << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
680    } else {
681      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
682        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
683            << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
684    }
685  }
686
687  return false;
688}
689
690/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
691/// int type). This simply type checks that type is one of the defined
692/// constants (0-3).
693bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
694  Expr *Arg = TheCall->getArg(1);
695  if (Arg->isTypeDependent())
696    return false;
697
698  QualType ArgType = Arg->getType();
699  const BuiltinType *BT = ArgType->getAsBuiltinType();
700  llvm::APSInt Result(32);
701  if (!BT || BT->getKind() != BuiltinType::Int)
702    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
703             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
704
705  if (Arg->isValueDependent())
706    return false;
707
708  if (!Arg->isIntegerConstantExpr(Result, Context)) {
709    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
710             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
711  }
712
713  if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
714    return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
715             << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
716  }
717
718  return false;
719}
720
721/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
722/// This checks that val is a constant 1.
723bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) {
724  Expr *Arg = TheCall->getArg(1);
725  if (Arg->isTypeDependent() || Arg->isValueDependent())
726    return false;
727
728  llvm::APSInt Result(32);
729  if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1)
730    return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val)
731             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
732
733  return false;
734}
735
736// Handle i > 1 ? "x" : "y", recursivelly
737bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
738                                  bool HasVAListArg,
739                                  unsigned format_idx, unsigned firstDataArg) {
740  if (E->isTypeDependent() || E->isValueDependent())
741    return false;
742
743  switch (E->getStmtClass()) {
744  case Stmt::ConditionalOperatorClass: {
745    const ConditionalOperator *C = cast<ConditionalOperator>(E);
746    return SemaCheckStringLiteral(C->getLHS(), TheCall,
747                                  HasVAListArg, format_idx, firstDataArg)
748        && SemaCheckStringLiteral(C->getRHS(), TheCall,
749                                  HasVAListArg, format_idx, firstDataArg);
750  }
751
752  case Stmt::ImplicitCastExprClass: {
753    const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E);
754    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
755                                  format_idx, firstDataArg);
756  }
757
758  case Stmt::ParenExprClass: {
759    const ParenExpr *Expr = cast<ParenExpr>(E);
760    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
761                                  format_idx, firstDataArg);
762  }
763
764  case Stmt::DeclRefExprClass: {
765    const DeclRefExpr *DR = cast<DeclRefExpr>(E);
766
767    // As an exception, do not flag errors for variables binding to
768    // const string literals.
769    if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
770      bool isConstant = false;
771      QualType T = DR->getType();
772
773      if (const ArrayType *AT = Context.getAsArrayType(T)) {
774        isConstant = AT->getElementType().isConstant(Context);
775      } else if (const PointerType *PT = T->getAs<PointerType>()) {
776        isConstant = T.isConstant(Context) &&
777                     PT->getPointeeType().isConstant(Context);
778      }
779
780      if (isConstant) {
781        const VarDecl *Def = 0;
782        if (const Expr *Init = VD->getDefinition(Def))
783          return SemaCheckStringLiteral(Init, TheCall,
784                                        HasVAListArg, format_idx, firstDataArg);
785      }
786
787      // For vprintf* functions (i.e., HasVAListArg==true), we add a
788      // special check to see if the format string is a function parameter
789      // of the function calling the printf function.  If the function
790      // has an attribute indicating it is a printf-like function, then we
791      // should suppress warnings concerning non-literals being used in a call
792      // to a vprintf function.  For example:
793      //
794      // void
795      // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){
796      //      va_list ap;
797      //      va_start(ap, fmt);
798      //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
799      //      ...
800      //
801      //
802      //  FIXME: We don't have full attribute support yet, so just check to see
803      //    if the argument is a DeclRefExpr that references a parameter.  We'll
804      //    add proper support for checking the attribute later.
805      if (HasVAListArg)
806        if (isa<ParmVarDecl>(VD))
807          return true;
808    }
809
810    return false;
811  }
812
813  case Stmt::CallExprClass: {
814    const CallExpr *CE = cast<CallExpr>(E);
815    if (const ImplicitCastExpr *ICE
816          = dyn_cast<ImplicitCastExpr>(CE->getCallee())) {
817      if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) {
818        if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
819          if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) {
820            unsigned ArgIndex = FA->getFormatIdx();
821            const Expr *Arg = CE->getArg(ArgIndex - 1);
822
823            return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg,
824                                          format_idx, firstDataArg);
825          }
826        }
827      }
828    }
829
830    return false;
831  }
832  case Stmt::ObjCStringLiteralClass:
833  case Stmt::StringLiteralClass: {
834    const StringLiteral *StrE = NULL;
835
836    if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E))
837      StrE = ObjCFExpr->getString();
838    else
839      StrE = cast<StringLiteral>(E);
840
841    if (StrE) {
842      CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
843                        firstDataArg);
844      return true;
845    }
846
847    return false;
848  }
849
850  default:
851    return false;
852  }
853}
854
855void
856Sema::CheckNonNullArguments(const NonNullAttr *NonNull, const CallExpr *TheCall)
857{
858  for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end();
859       i != e; ++i) {
860    const Expr *ArgExpr = TheCall->getArg(*i);
861    if (ArgExpr->isNullPointerConstant(Context))
862      Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg)
863        << ArgExpr->getSourceRange();
864  }
865}
866
867/// CheckPrintfArguments - Check calls to printf (and similar functions) for
868/// correct use of format strings.
869///
870///  HasVAListArg - A predicate indicating whether the printf-like
871///    function is passed an explicit va_arg argument (e.g., vprintf)
872///
873///  format_idx - The index into Args for the format string.
874///
875/// Improper format strings to functions in the printf family can be
876/// the source of bizarre bugs and very serious security holes.  A
877/// good source of information is available in the following paper
878/// (which includes additional references):
879///
880///  FormatGuard: Automatic Protection From printf Format String
881///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
882///
883/// Functionality implemented:
884///
885///  We can statically check the following properties for string
886///  literal format strings for non v.*printf functions (where the
887///  arguments are passed directly):
888//
889///  (1) Are the number of format conversions equal to the number of
890///      data arguments?
891///
892///  (2) Does each format conversion correctly match the type of the
893///      corresponding data argument?  (TODO)
894///
895/// Moreover, for all printf functions we can:
896///
897///  (3) Check for a missing format string (when not caught by type checking).
898///
899///  (4) Check for no-operation flags; e.g. using "#" with format
900///      conversion 'c'  (TODO)
901///
902///  (5) Check the use of '%n', a major source of security holes.
903///
904///  (6) Check for malformed format conversions that don't specify anything.
905///
906///  (7) Check for empty format strings.  e.g: printf("");
907///
908///  (8) Check that the format string is a wide literal.
909///
910///  (9) Also check the arguments of functions with the __format__ attribute.
911///      (TODO).
912///
913/// All of these checks can be done by parsing the format string.
914///
915/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
916void
917Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
918                           unsigned format_idx, unsigned firstDataArg) {
919  const Expr *Fn = TheCall->getCallee();
920
921  // CHECK: printf-like function is called with no format string.
922  if (format_idx >= TheCall->getNumArgs()) {
923    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
924      << Fn->getSourceRange();
925    return;
926  }
927
928  const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
929
930  // CHECK: format string is not a string literal.
931  //
932  // Dynamically generated format strings are difficult to
933  // automatically vet at compile time.  Requiring that format strings
934  // are string literals: (1) permits the checking of format strings by
935  // the compiler and thereby (2) can practically remove the source of
936  // many format string exploits.
937
938  // Format string can be either ObjC string (e.g. @"%d") or
939  // C string (e.g. "%d")
940  // ObjC string uses the same format specifiers as C string, so we can use
941  // the same format string checking logic for both ObjC and C strings.
942  if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx,
943                             firstDataArg))
944    return;  // Literal format string found, check done!
945
946  // If there are no arguments specified, warn with -Wformat-security, otherwise
947  // warn only with -Wformat-nonliteral.
948  if (TheCall->getNumArgs() == format_idx+1)
949    Diag(TheCall->getArg(format_idx)->getLocStart(),
950         diag::warn_printf_nonliteral_noargs)
951      << OrigFormatExpr->getSourceRange();
952  else
953    Diag(TheCall->getArg(format_idx)->getLocStart(),
954         diag::warn_printf_nonliteral)
955           << OrigFormatExpr->getSourceRange();
956}
957
958void Sema::CheckPrintfString(const StringLiteral *FExpr,
959                             const Expr *OrigFormatExpr,
960                             const CallExpr *TheCall, bool HasVAListArg,
961                             unsigned format_idx, unsigned firstDataArg) {
962
963  const ObjCStringLiteral *ObjCFExpr =
964    dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
965
966  // CHECK: is the format string a wide literal?
967  if (FExpr->isWide()) {
968    Diag(FExpr->getLocStart(),
969         diag::warn_printf_format_string_is_wide_literal)
970      << OrigFormatExpr->getSourceRange();
971    return;
972  }
973
974  // Str - The format string.  NOTE: this is NOT null-terminated!
975  const char *Str = FExpr->getStrData();
976
977  // CHECK: empty format string?
978  unsigned StrLen = FExpr->getByteLength();
979
980  if (StrLen == 0) {
981    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
982      << OrigFormatExpr->getSourceRange();
983    return;
984  }
985
986  // We process the format string using a binary state machine.  The
987  // current state is stored in CurrentState.
988  enum {
989    state_OrdChr,
990    state_Conversion
991  } CurrentState = state_OrdChr;
992
993  // numConversions - The number of conversions seen so far.  This is
994  //  incremented as we traverse the format string.
995  unsigned numConversions = 0;
996
997  // numDataArgs - The number of data arguments after the format
998  //  string.  This can only be determined for non vprintf-like
999  //  functions.  For those functions, this value is 1 (the sole
1000  //  va_arg argument).
1001  unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
1002
1003  // Inspect the format string.
1004  unsigned StrIdx = 0;
1005
1006  // LastConversionIdx - Index within the format string where we last saw
1007  //  a '%' character that starts a new format conversion.
1008  unsigned LastConversionIdx = 0;
1009
1010  for (; StrIdx < StrLen; ++StrIdx) {
1011
1012    // Is the number of detected conversion conversions greater than
1013    // the number of matching data arguments?  If so, stop.
1014    if (!HasVAListArg && numConversions > numDataArgs) break;
1015
1016    // Handle "\0"
1017    if (Str[StrIdx] == '\0') {
1018      // The string returned by getStrData() is not null-terminated,
1019      // so the presence of a null character is likely an error.
1020      Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
1021           diag::warn_printf_format_string_contains_null_char)
1022        <<  OrigFormatExpr->getSourceRange();
1023      return;
1024    }
1025
1026    // Ordinary characters (not processing a format conversion).
1027    if (CurrentState == state_OrdChr) {
1028      if (Str[StrIdx] == '%') {
1029        CurrentState = state_Conversion;
1030        LastConversionIdx = StrIdx;
1031      }
1032      continue;
1033    }
1034
1035    // Seen '%'.  Now processing a format conversion.
1036    switch (Str[StrIdx]) {
1037    // Handle dynamic precision or width specifier.
1038    case '*': {
1039      ++numConversions;
1040
1041      if (!HasVAListArg) {
1042        if (numConversions > numDataArgs) {
1043          SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
1044
1045          if (Str[StrIdx-1] == '.')
1046            Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
1047              << OrigFormatExpr->getSourceRange();
1048          else
1049            Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
1050              << OrigFormatExpr->getSourceRange();
1051
1052          // Don't do any more checking.  We'll just emit spurious errors.
1053          return;
1054        }
1055
1056        // Perform type checking on width/precision specifier.
1057        const Expr *E = TheCall->getArg(format_idx+numConversions);
1058        if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
1059          if (BT->getKind() == BuiltinType::Int)
1060            break;
1061
1062        SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
1063
1064        if (Str[StrIdx-1] == '.')
1065          Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
1066          << E->getType() << E->getSourceRange();
1067        else
1068          Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
1069          << E->getType() << E->getSourceRange();
1070
1071        break;
1072      }
1073    }
1074
1075    // Characters which can terminate a format conversion
1076    // (e.g. "%d").  Characters that specify length modifiers or
1077    // other flags are handled by the default case below.
1078    //
1079    // FIXME: additional checks will go into the following cases.
1080    case 'i':
1081    case 'd':
1082    case 'o':
1083    case 'u':
1084    case 'x':
1085    case 'X':
1086    case 'D':
1087    case 'O':
1088    case 'U':
1089    case 'e':
1090    case 'E':
1091    case 'f':
1092    case 'F':
1093    case 'g':
1094    case 'G':
1095    case 'a':
1096    case 'A':
1097    case 'c':
1098    case 'C':
1099    case 'S':
1100    case 's':
1101    case 'p':
1102      ++numConversions;
1103      CurrentState = state_OrdChr;
1104      break;
1105
1106    case 'm':
1107      // FIXME: Warn in situations where this isn't supported!
1108      CurrentState = state_OrdChr;
1109      break;
1110
1111    // CHECK: Are we using "%n"?  Issue a warning.
1112    case 'n': {
1113      ++numConversions;
1114      CurrentState = state_OrdChr;
1115      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
1116                                                          LastConversionIdx);
1117
1118      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
1119      break;
1120    }
1121
1122    // Handle "%@"
1123    case '@':
1124      // %@ is allowed in ObjC format strings only.
1125      if(ObjCFExpr != NULL)
1126        CurrentState = state_OrdChr;
1127      else {
1128        // Issue a warning: invalid format conversion.
1129        SourceLocation Loc =
1130          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1131
1132        Diag(Loc, diag::warn_printf_invalid_conversion)
1133          <<  std::string(Str+LastConversionIdx,
1134                          Str+std::min(LastConversionIdx+2, StrLen))
1135          << OrigFormatExpr->getSourceRange();
1136      }
1137      ++numConversions;
1138      break;
1139
1140    // Handle "%%"
1141    case '%':
1142      // Sanity check: Was the first "%" character the previous one?
1143      // If not, we will assume that we have a malformed format
1144      // conversion, and that the current "%" character is the start
1145      // of a new conversion.
1146      if (StrIdx - LastConversionIdx == 1)
1147        CurrentState = state_OrdChr;
1148      else {
1149        // Issue a warning: invalid format conversion.
1150        SourceLocation Loc =
1151          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1152
1153        Diag(Loc, diag::warn_printf_invalid_conversion)
1154          << std::string(Str+LastConversionIdx, Str+StrIdx)
1155          << OrigFormatExpr->getSourceRange();
1156
1157        // This conversion is broken.  Advance to the next format
1158        // conversion.
1159        LastConversionIdx = StrIdx;
1160        ++numConversions;
1161      }
1162      break;
1163
1164    default:
1165      // This case catches all other characters: flags, widths, etc.
1166      // We should eventually process those as well.
1167      break;
1168    }
1169  }
1170
1171  if (CurrentState == state_Conversion) {
1172    // Issue a warning: invalid format conversion.
1173    SourceLocation Loc =
1174      getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1175
1176    Diag(Loc, diag::warn_printf_invalid_conversion)
1177      << std::string(Str+LastConversionIdx,
1178                     Str+std::min(LastConversionIdx+2, StrLen))
1179      << OrigFormatExpr->getSourceRange();
1180    return;
1181  }
1182
1183  if (!HasVAListArg) {
1184    // CHECK: Does the number of format conversions exceed the number
1185    //        of data arguments?
1186    if (numConversions > numDataArgs) {
1187      SourceLocation Loc =
1188        getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1189
1190      Diag(Loc, diag::warn_printf_insufficient_data_args)
1191        << OrigFormatExpr->getSourceRange();
1192    }
1193    // CHECK: Does the number of data arguments exceed the number of
1194    //        format conversions in the format string?
1195    else if (numConversions < numDataArgs)
1196      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
1197           diag::warn_printf_too_many_data_args)
1198        << OrigFormatExpr->getSourceRange();
1199  }
1200}
1201
1202//===--- CHECK: Return Address of Stack Variable --------------------------===//
1203
1204static DeclRefExpr* EvalVal(Expr *E);
1205static DeclRefExpr* EvalAddr(Expr* E);
1206
1207/// CheckReturnStackAddr - Check if a return statement returns the address
1208///   of a stack variable.
1209void
1210Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
1211                           SourceLocation ReturnLoc) {
1212
1213  // Perform checking for returned stack addresses.
1214  if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
1215    if (DeclRefExpr *DR = EvalAddr(RetValExp))
1216      Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
1217       << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
1218
1219    // Skip over implicit cast expressions when checking for block expressions.
1220    if (ImplicitCastExpr *IcExpr =
1221          dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
1222      RetValExp = IcExpr->getSubExpr();
1223
1224    if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
1225      if (C->hasBlockDeclRefExprs())
1226        Diag(C->getLocStart(), diag::err_ret_local_block)
1227          << C->getSourceRange();
1228  } else if (lhsType->isReferenceType()) {
1229    // Perform checking for stack values returned by reference.
1230    // Check for a reference to the stack
1231    if (DeclRefExpr *DR = EvalVal(RetValExp))
1232      Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
1233        << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
1234  }
1235}
1236
1237/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
1238///  check if the expression in a return statement evaluates to an address
1239///  to a location on the stack.  The recursion is used to traverse the
1240///  AST of the return expression, with recursion backtracking when we
1241///  encounter a subexpression that (1) clearly does not lead to the address
1242///  of a stack variable or (2) is something we cannot determine leads to
1243///  the address of a stack variable based on such local checking.
1244///
1245///  EvalAddr processes expressions that are pointers that are used as
1246///  references (and not L-values).  EvalVal handles all other values.
1247///  At the base case of the recursion is a check for a DeclRefExpr* in
1248///  the refers to a stack variable.
1249///
1250///  This implementation handles:
1251///
1252///   * pointer-to-pointer casts
1253///   * implicit conversions from array references to pointers
1254///   * taking the address of fields
1255///   * arbitrary interplay between "&" and "*" operators
1256///   * pointer arithmetic from an address of a stack variable
1257///   * taking the address of an array element where the array is on the stack
1258static DeclRefExpr* EvalAddr(Expr *E) {
1259  // We should only be called for evaluating pointer expressions.
1260  assert((E->getType()->isAnyPointerType() ||
1261          E->getType()->isBlockPointerType() ||
1262          E->getType()->isObjCQualifiedIdType()) &&
1263         "EvalAddr only works on pointers");
1264
1265  // Our "symbolic interpreter" is just a dispatch off the currently
1266  // viewed AST node.  We then recursively traverse the AST by calling
1267  // EvalAddr and EvalVal appropriately.
1268  switch (E->getStmtClass()) {
1269  case Stmt::ParenExprClass:
1270    // Ignore parentheses.
1271    return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
1272
1273  case Stmt::UnaryOperatorClass: {
1274    // The only unary operator that make sense to handle here
1275    // is AddrOf.  All others don't make sense as pointers.
1276    UnaryOperator *U = cast<UnaryOperator>(E);
1277
1278    if (U->getOpcode() == UnaryOperator::AddrOf)
1279      return EvalVal(U->getSubExpr());
1280    else
1281      return NULL;
1282  }
1283
1284  case Stmt::BinaryOperatorClass: {
1285    // Handle pointer arithmetic.  All other binary operators are not valid
1286    // in this context.
1287    BinaryOperator *B = cast<BinaryOperator>(E);
1288    BinaryOperator::Opcode op = B->getOpcode();
1289
1290    if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
1291      return NULL;
1292
1293    Expr *Base = B->getLHS();
1294
1295    // Determine which argument is the real pointer base.  It could be
1296    // the RHS argument instead of the LHS.
1297    if (!Base->getType()->isPointerType()) Base = B->getRHS();
1298
1299    assert (Base->getType()->isPointerType());
1300    return EvalAddr(Base);
1301  }
1302
1303  // For conditional operators we need to see if either the LHS or RHS are
1304  // valid DeclRefExpr*s.  If one of them is valid, we return it.
1305  case Stmt::ConditionalOperatorClass: {
1306    ConditionalOperator *C = cast<ConditionalOperator>(E);
1307
1308    // Handle the GNU extension for missing LHS.
1309    if (Expr *lhsExpr = C->getLHS())
1310      if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
1311        return LHS;
1312
1313     return EvalAddr(C->getRHS());
1314  }
1315
1316  // For casts, we need to handle conversions from arrays to
1317  // pointer values, and pointer-to-pointer conversions.
1318  case Stmt::ImplicitCastExprClass:
1319  case Stmt::CStyleCastExprClass:
1320  case Stmt::CXXFunctionalCastExprClass: {
1321    Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
1322    QualType T = SubExpr->getType();
1323
1324    if (SubExpr->getType()->isPointerType() ||
1325        SubExpr->getType()->isBlockPointerType() ||
1326        SubExpr->getType()->isObjCQualifiedIdType())
1327      return EvalAddr(SubExpr);
1328    else if (T->isArrayType())
1329      return EvalVal(SubExpr);
1330    else
1331      return 0;
1332  }
1333
1334  // C++ casts.  For dynamic casts, static casts, and const casts, we
1335  // are always converting from a pointer-to-pointer, so we just blow
1336  // through the cast.  In the case the dynamic cast doesn't fail (and
1337  // return NULL), we take the conservative route and report cases
1338  // where we return the address of a stack variable.  For Reinterpre
1339  // FIXME: The comment about is wrong; we're not always converting
1340  // from pointer to pointer. I'm guessing that this code should also
1341  // handle references to objects.
1342  case Stmt::CXXStaticCastExprClass:
1343  case Stmt::CXXDynamicCastExprClass:
1344  case Stmt::CXXConstCastExprClass:
1345  case Stmt::CXXReinterpretCastExprClass: {
1346      Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
1347      if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
1348        return EvalAddr(S);
1349      else
1350        return NULL;
1351  }
1352
1353  // Everything else: we simply don't reason about them.
1354  default:
1355    return NULL;
1356  }
1357}
1358
1359
1360///  EvalVal - This function is complements EvalAddr in the mutual recursion.
1361///   See the comments for EvalAddr for more details.
1362static DeclRefExpr* EvalVal(Expr *E) {
1363
1364  // We should only be called for evaluating non-pointer expressions, or
1365  // expressions with a pointer type that are not used as references but instead
1366  // are l-values (e.g., DeclRefExpr with a pointer type).
1367
1368  // Our "symbolic interpreter" is just a dispatch off the currently
1369  // viewed AST node.  We then recursively traverse the AST by calling
1370  // EvalAddr and EvalVal appropriately.
1371  switch (E->getStmtClass()) {
1372  case Stmt::DeclRefExprClass:
1373  case Stmt::QualifiedDeclRefExprClass: {
1374    // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
1375    //  at code that refers to a variable's name.  We check if it has local
1376    //  storage within the function, and if so, return the expression.
1377    DeclRefExpr *DR = cast<DeclRefExpr>(E);
1378
1379    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
1380      if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
1381
1382    return NULL;
1383  }
1384
1385  case Stmt::ParenExprClass:
1386    // Ignore parentheses.
1387    return EvalVal(cast<ParenExpr>(E)->getSubExpr());
1388
1389  case Stmt::UnaryOperatorClass: {
1390    // The only unary operator that make sense to handle here
1391    // is Deref.  All others don't resolve to a "name."  This includes
1392    // handling all sorts of rvalues passed to a unary operator.
1393    UnaryOperator *U = cast<UnaryOperator>(E);
1394
1395    if (U->getOpcode() == UnaryOperator::Deref)
1396      return EvalAddr(U->getSubExpr());
1397
1398    return NULL;
1399  }
1400
1401  case Stmt::ArraySubscriptExprClass: {
1402    // Array subscripts are potential references to data on the stack.  We
1403    // retrieve the DeclRefExpr* for the array variable if it indeed
1404    // has local storage.
1405    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
1406  }
1407
1408  case Stmt::ConditionalOperatorClass: {
1409    // For conditional operators we need to see if either the LHS or RHS are
1410    // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
1411    ConditionalOperator *C = cast<ConditionalOperator>(E);
1412
1413    // Handle the GNU extension for missing LHS.
1414    if (Expr *lhsExpr = C->getLHS())
1415      if (DeclRefExpr *LHS = EvalVal(lhsExpr))
1416        return LHS;
1417
1418    return EvalVal(C->getRHS());
1419  }
1420
1421  // Accesses to members are potential references to data on the stack.
1422  case Stmt::MemberExprClass: {
1423    MemberExpr *M = cast<MemberExpr>(E);
1424
1425    // Check for indirect access.  We only want direct field accesses.
1426    if (!M->isArrow())
1427      return EvalVal(M->getBase());
1428    else
1429      return NULL;
1430  }
1431
1432  // Everything else: we simply don't reason about them.
1433  default:
1434    return NULL;
1435  }
1436}
1437
1438//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1439
1440/// Check for comparisons of floating point operands using != and ==.
1441/// Issue a warning if these are no self-comparisons, as they are not likely
1442/// to do what the programmer intended.
1443void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1444  bool EmitWarning = true;
1445
1446  Expr* LeftExprSansParen = lex->IgnoreParens();
1447  Expr* RightExprSansParen = rex->IgnoreParens();
1448
1449  // Special case: check for x == x (which is OK).
1450  // Do not emit warnings for such cases.
1451  if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1452    if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1453      if (DRL->getDecl() == DRR->getDecl())
1454        EmitWarning = false;
1455
1456
1457  // Special case: check for comparisons against literals that can be exactly
1458  //  represented by APFloat.  In such cases, do not emit a warning.  This
1459  //  is a heuristic: often comparison against such literals are used to
1460  //  detect if a value in a variable has not changed.  This clearly can
1461  //  lead to false negatives.
1462  if (EmitWarning) {
1463    if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1464      if (FLL->isExact())
1465        EmitWarning = false;
1466    } else
1467      if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1468        if (FLR->isExact())
1469          EmitWarning = false;
1470    }
1471  }
1472
1473  // Check for comparisons with builtin types.
1474  if (EmitWarning)
1475    if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
1476      if (CL->isBuiltinCall(Context))
1477        EmitWarning = false;
1478
1479  if (EmitWarning)
1480    if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
1481      if (CR->isBuiltinCall(Context))
1482        EmitWarning = false;
1483
1484  // Emit the diagnostic.
1485  if (EmitWarning)
1486    Diag(loc, diag::warn_floatingpoint_eq)
1487      << lex->getSourceRange() << rex->getSourceRange();
1488}
1489