SemaChecking.cpp revision 370ab3f1373841d70582feac9e35c3c6b3489f63
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements extra semantic analysis beyond what is enforced
11//  by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/DeclObjC.h"
18#include "clang/AST/ExprCXX.h"
19#include "clang/AST/ExprObjC.h"
20#include "clang/Lex/Preprocessor.h"
21#include "SemaUtil.h"
22using namespace clang;
23
24/// CheckFunctionCall - Check a direct function call for various correctness
25/// and safety properties not strictly enforced by the C type system.
26Action::OwningExprResult
27Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
28  OwningExprResult TheCallResult(Owned(TheCall));
29  // Get the IdentifierInfo* for the called function.
30  IdentifierInfo *FnInfo = FDecl->getIdentifier();
31
32  // None of the checks below are needed for functions that don't have
33  // simple names (e.g., C++ conversion functions).
34  if (!FnInfo)
35    return move(TheCallResult);
36
37  switch (FDecl->getBuiltinID()) {
38  case Builtin::BI__builtin___CFStringMakeConstantString:
39    assert(TheCall->getNumArgs() == 1 &&
40           "Wrong # arguments to builtin CFStringMakeConstantString");
41    if (CheckBuiltinCFStringArgument(TheCall->getArg(0)))
42      return ExprError();
43    return move(TheCallResult);
44  case Builtin::BI__builtin_stdarg_start:
45  case Builtin::BI__builtin_va_start:
46    if (SemaBuiltinVAStart(TheCall))
47      return ExprError();
48    return move(TheCallResult);
49  case Builtin::BI__builtin_isgreater:
50  case Builtin::BI__builtin_isgreaterequal:
51  case Builtin::BI__builtin_isless:
52  case Builtin::BI__builtin_islessequal:
53  case Builtin::BI__builtin_islessgreater:
54  case Builtin::BI__builtin_isunordered:
55    if (SemaBuiltinUnorderedCompare(TheCall))
56      return ExprError();
57    return move(TheCallResult);
58  case Builtin::BI__builtin_return_address:
59  case Builtin::BI__builtin_frame_address:
60    if (SemaBuiltinStackAddress(TheCall))
61      return ExprError();
62    return move(TheCallResult);
63  case Builtin::BI__builtin_shufflevector:
64    return SemaBuiltinShuffleVector(TheCall);
65    // TheCall will be freed by the smart pointer here, but that's fine, since
66    // SemaBuiltinShuffleVector guts it, but then doesn't release it.
67  case Builtin::BI__builtin_prefetch:
68    if (SemaBuiltinPrefetch(TheCall))
69      return ExprError();
70    return move(TheCallResult);
71  case Builtin::BI__builtin_object_size:
72    if (SemaBuiltinObjectSize(TheCall))
73      return ExprError();
74  }
75
76  // FIXME: This mechanism should be abstracted to be less fragile and
77  // more efficient. For example, just map function ids to custom
78  // handlers.
79
80  // Printf checking.
81  unsigned format_idx = 0;
82  bool HasVAListArg = false;
83  if (FDecl->getBuiltinID() &&
84      Context.BuiltinInfo.isPrintfLike(FDecl->getBuiltinID(), format_idx,
85                                       HasVAListArg)) {
86    // Found a printf builtin.
87  } else if (FnInfo == KnownFunctionIDs[id_NSLog]) {
88    format_idx = 0;
89    HasVAListArg = false;
90  } else if (FnInfo == KnownFunctionIDs[id_asprintf]) {
91    format_idx = 1;
92    HasVAListArg = false;
93  } else if (FnInfo == KnownFunctionIDs[id_vasprintf]) {
94    format_idx = 1;
95    HasVAListArg = true;
96  } else {
97    return move(TheCallResult);
98  }
99
100  CheckPrintfArguments(TheCall, HasVAListArg, format_idx);
101
102  return move(TheCallResult);
103}
104
105/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
106/// CFString constructor is correct
107bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
108  Arg = Arg->IgnoreParenCasts();
109
110  StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
111
112  if (!Literal || Literal->isWide()) {
113    Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
114      << Arg->getSourceRange();
115    return true;
116  }
117
118  const char *Data = Literal->getStrData();
119  unsigned Length = Literal->getByteLength();
120
121  for (unsigned i = 0; i < Length; ++i) {
122    if (!isascii(Data[i])) {
123      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
124           diag::warn_cfstring_literal_contains_non_ascii_character)
125        << Arg->getSourceRange();
126      break;
127    }
128
129    if (!Data[i]) {
130      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
131           diag::warn_cfstring_literal_contains_nul_character)
132        << Arg->getSourceRange();
133      break;
134    }
135  }
136
137  return false;
138}
139
140/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
141/// Emit an error and return true on failure, return false on success.
142bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
143  Expr *Fn = TheCall->getCallee();
144  if (TheCall->getNumArgs() > 2) {
145    Diag(TheCall->getArg(2)->getLocStart(),
146         diag::err_typecheck_call_too_many_args)
147      << 0 /*function call*/ << Fn->getSourceRange()
148      << SourceRange(TheCall->getArg(2)->getLocStart(),
149                     (*(TheCall->arg_end()-1))->getLocEnd());
150    return true;
151  }
152
153  if (TheCall->getNumArgs() < 2) {
154    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
155      << 0 /*function call*/;
156  }
157
158  // Determine whether the current function is variadic or not.
159  bool isVariadic;
160  if (getCurFunctionDecl()) {
161    if (FunctionTypeProto* FTP =
162            dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType()))
163      isVariadic = FTP->isVariadic();
164    else
165      isVariadic = false;
166  } else {
167    isVariadic = getCurMethodDecl()->isVariadic();
168  }
169
170  if (!isVariadic) {
171    Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
172    return true;
173  }
174
175  // Verify that the second argument to the builtin is the last argument of the
176  // current function or method.
177  bool SecondArgIsLastNamedArgument = false;
178  const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
179
180  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
181    if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
182      // FIXME: This isn't correct for methods (results in bogus warning).
183      // Get the last formal in the current function.
184      const ParmVarDecl *LastArg;
185      if (FunctionDecl *FD = getCurFunctionDecl())
186        LastArg = *(FD->param_end()-1);
187      else
188        LastArg = *(getCurMethodDecl()->param_end()-1);
189      SecondArgIsLastNamedArgument = PV == LastArg;
190    }
191  }
192
193  if (!SecondArgIsLastNamedArgument)
194    Diag(TheCall->getArg(1)->getLocStart(),
195         diag::warn_second_parameter_of_va_start_not_last_named_argument);
196  return false;
197}
198
199/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
200/// friends.  This is declared to take (...), so we have to check everything.
201bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
202  if (TheCall->getNumArgs() < 2)
203    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
204      << 0 /*function call*/;
205  if (TheCall->getNumArgs() > 2)
206    return Diag(TheCall->getArg(2)->getLocStart(),
207                diag::err_typecheck_call_too_many_args)
208      << 0 /*function call*/
209      << SourceRange(TheCall->getArg(2)->getLocStart(),
210                     (*(TheCall->arg_end()-1))->getLocEnd());
211
212  Expr *OrigArg0 = TheCall->getArg(0);
213  Expr *OrigArg1 = TheCall->getArg(1);
214
215  // Do standard promotions between the two arguments, returning their common
216  // type.
217  QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
218
219  // If the common type isn't a real floating type, then the arguments were
220  // invalid for this operation.
221  if (!Res->isRealFloatingType())
222    return Diag(OrigArg0->getLocStart(),
223                diag::err_typecheck_call_invalid_ordered_compare)
224      << OrigArg0->getType() << OrigArg1->getType()
225      << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
226
227  return false;
228}
229
230bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
231  // The signature for these builtins is exact; the only thing we need
232  // to check is that the argument is a constant.
233  SourceLocation Loc;
234  if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
235    return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
236
237  return false;
238}
239
240/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
241// This is declared to take (...), so we have to check everything.
242Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
243  if (TheCall->getNumArgs() < 3)
244    return ExprError(Diag(TheCall->getLocEnd(),
245                          diag::err_typecheck_call_too_few_args)
246      << 0 /*function call*/ << TheCall->getSourceRange());
247
248  QualType FAType = TheCall->getArg(0)->getType();
249  QualType SAType = TheCall->getArg(1)->getType();
250
251  if (!FAType->isVectorType() || !SAType->isVectorType()) {
252    Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
253      << SourceRange(TheCall->getArg(0)->getLocStart(),
254                     TheCall->getArg(1)->getLocEnd());
255    return ExprError();
256  }
257
258  if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
259      Context.getCanonicalType(SAType).getUnqualifiedType()) {
260    Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
261      << SourceRange(TheCall->getArg(0)->getLocStart(),
262                     TheCall->getArg(1)->getLocEnd());
263    return ExprError();
264  }
265
266  unsigned numElements = FAType->getAsVectorType()->getNumElements();
267  if (TheCall->getNumArgs() != numElements+2) {
268    if (TheCall->getNumArgs() < numElements+2)
269      return ExprError(Diag(TheCall->getLocEnd(),
270                            diag::err_typecheck_call_too_few_args)
271               << 0 /*function call*/ << TheCall->getSourceRange());
272    return ExprError(Diag(TheCall->getLocEnd(),
273                          diag::err_typecheck_call_too_many_args)
274             << 0 /*function call*/ << TheCall->getSourceRange());
275  }
276
277  for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
278    llvm::APSInt Result(32);
279    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
280      return ExprError(Diag(TheCall->getLocStart(),
281                  diag::err_shufflevector_nonconstant_argument)
282                << TheCall->getArg(i)->getSourceRange());
283
284    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
285      return ExprError(Diag(TheCall->getLocStart(),
286                  diag::err_shufflevector_argument_too_large)
287               << TheCall->getArg(i)->getSourceRange());
288  }
289
290  llvm::SmallVector<Expr*, 32> exprs;
291
292  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
293    exprs.push_back(TheCall->getArg(i));
294    TheCall->setArg(i, 0);
295  }
296
297  return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2,
298                                            FAType,
299                                            TheCall->getCallee()->getLocStart(),
300                                            TheCall->getRParenLoc()));
301}
302
303/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
304// This is declared to take (const void*, ...) and can take two
305// optional constant int args.
306bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
307  unsigned NumArgs = TheCall->getNumArgs();
308
309  if (NumArgs > 3)
310    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
311             << 0 /*function call*/ << TheCall->getSourceRange();
312
313  // Argument 0 is checked for us and the remaining arguments must be
314  // constant integers.
315  for (unsigned i = 1; i != NumArgs; ++i) {
316    Expr *Arg = TheCall->getArg(i);
317    QualType RWType = Arg->getType();
318
319    const BuiltinType *BT = RWType->getAsBuiltinType();
320    llvm::APSInt Result;
321    if (!BT || BT->getKind() != BuiltinType::Int ||
322        !Arg->isIntegerConstantExpr(Result, Context))
323      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
324              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
325
326    // FIXME: gcc issues a warning and rewrites these to 0. These
327    // seems especially odd for the third argument since the default
328    // is 3.
329    if (i == 1) {
330      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
331        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
332             << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
333    } else {
334      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
335        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
336            << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
337    }
338  }
339
340  return false;
341}
342
343/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
344/// int type). This simply type checks that type is one of the defined
345/// constants (0-3).
346bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
347  Expr *Arg = TheCall->getArg(1);
348  QualType ArgType = Arg->getType();
349  const BuiltinType *BT = ArgType->getAsBuiltinType();
350  llvm::APSInt Result(32);
351  if (!BT || BT->getKind() != BuiltinType::Int ||
352      !Arg->isIntegerConstantExpr(Result, Context)) {
353    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
354             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
355  }
356
357  if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
358    return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
359             << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
360  }
361
362  return false;
363}
364
365// Handle i > 1 ? "x" : "y", recursivelly
366bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg,
367                                  unsigned format_idx) {
368
369  switch (E->getStmtClass()) {
370  case Stmt::ConditionalOperatorClass: {
371    ConditionalOperator *C = cast<ConditionalOperator>(E);
372    return SemaCheckStringLiteral(C->getLHS(), TheCall,
373                                  HasVAListArg, format_idx)
374        && SemaCheckStringLiteral(C->getRHS(), TheCall,
375                                  HasVAListArg, format_idx);
376  }
377
378  case Stmt::ImplicitCastExprClass: {
379    ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E);
380    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
381                                  format_idx);
382  }
383
384  case Stmt::ParenExprClass: {
385    ParenExpr *Expr = dyn_cast<ParenExpr>(E);
386    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
387                                  format_idx);
388  }
389
390  default: {
391    ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E);
392    StringLiteral *StrE = NULL;
393
394    if (ObjCFExpr)
395      StrE = ObjCFExpr->getString();
396    else
397      StrE = dyn_cast<StringLiteral>(E);
398
399    if (StrE) {
400      CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx);
401      return true;
402    }
403
404    return false;
405  }
406  }
407}
408
409
410/// CheckPrintfArguments - Check calls to printf (and similar functions) for
411/// correct use of format strings.
412///
413///  HasVAListArg - A predicate indicating whether the printf-like
414///    function is passed an explicit va_arg argument (e.g., vprintf)
415///
416///  format_idx - The index into Args for the format string.
417///
418/// Improper format strings to functions in the printf family can be
419/// the source of bizarre bugs and very serious security holes.  A
420/// good source of information is available in the following paper
421/// (which includes additional references):
422///
423///  FormatGuard: Automatic Protection From printf Format String
424///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
425///
426/// Functionality implemented:
427///
428///  We can statically check the following properties for string
429///  literal format strings for non v.*printf functions (where the
430///  arguments are passed directly):
431//
432///  (1) Are the number of format conversions equal to the number of
433///      data arguments?
434///
435///  (2) Does each format conversion correctly match the type of the
436///      corresponding data argument?  (TODO)
437///
438/// Moreover, for all printf functions we can:
439///
440///  (3) Check for a missing format string (when not caught by type checking).
441///
442///  (4) Check for no-operation flags; e.g. using "#" with format
443///      conversion 'c'  (TODO)
444///
445///  (5) Check the use of '%n', a major source of security holes.
446///
447///  (6) Check for malformed format conversions that don't specify anything.
448///
449///  (7) Check for empty format strings.  e.g: printf("");
450///
451///  (8) Check that the format string is a wide literal.
452///
453///  (9) Also check the arguments of functions with the __format__ attribute.
454///      (TODO).
455///
456/// All of these checks can be done by parsing the format string.
457///
458/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
459void
460Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
461                           unsigned format_idx) {
462  Expr *Fn = TheCall->getCallee();
463
464  // CHECK: printf-like function is called with no format string.
465  if (format_idx >= TheCall->getNumArgs()) {
466    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
467      << Fn->getSourceRange();
468    return;
469  }
470
471  Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
472
473  // CHECK: format string is not a string literal.
474  //
475  // Dynamically generated format strings are difficult to
476  // automatically vet at compile time.  Requiring that format strings
477  // are string literals: (1) permits the checking of format strings by
478  // the compiler and thereby (2) can practically remove the source of
479  // many format string exploits.
480
481  // Format string can be either ObjC string (e.g. @"%d") or
482  // C string (e.g. "%d")
483  // ObjC string uses the same format specifiers as C string, so we can use
484  // the same format string checking logic for both ObjC and C strings.
485  bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx);
486
487  if (!isFExpr) {
488    // For vprintf* functions (i.e., HasVAListArg==true), we add a
489    // special check to see if the format string is a function parameter
490    // of the function calling the printf function.  If the function
491    // has an attribute indicating it is a printf-like function, then we
492    // should suppress warnings concerning non-literals being used in a call
493    // to a vprintf function.  For example:
494    //
495    // void
496    // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
497    //      va_list ap;
498    //      va_start(ap, fmt);
499    //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
500    //      ...
501    //
502    //
503    //  FIXME: We don't have full attribute support yet, so just check to see
504    //    if the argument is a DeclRefExpr that references a parameter.  We'll
505    //    add proper support for checking the attribute later.
506    if (HasVAListArg)
507      if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
508        if (isa<ParmVarDecl>(DR->getDecl()))
509          return;
510
511    Diag(TheCall->getArg(format_idx)->getLocStart(),
512         diag::warn_printf_not_string_constant)
513      << OrigFormatExpr->getSourceRange();
514    return;
515  }
516}
517
518void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
519      CallExpr *TheCall, bool HasVAListArg, unsigned format_idx) {
520
521  ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
522  // CHECK: is the format string a wide literal?
523  if (FExpr->isWide()) {
524    Diag(FExpr->getLocStart(),
525         diag::warn_printf_format_string_is_wide_literal)
526      << OrigFormatExpr->getSourceRange();
527    return;
528  }
529
530  // Str - The format string.  NOTE: this is NOT null-terminated!
531  const char * const Str = FExpr->getStrData();
532
533  // CHECK: empty format string?
534  const unsigned StrLen = FExpr->getByteLength();
535
536  if (StrLen == 0) {
537    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
538      << OrigFormatExpr->getSourceRange();
539    return;
540  }
541
542  // We process the format string using a binary state machine.  The
543  // current state is stored in CurrentState.
544  enum {
545    state_OrdChr,
546    state_Conversion
547  } CurrentState = state_OrdChr;
548
549  // numConversions - The number of conversions seen so far.  This is
550  //  incremented as we traverse the format string.
551  unsigned numConversions = 0;
552
553  // numDataArgs - The number of data arguments after the format
554  //  string.  This can only be determined for non vprintf-like
555  //  functions.  For those functions, this value is 1 (the sole
556  //  va_arg argument).
557  unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1);
558
559  // Inspect the format string.
560  unsigned StrIdx = 0;
561
562  // LastConversionIdx - Index within the format string where we last saw
563  //  a '%' character that starts a new format conversion.
564  unsigned LastConversionIdx = 0;
565
566  for (; StrIdx < StrLen; ++StrIdx) {
567
568    // Is the number of detected conversion conversions greater than
569    // the number of matching data arguments?  If so, stop.
570    if (!HasVAListArg && numConversions > numDataArgs) break;
571
572    // Handle "\0"
573    if (Str[StrIdx] == '\0') {
574      // The string returned by getStrData() is not null-terminated,
575      // so the presence of a null character is likely an error.
576      Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
577           diag::warn_printf_format_string_contains_null_char)
578        <<  OrigFormatExpr->getSourceRange();
579      return;
580    }
581
582    // Ordinary characters (not processing a format conversion).
583    if (CurrentState == state_OrdChr) {
584      if (Str[StrIdx] == '%') {
585        CurrentState = state_Conversion;
586        LastConversionIdx = StrIdx;
587      }
588      continue;
589    }
590
591    // Seen '%'.  Now processing a format conversion.
592    switch (Str[StrIdx]) {
593    // Handle dynamic precision or width specifier.
594    case '*': {
595      ++numConversions;
596
597      if (!HasVAListArg && numConversions > numDataArgs) {
598        SourceLocation Loc = FExpr->getLocStart();
599        Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
600
601        if (Str[StrIdx-1] == '.')
602          Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
603            << OrigFormatExpr->getSourceRange();
604        else
605          Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
606            << OrigFormatExpr->getSourceRange();
607
608        // Don't do any more checking.  We'll just emit spurious errors.
609        return;
610      }
611
612      // Perform type checking on width/precision specifier.
613      Expr *E = TheCall->getArg(format_idx+numConversions);
614      if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
615        if (BT->getKind() == BuiltinType::Int)
616          break;
617
618      SourceLocation Loc =
619        PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
620
621      if (Str[StrIdx-1] == '.')
622        Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
623          << E->getType() << E->getSourceRange();
624      else
625        Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
626          << E->getType() << E->getSourceRange();
627
628      break;
629    }
630
631    // Characters which can terminate a format conversion
632    // (e.g. "%d").  Characters that specify length modifiers or
633    // other flags are handled by the default case below.
634    //
635    // FIXME: additional checks will go into the following cases.
636    case 'i':
637    case 'd':
638    case 'o':
639    case 'u':
640    case 'x':
641    case 'X':
642    case 'D':
643    case 'O':
644    case 'U':
645    case 'e':
646    case 'E':
647    case 'f':
648    case 'F':
649    case 'g':
650    case 'G':
651    case 'a':
652    case 'A':
653    case 'c':
654    case 'C':
655    case 'S':
656    case 's':
657    case 'p':
658      ++numConversions;
659      CurrentState = state_OrdChr;
660      break;
661
662    // CHECK: Are we using "%n"?  Issue a warning.
663    case 'n': {
664      ++numConversions;
665      CurrentState = state_OrdChr;
666      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
667                                                      LastConversionIdx+1);
668
669      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
670      break;
671    }
672
673    // Handle "%@"
674    case '@':
675      // %@ is allowed in ObjC format strings only.
676      if(ObjCFExpr != NULL)
677        CurrentState = state_OrdChr;
678      else {
679        // Issue a warning: invalid format conversion.
680        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
681                                                    LastConversionIdx+1);
682
683        Diag(Loc, diag::warn_printf_invalid_conversion)
684          <<  std::string(Str+LastConversionIdx,
685                          Str+std::min(LastConversionIdx+2, StrLen))
686          << OrigFormatExpr->getSourceRange();
687      }
688      ++numConversions;
689      break;
690
691    // Handle "%%"
692    case '%':
693      // Sanity check: Was the first "%" character the previous one?
694      // If not, we will assume that we have a malformed format
695      // conversion, and that the current "%" character is the start
696      // of a new conversion.
697      if (StrIdx - LastConversionIdx == 1)
698        CurrentState = state_OrdChr;
699      else {
700        // Issue a warning: invalid format conversion.
701        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
702                                                        LastConversionIdx+1);
703
704        Diag(Loc, diag::warn_printf_invalid_conversion)
705          << std::string(Str+LastConversionIdx, Str+StrIdx)
706          << OrigFormatExpr->getSourceRange();
707
708        // This conversion is broken.  Advance to the next format
709        // conversion.
710        LastConversionIdx = StrIdx;
711        ++numConversions;
712      }
713      break;
714
715    default:
716      // This case catches all other characters: flags, widths, etc.
717      // We should eventually process those as well.
718      break;
719    }
720  }
721
722  if (CurrentState == state_Conversion) {
723    // Issue a warning: invalid format conversion.
724    SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
725                                                    LastConversionIdx+1);
726
727    Diag(Loc, diag::warn_printf_invalid_conversion)
728      << std::string(Str+LastConversionIdx,
729                     Str+std::min(LastConversionIdx+2, StrLen))
730      << OrigFormatExpr->getSourceRange();
731    return;
732  }
733
734  if (!HasVAListArg) {
735    // CHECK: Does the number of format conversions exceed the number
736    //        of data arguments?
737    if (numConversions > numDataArgs) {
738      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
739                                                      LastConversionIdx);
740
741      Diag(Loc, diag::warn_printf_insufficient_data_args)
742        << OrigFormatExpr->getSourceRange();
743    }
744    // CHECK: Does the number of data arguments exceed the number of
745    //        format conversions in the format string?
746    else if (numConversions < numDataArgs)
747      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
748           diag::warn_printf_too_many_data_args)
749        << OrigFormatExpr->getSourceRange();
750  }
751}
752
753//===--- CHECK: Return Address of Stack Variable --------------------------===//
754
755static DeclRefExpr* EvalVal(Expr *E);
756static DeclRefExpr* EvalAddr(Expr* E);
757
758/// CheckReturnStackAddr - Check if a return statement returns the address
759///   of a stack variable.
760void
761Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
762                           SourceLocation ReturnLoc) {
763
764  // Perform checking for returned stack addresses.
765  if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
766    if (DeclRefExpr *DR = EvalAddr(RetValExp))
767      Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
768       << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
769
770    // Skip over implicit cast expressions when checking for block expressions.
771    if (ImplicitCastExpr *IcExpr =
772          dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
773      RetValExp = IcExpr->getSubExpr();
774
775    if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
776      Diag(C->getLocStart(), diag::err_ret_local_block)
777        << C->getSourceRange();
778  }
779  // Perform checking for stack values returned by reference.
780  else if (lhsType->isReferenceType()) {
781    // Check for a reference to the stack
782    if (DeclRefExpr *DR = EvalVal(RetValExp))
783      Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
784        << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
785  }
786}
787
788/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
789///  check if the expression in a return statement evaluates to an address
790///  to a location on the stack.  The recursion is used to traverse the
791///  AST of the return expression, with recursion backtracking when we
792///  encounter a subexpression that (1) clearly does not lead to the address
793///  of a stack variable or (2) is something we cannot determine leads to
794///  the address of a stack variable based on such local checking.
795///
796///  EvalAddr processes expressions that are pointers that are used as
797///  references (and not L-values).  EvalVal handles all other values.
798///  At the base case of the recursion is a check for a DeclRefExpr* in
799///  the refers to a stack variable.
800///
801///  This implementation handles:
802///
803///   * pointer-to-pointer casts
804///   * implicit conversions from array references to pointers
805///   * taking the address of fields
806///   * arbitrary interplay between "&" and "*" operators
807///   * pointer arithmetic from an address of a stack variable
808///   * taking the address of an array element where the array is on the stack
809static DeclRefExpr* EvalAddr(Expr *E) {
810  // We should only be called for evaluating pointer expressions.
811  assert((E->getType()->isPointerType() ||
812          E->getType()->isBlockPointerType() ||
813          E->getType()->isObjCQualifiedIdType()) &&
814         "EvalAddr only works on pointers");
815
816  // Our "symbolic interpreter" is just a dispatch off the currently
817  // viewed AST node.  We then recursively traverse the AST by calling
818  // EvalAddr and EvalVal appropriately.
819  switch (E->getStmtClass()) {
820  case Stmt::ParenExprClass:
821    // Ignore parentheses.
822    return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
823
824  case Stmt::UnaryOperatorClass: {
825    // The only unary operator that make sense to handle here
826    // is AddrOf.  All others don't make sense as pointers.
827    UnaryOperator *U = cast<UnaryOperator>(E);
828
829    if (U->getOpcode() == UnaryOperator::AddrOf)
830      return EvalVal(U->getSubExpr());
831    else
832      return NULL;
833  }
834
835  case Stmt::BinaryOperatorClass: {
836    // Handle pointer arithmetic.  All other binary operators are not valid
837    // in this context.
838    BinaryOperator *B = cast<BinaryOperator>(E);
839    BinaryOperator::Opcode op = B->getOpcode();
840
841    if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
842      return NULL;
843
844    Expr *Base = B->getLHS();
845
846    // Determine which argument is the real pointer base.  It could be
847    // the RHS argument instead of the LHS.
848    if (!Base->getType()->isPointerType()) Base = B->getRHS();
849
850    assert (Base->getType()->isPointerType());
851    return EvalAddr(Base);
852  }
853
854  // For conditional operators we need to see if either the LHS or RHS are
855  // valid DeclRefExpr*s.  If one of them is valid, we return it.
856  case Stmt::ConditionalOperatorClass: {
857    ConditionalOperator *C = cast<ConditionalOperator>(E);
858
859    // Handle the GNU extension for missing LHS.
860    if (Expr *lhsExpr = C->getLHS())
861      if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
862        return LHS;
863
864     return EvalAddr(C->getRHS());
865  }
866
867  // For casts, we need to handle conversions from arrays to
868  // pointer values, and pointer-to-pointer conversions.
869  case Stmt::ImplicitCastExprClass:
870  case Stmt::CStyleCastExprClass:
871  case Stmt::CXXFunctionalCastExprClass: {
872    Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
873    QualType T = SubExpr->getType();
874
875    if (SubExpr->getType()->isPointerType() ||
876        SubExpr->getType()->isBlockPointerType() ||
877        SubExpr->getType()->isObjCQualifiedIdType())
878      return EvalAddr(SubExpr);
879    else if (T->isArrayType())
880      return EvalVal(SubExpr);
881    else
882      return 0;
883  }
884
885  // C++ casts.  For dynamic casts, static casts, and const casts, we
886  // are always converting from a pointer-to-pointer, so we just blow
887  // through the cast.  In the case the dynamic cast doesn't fail (and
888  // return NULL), we take the conservative route and report cases
889  // where we return the address of a stack variable.  For Reinterpre
890  // FIXME: The comment about is wrong; we're not always converting
891  // from pointer to pointer. I'm guessing that this code should also
892  // handle references to objects.
893  case Stmt::CXXStaticCastExprClass:
894  case Stmt::CXXDynamicCastExprClass:
895  case Stmt::CXXConstCastExprClass:
896  case Stmt::CXXReinterpretCastExprClass: {
897      Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
898      if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
899        return EvalAddr(S);
900      else
901        return NULL;
902  }
903
904  // Everything else: we simply don't reason about them.
905  default:
906    return NULL;
907  }
908}
909
910
911///  EvalVal - This function is complements EvalAddr in the mutual recursion.
912///   See the comments for EvalAddr for more details.
913static DeclRefExpr* EvalVal(Expr *E) {
914
915  // We should only be called for evaluating non-pointer expressions, or
916  // expressions with a pointer type that are not used as references but instead
917  // are l-values (e.g., DeclRefExpr with a pointer type).
918
919  // Our "symbolic interpreter" is just a dispatch off the currently
920  // viewed AST node.  We then recursively traverse the AST by calling
921  // EvalAddr and EvalVal appropriately.
922  switch (E->getStmtClass()) {
923  case Stmt::DeclRefExprClass:
924  case Stmt::QualifiedDeclRefExprClass: {
925    // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
926    //  at code that refers to a variable's name.  We check if it has local
927    //  storage within the function, and if so, return the expression.
928    DeclRefExpr *DR = cast<DeclRefExpr>(E);
929
930    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
931      if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
932
933    return NULL;
934  }
935
936  case Stmt::ParenExprClass:
937    // Ignore parentheses.
938    return EvalVal(cast<ParenExpr>(E)->getSubExpr());
939
940  case Stmt::UnaryOperatorClass: {
941    // The only unary operator that make sense to handle here
942    // is Deref.  All others don't resolve to a "name."  This includes
943    // handling all sorts of rvalues passed to a unary operator.
944    UnaryOperator *U = cast<UnaryOperator>(E);
945
946    if (U->getOpcode() == UnaryOperator::Deref)
947      return EvalAddr(U->getSubExpr());
948
949    return NULL;
950  }
951
952  case Stmt::ArraySubscriptExprClass: {
953    // Array subscripts are potential references to data on the stack.  We
954    // retrieve the DeclRefExpr* for the array variable if it indeed
955    // has local storage.
956    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
957  }
958
959  case Stmt::ConditionalOperatorClass: {
960    // For conditional operators we need to see if either the LHS or RHS are
961    // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
962    ConditionalOperator *C = cast<ConditionalOperator>(E);
963
964    // Handle the GNU extension for missing LHS.
965    if (Expr *lhsExpr = C->getLHS())
966      if (DeclRefExpr *LHS = EvalVal(lhsExpr))
967        return LHS;
968
969    return EvalVal(C->getRHS());
970  }
971
972  // Accesses to members are potential references to data on the stack.
973  case Stmt::MemberExprClass: {
974    MemberExpr *M = cast<MemberExpr>(E);
975
976    // Check for indirect access.  We only want direct field accesses.
977    if (!M->isArrow())
978      return EvalVal(M->getBase());
979    else
980      return NULL;
981  }
982
983  // Everything else: we simply don't reason about them.
984  default:
985    return NULL;
986  }
987}
988
989//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
990
991/// Check for comparisons of floating point operands using != and ==.
992/// Issue a warning if these are no self-comparisons, as they are not likely
993/// to do what the programmer intended.
994void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
995  bool EmitWarning = true;
996
997  Expr* LeftExprSansParen = lex->IgnoreParens();
998  Expr* RightExprSansParen = rex->IgnoreParens();
999
1000  // Special case: check for x == x (which is OK).
1001  // Do not emit warnings for such cases.
1002  if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1003    if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1004      if (DRL->getDecl() == DRR->getDecl())
1005        EmitWarning = false;
1006
1007
1008  // Special case: check for comparisons against literals that can be exactly
1009  //  represented by APFloat.  In such cases, do not emit a warning.  This
1010  //  is a heuristic: often comparison against such literals are used to
1011  //  detect if a value in a variable has not changed.  This clearly can
1012  //  lead to false negatives.
1013  if (EmitWarning) {
1014    if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1015      if (FLL->isExact())
1016        EmitWarning = false;
1017    }
1018    else
1019      if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1020        if (FLR->isExact())
1021          EmitWarning = false;
1022    }
1023  }
1024
1025  // Check for comparisons with builtin types.
1026  if (EmitWarning)
1027    if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
1028      if (isCallBuiltin(CL))
1029        EmitWarning = false;
1030
1031  if (EmitWarning)
1032    if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
1033      if (isCallBuiltin(CR))
1034        EmitWarning = false;
1035
1036  // Emit the diagnostic.
1037  if (EmitWarning)
1038    Diag(loc, diag::warn_floatingpoint_eq)
1039      << lex->getSourceRange() << rex->getSourceRange();
1040}
1041