SemaChecking.cpp revision 6cfda23b3768f93a6eb0b2a9135c8334a20125bb
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements extra semantic analysis beyond what is enforced
11//  by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/Decl.h"
18#include "clang/AST/Expr.h"
19#include "clang/AST/ExprCXX.h"
20#include "clang/Lex/Preprocessor.h"
21#include "clang/Lex/LiteralSupport.h"
22#include "clang/Basic/SourceManager.h"
23#include "clang/Basic/Diagnostic.h"
24#include "clang/Basic/LangOptions.h"
25#include "clang/Basic/TargetInfo.h"
26#include "llvm/ADT/OwningPtr.h"
27#include "llvm/ADT/SmallString.h"
28#include "llvm/ADT/StringExtras.h"
29#include "SemaUtil.h"
30using namespace clang;
31
32/// CheckFunctionCall - Check a direct function call for various correctness
33/// and safety properties not strictly enforced by the C type system.
34Action::ExprResult
35Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) {
36  llvm::OwningPtr<CallExpr> TheCall(TheCallRaw);
37  // Get the IdentifierInfo* for the called function.
38  IdentifierInfo *FnInfo = FDecl->getIdentifier();
39
40  switch (FnInfo->getBuiltinID()) {
41  case Builtin::BI__builtin___CFStringMakeConstantString:
42    assert(TheCall->getNumArgs() == 1 &&
43           "Wrong # arguments to builtin CFStringMakeConstantString");
44    if (CheckBuiltinCFStringArgument(TheCall->getArg(0)))
45      return true;
46    return TheCall.take();
47  case Builtin::BI__builtin_va_start:
48    if (SemaBuiltinVAStart(TheCall.get())) {
49      return true;
50    }
51    return TheCall.take();
52  case Builtin::BI__builtin_isgreater:
53  case Builtin::BI__builtin_isgreaterequal:
54  case Builtin::BI__builtin_isless:
55  case Builtin::BI__builtin_islessequal:
56  case Builtin::BI__builtin_islessgreater:
57  case Builtin::BI__builtin_isunordered:
58    if (SemaBuiltinUnorderedCompare(TheCall.get()))
59      return true;
60    return TheCall.take();
61  case Builtin::BI__builtin_return_address:
62  case Builtin::BI__builtin_frame_address:
63    if (SemaBuiltinStackAddress(TheCall.get()))
64      return true;
65    return TheCall.take();
66  case Builtin::BI__builtin_shufflevector:
67    return SemaBuiltinShuffleVector(TheCall.get());
68  }
69
70  // Search the KnownFunctionIDs for the identifier.
71  unsigned i = 0, e = id_num_known_functions;
72  for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
73  if (i == e) return TheCall.take();
74
75  // Printf checking.
76  if (i <= id_vprintf) {
77    // Retrieve the index of the format string parameter and determine
78    // if the function is passed a va_arg argument.
79    unsigned format_idx = 0;
80    bool HasVAListArg = false;
81
82    switch (i) {
83    default: assert(false && "No format string argument index.");
84    case id_printf:    format_idx = 0; break;
85    case id_fprintf:   format_idx = 1; break;
86    case id_sprintf:   format_idx = 1; break;
87    case id_snprintf:  format_idx = 2; break;
88    case id_asprintf:  format_idx = 1; break;
89    case id_vsnprintf: format_idx = 2; HasVAListArg = true; break;
90    case id_vasprintf: format_idx = 1; HasVAListArg = true; break;
91    case id_vfprintf:  format_idx = 1; HasVAListArg = true; break;
92    case id_vsprintf:  format_idx = 1; HasVAListArg = true; break;
93    case id_vprintf:   format_idx = 0; HasVAListArg = true; break;
94    }
95
96    CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx);
97  }
98
99  return TheCall.take();
100}
101
102/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
103/// CFString constructor is correct
104bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
105  Arg = Arg->IgnoreParenCasts();
106
107  StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
108
109  if (!Literal || Literal->isWide()) {
110    Diag(Arg->getLocStart(),
111         diag::err_cfstring_literal_not_string_constant,
112         Arg->getSourceRange());
113    return true;
114  }
115
116  const char *Data = Literal->getStrData();
117  unsigned Length = Literal->getByteLength();
118
119  for (unsigned i = 0; i < Length; ++i) {
120    if (!isascii(Data[i])) {
121      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
122           diag::warn_cfstring_literal_contains_non_ascii_character,
123           Arg->getSourceRange());
124      break;
125    }
126
127    if (!Data[i]) {
128      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
129           diag::warn_cfstring_literal_contains_nul_character,
130           Arg->getSourceRange());
131      break;
132    }
133  }
134
135  return false;
136}
137
138/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
139/// Emit an error and return true on failure, return false on success.
140bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
141  Expr *Fn = TheCall->getCallee();
142  if (TheCall->getNumArgs() > 2) {
143    Diag(TheCall->getArg(2)->getLocStart(),
144         diag::err_typecheck_call_too_many_args, Fn->getSourceRange(),
145         SourceRange(TheCall->getArg(2)->getLocStart(),
146                     (*(TheCall->arg_end()-1))->getLocEnd()));
147    return true;
148  }
149
150  // Determine whether the current function is variadic or not.
151  bool isVariadic;
152  if (CurFunctionDecl)
153    isVariadic =
154      cast<FunctionTypeProto>(CurFunctionDecl->getType())->isVariadic();
155  else
156    isVariadic = CurMethodDecl->isVariadic();
157
158  if (!isVariadic) {
159    Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
160    return true;
161  }
162
163  // Verify that the second argument to the builtin is the last argument of the
164  // current function or method.
165  bool SecondArgIsLastNamedArgument = false;
166  const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
167
168  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
169    if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
170      // FIXME: This isn't correct for methods (results in bogus warning).
171      // Get the last formal in the current function.
172      const ParmVarDecl *LastArg;
173      if (CurFunctionDecl)
174        LastArg = *(CurFunctionDecl->param_end()-1);
175      else
176        LastArg = *(CurMethodDecl->param_end()-1);
177      SecondArgIsLastNamedArgument = PV == LastArg;
178    }
179  }
180
181  if (!SecondArgIsLastNamedArgument)
182    Diag(TheCall->getArg(1)->getLocStart(),
183         diag::warn_second_parameter_of_va_start_not_last_named_argument);
184  return false;
185}
186
187/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
188/// friends.  This is declared to take (...), so we have to check everything.
189bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
190  if (TheCall->getNumArgs() < 2)
191    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args);
192  if (TheCall->getNumArgs() > 2)
193    return Diag(TheCall->getArg(2)->getLocStart(),
194                diag::err_typecheck_call_too_many_args,
195                SourceRange(TheCall->getArg(2)->getLocStart(),
196                            (*(TheCall->arg_end()-1))->getLocEnd()));
197
198  Expr *OrigArg0 = TheCall->getArg(0);
199  Expr *OrigArg1 = TheCall->getArg(1);
200
201  // Do standard promotions between the two arguments, returning their common
202  // type.
203  QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
204
205  // If the common type isn't a real floating type, then the arguments were
206  // invalid for this operation.
207  if (!Res->isRealFloatingType())
208    return Diag(OrigArg0->getLocStart(),
209                diag::err_typecheck_call_invalid_ordered_compare,
210                OrigArg0->getType().getAsString(),
211                OrigArg1->getType().getAsString(),
212                SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()));
213
214  return false;
215}
216
217bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
218  // The signature for these builtins is exact; the only thing we need
219  // to check is that the argument is a constant.
220  SourceLocation Loc;
221  if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc)) {
222    return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange());
223  }
224  return false;
225}
226
227/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
228// This is declared to take (...), so we have to check everything.
229Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
230  if (TheCall->getNumArgs() < 3)
231    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args,
232                TheCall->getSourceRange());
233
234  QualType FAType = TheCall->getArg(0)->getType();
235  QualType SAType = TheCall->getArg(1)->getType();
236
237  if (!FAType->isVectorType() || !SAType->isVectorType()) {
238    Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector,
239         SourceRange(TheCall->getArg(0)->getLocStart(),
240                     TheCall->getArg(1)->getLocEnd()));
241    return true;
242  }
243
244  if (FAType.getCanonicalType().getUnqualifiedType() !=
245      SAType.getCanonicalType().getUnqualifiedType()) {
246    Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector,
247         SourceRange(TheCall->getArg(0)->getLocStart(),
248                     TheCall->getArg(1)->getLocEnd()));
249    return true;
250  }
251
252  unsigned numElements = FAType->getAsVectorType()->getNumElements();
253  if (TheCall->getNumArgs() != numElements+2) {
254    if (TheCall->getNumArgs() < numElements+2)
255      Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args,
256           TheCall->getSourceRange());
257    else
258      Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args,
259           TheCall->getSourceRange());
260    return true;
261  }
262
263  for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
264    llvm::APSInt Result(32);
265    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context)) {
266      Diag(TheCall->getLocStart(),
267           diag::err_shufflevector_nonconstant_argument,
268           TheCall->getArg(i)->getSourceRange());
269      return true;
270    }
271    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2) {
272      Diag(TheCall->getLocStart(),
273           diag::err_shufflevector_argument_too_large,
274           TheCall->getArg(i)->getSourceRange());
275      return true;
276    }
277  }
278
279  llvm::SmallVector<Expr*, 32> exprs;
280
281  for (unsigned i = 0; i < TheCall->getNumArgs(); i++) {
282    exprs.push_back(TheCall->getArg(i));
283    TheCall->setArg(i, 0);
284  }
285
286  ShuffleVectorExpr* E = new ShuffleVectorExpr(
287      exprs.begin(), numElements+2, FAType,
288      TheCall->getCallee()->getLocStart(),
289      TheCall->getRParenLoc());
290
291  return E;
292}
293
294/// CheckPrintfArguments - Check calls to printf (and similar functions) for
295/// correct use of format strings.
296///
297///  HasVAListArg - A predicate indicating whether the printf-like
298///    function is passed an explicit va_arg argument (e.g., vprintf)
299///
300///  format_idx - The index into Args for the format string.
301///
302/// Improper format strings to functions in the printf family can be
303/// the source of bizarre bugs and very serious security holes.  A
304/// good source of information is available in the following paper
305/// (which includes additional references):
306///
307///  FormatGuard: Automatic Protection From printf Format String
308///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
309///
310/// Functionality implemented:
311///
312///  We can statically check the following properties for string
313///  literal format strings for non v.*printf functions (where the
314///  arguments are passed directly):
315//
316///  (1) Are the number of format conversions equal to the number of
317///      data arguments?
318///
319///  (2) Does each format conversion correctly match the type of the
320///      corresponding data argument?  (TODO)
321///
322/// Moreover, for all printf functions we can:
323///
324///  (3) Check for a missing format string (when not caught by type checking).
325///
326///  (4) Check for no-operation flags; e.g. using "#" with format
327///      conversion 'c'  (TODO)
328///
329///  (5) Check the use of '%n', a major source of security holes.
330///
331///  (6) Check for malformed format conversions that don't specify anything.
332///
333///  (7) Check for empty format strings.  e.g: printf("");
334///
335///  (8) Check that the format string is a wide literal.
336///
337///  (9) Also check the arguments of functions with the __format__ attribute.
338///      (TODO).
339///
340/// All of these checks can be done by parsing the format string.
341///
342/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
343void
344Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
345                           unsigned format_idx) {
346  Expr *Fn = TheCall->getCallee();
347
348  // CHECK: printf-like function is called with no format string.
349  if (format_idx >= TheCall->getNumArgs()) {
350    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string,
351         Fn->getSourceRange());
352    return;
353  }
354
355  Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
356
357  // CHECK: format string is not a string literal.
358  //
359  // Dynamically generated format strings are difficult to
360  // automatically vet at compile time.  Requiring that format strings
361  // are string literals: (1) permits the checking of format strings by
362  // the compiler and thereby (2) can practically remove the source of
363  // many format string exploits.
364  StringLiteral *FExpr = dyn_cast<StringLiteral>(OrigFormatExpr);
365  if (FExpr == NULL) {
366    // For vprintf* functions (i.e., HasVAListArg==true), we add a
367    // special check to see if the format string is a function parameter
368    // of the function calling the printf function.  If the function
369    // has an attribute indicating it is a printf-like function, then we
370    // should suppress warnings concerning non-literals being used in a call
371    // to a vprintf function.  For example:
372    //
373    // void
374    // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
375    //      va_list ap;
376    //      va_start(ap, fmt);
377    //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
378    //      ...
379    //
380    //
381    //  FIXME: We don't have full attribute support yet, so just check to see
382    //    if the argument is a DeclRefExpr that references a parameter.  We'll
383    //    add proper support for checking the attribute later.
384    if (HasVAListArg)
385      if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
386        if (isa<ParmVarDecl>(DR->getDecl()))
387          return;
388
389    Diag(TheCall->getArg(format_idx)->getLocStart(),
390         diag::warn_printf_not_string_constant, Fn->getSourceRange());
391    return;
392  }
393
394  // CHECK: is the format string a wide literal?
395  if (FExpr->isWide()) {
396    Diag(FExpr->getLocStart(),
397         diag::warn_printf_format_string_is_wide_literal, Fn->getSourceRange());
398    return;
399  }
400
401  // Str - The format string.  NOTE: this is NOT null-terminated!
402  const char * const Str = FExpr->getStrData();
403
404  // CHECK: empty format string?
405  const unsigned StrLen = FExpr->getByteLength();
406
407  if (StrLen == 0) {
408    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string,
409         Fn->getSourceRange());
410    return;
411  }
412
413  // We process the format string using a binary state machine.  The
414  // current state is stored in CurrentState.
415  enum {
416    state_OrdChr,
417    state_Conversion
418  } CurrentState = state_OrdChr;
419
420  // numConversions - The number of conversions seen so far.  This is
421  //  incremented as we traverse the format string.
422  unsigned numConversions = 0;
423
424  // numDataArgs - The number of data arguments after the format
425  //  string.  This can only be determined for non vprintf-like
426  //  functions.  For those functions, this value is 1 (the sole
427  //  va_arg argument).
428  unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1);
429
430  // Inspect the format string.
431  unsigned StrIdx = 0;
432
433  // LastConversionIdx - Index within the format string where we last saw
434  //  a '%' character that starts a new format conversion.
435  unsigned LastConversionIdx = 0;
436
437  for (; StrIdx < StrLen; ++StrIdx) {
438
439    // Is the number of detected conversion conversions greater than
440    // the number of matching data arguments?  If so, stop.
441    if (!HasVAListArg && numConversions > numDataArgs) break;
442
443    // Handle "\0"
444    if (Str[StrIdx] == '\0') {
445      // The string returned by getStrData() is not null-terminated,
446      // so the presence of a null character is likely an error.
447      Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
448           diag::warn_printf_format_string_contains_null_char,
449           Fn->getSourceRange());
450      return;
451    }
452
453    // Ordinary characters (not processing a format conversion).
454    if (CurrentState == state_OrdChr) {
455      if (Str[StrIdx] == '%') {
456        CurrentState = state_Conversion;
457        LastConversionIdx = StrIdx;
458      }
459      continue;
460    }
461
462    // Seen '%'.  Now processing a format conversion.
463    switch (Str[StrIdx]) {
464    // Handle dynamic precision or width specifier.
465    case '*': {
466      ++numConversions;
467
468      if (!HasVAListArg && numConversions > numDataArgs) {
469        SourceLocation Loc = FExpr->getLocStart();
470        Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
471
472        if (Str[StrIdx-1] == '.')
473          Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg,
474               Fn->getSourceRange());
475        else
476          Diag(Loc, diag::warn_printf_asterisk_width_missing_arg,
477               Fn->getSourceRange());
478
479        // Don't do any more checking.  We'll just emit spurious errors.
480        return;
481      }
482
483      // Perform type checking on width/precision specifier.
484      Expr *E = TheCall->getArg(format_idx+numConversions);
485      if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
486        if (BT->getKind() == BuiltinType::Int)
487          break;
488
489      SourceLocation Loc =
490        PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
491
492      if (Str[StrIdx-1] == '.')
493        Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type,
494             E->getType().getAsString(), E->getSourceRange());
495      else
496        Diag(Loc, diag::warn_printf_asterisk_width_wrong_type,
497             E->getType().getAsString(), E->getSourceRange());
498
499      break;
500    }
501
502    // Characters which can terminate a format conversion
503    // (e.g. "%d").  Characters that specify length modifiers or
504    // other flags are handled by the default case below.
505    //
506    // FIXME: additional checks will go into the following cases.
507    case 'i':
508    case 'd':
509    case 'o':
510    case 'u':
511    case 'x':
512    case 'X':
513    case 'D':
514    case 'O':
515    case 'U':
516    case 'e':
517    case 'E':
518    case 'f':
519    case 'F':
520    case 'g':
521    case 'G':
522    case 'a':
523    case 'A':
524    case 'c':
525    case 'C':
526    case 'S':
527    case 's':
528    case 'p':
529      ++numConversions;
530      CurrentState = state_OrdChr;
531      break;
532
533    // CHECK: Are we using "%n"?  Issue a warning.
534    case 'n': {
535      ++numConversions;
536      CurrentState = state_OrdChr;
537      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
538                                                      LastConversionIdx+1);
539
540      Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange());
541      break;
542    }
543
544    // Handle "%%"
545    case '%':
546      // Sanity check: Was the first "%" character the previous one?
547      // If not, we will assume that we have a malformed format
548      // conversion, and that the current "%" character is the start
549      // of a new conversion.
550      if (StrIdx - LastConversionIdx == 1)
551        CurrentState = state_OrdChr;
552      else {
553        // Issue a warning: invalid format conversion.
554        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
555                                                        LastConversionIdx+1);
556
557        Diag(Loc, diag::warn_printf_invalid_conversion,
558             std::string(Str+LastConversionIdx, Str+StrIdx),
559             Fn->getSourceRange());
560
561        // This conversion is broken.  Advance to the next format
562        // conversion.
563        LastConversionIdx = StrIdx;
564        ++numConversions;
565      }
566      break;
567
568    default:
569      // This case catches all other characters: flags, widths, etc.
570      // We should eventually process those as well.
571      break;
572    }
573  }
574
575  if (CurrentState == state_Conversion) {
576    // Issue a warning: invalid format conversion.
577    SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
578                                                    LastConversionIdx+1);
579
580    Diag(Loc, diag::warn_printf_invalid_conversion,
581         std::string(Str+LastConversionIdx,
582                     Str+std::min(LastConversionIdx+2, StrLen)),
583         Fn->getSourceRange());
584    return;
585  }
586
587  if (!HasVAListArg) {
588    // CHECK: Does the number of format conversions exceed the number
589    //        of data arguments?
590    if (numConversions > numDataArgs) {
591      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
592                                                      LastConversionIdx);
593
594      Diag(Loc, diag::warn_printf_insufficient_data_args,
595           Fn->getSourceRange());
596    }
597    // CHECK: Does the number of data arguments exceed the number of
598    //        format conversions in the format string?
599    else if (numConversions < numDataArgs)
600      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
601           diag::warn_printf_too_many_data_args, Fn->getSourceRange());
602  }
603}
604
605//===--- CHECK: Return Address of Stack Variable --------------------------===//
606
607static DeclRefExpr* EvalVal(Expr *E);
608static DeclRefExpr* EvalAddr(Expr* E);
609
610/// CheckReturnStackAddr - Check if a return statement returns the address
611///   of a stack variable.
612void
613Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
614                           SourceLocation ReturnLoc) {
615
616  // Perform checking for returned stack addresses.
617  if (lhsType->isPointerType()) {
618    if (DeclRefExpr *DR = EvalAddr(RetValExp))
619      Diag(DR->getLocStart(), diag::warn_ret_stack_addr,
620           DR->getDecl()->getIdentifier()->getName(),
621           RetValExp->getSourceRange());
622  }
623  // Perform checking for stack values returned by reference.
624  else if (lhsType->isReferenceType()) {
625    // Check for an implicit cast to a reference.
626    if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp))
627      if (DeclRefExpr *DR = EvalVal(I->getSubExpr()))
628        Diag(DR->getLocStart(), diag::warn_ret_stack_ref,
629             DR->getDecl()->getIdentifier()->getName(),
630             RetValExp->getSourceRange());
631  }
632}
633
634/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
635///  check if the expression in a return statement evaluates to an address
636///  to a location on the stack.  The recursion is used to traverse the
637///  AST of the return expression, with recursion backtracking when we
638///  encounter a subexpression that (1) clearly does not lead to the address
639///  of a stack variable or (2) is something we cannot determine leads to
640///  the address of a stack variable based on such local checking.
641///
642///  EvalAddr processes expressions that are pointers that are used as
643///  references (and not L-values).  EvalVal handles all other values.
644///  At the base case of the recursion is a check for a DeclRefExpr* in
645///  the refers to a stack variable.
646///
647///  This implementation handles:
648///
649///   * pointer-to-pointer casts
650///   * implicit conversions from array references to pointers
651///   * taking the address of fields
652///   * arbitrary interplay between "&" and "*" operators
653///   * pointer arithmetic from an address of a stack variable
654///   * taking the address of an array element where the array is on the stack
655static DeclRefExpr* EvalAddr(Expr *E) {
656  // We should only be called for evaluating pointer expressions.
657  assert((E->getType()->isPointerType() ||
658          E->getType()->isObjCQualifiedIdType()) &&
659         "EvalAddr only works on pointers");
660
661  // Our "symbolic interpreter" is just a dispatch off the currently
662  // viewed AST node.  We then recursively traverse the AST by calling
663  // EvalAddr and EvalVal appropriately.
664  switch (E->getStmtClass()) {
665  case Stmt::ParenExprClass:
666    // Ignore parentheses.
667    return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
668
669  case Stmt::UnaryOperatorClass: {
670    // The only unary operator that make sense to handle here
671    // is AddrOf.  All others don't make sense as pointers.
672    UnaryOperator *U = cast<UnaryOperator>(E);
673
674    if (U->getOpcode() == UnaryOperator::AddrOf)
675      return EvalVal(U->getSubExpr());
676    else
677      return NULL;
678  }
679
680  case Stmt::BinaryOperatorClass: {
681    // Handle pointer arithmetic.  All other binary operators are not valid
682    // in this context.
683    BinaryOperator *B = cast<BinaryOperator>(E);
684    BinaryOperator::Opcode op = B->getOpcode();
685
686    if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
687      return NULL;
688
689    Expr *Base = B->getLHS();
690
691    // Determine which argument is the real pointer base.  It could be
692    // the RHS argument instead of the LHS.
693    if (!Base->getType()->isPointerType()) Base = B->getRHS();
694
695    assert (Base->getType()->isPointerType());
696    return EvalAddr(Base);
697  }
698
699  // For conditional operators we need to see if either the LHS or RHS are
700  // valid DeclRefExpr*s.  If one of them is valid, we return it.
701  case Stmt::ConditionalOperatorClass: {
702    ConditionalOperator *C = cast<ConditionalOperator>(E);
703
704    // Handle the GNU extension for missing LHS.
705    if (Expr *lhsExpr = C->getLHS())
706      if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
707        return LHS;
708
709     return EvalAddr(C->getRHS());
710  }
711
712  // For implicit casts, we need to handle conversions from arrays to
713  // pointer values, and implicit pointer-to-pointer conversions.
714  case Stmt::ImplicitCastExprClass: {
715    ImplicitCastExpr *IE = cast<ImplicitCastExpr>(E);
716    Expr* SubExpr = IE->getSubExpr();
717
718    if (SubExpr->getType()->isPointerType() ||
719        SubExpr->getType()->isObjCQualifiedIdType())
720      return EvalAddr(SubExpr);
721    else
722      return EvalVal(SubExpr);
723  }
724
725  // For casts, we handle pointer-to-pointer conversions (which
726  // is essentially a no-op from our mini-interpreter's standpoint).
727  // For other casts we abort.
728  case Stmt::CastExprClass: {
729    CastExpr *C = cast<CastExpr>(E);
730    Expr *SubExpr = C->getSubExpr();
731
732    if (SubExpr->getType()->isPointerType())
733      return EvalAddr(SubExpr);
734    else
735      return NULL;
736  }
737
738  // C++ casts.  For dynamic casts, static casts, and const casts, we
739  // are always converting from a pointer-to-pointer, so we just blow
740  // through the cast.  In the case the dynamic cast doesn't fail
741  // (and return NULL), we take the conservative route and report cases
742  // where we return the address of a stack variable.  For Reinterpre
743  case Stmt::CXXCastExprClass: {
744    CXXCastExpr *C = cast<CXXCastExpr>(E);
745
746    if (C->getOpcode() == CXXCastExpr::ReinterpretCast) {
747      Expr *S = C->getSubExpr();
748      if (S->getType()->isPointerType())
749        return EvalAddr(S);
750      else
751        return NULL;
752    }
753    else
754      return EvalAddr(C->getSubExpr());
755  }
756
757  // Everything else: we simply don't reason about them.
758  default:
759    return NULL;
760  }
761}
762
763
764///  EvalVal - This function is complements EvalAddr in the mutual recursion.
765///   See the comments for EvalAddr for more details.
766static DeclRefExpr* EvalVal(Expr *E) {
767
768  // We should only be called for evaluating non-pointer expressions, or
769  // expressions with a pointer type that are not used as references but instead
770  // are l-values (e.g., DeclRefExpr with a pointer type).
771
772  // Our "symbolic interpreter" is just a dispatch off the currently
773  // viewed AST node.  We then recursively traverse the AST by calling
774  // EvalAddr and EvalVal appropriately.
775  switch (E->getStmtClass()) {
776  case Stmt::DeclRefExprClass: {
777    // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
778    //  at code that refers to a variable's name.  We check if it has local
779    //  storage within the function, and if so, return the expression.
780    DeclRefExpr *DR = cast<DeclRefExpr>(E);
781
782    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
783      if(V->hasLocalStorage()) return DR;
784
785    return NULL;
786  }
787
788  case Stmt::ParenExprClass:
789    // Ignore parentheses.
790    return EvalVal(cast<ParenExpr>(E)->getSubExpr());
791
792  case Stmt::UnaryOperatorClass: {
793    // The only unary operator that make sense to handle here
794    // is Deref.  All others don't resolve to a "name."  This includes
795    // handling all sorts of rvalues passed to a unary operator.
796    UnaryOperator *U = cast<UnaryOperator>(E);
797
798    if (U->getOpcode() == UnaryOperator::Deref)
799      return EvalAddr(U->getSubExpr());
800
801    return NULL;
802  }
803
804  case Stmt::ArraySubscriptExprClass: {
805    // Array subscripts are potential references to data on the stack.  We
806    // retrieve the DeclRefExpr* for the array variable if it indeed
807    // has local storage.
808    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
809  }
810
811  case Stmt::ConditionalOperatorClass: {
812    // For conditional operators we need to see if either the LHS or RHS are
813    // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
814    ConditionalOperator *C = cast<ConditionalOperator>(E);
815
816    // Handle the GNU extension for missing LHS.
817    if (Expr *lhsExpr = C->getLHS())
818      if (DeclRefExpr *LHS = EvalVal(lhsExpr))
819        return LHS;
820
821    return EvalVal(C->getRHS());
822  }
823
824  // Accesses to members are potential references to data on the stack.
825  case Stmt::MemberExprClass: {
826    MemberExpr *M = cast<MemberExpr>(E);
827
828    // Check for indirect access.  We only want direct field accesses.
829    if (!M->isArrow())
830      return EvalVal(M->getBase());
831    else
832      return NULL;
833  }
834
835  // Everything else: we simply don't reason about them.
836  default:
837    return NULL;
838  }
839}
840
841//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
842
843/// Check for comparisons of floating point operands using != and ==.
844/// Issue a warning if these are no self-comparisons, as they are not likely
845/// to do what the programmer intended.
846void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
847  bool EmitWarning = true;
848
849  Expr* LeftExprSansParen = lex->IgnoreParens();
850  Expr* RightExprSansParen = rex->IgnoreParens();
851
852  // Special case: check for x == x (which is OK).
853  // Do not emit warnings for such cases.
854  if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
855    if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
856      if (DRL->getDecl() == DRR->getDecl())
857        EmitWarning = false;
858
859
860  // Special case: check for comparisons against literals that can be exactly
861  //  represented by APFloat.  In such cases, do not emit a warning.  This
862  //  is a heuristic: often comparison against such literals are used to
863  //  detect if a value in a variable has not changed.  This clearly can
864  //  lead to false negatives.
865  if (EmitWarning) {
866    if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
867      if (FLL->isExact())
868        EmitWarning = false;
869    }
870    else
871      if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
872        if (FLR->isExact())
873          EmitWarning = false;
874    }
875  }
876
877  // Check for comparisons with builtin types.
878  if (EmitWarning)
879    if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
880      if (isCallBuiltin(CL))
881        EmitWarning = false;
882
883  if (EmitWarning)
884    if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
885      if (isCallBuiltin(CR))
886        EmitWarning = false;
887
888  // Emit the diagnostic.
889  if (EmitWarning)
890    Diag(loc, diag::warn_floatingpoint_eq,
891         lex->getSourceRange(),rex->getSourceRange());
892}
893