SemaChecking.cpp revision 500d3297d2a21edeac4d46cbcbe21bc2352c2a28
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements extra semantic analysis beyond what is enforced
11//  by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/DeclObjC.h"
18#include "clang/AST/ExprCXX.h"
19#include "clang/AST/ExprObjC.h"
20#include "clang/Lex/Preprocessor.h"
21#include "SemaUtil.h"
22using namespace clang;
23
24/// CheckFunctionCall - Check a direct function call for various correctness
25/// and safety properties not strictly enforced by the C type system.
26Action::OwningExprResult
27Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
28  OwningExprResult TheCallResult(Owned(TheCall));
29  // Get the IdentifierInfo* for the called function.
30  IdentifierInfo *FnInfo = FDecl->getIdentifier();
31
32  // None of the checks below are needed for functions that don't have
33  // simple names (e.g., C++ conversion functions).
34  if (!FnInfo)
35    return move(TheCallResult);
36
37  switch (FnInfo->getBuiltinID()) {
38  case Builtin::BI__builtin___CFStringMakeConstantString:
39    assert(TheCall->getNumArgs() == 1 &&
40           "Wrong # arguments to builtin CFStringMakeConstantString");
41    if (CheckBuiltinCFStringArgument(TheCall->getArg(0)))
42      return ExprError();
43    return move(TheCallResult);
44  case Builtin::BI__builtin_stdarg_start:
45  case Builtin::BI__builtin_va_start:
46    if (SemaBuiltinVAStart(TheCall))
47      return ExprError();
48    return move(TheCallResult);
49  case Builtin::BI__builtin_isgreater:
50  case Builtin::BI__builtin_isgreaterequal:
51  case Builtin::BI__builtin_isless:
52  case Builtin::BI__builtin_islessequal:
53  case Builtin::BI__builtin_islessgreater:
54  case Builtin::BI__builtin_isunordered:
55    if (SemaBuiltinUnorderedCompare(TheCall))
56      return ExprError();
57    return move(TheCallResult);
58  case Builtin::BI__builtin_return_address:
59  case Builtin::BI__builtin_frame_address:
60    if (SemaBuiltinStackAddress(TheCall))
61      return ExprError();
62    return move(TheCallResult);
63  case Builtin::BI__builtin_shufflevector:
64    return SemaBuiltinShuffleVector(TheCall);
65    // TheCall will be freed by the smart pointer here, but that's fine, since
66    // SemaBuiltinShuffleVector guts it, but then doesn't release it.
67  case Builtin::BI__builtin_prefetch:
68    if (SemaBuiltinPrefetch(TheCall))
69      return ExprError();
70    return move(TheCallResult);
71  case Builtin::BI__builtin_object_size:
72    if (SemaBuiltinObjectSize(TheCall))
73      return ExprError();
74  }
75
76  // FIXME: This mechanism should be abstracted to be less fragile and
77  // more efficient. For example, just map function ids to custom
78  // handlers.
79
80  // Search the KnownFunctionIDs for the identifier.
81  unsigned i = 0, e = id_num_known_functions;
82  for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
83  if (i == e) return move(TheCallResult);
84
85  // Printf checking.
86  if (i <= id_vprintf) {
87    // Retrieve the index of the format string parameter and determine
88    // if the function is passed a va_arg argument.
89    unsigned format_idx = 0;
90    bool HasVAListArg = false;
91
92    switch (i) {
93    default: assert(false && "No format string argument index.");
94    case id_NSLog:         format_idx = 0; break;
95    case id_asprintf:      format_idx = 1; break;
96    case id_fprintf:       format_idx = 1; break;
97    case id_printf:        format_idx = 0; break;
98    case id_snprintf:      format_idx = 2; break;
99    case id_snprintf_chk:  format_idx = 4; break;
100    case id_sprintf:       format_idx = 1; break;
101    case id_sprintf_chk:   format_idx = 3; break;
102    case id_vasprintf:     format_idx = 1; HasVAListArg = true; break;
103    case id_vfprintf:      format_idx = 1; HasVAListArg = true; break;
104    case id_vsnprintf:     format_idx = 2; HasVAListArg = true; break;
105    case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break;
106    case id_vsprintf:      format_idx = 1; HasVAListArg = true; break;
107    case id_vsprintf_chk:  format_idx = 3; HasVAListArg = true; break;
108    case id_vprintf:       format_idx = 0; HasVAListArg = true; break;
109    }
110
111    CheckPrintfArguments(TheCall, HasVAListArg, format_idx);
112  }
113
114  return move(TheCallResult);
115}
116
117/// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
118/// CFString constructor is correct
119bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
120  Arg = Arg->IgnoreParenCasts();
121
122  StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
123
124  if (!Literal || Literal->isWide()) {
125    Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
126      << Arg->getSourceRange();
127    return true;
128  }
129
130  const char *Data = Literal->getStrData();
131  unsigned Length = Literal->getByteLength();
132
133  for (unsigned i = 0; i < Length; ++i) {
134    if (!isascii(Data[i])) {
135      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
136           diag::warn_cfstring_literal_contains_non_ascii_character)
137        << Arg->getSourceRange();
138      break;
139    }
140
141    if (!Data[i]) {
142      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
143           diag::warn_cfstring_literal_contains_nul_character)
144        << Arg->getSourceRange();
145      break;
146    }
147  }
148
149  return false;
150}
151
152/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
153/// Emit an error and return true on failure, return false on success.
154bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
155  Expr *Fn = TheCall->getCallee();
156  if (TheCall->getNumArgs() > 2) {
157    Diag(TheCall->getArg(2)->getLocStart(),
158         diag::err_typecheck_call_too_many_args)
159      << 0 /*function call*/ << Fn->getSourceRange()
160      << SourceRange(TheCall->getArg(2)->getLocStart(),
161                     (*(TheCall->arg_end()-1))->getLocEnd());
162    return true;
163  }
164
165  if (TheCall->getNumArgs() < 2) {
166    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
167      << 0 /*function call*/;
168  }
169
170  // Determine whether the current function is variadic or not.
171  bool isVariadic;
172  if (getCurFunctionDecl()) {
173    if (FunctionTypeProto* FTP =
174            dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType()))
175      isVariadic = FTP->isVariadic();
176    else
177      isVariadic = false;
178  } else {
179    isVariadic = getCurMethodDecl()->isVariadic();
180  }
181
182  if (!isVariadic) {
183    Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
184    return true;
185  }
186
187  // Verify that the second argument to the builtin is the last argument of the
188  // current function or method.
189  bool SecondArgIsLastNamedArgument = false;
190  const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
191
192  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
193    if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
194      // FIXME: This isn't correct for methods (results in bogus warning).
195      // Get the last formal in the current function.
196      const ParmVarDecl *LastArg;
197      if (FunctionDecl *FD = getCurFunctionDecl())
198        LastArg = *(FD->param_end()-1);
199      else
200        LastArg = *(getCurMethodDecl()->param_end()-1);
201      SecondArgIsLastNamedArgument = PV == LastArg;
202    }
203  }
204
205  if (!SecondArgIsLastNamedArgument)
206    Diag(TheCall->getArg(1)->getLocStart(),
207         diag::warn_second_parameter_of_va_start_not_last_named_argument);
208  return false;
209}
210
211/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
212/// friends.  This is declared to take (...), so we have to check everything.
213bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
214  if (TheCall->getNumArgs() < 2)
215    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
216      << 0 /*function call*/;
217  if (TheCall->getNumArgs() > 2)
218    return Diag(TheCall->getArg(2)->getLocStart(),
219                diag::err_typecheck_call_too_many_args)
220      << 0 /*function call*/
221      << SourceRange(TheCall->getArg(2)->getLocStart(),
222                     (*(TheCall->arg_end()-1))->getLocEnd());
223
224  Expr *OrigArg0 = TheCall->getArg(0);
225  Expr *OrigArg1 = TheCall->getArg(1);
226
227  // Do standard promotions between the two arguments, returning their common
228  // type.
229  QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
230
231  // If the common type isn't a real floating type, then the arguments were
232  // invalid for this operation.
233  if (!Res->isRealFloatingType())
234    return Diag(OrigArg0->getLocStart(),
235                diag::err_typecheck_call_invalid_ordered_compare)
236      << OrigArg0->getType() << OrigArg1->getType()
237      << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
238
239  return false;
240}
241
242bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
243  // The signature for these builtins is exact; the only thing we need
244  // to check is that the argument is a constant.
245  SourceLocation Loc;
246  if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
247    return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
248
249  return false;
250}
251
252/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
253// This is declared to take (...), so we have to check everything.
254Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
255  if (TheCall->getNumArgs() < 3)
256    return ExprError(Diag(TheCall->getLocEnd(),
257                          diag::err_typecheck_call_too_few_args)
258      << 0 /*function call*/ << TheCall->getSourceRange());
259
260  QualType FAType = TheCall->getArg(0)->getType();
261  QualType SAType = TheCall->getArg(1)->getType();
262
263  if (!FAType->isVectorType() || !SAType->isVectorType()) {
264    Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
265      << SourceRange(TheCall->getArg(0)->getLocStart(),
266                     TheCall->getArg(1)->getLocEnd());
267    return ExprError();
268  }
269
270  if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
271      Context.getCanonicalType(SAType).getUnqualifiedType()) {
272    Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
273      << SourceRange(TheCall->getArg(0)->getLocStart(),
274                     TheCall->getArg(1)->getLocEnd());
275    return ExprError();
276  }
277
278  unsigned numElements = FAType->getAsVectorType()->getNumElements();
279  if (TheCall->getNumArgs() != numElements+2) {
280    if (TheCall->getNumArgs() < numElements+2)
281      return ExprError(Diag(TheCall->getLocEnd(),
282                            diag::err_typecheck_call_too_few_args)
283               << 0 /*function call*/ << TheCall->getSourceRange());
284    return ExprError(Diag(TheCall->getLocEnd(),
285                          diag::err_typecheck_call_too_many_args)
286             << 0 /*function call*/ << TheCall->getSourceRange());
287  }
288
289  for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
290    llvm::APSInt Result(32);
291    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
292      return ExprError(Diag(TheCall->getLocStart(),
293                  diag::err_shufflevector_nonconstant_argument)
294                << TheCall->getArg(i)->getSourceRange());
295
296    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
297      return ExprError(Diag(TheCall->getLocStart(),
298                  diag::err_shufflevector_argument_too_large)
299               << TheCall->getArg(i)->getSourceRange());
300  }
301
302  llvm::SmallVector<Expr*, 32> exprs;
303
304  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
305    exprs.push_back(TheCall->getArg(i));
306    TheCall->setArg(i, 0);
307  }
308
309  return Owned(new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType,
310                                     TheCall->getCallee()->getLocStart(),
311                                     TheCall->getRParenLoc()));
312}
313
314/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
315// This is declared to take (const void*, ...) and can take two
316// optional constant int args.
317bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
318  unsigned NumArgs = TheCall->getNumArgs();
319
320  if (NumArgs > 3)
321    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
322             << 0 /*function call*/ << TheCall->getSourceRange();
323
324  // Argument 0 is checked for us and the remaining arguments must be
325  // constant integers.
326  for (unsigned i = 1; i != NumArgs; ++i) {
327    Expr *Arg = TheCall->getArg(i);
328    QualType RWType = Arg->getType();
329
330    const BuiltinType *BT = RWType->getAsBuiltinType();
331    llvm::APSInt Result;
332    if (!BT || BT->getKind() != BuiltinType::Int ||
333        !Arg->isIntegerConstantExpr(Result, Context))
334      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
335              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
336
337    // FIXME: gcc issues a warning and rewrites these to 0. These
338    // seems especially odd for the third argument since the default
339    // is 3.
340    if (i == 1) {
341      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
342        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
343             << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
344    } else {
345      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
346        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
347            << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
348    }
349  }
350
351  return false;
352}
353
354/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
355/// int type). This simply type checks that type is one of the defined
356/// constants (0-3).
357bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
358  Expr *Arg = TheCall->getArg(1);
359  QualType ArgType = Arg->getType();
360  const BuiltinType *BT = ArgType->getAsBuiltinType();
361  llvm::APSInt Result(32);
362  if (!BT || BT->getKind() != BuiltinType::Int ||
363      !Arg->isIntegerConstantExpr(Result, Context)) {
364    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
365             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
366  }
367
368  if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
369    return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
370             << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
371  }
372
373  return false;
374}
375
376// Handle i > 1 ? "x" : "y", recursivelly
377bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg,
378                                  unsigned format_idx) {
379
380  switch (E->getStmtClass()) {
381  case Stmt::ConditionalOperatorClass: {
382    ConditionalOperator *C = cast<ConditionalOperator>(E);
383    return SemaCheckStringLiteral(C->getLHS(), TheCall,
384                                  HasVAListArg, format_idx)
385        && SemaCheckStringLiteral(C->getRHS(), TheCall,
386                                  HasVAListArg, format_idx);
387  }
388
389  case Stmt::ImplicitCastExprClass: {
390    ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E);
391    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
392                                  format_idx);
393  }
394
395  case Stmt::ParenExprClass: {
396    ParenExpr *Expr = dyn_cast<ParenExpr>(E);
397    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
398                                  format_idx);
399  }
400
401  default: {
402    ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E);
403    StringLiteral *StrE = NULL;
404
405    if (ObjCFExpr)
406      StrE = ObjCFExpr->getString();
407    else
408      StrE = dyn_cast<StringLiteral>(E);
409
410    if (StrE) {
411      CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx);
412      return true;
413    }
414
415    return false;
416  }
417  }
418}
419
420
421/// CheckPrintfArguments - Check calls to printf (and similar functions) for
422/// correct use of format strings.
423///
424///  HasVAListArg - A predicate indicating whether the printf-like
425///    function is passed an explicit va_arg argument (e.g., vprintf)
426///
427///  format_idx - The index into Args for the format string.
428///
429/// Improper format strings to functions in the printf family can be
430/// the source of bizarre bugs and very serious security holes.  A
431/// good source of information is available in the following paper
432/// (which includes additional references):
433///
434///  FormatGuard: Automatic Protection From printf Format String
435///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
436///
437/// Functionality implemented:
438///
439///  We can statically check the following properties for string
440///  literal format strings for non v.*printf functions (where the
441///  arguments are passed directly):
442//
443///  (1) Are the number of format conversions equal to the number of
444///      data arguments?
445///
446///  (2) Does each format conversion correctly match the type of the
447///      corresponding data argument?  (TODO)
448///
449/// Moreover, for all printf functions we can:
450///
451///  (3) Check for a missing format string (when not caught by type checking).
452///
453///  (4) Check for no-operation flags; e.g. using "#" with format
454///      conversion 'c'  (TODO)
455///
456///  (5) Check the use of '%n', a major source of security holes.
457///
458///  (6) Check for malformed format conversions that don't specify anything.
459///
460///  (7) Check for empty format strings.  e.g: printf("");
461///
462///  (8) Check that the format string is a wide literal.
463///
464///  (9) Also check the arguments of functions with the __format__ attribute.
465///      (TODO).
466///
467/// All of these checks can be done by parsing the format string.
468///
469/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
470void
471Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
472                           unsigned format_idx) {
473  Expr *Fn = TheCall->getCallee();
474
475  // CHECK: printf-like function is called with no format string.
476  if (format_idx >= TheCall->getNumArgs()) {
477    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
478      << Fn->getSourceRange();
479    return;
480  }
481
482  Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
483
484  // CHECK: format string is not a string literal.
485  //
486  // Dynamically generated format strings are difficult to
487  // automatically vet at compile time.  Requiring that format strings
488  // are string literals: (1) permits the checking of format strings by
489  // the compiler and thereby (2) can practically remove the source of
490  // many format string exploits.
491
492  // Format string can be either ObjC string (e.g. @"%d") or
493  // C string (e.g. "%d")
494  // ObjC string uses the same format specifiers as C string, so we can use
495  // the same format string checking logic for both ObjC and C strings.
496  bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx);
497
498  if (!isFExpr) {
499    // For vprintf* functions (i.e., HasVAListArg==true), we add a
500    // special check to see if the format string is a function parameter
501    // of the function calling the printf function.  If the function
502    // has an attribute indicating it is a printf-like function, then we
503    // should suppress warnings concerning non-literals being used in a call
504    // to a vprintf function.  For example:
505    //
506    // void
507    // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
508    //      va_list ap;
509    //      va_start(ap, fmt);
510    //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
511    //      ...
512    //
513    //
514    //  FIXME: We don't have full attribute support yet, so just check to see
515    //    if the argument is a DeclRefExpr that references a parameter.  We'll
516    //    add proper support for checking the attribute later.
517    if (HasVAListArg)
518      if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
519        if (isa<ParmVarDecl>(DR->getDecl()))
520          return;
521
522    Diag(TheCall->getArg(format_idx)->getLocStart(),
523         diag::warn_printf_not_string_constant)
524      << OrigFormatExpr->getSourceRange();
525    return;
526  }
527}
528
529void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
530      CallExpr *TheCall, bool HasVAListArg, unsigned format_idx) {
531
532  ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
533  // CHECK: is the format string a wide literal?
534  if (FExpr->isWide()) {
535    Diag(FExpr->getLocStart(),
536         diag::warn_printf_format_string_is_wide_literal)
537      << OrigFormatExpr->getSourceRange();
538    return;
539  }
540
541  // Str - The format string.  NOTE: this is NOT null-terminated!
542  const char * const Str = FExpr->getStrData();
543
544  // CHECK: empty format string?
545  const unsigned StrLen = FExpr->getByteLength();
546
547  if (StrLen == 0) {
548    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
549      << OrigFormatExpr->getSourceRange();
550    return;
551  }
552
553  // We process the format string using a binary state machine.  The
554  // current state is stored in CurrentState.
555  enum {
556    state_OrdChr,
557    state_Conversion
558  } CurrentState = state_OrdChr;
559
560  // numConversions - The number of conversions seen so far.  This is
561  //  incremented as we traverse the format string.
562  unsigned numConversions = 0;
563
564  // numDataArgs - The number of data arguments after the format
565  //  string.  This can only be determined for non vprintf-like
566  //  functions.  For those functions, this value is 1 (the sole
567  //  va_arg argument).
568  unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1);
569
570  // Inspect the format string.
571  unsigned StrIdx = 0;
572
573  // LastConversionIdx - Index within the format string where we last saw
574  //  a '%' character that starts a new format conversion.
575  unsigned LastConversionIdx = 0;
576
577  for (; StrIdx < StrLen; ++StrIdx) {
578
579    // Is the number of detected conversion conversions greater than
580    // the number of matching data arguments?  If so, stop.
581    if (!HasVAListArg && numConversions > numDataArgs) break;
582
583    // Handle "\0"
584    if (Str[StrIdx] == '\0') {
585      // The string returned by getStrData() is not null-terminated,
586      // so the presence of a null character is likely an error.
587      Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
588           diag::warn_printf_format_string_contains_null_char)
589        <<  OrigFormatExpr->getSourceRange();
590      return;
591    }
592
593    // Ordinary characters (not processing a format conversion).
594    if (CurrentState == state_OrdChr) {
595      if (Str[StrIdx] == '%') {
596        CurrentState = state_Conversion;
597        LastConversionIdx = StrIdx;
598      }
599      continue;
600    }
601
602    // Seen '%'.  Now processing a format conversion.
603    switch (Str[StrIdx]) {
604    // Handle dynamic precision or width specifier.
605    case '*': {
606      ++numConversions;
607
608      if (!HasVAListArg && numConversions > numDataArgs) {
609        SourceLocation Loc = FExpr->getLocStart();
610        Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
611
612        if (Str[StrIdx-1] == '.')
613          Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
614            << OrigFormatExpr->getSourceRange();
615        else
616          Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
617            << OrigFormatExpr->getSourceRange();
618
619        // Don't do any more checking.  We'll just emit spurious errors.
620        return;
621      }
622
623      // Perform type checking on width/precision specifier.
624      Expr *E = TheCall->getArg(format_idx+numConversions);
625      if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
626        if (BT->getKind() == BuiltinType::Int)
627          break;
628
629      SourceLocation Loc =
630        PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
631
632      if (Str[StrIdx-1] == '.')
633        Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
634          << E->getType() << E->getSourceRange();
635      else
636        Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
637          << E->getType() << E->getSourceRange();
638
639      break;
640    }
641
642    // Characters which can terminate a format conversion
643    // (e.g. "%d").  Characters that specify length modifiers or
644    // other flags are handled by the default case below.
645    //
646    // FIXME: additional checks will go into the following cases.
647    case 'i':
648    case 'd':
649    case 'o':
650    case 'u':
651    case 'x':
652    case 'X':
653    case 'D':
654    case 'O':
655    case 'U':
656    case 'e':
657    case 'E':
658    case 'f':
659    case 'F':
660    case 'g':
661    case 'G':
662    case 'a':
663    case 'A':
664    case 'c':
665    case 'C':
666    case 'S':
667    case 's':
668    case 'p':
669      ++numConversions;
670      CurrentState = state_OrdChr;
671      break;
672
673    // CHECK: Are we using "%n"?  Issue a warning.
674    case 'n': {
675      ++numConversions;
676      CurrentState = state_OrdChr;
677      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
678                                                      LastConversionIdx+1);
679
680      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
681      break;
682    }
683
684    // Handle "%@"
685    case '@':
686      // %@ is allowed in ObjC format strings only.
687      if(ObjCFExpr != NULL)
688        CurrentState = state_OrdChr;
689      else {
690        // Issue a warning: invalid format conversion.
691        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
692                                                    LastConversionIdx+1);
693
694        Diag(Loc, diag::warn_printf_invalid_conversion)
695          <<  std::string(Str+LastConversionIdx,
696                          Str+std::min(LastConversionIdx+2, StrLen))
697          << OrigFormatExpr->getSourceRange();
698      }
699      ++numConversions;
700      break;
701
702    // Handle "%%"
703    case '%':
704      // Sanity check: Was the first "%" character the previous one?
705      // If not, we will assume that we have a malformed format
706      // conversion, and that the current "%" character is the start
707      // of a new conversion.
708      if (StrIdx - LastConversionIdx == 1)
709        CurrentState = state_OrdChr;
710      else {
711        // Issue a warning: invalid format conversion.
712        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
713                                                        LastConversionIdx+1);
714
715        Diag(Loc, diag::warn_printf_invalid_conversion)
716          << std::string(Str+LastConversionIdx, Str+StrIdx)
717          << OrigFormatExpr->getSourceRange();
718
719        // This conversion is broken.  Advance to the next format
720        // conversion.
721        LastConversionIdx = StrIdx;
722        ++numConversions;
723      }
724      break;
725
726    default:
727      // This case catches all other characters: flags, widths, etc.
728      // We should eventually process those as well.
729      break;
730    }
731  }
732
733  if (CurrentState == state_Conversion) {
734    // Issue a warning: invalid format conversion.
735    SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
736                                                    LastConversionIdx+1);
737
738    Diag(Loc, diag::warn_printf_invalid_conversion)
739      << std::string(Str+LastConversionIdx,
740                     Str+std::min(LastConversionIdx+2, StrLen))
741      << OrigFormatExpr->getSourceRange();
742    return;
743  }
744
745  if (!HasVAListArg) {
746    // CHECK: Does the number of format conversions exceed the number
747    //        of data arguments?
748    if (numConversions > numDataArgs) {
749      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
750                                                      LastConversionIdx);
751
752      Diag(Loc, diag::warn_printf_insufficient_data_args)
753        << OrigFormatExpr->getSourceRange();
754    }
755    // CHECK: Does the number of data arguments exceed the number of
756    //        format conversions in the format string?
757    else if (numConversions < numDataArgs)
758      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
759           diag::warn_printf_too_many_data_args)
760        << OrigFormatExpr->getSourceRange();
761  }
762}
763
764//===--- CHECK: Return Address of Stack Variable --------------------------===//
765
766static DeclRefExpr* EvalVal(Expr *E);
767static DeclRefExpr* EvalAddr(Expr* E);
768
769/// CheckReturnStackAddr - Check if a return statement returns the address
770///   of a stack variable.
771void
772Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
773                           SourceLocation ReturnLoc) {
774
775  // Perform checking for returned stack addresses.
776  if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
777    if (DeclRefExpr *DR = EvalAddr(RetValExp))
778      Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
779       << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
780
781    // Skip over implicit cast expressions when checking for block expressions.
782    if (ImplicitCastExpr *IcExpr =
783          dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
784      RetValExp = IcExpr->getSubExpr();
785
786    if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
787      Diag(C->getLocStart(), diag::err_ret_local_block)
788        << C->getSourceRange();
789  }
790  // Perform checking for stack values returned by reference.
791  else if (lhsType->isReferenceType()) {
792    // Check for a reference to the stack
793    if (DeclRefExpr *DR = EvalVal(RetValExp))
794      Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
795        << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
796  }
797}
798
799/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
800///  check if the expression in a return statement evaluates to an address
801///  to a location on the stack.  The recursion is used to traverse the
802///  AST of the return expression, with recursion backtracking when we
803///  encounter a subexpression that (1) clearly does not lead to the address
804///  of a stack variable or (2) is something we cannot determine leads to
805///  the address of a stack variable based on such local checking.
806///
807///  EvalAddr processes expressions that are pointers that are used as
808///  references (and not L-values).  EvalVal handles all other values.
809///  At the base case of the recursion is a check for a DeclRefExpr* in
810///  the refers to a stack variable.
811///
812///  This implementation handles:
813///
814///   * pointer-to-pointer casts
815///   * implicit conversions from array references to pointers
816///   * taking the address of fields
817///   * arbitrary interplay between "&" and "*" operators
818///   * pointer arithmetic from an address of a stack variable
819///   * taking the address of an array element where the array is on the stack
820static DeclRefExpr* EvalAddr(Expr *E) {
821  // We should only be called for evaluating pointer expressions.
822  assert((E->getType()->isPointerType() ||
823          E->getType()->isBlockPointerType() ||
824          E->getType()->isObjCQualifiedIdType()) &&
825         "EvalAddr only works on pointers");
826
827  // Our "symbolic interpreter" is just a dispatch off the currently
828  // viewed AST node.  We then recursively traverse the AST by calling
829  // EvalAddr and EvalVal appropriately.
830  switch (E->getStmtClass()) {
831  case Stmt::ParenExprClass:
832    // Ignore parentheses.
833    return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
834
835  case Stmt::UnaryOperatorClass: {
836    // The only unary operator that make sense to handle here
837    // is AddrOf.  All others don't make sense as pointers.
838    UnaryOperator *U = cast<UnaryOperator>(E);
839
840    if (U->getOpcode() == UnaryOperator::AddrOf)
841      return EvalVal(U->getSubExpr());
842    else
843      return NULL;
844  }
845
846  case Stmt::BinaryOperatorClass: {
847    // Handle pointer arithmetic.  All other binary operators are not valid
848    // in this context.
849    BinaryOperator *B = cast<BinaryOperator>(E);
850    BinaryOperator::Opcode op = B->getOpcode();
851
852    if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
853      return NULL;
854
855    Expr *Base = B->getLHS();
856
857    // Determine which argument is the real pointer base.  It could be
858    // the RHS argument instead of the LHS.
859    if (!Base->getType()->isPointerType()) Base = B->getRHS();
860
861    assert (Base->getType()->isPointerType());
862    return EvalAddr(Base);
863  }
864
865  // For conditional operators we need to see if either the LHS or RHS are
866  // valid DeclRefExpr*s.  If one of them is valid, we return it.
867  case Stmt::ConditionalOperatorClass: {
868    ConditionalOperator *C = cast<ConditionalOperator>(E);
869
870    // Handle the GNU extension for missing LHS.
871    if (Expr *lhsExpr = C->getLHS())
872      if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
873        return LHS;
874
875     return EvalAddr(C->getRHS());
876  }
877
878  // For casts, we need to handle conversions from arrays to
879  // pointer values, and pointer-to-pointer conversions.
880  case Stmt::ImplicitCastExprClass:
881  case Stmt::CStyleCastExprClass:
882  case Stmt::CXXFunctionalCastExprClass: {
883    Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
884    QualType T = SubExpr->getType();
885
886    if (SubExpr->getType()->isPointerType() ||
887        SubExpr->getType()->isBlockPointerType() ||
888        SubExpr->getType()->isObjCQualifiedIdType())
889      return EvalAddr(SubExpr);
890    else if (T->isArrayType())
891      return EvalVal(SubExpr);
892    else
893      return 0;
894  }
895
896  // C++ casts.  For dynamic casts, static casts, and const casts, we
897  // are always converting from a pointer-to-pointer, so we just blow
898  // through the cast.  In the case the dynamic cast doesn't fail (and
899  // return NULL), we take the conservative route and report cases
900  // where we return the address of a stack variable.  For Reinterpre
901  // FIXME: The comment about is wrong; we're not always converting
902  // from pointer to pointer. I'm guessing that this code should also
903  // handle references to objects.
904  case Stmt::CXXStaticCastExprClass:
905  case Stmt::CXXDynamicCastExprClass:
906  case Stmt::CXXConstCastExprClass:
907  case Stmt::CXXReinterpretCastExprClass: {
908      Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
909      if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
910        return EvalAddr(S);
911      else
912        return NULL;
913  }
914
915  // Everything else: we simply don't reason about them.
916  default:
917    return NULL;
918  }
919}
920
921
922///  EvalVal - This function is complements EvalAddr in the mutual recursion.
923///   See the comments for EvalAddr for more details.
924static DeclRefExpr* EvalVal(Expr *E) {
925
926  // We should only be called for evaluating non-pointer expressions, or
927  // expressions with a pointer type that are not used as references but instead
928  // are l-values (e.g., DeclRefExpr with a pointer type).
929
930  // Our "symbolic interpreter" is just a dispatch off the currently
931  // viewed AST node.  We then recursively traverse the AST by calling
932  // EvalAddr and EvalVal appropriately.
933  switch (E->getStmtClass()) {
934  case Stmt::DeclRefExprClass:
935  case Stmt::QualifiedDeclRefExprClass: {
936    // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
937    //  at code that refers to a variable's name.  We check if it has local
938    //  storage within the function, and if so, return the expression.
939    DeclRefExpr *DR = cast<DeclRefExpr>(E);
940
941    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
942      if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
943
944    return NULL;
945  }
946
947  case Stmt::ParenExprClass:
948    // Ignore parentheses.
949    return EvalVal(cast<ParenExpr>(E)->getSubExpr());
950
951  case Stmt::UnaryOperatorClass: {
952    // The only unary operator that make sense to handle here
953    // is Deref.  All others don't resolve to a "name."  This includes
954    // handling all sorts of rvalues passed to a unary operator.
955    UnaryOperator *U = cast<UnaryOperator>(E);
956
957    if (U->getOpcode() == UnaryOperator::Deref)
958      return EvalAddr(U->getSubExpr());
959
960    return NULL;
961  }
962
963  case Stmt::ArraySubscriptExprClass: {
964    // Array subscripts are potential references to data on the stack.  We
965    // retrieve the DeclRefExpr* for the array variable if it indeed
966    // has local storage.
967    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
968  }
969
970  case Stmt::ConditionalOperatorClass: {
971    // For conditional operators we need to see if either the LHS or RHS are
972    // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
973    ConditionalOperator *C = cast<ConditionalOperator>(E);
974
975    // Handle the GNU extension for missing LHS.
976    if (Expr *lhsExpr = C->getLHS())
977      if (DeclRefExpr *LHS = EvalVal(lhsExpr))
978        return LHS;
979
980    return EvalVal(C->getRHS());
981  }
982
983  // Accesses to members are potential references to data on the stack.
984  case Stmt::MemberExprClass: {
985    MemberExpr *M = cast<MemberExpr>(E);
986
987    // Check for indirect access.  We only want direct field accesses.
988    if (!M->isArrow())
989      return EvalVal(M->getBase());
990    else
991      return NULL;
992  }
993
994  // Everything else: we simply don't reason about them.
995  default:
996    return NULL;
997  }
998}
999
1000//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1001
1002/// Check for comparisons of floating point operands using != and ==.
1003/// Issue a warning if these are no self-comparisons, as they are not likely
1004/// to do what the programmer intended.
1005void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1006  bool EmitWarning = true;
1007
1008  Expr* LeftExprSansParen = lex->IgnoreParens();
1009  Expr* RightExprSansParen = rex->IgnoreParens();
1010
1011  // Special case: check for x == x (which is OK).
1012  // Do not emit warnings for such cases.
1013  if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1014    if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1015      if (DRL->getDecl() == DRR->getDecl())
1016        EmitWarning = false;
1017
1018
1019  // Special case: check for comparisons against literals that can be exactly
1020  //  represented by APFloat.  In such cases, do not emit a warning.  This
1021  //  is a heuristic: often comparison against such literals are used to
1022  //  detect if a value in a variable has not changed.  This clearly can
1023  //  lead to false negatives.
1024  if (EmitWarning) {
1025    if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1026      if (FLL->isExact())
1027        EmitWarning = false;
1028    }
1029    else
1030      if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1031        if (FLR->isExact())
1032          EmitWarning = false;
1033    }
1034  }
1035
1036  // Check for comparisons with builtin types.
1037  if (EmitWarning)
1038    if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
1039      if (isCallBuiltin(CL))
1040        EmitWarning = false;
1041
1042  if (EmitWarning)
1043    if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
1044      if (isCallBuiltin(CR))
1045        EmitWarning = false;
1046
1047  // Emit the diagnostic.
1048  if (EmitWarning)
1049    Diag(loc, diag::warn_floatingpoint_eq)
1050      << lex->getSourceRange() << rex->getSourceRange();
1051}
1052