SemaChecking.cpp revision d0d082f2eba4e3ed4eb467d76fd227c6dcd6cce7
1//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file implements extra semantic analysis beyond what is enforced
11//  by the C type system.
12//
13//===----------------------------------------------------------------------===//
14
15#include "Sema.h"
16#include "clang/AST/ASTContext.h"
17#include "clang/AST/DeclObjC.h"
18#include "clang/AST/ExprCXX.h"
19#include "clang/AST/ExprObjC.h"
20#include "clang/Lex/Preprocessor.h"
21using namespace clang;
22
23/// getLocationOfStringLiteralByte - Return a source location that points to the
24/// specified byte of the specified string literal.
25///
26/// Strings are amazingly complex.  They can be formed from multiple tokens and
27/// can have escape sequences in them in addition to the usual trigraph and
28/// escaped newline business.  This routine handles this complexity.
29///
30SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
31                                                    unsigned ByteNo) const {
32  assert(!SL->isWide() && "This doesn't work for wide strings yet");
33
34  llvm::SmallString<32> SpellingBuffer;
35
36  // Loop over all of the tokens in this string until we find the one that
37  // contains the byte we're looking for.
38  unsigned TokNo = 0;
39  while (1) {
40    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
41    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
42
43    // Get the spelling of the string so that we can get the data that makes up
44    // the string literal, not the identifier for the macro it is potentially
45    // expanded through.
46    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
47
48    // Re-lex the token to get its length and original spelling.
49    std::pair<FileID, unsigned> LocInfo =
50      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
51    std::pair<const char *,const char *> Buffer =
52      SourceMgr.getBufferData(LocInfo.first);
53    const char *StrData = Buffer.first+LocInfo.second;
54
55    // Create a langops struct and enable trigraphs.  This is sufficient for
56    // relexing tokens.
57    LangOptions LangOpts;
58    LangOpts.Trigraphs = true;
59
60    // Create a lexer starting at the beginning of this token.
61    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
62                   Buffer.second);
63    Token TheTok;
64    TheLexer.LexFromRawLexer(TheTok);
65
66    // Get the spelling of the token to remove trigraphs and escaped newlines.
67    SpellingBuffer.resize(TheTok.getLength());
68    const char *SpellingPtr = &SpellingBuffer[0];
69    unsigned TokLen = PP.getSpelling(TheTok, SpellingPtr);
70
71    // The length of the string is the token length minus the two quotes.
72    unsigned TokNumBytes = TokLen-2;
73
74    // If we found the token we're looking for, return the location.
75    // FIXME: This should consider character escapes!
76    if (ByteNo < TokNumBytes ||
77        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
78      // If the original token came from a macro expansion, just return the
79      // start of the token.  We don't want to magically jump to the spelling
80      // for a diagnostic.  We do the above business in case some tokens come
81      // from a macro expansion but others don't.
82      if (!StrTokLoc.isFileID()) return StrTokLoc;
83
84      // We advance +1 to step over the '"'.
85      return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
86    }
87
88    // Move to the next string token.
89    ++TokNo;
90    ByteNo -= TokNumBytes;
91  }
92}
93
94
95/// CheckFunctionCall - Check a direct function call for various correctness
96/// and safety properties not strictly enforced by the C type system.
97Action::OwningExprResult
98Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
99  OwningExprResult TheCallResult(Owned(TheCall));
100  // Get the IdentifierInfo* for the called function.
101  IdentifierInfo *FnInfo = FDecl->getIdentifier();
102
103  // None of the checks below are needed for functions that don't have
104  // simple names (e.g., C++ conversion functions).
105  if (!FnInfo)
106    return move(TheCallResult);
107
108  switch (FDecl->getBuiltinID(Context)) {
109  case Builtin::BI__builtin___CFStringMakeConstantString:
110    assert(TheCall->getNumArgs() == 1 &&
111           "Wrong # arguments to builtin CFStringMakeConstantString");
112    if (CheckObjCString(TheCall->getArg(0)))
113      return ExprError();
114    return move(TheCallResult);
115  case Builtin::BI__builtin_stdarg_start:
116  case Builtin::BI__builtin_va_start:
117    if (SemaBuiltinVAStart(TheCall))
118      return ExprError();
119    return move(TheCallResult);
120  case Builtin::BI__builtin_isgreater:
121  case Builtin::BI__builtin_isgreaterequal:
122  case Builtin::BI__builtin_isless:
123  case Builtin::BI__builtin_islessequal:
124  case Builtin::BI__builtin_islessgreater:
125  case Builtin::BI__builtin_isunordered:
126    if (SemaBuiltinUnorderedCompare(TheCall))
127      return ExprError();
128    return move(TheCallResult);
129  case Builtin::BI__builtin_return_address:
130  case Builtin::BI__builtin_frame_address:
131    if (SemaBuiltinStackAddress(TheCall))
132      return ExprError();
133    return move(TheCallResult);
134  case Builtin::BI__builtin_shufflevector:
135    return SemaBuiltinShuffleVector(TheCall);
136    // TheCall will be freed by the smart pointer here, but that's fine, since
137    // SemaBuiltinShuffleVector guts it, but then doesn't release it.
138  case Builtin::BI__builtin_prefetch:
139    if (SemaBuiltinPrefetch(TheCall))
140      return ExprError();
141    return move(TheCallResult);
142  case Builtin::BI__builtin_object_size:
143    if (SemaBuiltinObjectSize(TheCall))
144      return ExprError();
145  }
146
147  // FIXME: This mechanism should be abstracted to be less fragile and
148  // more efficient. For example, just map function ids to custom
149  // handlers.
150
151  // Printf checking.
152  if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
153    if (Format->getType() == "printf") {
154      bool HasVAListArg = false;
155      if (const FunctionTypeProto *Proto
156          = FDecl->getType()->getAsFunctionTypeProto())
157        HasVAListArg = !Proto->isVariadic();
158      CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
159                           Format->getFirstArg() - 1);
160    }
161  }
162
163  return move(TheCallResult);
164}
165
166/// CheckObjCString - Checks that the argument to the builtin
167/// CFString constructor is correct
168bool Sema::CheckObjCString(Expr *Arg) {
169  Arg = Arg->IgnoreParenCasts();
170  StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
171
172  if (!Literal || Literal->isWide()) {
173    Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
174      << Arg->getSourceRange();
175    return true;
176  }
177
178  const char *Data = Literal->getStrData();
179  unsigned Length = Literal->getByteLength();
180
181  for (unsigned i = 0; i < Length; ++i) {
182    if (!isascii(Data[i])) {
183      Diag(getLocationOfStringLiteralByte(Literal, i),
184           diag::warn_cfstring_literal_contains_non_ascii_character)
185        << Arg->getSourceRange();
186      break;
187    }
188
189    if (!Data[i]) {
190      Diag(getLocationOfStringLiteralByte(Literal, i),
191           diag::warn_cfstring_literal_contains_nul_character)
192        << Arg->getSourceRange();
193      break;
194    }
195  }
196
197  return false;
198}
199
200/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
201/// Emit an error and return true on failure, return false on success.
202bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
203  Expr *Fn = TheCall->getCallee();
204  if (TheCall->getNumArgs() > 2) {
205    Diag(TheCall->getArg(2)->getLocStart(),
206         diag::err_typecheck_call_too_many_args)
207      << 0 /*function call*/ << Fn->getSourceRange()
208      << SourceRange(TheCall->getArg(2)->getLocStart(),
209                     (*(TheCall->arg_end()-1))->getLocEnd());
210    return true;
211  }
212
213  if (TheCall->getNumArgs() < 2) {
214    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
215      << 0 /*function call*/;
216  }
217
218  // Determine whether the current function is variadic or not.
219  bool isVariadic;
220  if (getCurFunctionDecl()) {
221    if (FunctionTypeProto* FTP =
222            dyn_cast<FunctionTypeProto>(getCurFunctionDecl()->getType()))
223      isVariadic = FTP->isVariadic();
224    else
225      isVariadic = false;
226  } else {
227    isVariadic = getCurMethodDecl()->isVariadic();
228  }
229
230  if (!isVariadic) {
231    Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
232    return true;
233  }
234
235  // Verify that the second argument to the builtin is the last argument of the
236  // current function or method.
237  bool SecondArgIsLastNamedArgument = false;
238  const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
239
240  if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
241    if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
242      // FIXME: This isn't correct for methods (results in bogus warning).
243      // Get the last formal in the current function.
244      const ParmVarDecl *LastArg;
245      if (FunctionDecl *FD = getCurFunctionDecl())
246        LastArg = *(FD->param_end()-1);
247      else
248        LastArg = *(getCurMethodDecl()->param_end()-1);
249      SecondArgIsLastNamedArgument = PV == LastArg;
250    }
251  }
252
253  if (!SecondArgIsLastNamedArgument)
254    Diag(TheCall->getArg(1)->getLocStart(),
255         diag::warn_second_parameter_of_va_start_not_last_named_argument);
256  return false;
257}
258
259/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
260/// friends.  This is declared to take (...), so we have to check everything.
261bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
262  if (TheCall->getNumArgs() < 2)
263    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
264      << 0 /*function call*/;
265  if (TheCall->getNumArgs() > 2)
266    return Diag(TheCall->getArg(2)->getLocStart(),
267                diag::err_typecheck_call_too_many_args)
268      << 0 /*function call*/
269      << SourceRange(TheCall->getArg(2)->getLocStart(),
270                     (*(TheCall->arg_end()-1))->getLocEnd());
271
272  Expr *OrigArg0 = TheCall->getArg(0);
273  Expr *OrigArg1 = TheCall->getArg(1);
274
275  // Do standard promotions between the two arguments, returning their common
276  // type.
277  QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
278
279  // If the common type isn't a real floating type, then the arguments were
280  // invalid for this operation.
281  if (!Res->isRealFloatingType())
282    return Diag(OrigArg0->getLocStart(),
283                diag::err_typecheck_call_invalid_ordered_compare)
284      << OrigArg0->getType() << OrigArg1->getType()
285      << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
286
287  return false;
288}
289
290bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
291  // The signature for these builtins is exact; the only thing we need
292  // to check is that the argument is a constant.
293  SourceLocation Loc;
294  if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
295    return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
296
297  return false;
298}
299
300/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
301// This is declared to take (...), so we have to check everything.
302Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
303  if (TheCall->getNumArgs() < 3)
304    return ExprError(Diag(TheCall->getLocEnd(),
305                          diag::err_typecheck_call_too_few_args)
306      << 0 /*function call*/ << TheCall->getSourceRange());
307
308  QualType FAType = TheCall->getArg(0)->getType();
309  QualType SAType = TheCall->getArg(1)->getType();
310
311  if (!FAType->isVectorType() || !SAType->isVectorType()) {
312    Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
313      << SourceRange(TheCall->getArg(0)->getLocStart(),
314                     TheCall->getArg(1)->getLocEnd());
315    return ExprError();
316  }
317
318  if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
319      Context.getCanonicalType(SAType).getUnqualifiedType()) {
320    Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
321      << SourceRange(TheCall->getArg(0)->getLocStart(),
322                     TheCall->getArg(1)->getLocEnd());
323    return ExprError();
324  }
325
326  unsigned numElements = FAType->getAsVectorType()->getNumElements();
327  if (TheCall->getNumArgs() != numElements+2) {
328    if (TheCall->getNumArgs() < numElements+2)
329      return ExprError(Diag(TheCall->getLocEnd(),
330                            diag::err_typecheck_call_too_few_args)
331               << 0 /*function call*/ << TheCall->getSourceRange());
332    return ExprError(Diag(TheCall->getLocEnd(),
333                          diag::err_typecheck_call_too_many_args)
334             << 0 /*function call*/ << TheCall->getSourceRange());
335  }
336
337  for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
338    llvm::APSInt Result(32);
339    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
340      return ExprError(Diag(TheCall->getLocStart(),
341                  diag::err_shufflevector_nonconstant_argument)
342                << TheCall->getArg(i)->getSourceRange());
343
344    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
345      return ExprError(Diag(TheCall->getLocStart(),
346                  diag::err_shufflevector_argument_too_large)
347               << TheCall->getArg(i)->getSourceRange());
348  }
349
350  llvm::SmallVector<Expr*, 32> exprs;
351
352  for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
353    exprs.push_back(TheCall->getArg(i));
354    TheCall->setArg(i, 0);
355  }
356
357  return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), numElements+2,
358                                            FAType,
359                                            TheCall->getCallee()->getLocStart(),
360                                            TheCall->getRParenLoc()));
361}
362
363/// SemaBuiltinPrefetch - Handle __builtin_prefetch.
364// This is declared to take (const void*, ...) and can take two
365// optional constant int args.
366bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
367  unsigned NumArgs = TheCall->getNumArgs();
368
369  if (NumArgs > 3)
370    return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
371             << 0 /*function call*/ << TheCall->getSourceRange();
372
373  // Argument 0 is checked for us and the remaining arguments must be
374  // constant integers.
375  for (unsigned i = 1; i != NumArgs; ++i) {
376    Expr *Arg = TheCall->getArg(i);
377    QualType RWType = Arg->getType();
378
379    const BuiltinType *BT = RWType->getAsBuiltinType();
380    llvm::APSInt Result;
381    if (!BT || BT->getKind() != BuiltinType::Int ||
382        !Arg->isIntegerConstantExpr(Result, Context))
383      return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
384              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
385
386    // FIXME: gcc issues a warning and rewrites these to 0. These
387    // seems especially odd for the third argument since the default
388    // is 3.
389    if (i == 1) {
390      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
391        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
392             << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
393    } else {
394      if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
395        return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
396            << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
397    }
398  }
399
400  return false;
401}
402
403/// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
404/// int type). This simply type checks that type is one of the defined
405/// constants (0-3).
406bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
407  Expr *Arg = TheCall->getArg(1);
408  QualType ArgType = Arg->getType();
409  const BuiltinType *BT = ArgType->getAsBuiltinType();
410  llvm::APSInt Result(32);
411  if (!BT || BT->getKind() != BuiltinType::Int ||
412      !Arg->isIntegerConstantExpr(Result, Context)) {
413    return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
414             << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
415  }
416
417  if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
418    return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
419             << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
420  }
421
422  return false;
423}
424
425// Handle i > 1 ? "x" : "y", recursivelly
426bool Sema::SemaCheckStringLiteral(Expr *E, CallExpr *TheCall, bool HasVAListArg,
427                                  unsigned format_idx, unsigned firstDataArg) {
428
429  switch (E->getStmtClass()) {
430  case Stmt::ConditionalOperatorClass: {
431    ConditionalOperator *C = cast<ConditionalOperator>(E);
432    return SemaCheckStringLiteral(C->getLHS(), TheCall,
433                                  HasVAListArg, format_idx, firstDataArg)
434        && SemaCheckStringLiteral(C->getRHS(), TheCall,
435                                  HasVAListArg, format_idx, firstDataArg);
436  }
437
438  case Stmt::ImplicitCastExprClass: {
439    ImplicitCastExpr *Expr = dyn_cast<ImplicitCastExpr>(E);
440    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
441                                  format_idx, firstDataArg);
442  }
443
444  case Stmt::ParenExprClass: {
445    ParenExpr *Expr = dyn_cast<ParenExpr>(E);
446    return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
447                                  format_idx, firstDataArg);
448  }
449
450  default: {
451    ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E);
452    StringLiteral *StrE = NULL;
453
454    if (ObjCFExpr)
455      StrE = ObjCFExpr->getString();
456    else
457      StrE = dyn_cast<StringLiteral>(E);
458
459    if (StrE) {
460      CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
461                        firstDataArg);
462      return true;
463    }
464
465    return false;
466  }
467  }
468}
469
470
471/// CheckPrintfArguments - Check calls to printf (and similar functions) for
472/// correct use of format strings.
473///
474///  HasVAListArg - A predicate indicating whether the printf-like
475///    function is passed an explicit va_arg argument (e.g., vprintf)
476///
477///  format_idx - The index into Args for the format string.
478///
479/// Improper format strings to functions in the printf family can be
480/// the source of bizarre bugs and very serious security holes.  A
481/// good source of information is available in the following paper
482/// (which includes additional references):
483///
484///  FormatGuard: Automatic Protection From printf Format String
485///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
486///
487/// Functionality implemented:
488///
489///  We can statically check the following properties for string
490///  literal format strings for non v.*printf functions (where the
491///  arguments are passed directly):
492//
493///  (1) Are the number of format conversions equal to the number of
494///      data arguments?
495///
496///  (2) Does each format conversion correctly match the type of the
497///      corresponding data argument?  (TODO)
498///
499/// Moreover, for all printf functions we can:
500///
501///  (3) Check for a missing format string (when not caught by type checking).
502///
503///  (4) Check for no-operation flags; e.g. using "#" with format
504///      conversion 'c'  (TODO)
505///
506///  (5) Check the use of '%n', a major source of security holes.
507///
508///  (6) Check for malformed format conversions that don't specify anything.
509///
510///  (7) Check for empty format strings.  e.g: printf("");
511///
512///  (8) Check that the format string is a wide literal.
513///
514///  (9) Also check the arguments of functions with the __format__ attribute.
515///      (TODO).
516///
517/// All of these checks can be done by parsing the format string.
518///
519/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
520void
521Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
522                           unsigned format_idx, unsigned firstDataArg) {
523  Expr *Fn = TheCall->getCallee();
524
525  // CHECK: printf-like function is called with no format string.
526  if (format_idx >= TheCall->getNumArgs()) {
527    Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
528      << Fn->getSourceRange();
529    return;
530  }
531
532  Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
533
534  // CHECK: format string is not a string literal.
535  //
536  // Dynamically generated format strings are difficult to
537  // automatically vet at compile time.  Requiring that format strings
538  // are string literals: (1) permits the checking of format strings by
539  // the compiler and thereby (2) can practically remove the source of
540  // many format string exploits.
541
542  // Format string can be either ObjC string (e.g. @"%d") or
543  // C string (e.g. "%d")
544  // ObjC string uses the same format specifiers as C string, so we can use
545  // the same format string checking logic for both ObjC and C strings.
546  bool isFExpr = SemaCheckStringLiteral(OrigFormatExpr, TheCall,
547                                        HasVAListArg, format_idx,
548                                        firstDataArg);
549
550  if (!isFExpr) {
551    // For vprintf* functions (i.e., HasVAListArg==true), we add a
552    // special check to see if the format string is a function parameter
553    // of the function calling the printf function.  If the function
554    // has an attribute indicating it is a printf-like function, then we
555    // should suppress warnings concerning non-literals being used in a call
556    // to a vprintf function.  For example:
557    //
558    // void
559    // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
560    //      va_list ap;
561    //      va_start(ap, fmt);
562    //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
563    //      ...
564    //
565    //
566    //  FIXME: We don't have full attribute support yet, so just check to see
567    //    if the argument is a DeclRefExpr that references a parameter.  We'll
568    //    add proper support for checking the attribute later.
569    if (HasVAListArg)
570      if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
571        if (isa<ParmVarDecl>(DR->getDecl()))
572          return;
573
574    Diag(TheCall->getArg(format_idx)->getLocStart(),
575         diag::warn_printf_not_string_constant)
576      << OrigFormatExpr->getSourceRange();
577    return;
578  }
579}
580
581void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
582      CallExpr *TheCall, bool HasVAListArg, unsigned format_idx,
583                             unsigned firstDataArg) {
584
585  ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
586  // CHECK: is the format string a wide literal?
587  if (FExpr->isWide()) {
588    Diag(FExpr->getLocStart(),
589         diag::warn_printf_format_string_is_wide_literal)
590      << OrigFormatExpr->getSourceRange();
591    return;
592  }
593
594  // Str - The format string.  NOTE: this is NOT null-terminated!
595  const char * const Str = FExpr->getStrData();
596
597  // CHECK: empty format string?
598  const unsigned StrLen = FExpr->getByteLength();
599
600  if (StrLen == 0) {
601    Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
602      << OrigFormatExpr->getSourceRange();
603    return;
604  }
605
606  // We process the format string using a binary state machine.  The
607  // current state is stored in CurrentState.
608  enum {
609    state_OrdChr,
610    state_Conversion
611  } CurrentState = state_OrdChr;
612
613  // numConversions - The number of conversions seen so far.  This is
614  //  incremented as we traverse the format string.
615  unsigned numConversions = 0;
616
617  // numDataArgs - The number of data arguments after the format
618  //  string.  This can only be determined for non vprintf-like
619  //  functions.  For those functions, this value is 1 (the sole
620  //  va_arg argument).
621  unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
622
623  // Inspect the format string.
624  unsigned StrIdx = 0;
625
626  // LastConversionIdx - Index within the format string where we last saw
627  //  a '%' character that starts a new format conversion.
628  unsigned LastConversionIdx = 0;
629
630  for (; StrIdx < StrLen; ++StrIdx) {
631
632    // Is the number of detected conversion conversions greater than
633    // the number of matching data arguments?  If so, stop.
634    if (!HasVAListArg && numConversions > numDataArgs) break;
635
636    // Handle "\0"
637    if (Str[StrIdx] == '\0') {
638      // The string returned by getStrData() is not null-terminated,
639      // so the presence of a null character is likely an error.
640      Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
641           diag::warn_printf_format_string_contains_null_char)
642        <<  OrigFormatExpr->getSourceRange();
643      return;
644    }
645
646    // Ordinary characters (not processing a format conversion).
647    if (CurrentState == state_OrdChr) {
648      if (Str[StrIdx] == '%') {
649        CurrentState = state_Conversion;
650        LastConversionIdx = StrIdx;
651      }
652      continue;
653    }
654
655    // Seen '%'.  Now processing a format conversion.
656    switch (Str[StrIdx]) {
657    // Handle dynamic precision or width specifier.
658    case '*': {
659      ++numConversions;
660
661      if (!HasVAListArg && numConversions > numDataArgs) {
662        SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
663
664        if (Str[StrIdx-1] == '.')
665          Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
666            << OrigFormatExpr->getSourceRange();
667        else
668          Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
669            << OrigFormatExpr->getSourceRange();
670
671        // Don't do any more checking.  We'll just emit spurious errors.
672        return;
673      }
674
675      // Perform type checking on width/precision specifier.
676      Expr *E = TheCall->getArg(format_idx+numConversions);
677      if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
678        if (BT->getKind() == BuiltinType::Int)
679          break;
680
681      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
682
683      if (Str[StrIdx-1] == '.')
684        Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
685          << E->getType() << E->getSourceRange();
686      else
687        Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
688          << E->getType() << E->getSourceRange();
689
690      break;
691    }
692
693    // Characters which can terminate a format conversion
694    // (e.g. "%d").  Characters that specify length modifiers or
695    // other flags are handled by the default case below.
696    //
697    // FIXME: additional checks will go into the following cases.
698    case 'i':
699    case 'd':
700    case 'o':
701    case 'u':
702    case 'x':
703    case 'X':
704    case 'D':
705    case 'O':
706    case 'U':
707    case 'e':
708    case 'E':
709    case 'f':
710    case 'F':
711    case 'g':
712    case 'G':
713    case 'a':
714    case 'A':
715    case 'c':
716    case 'C':
717    case 'S':
718    case 's':
719    case 'p':
720      ++numConversions;
721      CurrentState = state_OrdChr;
722      break;
723
724    // CHECK: Are we using "%n"?  Issue a warning.
725    case 'n': {
726      ++numConversions;
727      CurrentState = state_OrdChr;
728      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
729                                                          LastConversionIdx);
730
731      Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
732      break;
733    }
734
735    // Handle "%@"
736    case '@':
737      // %@ is allowed in ObjC format strings only.
738      if(ObjCFExpr != NULL)
739        CurrentState = state_OrdChr;
740      else {
741        // Issue a warning: invalid format conversion.
742        SourceLocation Loc =
743          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
744
745        Diag(Loc, diag::warn_printf_invalid_conversion)
746          <<  std::string(Str+LastConversionIdx,
747                          Str+std::min(LastConversionIdx+2, StrLen))
748          << OrigFormatExpr->getSourceRange();
749      }
750      ++numConversions;
751      break;
752
753    // Handle "%%"
754    case '%':
755      // Sanity check: Was the first "%" character the previous one?
756      // If not, we will assume that we have a malformed format
757      // conversion, and that the current "%" character is the start
758      // of a new conversion.
759      if (StrIdx - LastConversionIdx == 1)
760        CurrentState = state_OrdChr;
761      else {
762        // Issue a warning: invalid format conversion.
763        SourceLocation Loc =
764          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
765
766        Diag(Loc, diag::warn_printf_invalid_conversion)
767          << std::string(Str+LastConversionIdx, Str+StrIdx)
768          << OrigFormatExpr->getSourceRange();
769
770        // This conversion is broken.  Advance to the next format
771        // conversion.
772        LastConversionIdx = StrIdx;
773        ++numConversions;
774      }
775      break;
776
777    default:
778      // This case catches all other characters: flags, widths, etc.
779      // We should eventually process those as well.
780      break;
781    }
782  }
783
784  if (CurrentState == state_Conversion) {
785    // Issue a warning: invalid format conversion.
786    SourceLocation Loc =
787      getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
788
789    Diag(Loc, diag::warn_printf_invalid_conversion)
790      << std::string(Str+LastConversionIdx,
791                     Str+std::min(LastConversionIdx+2, StrLen))
792      << OrigFormatExpr->getSourceRange();
793    return;
794  }
795
796  if (!HasVAListArg) {
797    // CHECK: Does the number of format conversions exceed the number
798    //        of data arguments?
799    if (numConversions > numDataArgs) {
800      SourceLocation Loc =
801        getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
802
803      Diag(Loc, diag::warn_printf_insufficient_data_args)
804        << OrigFormatExpr->getSourceRange();
805    }
806    // CHECK: Does the number of data arguments exceed the number of
807    //        format conversions in the format string?
808    else if (numConversions < numDataArgs)
809      Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
810           diag::warn_printf_too_many_data_args)
811        << OrigFormatExpr->getSourceRange();
812  }
813}
814
815//===--- CHECK: Return Address of Stack Variable --------------------------===//
816
817static DeclRefExpr* EvalVal(Expr *E);
818static DeclRefExpr* EvalAddr(Expr* E);
819
820/// CheckReturnStackAddr - Check if a return statement returns the address
821///   of a stack variable.
822void
823Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
824                           SourceLocation ReturnLoc) {
825
826  // Perform checking for returned stack addresses.
827  if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
828    if (DeclRefExpr *DR = EvalAddr(RetValExp))
829      Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
830       << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
831
832    // Skip over implicit cast expressions when checking for block expressions.
833    if (ImplicitCastExpr *IcExpr =
834          dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
835      RetValExp = IcExpr->getSubExpr();
836
837    if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
838      Diag(C->getLocStart(), diag::err_ret_local_block)
839        << C->getSourceRange();
840  }
841  // Perform checking for stack values returned by reference.
842  else if (lhsType->isReferenceType()) {
843    // Check for a reference to the stack
844    if (DeclRefExpr *DR = EvalVal(RetValExp))
845      Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
846        << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
847  }
848}
849
850/// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
851///  check if the expression in a return statement evaluates to an address
852///  to a location on the stack.  The recursion is used to traverse the
853///  AST of the return expression, with recursion backtracking when we
854///  encounter a subexpression that (1) clearly does not lead to the address
855///  of a stack variable or (2) is something we cannot determine leads to
856///  the address of a stack variable based on such local checking.
857///
858///  EvalAddr processes expressions that are pointers that are used as
859///  references (and not L-values).  EvalVal handles all other values.
860///  At the base case of the recursion is a check for a DeclRefExpr* in
861///  the refers to a stack variable.
862///
863///  This implementation handles:
864///
865///   * pointer-to-pointer casts
866///   * implicit conversions from array references to pointers
867///   * taking the address of fields
868///   * arbitrary interplay between "&" and "*" operators
869///   * pointer arithmetic from an address of a stack variable
870///   * taking the address of an array element where the array is on the stack
871static DeclRefExpr* EvalAddr(Expr *E) {
872  // We should only be called for evaluating pointer expressions.
873  assert((E->getType()->isPointerType() ||
874          E->getType()->isBlockPointerType() ||
875          E->getType()->isObjCQualifiedIdType()) &&
876         "EvalAddr only works on pointers");
877
878  // Our "symbolic interpreter" is just a dispatch off the currently
879  // viewed AST node.  We then recursively traverse the AST by calling
880  // EvalAddr and EvalVal appropriately.
881  switch (E->getStmtClass()) {
882  case Stmt::ParenExprClass:
883    // Ignore parentheses.
884    return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
885
886  case Stmt::UnaryOperatorClass: {
887    // The only unary operator that make sense to handle here
888    // is AddrOf.  All others don't make sense as pointers.
889    UnaryOperator *U = cast<UnaryOperator>(E);
890
891    if (U->getOpcode() == UnaryOperator::AddrOf)
892      return EvalVal(U->getSubExpr());
893    else
894      return NULL;
895  }
896
897  case Stmt::BinaryOperatorClass: {
898    // Handle pointer arithmetic.  All other binary operators are not valid
899    // in this context.
900    BinaryOperator *B = cast<BinaryOperator>(E);
901    BinaryOperator::Opcode op = B->getOpcode();
902
903    if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
904      return NULL;
905
906    Expr *Base = B->getLHS();
907
908    // Determine which argument is the real pointer base.  It could be
909    // the RHS argument instead of the LHS.
910    if (!Base->getType()->isPointerType()) Base = B->getRHS();
911
912    assert (Base->getType()->isPointerType());
913    return EvalAddr(Base);
914  }
915
916  // For conditional operators we need to see if either the LHS or RHS are
917  // valid DeclRefExpr*s.  If one of them is valid, we return it.
918  case Stmt::ConditionalOperatorClass: {
919    ConditionalOperator *C = cast<ConditionalOperator>(E);
920
921    // Handle the GNU extension for missing LHS.
922    if (Expr *lhsExpr = C->getLHS())
923      if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
924        return LHS;
925
926     return EvalAddr(C->getRHS());
927  }
928
929  // For casts, we need to handle conversions from arrays to
930  // pointer values, and pointer-to-pointer conversions.
931  case Stmt::ImplicitCastExprClass:
932  case Stmt::CStyleCastExprClass:
933  case Stmt::CXXFunctionalCastExprClass: {
934    Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
935    QualType T = SubExpr->getType();
936
937    if (SubExpr->getType()->isPointerType() ||
938        SubExpr->getType()->isBlockPointerType() ||
939        SubExpr->getType()->isObjCQualifiedIdType())
940      return EvalAddr(SubExpr);
941    else if (T->isArrayType())
942      return EvalVal(SubExpr);
943    else
944      return 0;
945  }
946
947  // C++ casts.  For dynamic casts, static casts, and const casts, we
948  // are always converting from a pointer-to-pointer, so we just blow
949  // through the cast.  In the case the dynamic cast doesn't fail (and
950  // return NULL), we take the conservative route and report cases
951  // where we return the address of a stack variable.  For Reinterpre
952  // FIXME: The comment about is wrong; we're not always converting
953  // from pointer to pointer. I'm guessing that this code should also
954  // handle references to objects.
955  case Stmt::CXXStaticCastExprClass:
956  case Stmt::CXXDynamicCastExprClass:
957  case Stmt::CXXConstCastExprClass:
958  case Stmt::CXXReinterpretCastExprClass: {
959      Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
960      if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
961        return EvalAddr(S);
962      else
963        return NULL;
964  }
965
966  // Everything else: we simply don't reason about them.
967  default:
968    return NULL;
969  }
970}
971
972
973///  EvalVal - This function is complements EvalAddr in the mutual recursion.
974///   See the comments for EvalAddr for more details.
975static DeclRefExpr* EvalVal(Expr *E) {
976
977  // We should only be called for evaluating non-pointer expressions, or
978  // expressions with a pointer type that are not used as references but instead
979  // are l-values (e.g., DeclRefExpr with a pointer type).
980
981  // Our "symbolic interpreter" is just a dispatch off the currently
982  // viewed AST node.  We then recursively traverse the AST by calling
983  // EvalAddr and EvalVal appropriately.
984  switch (E->getStmtClass()) {
985  case Stmt::DeclRefExprClass:
986  case Stmt::QualifiedDeclRefExprClass: {
987    // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
988    //  at code that refers to a variable's name.  We check if it has local
989    //  storage within the function, and if so, return the expression.
990    DeclRefExpr *DR = cast<DeclRefExpr>(E);
991
992    if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
993      if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
994
995    return NULL;
996  }
997
998  case Stmt::ParenExprClass:
999    // Ignore parentheses.
1000    return EvalVal(cast<ParenExpr>(E)->getSubExpr());
1001
1002  case Stmt::UnaryOperatorClass: {
1003    // The only unary operator that make sense to handle here
1004    // is Deref.  All others don't resolve to a "name."  This includes
1005    // handling all sorts of rvalues passed to a unary operator.
1006    UnaryOperator *U = cast<UnaryOperator>(E);
1007
1008    if (U->getOpcode() == UnaryOperator::Deref)
1009      return EvalAddr(U->getSubExpr());
1010
1011    return NULL;
1012  }
1013
1014  case Stmt::ArraySubscriptExprClass: {
1015    // Array subscripts are potential references to data on the stack.  We
1016    // retrieve the DeclRefExpr* for the array variable if it indeed
1017    // has local storage.
1018    return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
1019  }
1020
1021  case Stmt::ConditionalOperatorClass: {
1022    // For conditional operators we need to see if either the LHS or RHS are
1023    // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
1024    ConditionalOperator *C = cast<ConditionalOperator>(E);
1025
1026    // Handle the GNU extension for missing LHS.
1027    if (Expr *lhsExpr = C->getLHS())
1028      if (DeclRefExpr *LHS = EvalVal(lhsExpr))
1029        return LHS;
1030
1031    return EvalVal(C->getRHS());
1032  }
1033
1034  // Accesses to members are potential references to data on the stack.
1035  case Stmt::MemberExprClass: {
1036    MemberExpr *M = cast<MemberExpr>(E);
1037
1038    // Check for indirect access.  We only want direct field accesses.
1039    if (!M->isArrow())
1040      return EvalVal(M->getBase());
1041    else
1042      return NULL;
1043  }
1044
1045  // Everything else: we simply don't reason about them.
1046  default:
1047    return NULL;
1048  }
1049}
1050
1051//===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1052
1053/// Check for comparisons of floating point operands using != and ==.
1054/// Issue a warning if these are no self-comparisons, as they are not likely
1055/// to do what the programmer intended.
1056void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1057  bool EmitWarning = true;
1058
1059  Expr* LeftExprSansParen = lex->IgnoreParens();
1060  Expr* RightExprSansParen = rex->IgnoreParens();
1061
1062  // Special case: check for x == x (which is OK).
1063  // Do not emit warnings for such cases.
1064  if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1065    if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1066      if (DRL->getDecl() == DRR->getDecl())
1067        EmitWarning = false;
1068
1069
1070  // Special case: check for comparisons against literals that can be exactly
1071  //  represented by APFloat.  In such cases, do not emit a warning.  This
1072  //  is a heuristic: often comparison against such literals are used to
1073  //  detect if a value in a variable has not changed.  This clearly can
1074  //  lead to false negatives.
1075  if (EmitWarning) {
1076    if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1077      if (FLL->isExact())
1078        EmitWarning = false;
1079    }
1080    else
1081      if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1082        if (FLR->isExact())
1083          EmitWarning = false;
1084    }
1085  }
1086
1087  // Check for comparisons with builtin types.
1088  if (EmitWarning)
1089    if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
1090      if (CL->isBuiltinCall(Context))
1091        EmitWarning = false;
1092
1093  if (EmitWarning)
1094    if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
1095      if (CR->isBuiltinCall(Context))
1096        EmitWarning = false;
1097
1098  // Emit the diagnostic.
1099  if (EmitWarning)
1100    Diag(loc, diag::warn_floatingpoint_eq)
1101      << lex->getSourceRange() << rex->getSourceRange();
1102}
1103