ScanfFormatString.cpp revision d02deebce5f1b283101e035a7f5d5bab0d2068ec
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfArgTypeResult;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO) {
72
73  using namespace clang::analyze_scanf;
74  const char *I = Beg;
75  const char *Start = 0;
76  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
77
78    // Look for a '%' character that indicates the start of a format specifier.
79  for ( ; I != E ; ++I) {
80    char c = *I;
81    if (c == '\0') {
82        // Detect spurious null characters, which are likely errors.
83      H.HandleNullChar(I);
84      return true;
85    }
86    if (c == '%') {
87      Start = I++;  // Record the start of the format specifier.
88      break;
89    }
90  }
91
92    // No format specifier found?
93  if (!Start)
94    return false;
95
96  if (I == E) {
97      // No more characters left?
98    H.HandleIncompleteSpecifier(Start, E - Start);
99    return true;
100  }
101
102  ScanfSpecifier FS;
103  if (ParseArgPosition(H, FS, Start, I, E))
104    return true;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  // Look for '*' flag if it is present.
113  if (*I == '*') {
114    FS.setSuppressAssignment(I);
115    if (++I == E) {
116      H.HandleIncompleteSpecifier(Start, E - Start);
117      return true;
118    }
119  }
120
121  // Look for the field width (if any).  Unlike printf, this is either
122  // a fixed integer or isn't present.
123  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
124  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
125    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
126    FS.setFieldWidth(Amt);
127
128    if (I == E) {
129      // No more characters left?
130      H.HandleIncompleteSpecifier(Start, E - Start);
131      return true;
132    }
133  }
134
135  // Look for the length modifier.
136  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
137      // No more characters left?
138    H.HandleIncompleteSpecifier(Start, E - Start);
139    return true;
140  }
141
142  // Detect spurious null characters, which are likely errors.
143  if (*I == '\0') {
144    H.HandleNullChar(I);
145    return true;
146  }
147
148  // Finally, look for the conversion specifier.
149  const char *conversionPosition = I++;
150  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
151  switch (*conversionPosition) {
152    default:
153      break;
154    case '%': k = ConversionSpecifier::PercentArg;   break;
155    case 'A': k = ConversionSpecifier::AArg; break;
156    case 'E': k = ConversionSpecifier::EArg; break;
157    case 'F': k = ConversionSpecifier::FArg; break;
158    case 'G': k = ConversionSpecifier::GArg; break;
159    case 'X': k = ConversionSpecifier::XArg; break;
160    case 'a': k = ConversionSpecifier::aArg; break;
161    case 'd': k = ConversionSpecifier::dArg; break;
162    case 'e': k = ConversionSpecifier::eArg; break;
163    case 'f': k = ConversionSpecifier::fArg; break;
164    case 'g': k = ConversionSpecifier::gArg; break;
165    case 'i': k = ConversionSpecifier::iArg; break;
166    case 'n': k = ConversionSpecifier::nArg; break;
167    case 'c': k = ConversionSpecifier::cArg; break;
168    case 'C': k = ConversionSpecifier::CArg; break;
169    case 'S': k = ConversionSpecifier::SArg; break;
170    case '[': k = ConversionSpecifier::ScanListArg; break;
171    case 'u': k = ConversionSpecifier::uArg; break;
172    case 'x': k = ConversionSpecifier::xArg; break;
173    case 'o': k = ConversionSpecifier::oArg; break;
174    case 's': k = ConversionSpecifier::sArg; break;
175    case 'p': k = ConversionSpecifier::pArg; break;
176  }
177  ScanfConversionSpecifier CS(conversionPosition, k);
178  if (k == ScanfConversionSpecifier::ScanListArg) {
179    if (!ParseScanList(H, CS, I, E))
180      return true;
181  }
182  FS.setConversionSpecifier(CS);
183  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
184      && !FS.usesPositionalArg())
185    FS.setArgIndex(argIndex++);
186
187  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
188  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
189
190  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
191    // Assume the conversion takes one argument.
192    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
193  }
194  return ScanfSpecifierResult(Start, FS);
195}
196
197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
198  const ScanfConversionSpecifier &CS = getConversionSpecifier();
199
200  if (!CS.consumesDataArgument())
201    return ScanfArgTypeResult::Invalid();
202
203  switch(CS.getKind()) {
204    // Signed int.
205    case ConversionSpecifier::dArg:
206    case ConversionSpecifier::iArg:
207      switch (LM.getKind()) {
208        case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
209        case LengthModifier::AsChar:
210          return ArgTypeResult(ArgTypeResult::AnyCharTy);
211        case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
212        case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
213        case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy);
214        case LengthModifier::AsIntMax:
215          return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
216        case LengthModifier::AsSizeT:
217          // FIXME: ssize_t.
218          return ScanfArgTypeResult();
219        case LengthModifier::AsPtrDiff:
220          return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
221        case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
222        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
223      }
224
225    // Unsigned int.
226    case ConversionSpecifier::oArg:
227    case ConversionSpecifier::uArg:
228    case ConversionSpecifier::xArg:
229    case ConversionSpecifier::XArg:
230      switch (LM.getKind()) {
231        case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
232        case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
233        case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
234        case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
235        case LengthModifier::AsLongLong:
236          return ArgTypeResult(Ctx.UnsignedLongLongTy);
237        case LengthModifier::AsIntMax:
238          return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
239        case LengthModifier::AsSizeT:
240          return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
241        case LengthModifier::AsPtrDiff:
242          // FIXME: Unsigned version of ptrdiff_t?
243          return ScanfArgTypeResult();
244        case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
245        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
246      }
247
248    // Float.
249    case ConversionSpecifier::aArg:
250    case ConversionSpecifier::AArg:
251    case ConversionSpecifier::eArg:
252    case ConversionSpecifier::EArg:
253    case ConversionSpecifier::fArg:
254    case ConversionSpecifier::FArg:
255    case ConversionSpecifier::gArg:
256    case ConversionSpecifier::GArg:
257      switch (LM.getKind()) {
258        case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
259        case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
260        case LengthModifier::AsLongDouble:
261          return ArgTypeResult(Ctx.LongDoubleTy);
262        default:
263          return ScanfArgTypeResult::Invalid();
264      }
265
266    // Char, string and scanlist.
267    case ConversionSpecifier::cArg:
268    case ConversionSpecifier::sArg:
269    case ConversionSpecifier::ScanListArg:
270      switch (LM.getKind()) {
271        case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
272        case LengthModifier::AsLong:
273          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
274        default:
275          return ScanfArgTypeResult::Invalid();
276      }
277    case ConversionSpecifier::CArg:
278    case ConversionSpecifier::SArg:
279      // FIXME: Mac OS X specific?
280      if (LM.getKind() == LengthModifier::None)
281        return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
282      return ScanfArgTypeResult::Invalid();
283
284    // Pointer.
285    case ConversionSpecifier::pArg:
286      return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
287
288    default:
289      break;
290  }
291
292  return ScanfArgTypeResult();
293}
294
295bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt)
296{
297  if (!QT->isPointerType())
298    return false;
299
300  QualType PT = QT->getPointeeType();
301  const BuiltinType *BT = PT->getAs<BuiltinType>();
302  if (!BT)
303    return false;
304
305  // Pointer to a character.
306  if (PT->isAnyCharacterType()) {
307    CS.setKind(ConversionSpecifier::sArg);
308    if (PT->isWideCharType())
309      LM.setKind(LengthModifier::AsWideChar);
310    else
311      LM.setKind(LengthModifier::None);
312    return true;
313  }
314
315  // Figure out the length modifier.
316  switch (BT->getKind()) {
317    // no modifier
318    case BuiltinType::UInt:
319    case BuiltinType::Int:
320    case BuiltinType::Float:
321      LM.setKind(LengthModifier::None);
322      break;
323
324    // hh
325    case BuiltinType::Char_U:
326    case BuiltinType::UChar:
327    case BuiltinType::Char_S:
328    case BuiltinType::SChar:
329      LM.setKind(LengthModifier::AsChar);
330      break;
331
332    // h
333    case BuiltinType::Short:
334    case BuiltinType::UShort:
335      LM.setKind(LengthModifier::AsShort);
336      break;
337
338    // l
339    case BuiltinType::Long:
340    case BuiltinType::ULong:
341    case BuiltinType::Double:
342      LM.setKind(LengthModifier::AsLong);
343      break;
344
345    // ll
346    case BuiltinType::LongLong:
347    case BuiltinType::ULongLong:
348      LM.setKind(LengthModifier::AsLongLong);
349      break;
350
351    // L
352    case BuiltinType::LongDouble:
353      LM.setKind(LengthModifier::AsLongDouble);
354      break;
355
356    // Don't know.
357    default:
358      return false;
359  }
360
361  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
362  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
363    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
364    if (Identifier->getName() == "size_t") {
365      LM.setKind(LengthModifier::AsSizeT);
366    } else if (Identifier->getName() == "ssize_t") {
367      // Not C99, but common in Unix.
368      LM.setKind(LengthModifier::AsSizeT);
369    } else if (Identifier->getName() == "intmax_t") {
370      LM.setKind(LengthModifier::AsIntMax);
371    } else if (Identifier->getName() == "uintmax_t") {
372      LM.setKind(LengthModifier::AsIntMax);
373    } else if (Identifier->getName() == "ptrdiff_t") {
374      LM.setKind(LengthModifier::AsPtrDiff);
375    }
376  }
377
378  // Figure out the conversion specifier.
379  if (PT->isRealFloatingType())
380    CS.setKind(ConversionSpecifier::fArg);
381  else if (PT->isSignedIntegerType())
382    CS.setKind(ConversionSpecifier::dArg);
383  else if (PT->isUnsignedIntegerType()) {
384    // Preserve the original formatting, e.g. 'X', 'o'.
385    if (!CS.isUIntArg()) {
386      CS.setKind(ConversionSpecifier::uArg);
387    }
388  } else
389    llvm_unreachable("Unexpected type");
390
391  return true;
392}
393
394void ScanfSpecifier::toString(raw_ostream &os) const {
395  os << "%";
396
397  if (usesPositionalArg())
398    os << getPositionalArgIndex() << "$";
399  if (SuppressAssignment)
400    os << "*";
401
402  FieldWidth.toString(os);
403  os << LM.toString();
404  os << CS.toString();
405}
406
407bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
408                                                    const char *I,
409                                                    const char *E,
410                                                    const LangOptions &LO) {
411
412  unsigned argIndex = 0;
413
414  // Keep looking for a format specifier until we have exhausted the string.
415  while (I != E) {
416    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
417                                                          LO);
418    // Did a fail-stop error of any kind occur when parsing the specifier?
419    // If so, don't do any more processing.
420    if (FSR.shouldStop())
421      return true;;
422      // Did we exhaust the string or encounter an error that
423      // we can recover from?
424    if (!FSR.hasValue())
425      continue;
426      // We have a format specifier.  Pass it to the callback.
427    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
428                                I - FSR.getStart())) {
429      return true;
430    }
431  }
432  assert(I == E && "Format string not exhausted");
433  return false;
434}
435
436bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
437  switch (K) {
438    case InvalidTy:
439      llvm_unreachable("ArgTypeResult must be valid");
440    case UnknownTy:
441      return true;
442    case CStrTy:
443      return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
444    case WCStrTy:
445      return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
446    case PtrToArgTypeResultTy: {
447      const PointerType *PT = argTy->getAs<PointerType>();
448      if (!PT)
449        return false;
450      return A.matchesType(C, PT->getPointeeType());
451    }
452  }
453
454  return false; // Unreachable, but we still get a warning.
455}
456
457QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
458  switch (K) {
459    case InvalidTy:
460      llvm_unreachable("No representative type for Invalid ArgTypeResult");
461    case UnknownTy:
462      return QualType();
463    case CStrTy:
464      return C.getPointerType(C.CharTy);
465    case WCStrTy:
466      return C.getPointerType(C.getWCharType());
467    case PtrToArgTypeResultTy:
468      return C.getPointerType(A.getRepresentativeType(C));
469  }
470
471  return QualType(); // Not reachable.
472}
473
474std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
475  std::string S = getRepresentativeType(C).getAsString();
476  if (!Name)
477    return std::string("'") + S + "'";
478  return std::string("'") + Name + "' (aka '" + S + "')";
479}
480