ScanfFormatString.cpp revision 6fcd932dfd6835f70cc00d6f7c6789793f6d7b66
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfArgTypeResult;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex) {
71
72  using namespace clang::analyze_scanf;
73  const char *I = Beg;
74  const char *Start = 0;
75  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
76
77    // Look for a '%' character that indicates the start of a format specifier.
78  for ( ; I != E ; ++I) {
79    char c = *I;
80    if (c == '\0') {
81        // Detect spurious null characters, which are likely errors.
82      H.HandleNullChar(I);
83      return true;
84    }
85    if (c == '%') {
86      Start = I++;  // Record the start of the format specifier.
87      break;
88    }
89  }
90
91    // No format specifier found?
92  if (!Start)
93    return false;
94
95  if (I == E) {
96      // No more characters left?
97    H.HandleIncompleteSpecifier(Start, E - Start);
98    return true;
99  }
100
101  ScanfSpecifier FS;
102  if (ParseArgPosition(H, FS, Start, I, E))
103    return true;
104
105  if (I == E) {
106      // No more characters left?
107    H.HandleIncompleteSpecifier(Start, E - Start);
108    return true;
109  }
110
111  // Look for '*' flag if it is present.
112  if (*I == '*') {
113    FS.setSuppressAssignment(I);
114    if (++I == E) {
115      H.HandleIncompleteSpecifier(Start, E - Start);
116      return true;
117    }
118  }
119
120  // Look for the field width (if any).  Unlike printf, this is either
121  // a fixed integer or isn't present.
122  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
123  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
124    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
125    FS.setFieldWidth(Amt);
126
127    if (I == E) {
128      // No more characters left?
129      H.HandleIncompleteSpecifier(Start, E - Start);
130      return true;
131    }
132  }
133
134  // Look for the length modifier.
135  if (ParseLengthModifier(FS, I, E) && I == E) {
136      // No more characters left?
137    H.HandleIncompleteSpecifier(Start, E - Start);
138    return true;
139  }
140
141  // Detect spurious null characters, which are likely errors.
142  if (*I == '\0') {
143    H.HandleNullChar(I);
144    return true;
145  }
146
147  // Finally, look for the conversion specifier.
148  const char *conversionPosition = I++;
149  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
150  switch (*conversionPosition) {
151    default:
152      break;
153    case '%': k = ConversionSpecifier::PercentArg;   break;
154    case 'A': k = ConversionSpecifier::AArg; break;
155    case 'E': k = ConversionSpecifier::EArg; break;
156    case 'F': k = ConversionSpecifier::FArg; break;
157    case 'G': k = ConversionSpecifier::GArg; break;
158    case 'X': k = ConversionSpecifier::XArg; break;
159    case 'a': k = ConversionSpecifier::aArg; break;
160    case 'd': k = ConversionSpecifier::dArg; break;
161    case 'e': k = ConversionSpecifier::eArg; break;
162    case 'f': k = ConversionSpecifier::fArg; break;
163    case 'g': k = ConversionSpecifier::gArg; break;
164    case 'i': k = ConversionSpecifier::iArg; break;
165    case 'n': k = ConversionSpecifier::nArg; break;
166    case 'c': k = ConversionSpecifier::cArg; break;
167    case 'C': k = ConversionSpecifier::CArg; break;
168    case 'S': k = ConversionSpecifier::SArg; break;
169    case '[': k = ConversionSpecifier::ScanListArg; break;
170    case 'u': k = ConversionSpecifier::uArg; break;
171    case 'x': k = ConversionSpecifier::xArg; break;
172    case 'o': k = ConversionSpecifier::oArg; break;
173    case 's': k = ConversionSpecifier::sArg; break;
174    case 'p': k = ConversionSpecifier::pArg; break;
175  }
176  ScanfConversionSpecifier CS(conversionPosition, k);
177  if (k == ScanfConversionSpecifier::ScanListArg) {
178    if (!ParseScanList(H, CS, I, E))
179      return true;
180  }
181  FS.setConversionSpecifier(CS);
182  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
183      && !FS.usesPositionalArg())
184    FS.setArgIndex(argIndex++);
185
186  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
187  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
188
189  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
190    // Assume the conversion takes one argument.
191    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
192  }
193  return ScanfSpecifierResult(Start, FS);
194}
195
196ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
197  const ScanfConversionSpecifier &CS = getConversionSpecifier();
198
199  if (!CS.consumesDataArgument())
200    return ScanfArgTypeResult::Invalid();
201
202  switch(CS.getKind()) {
203    // Signed int.
204    case ConversionSpecifier::dArg:
205    case ConversionSpecifier::iArg:
206      switch (LM.getKind()) {
207        case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
208        case LengthModifier::AsChar:
209          return ArgTypeResult(ArgTypeResult::AnyCharTy);
210        case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
211        case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
212        case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy);
213        case LengthModifier::AsIntMax:
214          return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
215        case LengthModifier::AsSizeT:
216          // FIXME: ssize_t.
217          return ScanfArgTypeResult();
218        case LengthModifier::AsPtrDiff:
219          return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
220        case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
221      }
222
223    // Unsigned int.
224    case ConversionSpecifier::oArg:
225    case ConversionSpecifier::uArg:
226    case ConversionSpecifier::xArg:
227    case ConversionSpecifier::XArg:
228      switch (LM.getKind()) {
229        case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
230        case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
231        case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
232        case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
233        case LengthModifier::AsLongLong:
234          return ArgTypeResult(Ctx.UnsignedLongLongTy);
235        case LengthModifier::AsIntMax:
236          return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
237        case LengthModifier::AsSizeT:
238          return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
239        case LengthModifier::AsPtrDiff:
240          // FIXME: Unsigned version of ptrdiff_t?
241          return ScanfArgTypeResult();
242        case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid();
243      }
244
245    // Float.
246    case ConversionSpecifier::aArg:
247    case ConversionSpecifier::AArg:
248    case ConversionSpecifier::eArg:
249    case ConversionSpecifier::EArg:
250    case ConversionSpecifier::fArg:
251    case ConversionSpecifier::FArg:
252    case ConversionSpecifier::gArg:
253    case ConversionSpecifier::GArg:
254      switch (LM.getKind()) {
255        case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
256        case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
257        case LengthModifier::AsLongDouble:
258          return ArgTypeResult(Ctx.LongDoubleTy);
259        default:
260          return ScanfArgTypeResult::Invalid();
261      }
262
263    // Char, string and scanlist.
264    case ConversionSpecifier::cArg:
265    case ConversionSpecifier::sArg:
266    case ConversionSpecifier::ScanListArg:
267      switch (LM.getKind()) {
268        case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
269        case LengthModifier::AsLong:
270          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
271        default:
272          return ScanfArgTypeResult::Invalid();
273      }
274    case ConversionSpecifier::CArg:
275    case ConversionSpecifier::SArg:
276      // FIXME: Mac OS X specific?
277      return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
278
279    // Pointer.
280    case ConversionSpecifier::pArg:
281      return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
282
283    default:
284      break;
285  }
286
287  return ScanfArgTypeResult();
288}
289
290bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt)
291{
292  if (!QT->isPointerType())
293    return false;
294
295  QualType PT = QT->getPointeeType();
296  const BuiltinType *BT = PT->getAs<BuiltinType>();
297  if (!BT)
298    return false;
299
300  // Pointer to a character.
301  if (PT->isAnyCharacterType()) {
302    CS.setKind(ConversionSpecifier::sArg);
303    if (PT->isWideCharType())
304      LM.setKind(LengthModifier::AsWideChar);
305    else
306      LM.setKind(LengthModifier::None);
307    return true;
308  }
309
310  // Figure out the length modifier.
311  switch (BT->getKind()) {
312    // no modifier
313    case BuiltinType::UInt:
314    case BuiltinType::Int:
315    case BuiltinType::Float:
316      LM.setKind(LengthModifier::None);
317      break;
318
319    // hh
320    case BuiltinType::Char_U:
321    case BuiltinType::UChar:
322    case BuiltinType::Char_S:
323    case BuiltinType::SChar:
324      LM.setKind(LengthModifier::AsChar);
325      break;
326
327    // h
328    case BuiltinType::Short:
329    case BuiltinType::UShort:
330      LM.setKind(LengthModifier::AsShort);
331      break;
332
333    // l
334    case BuiltinType::Long:
335    case BuiltinType::ULong:
336    case BuiltinType::Double:
337      LM.setKind(LengthModifier::AsLong);
338      break;
339
340    // ll
341    case BuiltinType::LongLong:
342    case BuiltinType::ULongLong:
343      LM.setKind(LengthModifier::AsLongLong);
344      break;
345
346    // L
347    case BuiltinType::LongDouble:
348      LM.setKind(LengthModifier::AsLongDouble);
349      break;
350
351    // Don't know.
352    default:
353      return false;
354  }
355
356  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
357  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
358    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
359    if (Identifier->getName() == "size_t") {
360      LM.setKind(LengthModifier::AsSizeT);
361    } else if (Identifier->getName() == "ssize_t") {
362      // Not C99, but common in Unix.
363      LM.setKind(LengthModifier::AsSizeT);
364    } else if (Identifier->getName() == "intmax_t") {
365      LM.setKind(LengthModifier::AsIntMax);
366    } else if (Identifier->getName() == "uintmax_t") {
367      LM.setKind(LengthModifier::AsIntMax);
368    } else if (Identifier->getName() == "ptrdiff_t") {
369      LM.setKind(LengthModifier::AsPtrDiff);
370    }
371  }
372
373  // Figure out the conversion specifier.
374  if (PT->isRealFloatingType())
375    CS.setKind(ConversionSpecifier::fArg);
376  else if (PT->isSignedIntegerType())
377    CS.setKind(ConversionSpecifier::dArg);
378  else if (PT->isUnsignedIntegerType()) {
379    // Preserve the original formatting, e.g. 'X', 'o'.
380    if (!CS.isUIntArg()) {
381      CS.setKind(ConversionSpecifier::uArg);
382    }
383  } else
384    llvm_unreachable("Unexpected type");
385
386  return true;
387}
388
389void ScanfSpecifier::toString(raw_ostream &os) const {
390  os << "%";
391
392  if (usesPositionalArg())
393    os << getPositionalArgIndex() << "$";
394  if (SuppressAssignment)
395    os << "*";
396
397  FieldWidth.toString(os);
398  os << LM.toString();
399  os << CS.toString();
400}
401
402bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
403                                                    const char *I,
404                                                    const char *E) {
405
406  unsigned argIndex = 0;
407
408  // Keep looking for a format specifier until we have exhausted the string.
409  while (I != E) {
410    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex);
411    // Did a fail-stop error of any kind occur when parsing the specifier?
412    // If so, don't do any more processing.
413    if (FSR.shouldStop())
414      return true;;
415      // Did we exhaust the string or encounter an error that
416      // we can recover from?
417    if (!FSR.hasValue())
418      continue;
419      // We have a format specifier.  Pass it to the callback.
420    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
421                                I - FSR.getStart())) {
422      return true;
423    }
424  }
425  assert(I == E && "Format string not exhausted");
426  return false;
427}
428
429bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
430  switch (K) {
431    case InvalidTy:
432      llvm_unreachable("ArgTypeResult must be valid");
433    case UnknownTy:
434      return true;
435    case CStrTy:
436      return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
437    case WCStrTy:
438      return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
439    case PtrToArgTypeResultTy: {
440      const PointerType *PT = argTy->getAs<PointerType>();
441      if (!PT)
442        return false;
443      return A.matchesType(C, PT->getPointeeType());
444    }
445  }
446
447  return false; // Unreachable, but we still get a warning.
448}
449
450QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
451  switch (K) {
452    case InvalidTy:
453      llvm_unreachable("No representative type for Invalid ArgTypeResult");
454    case UnknownTy:
455      return QualType();
456    case CStrTy:
457      return C.getPointerType(C.CharTy);
458    case WCStrTy:
459      return C.getPointerType(C.getWCharType());
460    case PtrToArgTypeResultTy:
461      return C.getPointerType(A.getRepresentativeType(C));
462  }
463
464  return QualType(); // Not reachable.
465}
466
467std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
468  std::string S = getRepresentativeType(C).getAsString();
469  if (!Name)
470    return std::string("'") + S + "'";
471  return std::string("'") + Name + "' (aka '" + S + "')";
472}
473