ScanfFormatString.cpp revision 9d24c2cbd9cf1b7c165ccb13221f2efb2f4b49b0
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfArgTypeResult;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO) {
72
73  using namespace clang::analyze_scanf;
74  const char *I = Beg;
75  const char *Start = 0;
76  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
77
78    // Look for a '%' character that indicates the start of a format specifier.
79  for ( ; I != E ; ++I) {
80    char c = *I;
81    if (c == '\0') {
82        // Detect spurious null characters, which are likely errors.
83      H.HandleNullChar(I);
84      return true;
85    }
86    if (c == '%') {
87      Start = I++;  // Record the start of the format specifier.
88      break;
89    }
90  }
91
92    // No format specifier found?
93  if (!Start)
94    return false;
95
96  if (I == E) {
97      // No more characters left?
98    H.HandleIncompleteSpecifier(Start, E - Start);
99    return true;
100  }
101
102  ScanfSpecifier FS;
103  if (ParseArgPosition(H, FS, Start, I, E))
104    return true;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  // Look for '*' flag if it is present.
113  if (*I == '*') {
114    FS.setSuppressAssignment(I);
115    if (++I == E) {
116      H.HandleIncompleteSpecifier(Start, E - Start);
117      return true;
118    }
119  }
120
121  // Look for the field width (if any).  Unlike printf, this is either
122  // a fixed integer or isn't present.
123  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
124  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
125    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
126    FS.setFieldWidth(Amt);
127
128    if (I == E) {
129      // No more characters left?
130      H.HandleIncompleteSpecifier(Start, E - Start);
131      return true;
132    }
133  }
134
135  // Look for the length modifier.
136  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
137      // No more characters left?
138    H.HandleIncompleteSpecifier(Start, E - Start);
139    return true;
140  }
141
142  // Detect spurious null characters, which are likely errors.
143  if (*I == '\0') {
144    H.HandleNullChar(I);
145    return true;
146  }
147
148  // Finally, look for the conversion specifier.
149  const char *conversionPosition = I++;
150  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
151  switch (*conversionPosition) {
152    default:
153      break;
154    case '%': k = ConversionSpecifier::PercentArg;   break;
155    case 'A': k = ConversionSpecifier::AArg; break;
156    case 'E': k = ConversionSpecifier::EArg; break;
157    case 'F': k = ConversionSpecifier::FArg; break;
158    case 'G': k = ConversionSpecifier::GArg; break;
159    case 'X': k = ConversionSpecifier::XArg; break;
160    case 'a': k = ConversionSpecifier::aArg; break;
161    case 'd': k = ConversionSpecifier::dArg; break;
162    case 'e': k = ConversionSpecifier::eArg; break;
163    case 'f': k = ConversionSpecifier::fArg; break;
164    case 'g': k = ConversionSpecifier::gArg; break;
165    case 'i': k = ConversionSpecifier::iArg; break;
166    case 'n': k = ConversionSpecifier::nArg; break;
167    case 'c': k = ConversionSpecifier::cArg; break;
168    case 'C': k = ConversionSpecifier::CArg; break;
169    case 'S': k = ConversionSpecifier::SArg; break;
170    case '[': k = ConversionSpecifier::ScanListArg; break;
171    case 'u': k = ConversionSpecifier::uArg; break;
172    case 'x': k = ConversionSpecifier::xArg; break;
173    case 'o': k = ConversionSpecifier::oArg; break;
174    case 's': k = ConversionSpecifier::sArg; break;
175    case 'p': k = ConversionSpecifier::pArg; break;
176  }
177  ScanfConversionSpecifier CS(conversionPosition, k);
178  if (k == ScanfConversionSpecifier::ScanListArg) {
179    if (ParseScanList(H, CS, I, E))
180      return true;
181  }
182  FS.setConversionSpecifier(CS);
183  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
184      && !FS.usesPositionalArg())
185    FS.setArgIndex(argIndex++);
186
187  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
188  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
189
190  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
191    // Assume the conversion takes one argument.
192    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
193  }
194  return ScanfSpecifierResult(Start, FS);
195}
196
197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
198  const ScanfConversionSpecifier &CS = getConversionSpecifier();
199
200  if (!CS.consumesDataArgument())
201    return ScanfArgTypeResult::Invalid();
202
203  switch(CS.getKind()) {
204    // Signed int.
205    case ConversionSpecifier::dArg:
206    case ConversionSpecifier::iArg:
207      switch (LM.getKind()) {
208        case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
209        case LengthModifier::AsChar:
210          return ArgTypeResult(ArgTypeResult::AnyCharTy);
211        case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
212        case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
213        case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy);
214        case LengthModifier::AsIntMax:
215          return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
216        case LengthModifier::AsSizeT:
217          // FIXME: ssize_t.
218          return ScanfArgTypeResult();
219        case LengthModifier::AsPtrDiff:
220          return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
221        case LengthModifier::AsLongDouble:
222          // GNU extension.
223          return ArgTypeResult(Ctx.LongLongTy);
224        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
225        case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
226      }
227
228    // Unsigned int.
229    case ConversionSpecifier::oArg:
230    case ConversionSpecifier::uArg:
231    case ConversionSpecifier::xArg:
232    case ConversionSpecifier::XArg:
233      switch (LM.getKind()) {
234        case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
235        case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
236        case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
237        case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
238        case LengthModifier::AsLongLong:
239          return ArgTypeResult(Ctx.UnsignedLongLongTy);
240        case LengthModifier::AsIntMax:
241          return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
242        case LengthModifier::AsSizeT:
243          return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
244        case LengthModifier::AsPtrDiff:
245          // FIXME: Unsigned version of ptrdiff_t?
246          return ScanfArgTypeResult();
247        case LengthModifier::AsLongDouble:
248          // GNU extension.
249          return ArgTypeResult(Ctx.UnsignedLongLongTy);
250        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
251        case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
252      }
253
254    // Float.
255    case ConversionSpecifier::aArg:
256    case ConversionSpecifier::AArg:
257    case ConversionSpecifier::eArg:
258    case ConversionSpecifier::EArg:
259    case ConversionSpecifier::fArg:
260    case ConversionSpecifier::FArg:
261    case ConversionSpecifier::gArg:
262    case ConversionSpecifier::GArg:
263      switch (LM.getKind()) {
264        case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
265        case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
266        case LengthModifier::AsLongDouble:
267          return ArgTypeResult(Ctx.LongDoubleTy);
268        default:
269          return ScanfArgTypeResult::Invalid();
270      }
271
272    // Char, string and scanlist.
273    case ConversionSpecifier::cArg:
274    case ConversionSpecifier::sArg:
275    case ConversionSpecifier::ScanListArg:
276      switch (LM.getKind()) {
277        case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
278        case LengthModifier::AsLong:
279          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
280        case LengthModifier::AsAllocate:
281        case LengthModifier::AsMAllocate:
282          return ScanfArgTypeResult(ArgTypeResult::CStrTy);
283        default:
284          return ScanfArgTypeResult::Invalid();
285      }
286    case ConversionSpecifier::CArg:
287    case ConversionSpecifier::SArg:
288      // FIXME: Mac OS X specific?
289      switch (LM.getKind()) {
290        case LengthModifier::None:
291          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
292        case LengthModifier::AsAllocate:
293        case LengthModifier::AsMAllocate:
294          return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
295        default:
296          return ScanfArgTypeResult::Invalid();
297      }
298
299    // Pointer.
300    case ConversionSpecifier::pArg:
301      return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
302
303    default:
304      break;
305  }
306
307  return ScanfArgTypeResult();
308}
309
310bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt)
311{
312  if (!QT->isPointerType())
313    return false;
314
315  QualType PT = QT->getPointeeType();
316  const BuiltinType *BT = PT->getAs<BuiltinType>();
317  if (!BT)
318    return false;
319
320  // Pointer to a character.
321  if (PT->isAnyCharacterType()) {
322    CS.setKind(ConversionSpecifier::sArg);
323    if (PT->isWideCharType())
324      LM.setKind(LengthModifier::AsWideChar);
325    else
326      LM.setKind(LengthModifier::None);
327    return true;
328  }
329
330  // Figure out the length modifier.
331  switch (BT->getKind()) {
332    // no modifier
333    case BuiltinType::UInt:
334    case BuiltinType::Int:
335    case BuiltinType::Float:
336      LM.setKind(LengthModifier::None);
337      break;
338
339    // hh
340    case BuiltinType::Char_U:
341    case BuiltinType::UChar:
342    case BuiltinType::Char_S:
343    case BuiltinType::SChar:
344      LM.setKind(LengthModifier::AsChar);
345      break;
346
347    // h
348    case BuiltinType::Short:
349    case BuiltinType::UShort:
350      LM.setKind(LengthModifier::AsShort);
351      break;
352
353    // l
354    case BuiltinType::Long:
355    case BuiltinType::ULong:
356    case BuiltinType::Double:
357      LM.setKind(LengthModifier::AsLong);
358      break;
359
360    // ll
361    case BuiltinType::LongLong:
362    case BuiltinType::ULongLong:
363      LM.setKind(LengthModifier::AsLongLong);
364      break;
365
366    // L
367    case BuiltinType::LongDouble:
368      LM.setKind(LengthModifier::AsLongDouble);
369      break;
370
371    // Don't know.
372    default:
373      return false;
374  }
375
376  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
377  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
378    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
379    if (Identifier->getName() == "size_t") {
380      LM.setKind(LengthModifier::AsSizeT);
381    } else if (Identifier->getName() == "ssize_t") {
382      // Not C99, but common in Unix.
383      LM.setKind(LengthModifier::AsSizeT);
384    } else if (Identifier->getName() == "intmax_t") {
385      LM.setKind(LengthModifier::AsIntMax);
386    } else if (Identifier->getName() == "uintmax_t") {
387      LM.setKind(LengthModifier::AsIntMax);
388    } else if (Identifier->getName() == "ptrdiff_t") {
389      LM.setKind(LengthModifier::AsPtrDiff);
390    }
391  }
392
393  // Figure out the conversion specifier.
394  if (PT->isRealFloatingType())
395    CS.setKind(ConversionSpecifier::fArg);
396  else if (PT->isSignedIntegerType())
397    CS.setKind(ConversionSpecifier::dArg);
398  else if (PT->isUnsignedIntegerType()) {
399    // Preserve the original formatting, e.g. 'X', 'o'.
400    if (!CS.isUIntArg()) {
401      CS.setKind(ConversionSpecifier::uArg);
402    }
403  } else
404    llvm_unreachable("Unexpected type");
405
406  return true;
407}
408
409void ScanfSpecifier::toString(raw_ostream &os) const {
410  os << "%";
411
412  if (usesPositionalArg())
413    os << getPositionalArgIndex() << "$";
414  if (SuppressAssignment)
415    os << "*";
416
417  FieldWidth.toString(os);
418  os << LM.toString();
419  os << CS.toString();
420}
421
422bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
423                                                    const char *I,
424                                                    const char *E,
425                                                    const LangOptions &LO) {
426
427  unsigned argIndex = 0;
428
429  // Keep looking for a format specifier until we have exhausted the string.
430  while (I != E) {
431    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
432                                                          LO);
433    // Did a fail-stop error of any kind occur when parsing the specifier?
434    // If so, don't do any more processing.
435    if (FSR.shouldStop())
436      return true;;
437      // Did we exhaust the string or encounter an error that
438      // we can recover from?
439    if (!FSR.hasValue())
440      continue;
441      // We have a format specifier.  Pass it to the callback.
442    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
443                                I - FSR.getStart())) {
444      return true;
445    }
446  }
447  assert(I == E && "Format string not exhausted");
448  return false;
449}
450
451bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
452  switch (K) {
453    case InvalidTy:
454      llvm_unreachable("ArgTypeResult must be valid");
455    case UnknownTy:
456      return true;
457    case CStrTy:
458      return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
459    case WCStrTy:
460      return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
461    case PtrToArgTypeResultTy: {
462      const PointerType *PT = argTy->getAs<PointerType>();
463      if (!PT)
464        return false;
465      return A.matchesType(C, PT->getPointeeType());
466    }
467  }
468
469  llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
470}
471
472QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
473  switch (K) {
474    case InvalidTy:
475      llvm_unreachable("No representative type for Invalid ArgTypeResult");
476    case UnknownTy:
477      return QualType();
478    case CStrTy:
479      return C.getPointerType(C.CharTy);
480    case WCStrTy:
481      return C.getPointerType(C.getWCharType());
482    case PtrToArgTypeResultTy:
483      return C.getPointerType(A.getRepresentativeType(C));
484  }
485
486  llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
487}
488
489std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
490  std::string S = getRepresentativeType(C).getAsString();
491  if (!Name)
492    return std::string("'") + S + "'";
493  return std::string("'") + Name + "' (aka '" + S + "')";
494}
495