ScanfFormatString.cpp revision 1ad23d62007162df82b58bca31b4aa277a5f6586
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgType;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfConversionSpecifier;
24using clang::analyze_scanf::ScanfSpecifier;
25using clang::UpdateOnReturn;
26using namespace clang;
27
28typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29        ScanfSpecifierResult;
30
31static bool ParseScanList(FormatStringHandler &H,
32                          ScanfConversionSpecifier &CS,
33                          const char *&Beg, const char *E) {
34  const char *I = Beg;
35  const char *start = I - 1;
36  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38  // No more characters?
39  if (I == E) {
40    H.HandleIncompleteScanList(start, I);
41    return true;
42  }
43
44  // Special case: ']' is the first character.
45  if (*I == ']') {
46    if (++I == E) {
47      H.HandleIncompleteScanList(start, I - 1);
48      return true;
49    }
50  }
51
52  // Look for a ']' character which denotes the end of the scan list.
53  while (*I != ']') {
54    if (++I == E) {
55      H.HandleIncompleteScanList(start, I - 1);
56      return true;
57    }
58  }
59
60  CS.setEndScanList(I);
61  return false;
62}
63
64// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
65// We can possibly refactor.
66static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
67                                                const char *&Beg,
68                                                const char *E,
69                                                unsigned &argIndex,
70                                                const LangOptions &LO) {
71
72  using namespace clang::analyze_scanf;
73  const char *I = Beg;
74  const char *Start = 0;
75  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
76
77    // Look for a '%' character that indicates the start of a format specifier.
78  for ( ; I != E ; ++I) {
79    char c = *I;
80    if (c == '\0') {
81        // Detect spurious null characters, which are likely errors.
82      H.HandleNullChar(I);
83      return true;
84    }
85    if (c == '%') {
86      Start = I++;  // Record the start of the format specifier.
87      break;
88    }
89  }
90
91    // No format specifier found?
92  if (!Start)
93    return false;
94
95  if (I == E) {
96      // No more characters left?
97    H.HandleIncompleteSpecifier(Start, E - Start);
98    return true;
99  }
100
101  ScanfSpecifier FS;
102  if (ParseArgPosition(H, FS, Start, I, E))
103    return true;
104
105  if (I == E) {
106      // No more characters left?
107    H.HandleIncompleteSpecifier(Start, E - Start);
108    return true;
109  }
110
111  // Look for '*' flag if it is present.
112  if (*I == '*') {
113    FS.setSuppressAssignment(I);
114    if (++I == E) {
115      H.HandleIncompleteSpecifier(Start, E - Start);
116      return true;
117    }
118  }
119
120  // Look for the field width (if any).  Unlike printf, this is either
121  // a fixed integer or isn't present.
122  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
123  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
124    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
125    FS.setFieldWidth(Amt);
126
127    if (I == E) {
128      // No more characters left?
129      H.HandleIncompleteSpecifier(Start, E - Start);
130      return true;
131    }
132  }
133
134  // Look for the length modifier.
135  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
136      // No more characters left?
137    H.HandleIncompleteSpecifier(Start, E - Start);
138    return true;
139  }
140
141  // Detect spurious null characters, which are likely errors.
142  if (*I == '\0') {
143    H.HandleNullChar(I);
144    return true;
145  }
146
147  // Finally, look for the conversion specifier.
148  const char *conversionPosition = I++;
149  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
150  switch (*conversionPosition) {
151    default:
152      break;
153    case '%': k = ConversionSpecifier::PercentArg;   break;
154    case 'A': k = ConversionSpecifier::AArg; break;
155    case 'E': k = ConversionSpecifier::EArg; break;
156    case 'F': k = ConversionSpecifier::FArg; break;
157    case 'G': k = ConversionSpecifier::GArg; break;
158    case 'X': k = ConversionSpecifier::XArg; break;
159    case 'a': k = ConversionSpecifier::aArg; break;
160    case 'd': k = ConversionSpecifier::dArg; break;
161    case 'e': k = ConversionSpecifier::eArg; break;
162    case 'f': k = ConversionSpecifier::fArg; break;
163    case 'g': k = ConversionSpecifier::gArg; break;
164    case 'i': k = ConversionSpecifier::iArg; break;
165    case 'n': k = ConversionSpecifier::nArg; break;
166    case 'c': k = ConversionSpecifier::cArg; break;
167    case 'C': k = ConversionSpecifier::CArg; break;
168    case 'S': k = ConversionSpecifier::SArg; break;
169    case '[': k = ConversionSpecifier::ScanListArg; break;
170    case 'u': k = ConversionSpecifier::uArg; break;
171    case 'x': k = ConversionSpecifier::xArg; break;
172    case 'o': k = ConversionSpecifier::oArg; break;
173    case 's': k = ConversionSpecifier::sArg; break;
174    case 'p': k = ConversionSpecifier::pArg; break;
175  }
176  ScanfConversionSpecifier CS(conversionPosition, k);
177  if (k == ScanfConversionSpecifier::ScanListArg) {
178    if (ParseScanList(H, CS, I, E))
179      return true;
180  }
181  FS.setConversionSpecifier(CS);
182  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
183      && !FS.usesPositionalArg())
184    FS.setArgIndex(argIndex++);
185
186  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
187  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
188
189  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
190    // Assume the conversion takes one argument.
191    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
192  }
193  return ScanfSpecifierResult(Start, FS);
194}
195
196ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
197  const ScanfConversionSpecifier &CS = getConversionSpecifier();
198
199  if (!CS.consumesDataArgument())
200    return ArgType::Invalid();
201
202  switch(CS.getKind()) {
203    // Signed int.
204    case ConversionSpecifier::dArg:
205    case ConversionSpecifier::iArg:
206      switch (LM.getKind()) {
207        case LengthModifier::None:
208          return ArgType::PtrTo(Ctx.IntTy);
209        case LengthModifier::AsChar:
210          return ArgType::PtrTo(ArgType::AnyCharTy);
211        case LengthModifier::AsShort:
212          return ArgType::PtrTo(Ctx.ShortTy);
213        case LengthModifier::AsLong:
214          return ArgType::PtrTo(Ctx.LongTy);
215        case LengthModifier::AsLongLong:
216        case LengthModifier::AsQuad:
217          return ArgType::PtrTo(Ctx.LongLongTy);
218        case LengthModifier::AsIntMax:
219          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
220        case LengthModifier::AsSizeT:
221          // FIXME: ssize_t.
222          return ArgType();
223        case LengthModifier::AsPtrDiff:
224          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
225        case LengthModifier::AsLongDouble:
226          // GNU extension.
227          return ArgType::PtrTo(Ctx.LongLongTy);
228        case LengthModifier::AsAllocate:
229          return ArgType::Invalid();
230        case LengthModifier::AsMAllocate:
231          return ArgType::Invalid();
232      }
233
234    // Unsigned int.
235    case ConversionSpecifier::oArg:
236    case ConversionSpecifier::uArg:
237    case ConversionSpecifier::xArg:
238    case ConversionSpecifier::XArg:
239      switch (LM.getKind()) {
240        case LengthModifier::None:
241          return ArgType::PtrTo(Ctx.UnsignedIntTy);
242        case LengthModifier::AsChar:
243          return ArgType::PtrTo(Ctx.UnsignedCharTy);
244        case LengthModifier::AsShort:
245          return ArgType::PtrTo(Ctx.UnsignedShortTy);
246        case LengthModifier::AsLong:
247          return ArgType::PtrTo(Ctx.UnsignedLongTy);
248        case LengthModifier::AsLongLong:
249        case LengthModifier::AsQuad:
250          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
251        case LengthModifier::AsIntMax:
252          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
253        case LengthModifier::AsSizeT:
254          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
255        case LengthModifier::AsPtrDiff:
256          // FIXME: Unsigned version of ptrdiff_t?
257          return ArgType();
258        case LengthModifier::AsLongDouble:
259          // GNU extension.
260          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
261        case LengthModifier::AsAllocate:
262          return ArgType::Invalid();
263        case LengthModifier::AsMAllocate:
264          return ArgType::Invalid();
265      }
266
267    // Float.
268    case ConversionSpecifier::aArg:
269    case ConversionSpecifier::AArg:
270    case ConversionSpecifier::eArg:
271    case ConversionSpecifier::EArg:
272    case ConversionSpecifier::fArg:
273    case ConversionSpecifier::FArg:
274    case ConversionSpecifier::gArg:
275    case ConversionSpecifier::GArg:
276      switch (LM.getKind()) {
277        case LengthModifier::None:
278          return ArgType::PtrTo(Ctx.FloatTy);
279        case LengthModifier::AsLong:
280          return ArgType::PtrTo(Ctx.DoubleTy);
281        case LengthModifier::AsLongDouble:
282          return ArgType::PtrTo(Ctx.LongDoubleTy);
283        default:
284          return ArgType::Invalid();
285      }
286
287    // Char, string and scanlist.
288    case ConversionSpecifier::cArg:
289    case ConversionSpecifier::sArg:
290    case ConversionSpecifier::ScanListArg:
291      switch (LM.getKind()) {
292        case LengthModifier::None:
293          return ArgType::PtrTo(ArgType::AnyCharTy);
294        case LengthModifier::AsLong:
295          return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
296        case LengthModifier::AsAllocate:
297        case LengthModifier::AsMAllocate:
298          return ArgType::PtrTo(ArgType::CStrTy);
299        default:
300          return ArgType::Invalid();
301      }
302    case ConversionSpecifier::CArg:
303    case ConversionSpecifier::SArg:
304      // FIXME: Mac OS X specific?
305      switch (LM.getKind()) {
306        case LengthModifier::None:
307          return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
308        case LengthModifier::AsAllocate:
309        case LengthModifier::AsMAllocate:
310          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
311        default:
312          return ArgType::Invalid();
313      }
314
315    // Pointer.
316    case ConversionSpecifier::pArg:
317      return ArgType::PtrTo(ArgType::CPointerTy);
318
319    // Write-back.
320    case ConversionSpecifier::nArg:
321      switch (LM.getKind()) {
322        case LengthModifier::None:
323          return ArgType::PtrTo(Ctx.IntTy);
324        case LengthModifier::AsChar:
325          return ArgType::PtrTo(Ctx.SignedCharTy);
326        case LengthModifier::AsShort:
327          return ArgType::PtrTo(Ctx.ShortTy);
328        case LengthModifier::AsLong:
329          return ArgType::PtrTo(Ctx.LongTy);
330        case LengthModifier::AsLongLong:
331        case LengthModifier::AsQuad:
332          return ArgType::PtrTo(Ctx.LongLongTy);
333        case LengthModifier::AsIntMax:
334          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
335        case LengthModifier::AsSizeT:
336          return ArgType(); // FIXME: ssize_t
337        case LengthModifier::AsPtrDiff:
338          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
339        case LengthModifier::AsLongDouble:
340          return ArgType(); // FIXME: Is this a known extension?
341        case LengthModifier::AsAllocate:
342        case LengthModifier::AsMAllocate:
343          return ArgType::Invalid();
344        }
345
346    default:
347      break;
348  }
349
350  return ArgType();
351}
352
353bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
354                             ASTContext &Ctx) {
355  if (!QT->isPointerType())
356    return false;
357
358  // %n is different from other conversion specifiers; don't try to fix it.
359  if (CS.getKind() == ConversionSpecifier::nArg)
360    return false;
361
362  QualType PT = QT->getPointeeType();
363
364  // If it's an enum, get its underlying type.
365  if (const EnumType *ETy = QT->getAs<EnumType>())
366    QT = ETy->getDecl()->getIntegerType();
367
368  const BuiltinType *BT = PT->getAs<BuiltinType>();
369  if (!BT)
370    return false;
371
372  // Pointer to a character.
373  if (PT->isAnyCharacterType()) {
374    CS.setKind(ConversionSpecifier::sArg);
375    if (PT->isWideCharType())
376      LM.setKind(LengthModifier::AsWideChar);
377    else
378      LM.setKind(LengthModifier::None);
379    return true;
380  }
381
382  // Figure out the length modifier.
383  switch (BT->getKind()) {
384    // no modifier
385    case BuiltinType::UInt:
386    case BuiltinType::Int:
387    case BuiltinType::Float:
388      LM.setKind(LengthModifier::None);
389      break;
390
391    // hh
392    case BuiltinType::Char_U:
393    case BuiltinType::UChar:
394    case BuiltinType::Char_S:
395    case BuiltinType::SChar:
396      LM.setKind(LengthModifier::AsChar);
397      break;
398
399    // h
400    case BuiltinType::Short:
401    case BuiltinType::UShort:
402      LM.setKind(LengthModifier::AsShort);
403      break;
404
405    // l
406    case BuiltinType::Long:
407    case BuiltinType::ULong:
408    case BuiltinType::Double:
409      LM.setKind(LengthModifier::AsLong);
410      break;
411
412    // ll
413    case BuiltinType::LongLong:
414    case BuiltinType::ULongLong:
415      LM.setKind(LengthModifier::AsLongLong);
416      break;
417
418    // L
419    case BuiltinType::LongDouble:
420      LM.setKind(LengthModifier::AsLongDouble);
421      break;
422
423    // Don't know.
424    default:
425      return false;
426  }
427
428  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
429  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
430    namedTypeToLengthModifier(PT, LM);
431
432  // If fixing the length modifier was enough, we are done.
433  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
434    const analyze_scanf::ArgType &AT = getArgType(Ctx);
435    if (AT.isValid() && AT.matchesType(Ctx, QT))
436      return true;
437  }
438
439  // Figure out the conversion specifier.
440  if (PT->isRealFloatingType())
441    CS.setKind(ConversionSpecifier::fArg);
442  else if (PT->isSignedIntegerType())
443    CS.setKind(ConversionSpecifier::dArg);
444  else if (PT->isUnsignedIntegerType())
445    CS.setKind(ConversionSpecifier::uArg);
446  else
447    llvm_unreachable("Unexpected type");
448
449  return true;
450}
451
452void ScanfSpecifier::toString(raw_ostream &os) const {
453  os << "%";
454
455  if (usesPositionalArg())
456    os << getPositionalArgIndex() << "$";
457  if (SuppressAssignment)
458    os << "*";
459
460  FieldWidth.toString(os);
461  os << LM.toString();
462  os << CS.toString();
463}
464
465bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
466                                                    const char *I,
467                                                    const char *E,
468                                                    const LangOptions &LO) {
469
470  unsigned argIndex = 0;
471
472  // Keep looking for a format specifier until we have exhausted the string.
473  while (I != E) {
474    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
475                                                          LO);
476    // Did a fail-stop error of any kind occur when parsing the specifier?
477    // If so, don't do any more processing.
478    if (FSR.shouldStop())
479      return true;
480      // Did we exhaust the string or encounter an error that
481      // we can recover from?
482    if (!FSR.hasValue())
483      continue;
484      // We have a format specifier.  Pass it to the callback.
485    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
486                                I - FSR.getStart())) {
487      return true;
488    }
489  }
490  assert(I == E && "Format string not exhausted");
491  return false;
492}
493