ScanfFormatString.cpp revision 033a9c0804f48119a03b73a2af42a04d4d0294ce
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfArgTypeResult;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO) {
72
73  using namespace clang::analyze_scanf;
74  const char *I = Beg;
75  const char *Start = 0;
76  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
77
78    // Look for a '%' character that indicates the start of a format specifier.
79  for ( ; I != E ; ++I) {
80    char c = *I;
81    if (c == '\0') {
82        // Detect spurious null characters, which are likely errors.
83      H.HandleNullChar(I);
84      return true;
85    }
86    if (c == '%') {
87      Start = I++;  // Record the start of the format specifier.
88      break;
89    }
90  }
91
92    // No format specifier found?
93  if (!Start)
94    return false;
95
96  if (I == E) {
97      // No more characters left?
98    H.HandleIncompleteSpecifier(Start, E - Start);
99    return true;
100  }
101
102  ScanfSpecifier FS;
103  if (ParseArgPosition(H, FS, Start, I, E))
104    return true;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  // Look for '*' flag if it is present.
113  if (*I == '*') {
114    FS.setSuppressAssignment(I);
115    if (++I == E) {
116      H.HandleIncompleteSpecifier(Start, E - Start);
117      return true;
118    }
119  }
120
121  // Look for the field width (if any).  Unlike printf, this is either
122  // a fixed integer or isn't present.
123  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
124  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
125    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
126    FS.setFieldWidth(Amt);
127
128    if (I == E) {
129      // No more characters left?
130      H.HandleIncompleteSpecifier(Start, E - Start);
131      return true;
132    }
133  }
134
135  // Look for the length modifier.
136  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
137      // No more characters left?
138    H.HandleIncompleteSpecifier(Start, E - Start);
139    return true;
140  }
141
142  // Detect spurious null characters, which are likely errors.
143  if (*I == '\0') {
144    H.HandleNullChar(I);
145    return true;
146  }
147
148  // Finally, look for the conversion specifier.
149  const char *conversionPosition = I++;
150  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
151  switch (*conversionPosition) {
152    default:
153      break;
154    case '%': k = ConversionSpecifier::PercentArg;   break;
155    case 'A': k = ConversionSpecifier::AArg; break;
156    case 'E': k = ConversionSpecifier::EArg; break;
157    case 'F': k = ConversionSpecifier::FArg; break;
158    case 'G': k = ConversionSpecifier::GArg; break;
159    case 'X': k = ConversionSpecifier::XArg; break;
160    case 'a': k = ConversionSpecifier::aArg; break;
161    case 'd': k = ConversionSpecifier::dArg; break;
162    case 'e': k = ConversionSpecifier::eArg; break;
163    case 'f': k = ConversionSpecifier::fArg; break;
164    case 'g': k = ConversionSpecifier::gArg; break;
165    case 'i': k = ConversionSpecifier::iArg; break;
166    case 'n': k = ConversionSpecifier::nArg; break;
167    case 'c': k = ConversionSpecifier::cArg; break;
168    case 'C': k = ConversionSpecifier::CArg; break;
169    case 'S': k = ConversionSpecifier::SArg; break;
170    case '[': k = ConversionSpecifier::ScanListArg; break;
171    case 'u': k = ConversionSpecifier::uArg; break;
172    case 'x': k = ConversionSpecifier::xArg; break;
173    case 'o': k = ConversionSpecifier::oArg; break;
174    case 's': k = ConversionSpecifier::sArg; break;
175    case 'p': k = ConversionSpecifier::pArg; break;
176  }
177  ScanfConversionSpecifier CS(conversionPosition, k);
178  if (k == ScanfConversionSpecifier::ScanListArg) {
179    if (ParseScanList(H, CS, I, E))
180      return true;
181  }
182  FS.setConversionSpecifier(CS);
183  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
184      && !FS.usesPositionalArg())
185    FS.setArgIndex(argIndex++);
186
187  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
188  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
189
190  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
191    // Assume the conversion takes one argument.
192    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
193  }
194  return ScanfSpecifierResult(Start, FS);
195}
196
197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
198  const ScanfConversionSpecifier &CS = getConversionSpecifier();
199
200  if (!CS.consumesDataArgument())
201    return ScanfArgTypeResult::Invalid();
202
203  switch(CS.getKind()) {
204    // Signed int.
205    case ConversionSpecifier::dArg:
206    case ConversionSpecifier::iArg:
207      switch (LM.getKind()) {
208        case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
209        case LengthModifier::AsChar:
210          return ArgTypeResult(ArgTypeResult::AnyCharTy);
211        case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
212        case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
213        case LengthModifier::AsLongLong:
214        case LengthModifier::AsQuad:
215          return ArgTypeResult(Ctx.LongLongTy);
216        case LengthModifier::AsIntMax:
217          return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
218        case LengthModifier::AsSizeT:
219          // FIXME: ssize_t.
220          return ScanfArgTypeResult();
221        case LengthModifier::AsPtrDiff:
222          return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
223        case LengthModifier::AsLongDouble:
224          // GNU extension.
225          return ArgTypeResult(Ctx.LongLongTy);
226        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
227        case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
228      }
229
230    // Unsigned int.
231    case ConversionSpecifier::oArg:
232    case ConversionSpecifier::uArg:
233    case ConversionSpecifier::xArg:
234    case ConversionSpecifier::XArg:
235      switch (LM.getKind()) {
236        case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
237        case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
238        case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
239        case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
240        case LengthModifier::AsLongLong:
241        case LengthModifier::AsQuad:
242          return ArgTypeResult(Ctx.UnsignedLongLongTy);
243        case LengthModifier::AsIntMax:
244          return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
245        case LengthModifier::AsSizeT:
246          return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
247        case LengthModifier::AsPtrDiff:
248          // FIXME: Unsigned version of ptrdiff_t?
249          return ScanfArgTypeResult();
250        case LengthModifier::AsLongDouble:
251          // GNU extension.
252          return ArgTypeResult(Ctx.UnsignedLongLongTy);
253        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
254        case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
255      }
256
257    // Float.
258    case ConversionSpecifier::aArg:
259    case ConversionSpecifier::AArg:
260    case ConversionSpecifier::eArg:
261    case ConversionSpecifier::EArg:
262    case ConversionSpecifier::fArg:
263    case ConversionSpecifier::FArg:
264    case ConversionSpecifier::gArg:
265    case ConversionSpecifier::GArg:
266      switch (LM.getKind()) {
267        case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
268        case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
269        case LengthModifier::AsLongDouble:
270          return ArgTypeResult(Ctx.LongDoubleTy);
271        default:
272          return ScanfArgTypeResult::Invalid();
273      }
274
275    // Char, string and scanlist.
276    case ConversionSpecifier::cArg:
277    case ConversionSpecifier::sArg:
278    case ConversionSpecifier::ScanListArg:
279      switch (LM.getKind()) {
280        case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
281        case LengthModifier::AsLong:
282          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
283        case LengthModifier::AsAllocate:
284        case LengthModifier::AsMAllocate:
285          return ScanfArgTypeResult(ArgTypeResult::CStrTy);
286        default:
287          return ScanfArgTypeResult::Invalid();
288      }
289    case ConversionSpecifier::CArg:
290    case ConversionSpecifier::SArg:
291      // FIXME: Mac OS X specific?
292      switch (LM.getKind()) {
293        case LengthModifier::None:
294          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
295        case LengthModifier::AsAllocate:
296        case LengthModifier::AsMAllocate:
297          return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
298        default:
299          return ScanfArgTypeResult::Invalid();
300      }
301
302    // Pointer.
303    case ConversionSpecifier::pArg:
304      return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
305
306    default:
307      break;
308  }
309
310  return ScanfArgTypeResult();
311}
312
313bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
314                             ASTContext &Ctx) {
315  if (!QT->isPointerType())
316    return false;
317
318  QualType PT = QT->getPointeeType();
319
320  // If it's an enum, get its underlying type.
321  if (const EnumType *ETy = QT->getAs<EnumType>())
322    QT = ETy->getDecl()->getIntegerType();
323
324  const BuiltinType *BT = PT->getAs<BuiltinType>();
325  if (!BT)
326    return false;
327
328  // Pointer to a character.
329  if (PT->isAnyCharacterType()) {
330    CS.setKind(ConversionSpecifier::sArg);
331    if (PT->isWideCharType())
332      LM.setKind(LengthModifier::AsWideChar);
333    else
334      LM.setKind(LengthModifier::None);
335    return true;
336  }
337
338  // Figure out the length modifier.
339  switch (BT->getKind()) {
340    // no modifier
341    case BuiltinType::UInt:
342    case BuiltinType::Int:
343    case BuiltinType::Float:
344      LM.setKind(LengthModifier::None);
345      break;
346
347    // hh
348    case BuiltinType::Char_U:
349    case BuiltinType::UChar:
350    case BuiltinType::Char_S:
351    case BuiltinType::SChar:
352      LM.setKind(LengthModifier::AsChar);
353      break;
354
355    // h
356    case BuiltinType::Short:
357    case BuiltinType::UShort:
358      LM.setKind(LengthModifier::AsShort);
359      break;
360
361    // l
362    case BuiltinType::Long:
363    case BuiltinType::ULong:
364    case BuiltinType::Double:
365      LM.setKind(LengthModifier::AsLong);
366      break;
367
368    // ll
369    case BuiltinType::LongLong:
370    case BuiltinType::ULongLong:
371      LM.setKind(LengthModifier::AsLongLong);
372      break;
373
374    // L
375    case BuiltinType::LongDouble:
376      LM.setKind(LengthModifier::AsLongDouble);
377      break;
378
379    // Don't know.
380    default:
381      return false;
382  }
383
384  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
385  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
386    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
387    if (Identifier->getName() == "size_t") {
388      LM.setKind(LengthModifier::AsSizeT);
389    } else if (Identifier->getName() == "ssize_t") {
390      // Not C99, but common in Unix.
391      LM.setKind(LengthModifier::AsSizeT);
392    } else if (Identifier->getName() == "intmax_t") {
393      LM.setKind(LengthModifier::AsIntMax);
394    } else if (Identifier->getName() == "uintmax_t") {
395      LM.setKind(LengthModifier::AsIntMax);
396    } else if (Identifier->getName() == "ptrdiff_t") {
397      LM.setKind(LengthModifier::AsPtrDiff);
398    }
399  }
400
401  // If fixing the length modifier was enough, we are done.
402  const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
403  if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
404    return true;
405
406  // Figure out the conversion specifier.
407  if (PT->isRealFloatingType())
408    CS.setKind(ConversionSpecifier::fArg);
409  else if (PT->isSignedIntegerType())
410    CS.setKind(ConversionSpecifier::dArg);
411  else if (PT->isUnsignedIntegerType())
412    CS.setKind(ConversionSpecifier::uArg);
413  else
414    llvm_unreachable("Unexpected type");
415
416  return true;
417}
418
419void ScanfSpecifier::toString(raw_ostream &os) const {
420  os << "%";
421
422  if (usesPositionalArg())
423    os << getPositionalArgIndex() << "$";
424  if (SuppressAssignment)
425    os << "*";
426
427  FieldWidth.toString(os);
428  os << LM.toString();
429  os << CS.toString();
430}
431
432bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
433                                                    const char *I,
434                                                    const char *E,
435                                                    const LangOptions &LO) {
436
437  unsigned argIndex = 0;
438
439  // Keep looking for a format specifier until we have exhausted the string.
440  while (I != E) {
441    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
442                                                          LO);
443    // Did a fail-stop error of any kind occur when parsing the specifier?
444    // If so, don't do any more processing.
445    if (FSR.shouldStop())
446      return true;;
447      // Did we exhaust the string or encounter an error that
448      // we can recover from?
449    if (!FSR.hasValue())
450      continue;
451      // We have a format specifier.  Pass it to the callback.
452    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
453                                I - FSR.getStart())) {
454      return true;
455    }
456  }
457  assert(I == E && "Format string not exhausted");
458  return false;
459}
460
461bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
462  switch (K) {
463    case InvalidTy:
464      llvm_unreachable("ArgTypeResult must be valid");
465    case UnknownTy:
466      return true;
467    case CStrTy:
468      return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
469    case WCStrTy:
470      return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
471    case PtrToArgTypeResultTy: {
472      const PointerType *PT = argTy->getAs<PointerType>();
473      if (!PT)
474        return false;
475      return A.matchesType(C, PT->getPointeeType());
476    }
477  }
478
479  llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
480}
481
482QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
483  switch (K) {
484    case InvalidTy:
485      llvm_unreachable("No representative type for Invalid ArgTypeResult");
486    case UnknownTy:
487      return QualType();
488    case CStrTy:
489      return C.getPointerType(C.CharTy);
490    case WCStrTy:
491      return C.getPointerType(C.getWCharType());
492    case PtrToArgTypeResultTy:
493      return C.getPointerType(A.getRepresentativeType(C));
494  }
495
496  llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
497}
498
499std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
500  std::string S = getRepresentativeType(C).getAsString();
501  if (!Name)
502    return std::string("'") + S + "'";
503  return std::string("'") + Name + "' (aka '" + S + "')";
504}
505