ScanfFormatString.cpp revision 32addd519c6699000ff79c387a1c87f0ab7c3698
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfArgTypeResult;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO) {
72
73  using namespace clang::analyze_scanf;
74  const char *I = Beg;
75  const char *Start = 0;
76  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
77
78    // Look for a '%' character that indicates the start of a format specifier.
79  for ( ; I != E ; ++I) {
80    char c = *I;
81    if (c == '\0') {
82        // Detect spurious null characters, which are likely errors.
83      H.HandleNullChar(I);
84      return true;
85    }
86    if (c == '%') {
87      Start = I++;  // Record the start of the format specifier.
88      break;
89    }
90  }
91
92    // No format specifier found?
93  if (!Start)
94    return false;
95
96  if (I == E) {
97      // No more characters left?
98    H.HandleIncompleteSpecifier(Start, E - Start);
99    return true;
100  }
101
102  ScanfSpecifier FS;
103  if (ParseArgPosition(H, FS, Start, I, E))
104    return true;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  // Look for '*' flag if it is present.
113  if (*I == '*') {
114    FS.setSuppressAssignment(I);
115    if (++I == E) {
116      H.HandleIncompleteSpecifier(Start, E - Start);
117      return true;
118    }
119  }
120
121  // Look for the field width (if any).  Unlike printf, this is either
122  // a fixed integer or isn't present.
123  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
124  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
125    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
126    FS.setFieldWidth(Amt);
127
128    if (I == E) {
129      // No more characters left?
130      H.HandleIncompleteSpecifier(Start, E - Start);
131      return true;
132    }
133  }
134
135  // Look for the length modifier.
136  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
137      // No more characters left?
138    H.HandleIncompleteSpecifier(Start, E - Start);
139    return true;
140  }
141
142  // Detect spurious null characters, which are likely errors.
143  if (*I == '\0') {
144    H.HandleNullChar(I);
145    return true;
146  }
147
148  // Finally, look for the conversion specifier.
149  const char *conversionPosition = I++;
150  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
151  switch (*conversionPosition) {
152    default:
153      break;
154    case '%': k = ConversionSpecifier::PercentArg;   break;
155    case 'A': k = ConversionSpecifier::AArg; break;
156    case 'E': k = ConversionSpecifier::EArg; break;
157    case 'F': k = ConversionSpecifier::FArg; break;
158    case 'G': k = ConversionSpecifier::GArg; break;
159    case 'X': k = ConversionSpecifier::XArg; break;
160    case 'a': k = ConversionSpecifier::aArg; break;
161    case 'd': k = ConversionSpecifier::dArg; break;
162    case 'e': k = ConversionSpecifier::eArg; break;
163    case 'f': k = ConversionSpecifier::fArg; break;
164    case 'g': k = ConversionSpecifier::gArg; break;
165    case 'i': k = ConversionSpecifier::iArg; break;
166    case 'n': k = ConversionSpecifier::nArg; break;
167    case 'c': k = ConversionSpecifier::cArg; break;
168    case 'C': k = ConversionSpecifier::CArg; break;
169    case 'S': k = ConversionSpecifier::SArg; break;
170    case '[': k = ConversionSpecifier::ScanListArg; break;
171    case 'u': k = ConversionSpecifier::uArg; break;
172    case 'x': k = ConversionSpecifier::xArg; break;
173    case 'o': k = ConversionSpecifier::oArg; break;
174    case 's': k = ConversionSpecifier::sArg; break;
175    case 'p': k = ConversionSpecifier::pArg; break;
176  }
177  ScanfConversionSpecifier CS(conversionPosition, k);
178  if (k == ScanfConversionSpecifier::ScanListArg) {
179    if (ParseScanList(H, CS, I, E))
180      return true;
181  }
182  FS.setConversionSpecifier(CS);
183  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
184      && !FS.usesPositionalArg())
185    FS.setArgIndex(argIndex++);
186
187  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
188  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
189
190  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
191    // Assume the conversion takes one argument.
192    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
193  }
194  return ScanfSpecifierResult(Start, FS);
195}
196
197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
198  const ScanfConversionSpecifier &CS = getConversionSpecifier();
199
200  if (!CS.consumesDataArgument())
201    return ScanfArgTypeResult::Invalid();
202
203  switch(CS.getKind()) {
204    // Signed int.
205    case ConversionSpecifier::dArg:
206    case ConversionSpecifier::iArg:
207      switch (LM.getKind()) {
208        case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
209        case LengthModifier::AsChar:
210          return ArgTypeResult(ArgTypeResult::AnyCharTy);
211        case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
212        case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
213        case LengthModifier::AsLongLong:
214        case LengthModifier::AsQuad:
215          return ArgTypeResult(Ctx.LongLongTy);
216        case LengthModifier::AsIntMax:
217          return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
218        case LengthModifier::AsSizeT:
219          // FIXME: ssize_t.
220          return ScanfArgTypeResult();
221        case LengthModifier::AsPtrDiff:
222          return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
223        case LengthModifier::AsLongDouble:
224          // GNU extension.
225          return ArgTypeResult(Ctx.LongLongTy);
226        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
227        case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
228      }
229
230    // Unsigned int.
231    case ConversionSpecifier::oArg:
232    case ConversionSpecifier::uArg:
233    case ConversionSpecifier::xArg:
234    case ConversionSpecifier::XArg:
235      switch (LM.getKind()) {
236        case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
237        case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
238        case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
239        case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
240        case LengthModifier::AsLongLong:
241        case LengthModifier::AsQuad:
242          return ArgTypeResult(Ctx.UnsignedLongLongTy);
243        case LengthModifier::AsIntMax:
244          return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
245        case LengthModifier::AsSizeT:
246          return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
247        case LengthModifier::AsPtrDiff:
248          // FIXME: Unsigned version of ptrdiff_t?
249          return ScanfArgTypeResult();
250        case LengthModifier::AsLongDouble:
251          // GNU extension.
252          return ArgTypeResult(Ctx.UnsignedLongLongTy);
253        case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
254        case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
255      }
256
257    // Float.
258    case ConversionSpecifier::aArg:
259    case ConversionSpecifier::AArg:
260    case ConversionSpecifier::eArg:
261    case ConversionSpecifier::EArg:
262    case ConversionSpecifier::fArg:
263    case ConversionSpecifier::FArg:
264    case ConversionSpecifier::gArg:
265    case ConversionSpecifier::GArg:
266      switch (LM.getKind()) {
267        case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
268        case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
269        case LengthModifier::AsLongDouble:
270          return ArgTypeResult(Ctx.LongDoubleTy);
271        default:
272          return ScanfArgTypeResult::Invalid();
273      }
274
275    // Char, string and scanlist.
276    case ConversionSpecifier::cArg:
277    case ConversionSpecifier::sArg:
278    case ConversionSpecifier::ScanListArg:
279      switch (LM.getKind()) {
280        case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
281        case LengthModifier::AsLong:
282          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
283        case LengthModifier::AsAllocate:
284        case LengthModifier::AsMAllocate:
285          return ScanfArgTypeResult(ArgTypeResult::CStrTy);
286        default:
287          return ScanfArgTypeResult::Invalid();
288      }
289    case ConversionSpecifier::CArg:
290    case ConversionSpecifier::SArg:
291      // FIXME: Mac OS X specific?
292      switch (LM.getKind()) {
293        case LengthModifier::None:
294          return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
295        case LengthModifier::AsAllocate:
296        case LengthModifier::AsMAllocate:
297          return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
298        default:
299          return ScanfArgTypeResult::Invalid();
300      }
301
302    // Pointer.
303    case ConversionSpecifier::pArg:
304      return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
305
306    default:
307      break;
308  }
309
310  return ScanfArgTypeResult();
311}
312
313bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
314                             ASTContext &Ctx) {
315  if (!QT->isPointerType())
316    return false;
317
318  QualType PT = QT->getPointeeType();
319  const BuiltinType *BT = PT->getAs<BuiltinType>();
320  if (!BT)
321    return false;
322
323  // Pointer to a character.
324  if (PT->isAnyCharacterType()) {
325    CS.setKind(ConversionSpecifier::sArg);
326    if (PT->isWideCharType())
327      LM.setKind(LengthModifier::AsWideChar);
328    else
329      LM.setKind(LengthModifier::None);
330    return true;
331  }
332
333  // Figure out the length modifier.
334  switch (BT->getKind()) {
335    // no modifier
336    case BuiltinType::UInt:
337    case BuiltinType::Int:
338    case BuiltinType::Float:
339      LM.setKind(LengthModifier::None);
340      break;
341
342    // hh
343    case BuiltinType::Char_U:
344    case BuiltinType::UChar:
345    case BuiltinType::Char_S:
346    case BuiltinType::SChar:
347      LM.setKind(LengthModifier::AsChar);
348      break;
349
350    // h
351    case BuiltinType::Short:
352    case BuiltinType::UShort:
353      LM.setKind(LengthModifier::AsShort);
354      break;
355
356    // l
357    case BuiltinType::Long:
358    case BuiltinType::ULong:
359    case BuiltinType::Double:
360      LM.setKind(LengthModifier::AsLong);
361      break;
362
363    // ll
364    case BuiltinType::LongLong:
365    case BuiltinType::ULongLong:
366      LM.setKind(LengthModifier::AsLongLong);
367      break;
368
369    // L
370    case BuiltinType::LongDouble:
371      LM.setKind(LengthModifier::AsLongDouble);
372      break;
373
374    // Don't know.
375    default:
376      return false;
377  }
378
379  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
380  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
381    const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
382    if (Identifier->getName() == "size_t") {
383      LM.setKind(LengthModifier::AsSizeT);
384    } else if (Identifier->getName() == "ssize_t") {
385      // Not C99, but common in Unix.
386      LM.setKind(LengthModifier::AsSizeT);
387    } else if (Identifier->getName() == "intmax_t") {
388      LM.setKind(LengthModifier::AsIntMax);
389    } else if (Identifier->getName() == "uintmax_t") {
390      LM.setKind(LengthModifier::AsIntMax);
391    } else if (Identifier->getName() == "ptrdiff_t") {
392      LM.setKind(LengthModifier::AsPtrDiff);
393    }
394  }
395
396  // If fixing the length modifier was enough, we are done.
397  const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
398  if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
399    return true;
400
401  // Figure out the conversion specifier.
402  if (PT->isRealFloatingType())
403    CS.setKind(ConversionSpecifier::fArg);
404  else if (PT->isSignedIntegerType())
405    CS.setKind(ConversionSpecifier::dArg);
406  else if (PT->isUnsignedIntegerType())
407    CS.setKind(ConversionSpecifier::uArg);
408  else
409    llvm_unreachable("Unexpected type");
410
411  return true;
412}
413
414void ScanfSpecifier::toString(raw_ostream &os) const {
415  os << "%";
416
417  if (usesPositionalArg())
418    os << getPositionalArgIndex() << "$";
419  if (SuppressAssignment)
420    os << "*";
421
422  FieldWidth.toString(os);
423  os << LM.toString();
424  os << CS.toString();
425}
426
427bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
428                                                    const char *I,
429                                                    const char *E,
430                                                    const LangOptions &LO) {
431
432  unsigned argIndex = 0;
433
434  // Keep looking for a format specifier until we have exhausted the string.
435  while (I != E) {
436    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
437                                                          LO);
438    // Did a fail-stop error of any kind occur when parsing the specifier?
439    // If so, don't do any more processing.
440    if (FSR.shouldStop())
441      return true;;
442      // Did we exhaust the string or encounter an error that
443      // we can recover from?
444    if (!FSR.hasValue())
445      continue;
446      // We have a format specifier.  Pass it to the callback.
447    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
448                                I - FSR.getStart())) {
449      return true;
450    }
451  }
452  assert(I == E && "Format string not exhausted");
453  return false;
454}
455
456bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
457  switch (K) {
458    case InvalidTy:
459      llvm_unreachable("ArgTypeResult must be valid");
460    case UnknownTy:
461      return true;
462    case CStrTy:
463      return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
464    case WCStrTy:
465      return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
466    case PtrToArgTypeResultTy: {
467      const PointerType *PT = argTy->getAs<PointerType>();
468      if (!PT)
469        return false;
470      return A.matchesType(C, PT->getPointeeType());
471    }
472  }
473
474  llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
475}
476
477QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
478  switch (K) {
479    case InvalidTy:
480      llvm_unreachable("No representative type for Invalid ArgTypeResult");
481    case UnknownTy:
482      return QualType();
483    case CStrTy:
484      return C.getPointerType(C.CharTy);
485    case WCStrTy:
486      return C.getPointerType(C.getWCharType());
487    case PtrToArgTypeResultTy:
488      return C.getPointerType(A.getRepresentativeType(C));
489  }
490
491  llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
492}
493
494std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
495  std::string S = getRepresentativeType(C).getAsString();
496  if (!Name)
497    return std::string("'") + S + "'";
498  return std::string("'") + Name + "' (aka '" + S + "')";
499}
500