ScanfFormatString.cpp revision 275b6f52c7bcafc1f3cf291813b5c60ee776965a
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "clang/Basic/TargetInfo.h"
17#include "FormatStringParsing.h"
18
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::FormatStringHandler;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::ConversionSpecifier;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO,
72                                                const TargetInfo &Target) {
73
74  using namespace clang::analyze_scanf;
75  const char *I = Beg;
76  const char *Start = 0;
77  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
78
79    // Look for a '%' character that indicates the start of a format specifier.
80  for ( ; I != E ; ++I) {
81    char c = *I;
82    if (c == '\0') {
83        // Detect spurious null characters, which are likely errors.
84      H.HandleNullChar(I);
85      return true;
86    }
87    if (c == '%') {
88      Start = I++;  // Record the start of the format specifier.
89      break;
90    }
91  }
92
93    // No format specifier found?
94  if (!Start)
95    return false;
96
97  if (I == E) {
98      // No more characters left?
99    H.HandleIncompleteSpecifier(Start, E - Start);
100    return true;
101  }
102
103  ScanfSpecifier FS;
104  if (ParseArgPosition(H, FS, Start, I, E))
105    return true;
106
107  if (I == E) {
108      // No more characters left?
109    H.HandleIncompleteSpecifier(Start, E - Start);
110    return true;
111  }
112
113  // Look for '*' flag if it is present.
114  if (*I == '*') {
115    FS.setSuppressAssignment(I);
116    if (++I == E) {
117      H.HandleIncompleteSpecifier(Start, E - Start);
118      return true;
119    }
120  }
121
122  // Look for the field width (if any).  Unlike printf, this is either
123  // a fixed integer or isn't present.
124  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
125  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
126    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
127    FS.setFieldWidth(Amt);
128
129    if (I == E) {
130      // No more characters left?
131      H.HandleIncompleteSpecifier(Start, E - Start);
132      return true;
133    }
134  }
135
136  // Look for the length modifier.
137  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
138      // No more characters left?
139    H.HandleIncompleteSpecifier(Start, E - Start);
140    return true;
141  }
142
143  // Detect spurious null characters, which are likely errors.
144  if (*I == '\0') {
145    H.HandleNullChar(I);
146    return true;
147  }
148
149  // Finally, look for the conversion specifier.
150  const char *conversionPosition = I++;
151  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
152  switch (*conversionPosition) {
153    default:
154      break;
155    case '%': k = ConversionSpecifier::PercentArg;   break;
156    case 'A': k = ConversionSpecifier::AArg; break;
157    case 'E': k = ConversionSpecifier::EArg; break;
158    case 'F': k = ConversionSpecifier::FArg; break;
159    case 'G': k = ConversionSpecifier::GArg; break;
160    case 'X': k = ConversionSpecifier::XArg; break;
161    case 'a': k = ConversionSpecifier::aArg; break;
162    case 'd': k = ConversionSpecifier::dArg; break;
163    case 'e': k = ConversionSpecifier::eArg; break;
164    case 'f': k = ConversionSpecifier::fArg; break;
165    case 'g': k = ConversionSpecifier::gArg; break;
166    case 'i': k = ConversionSpecifier::iArg; break;
167    case 'n': k = ConversionSpecifier::nArg; break;
168    case 'c': k = ConversionSpecifier::cArg; break;
169    case 'C': k = ConversionSpecifier::CArg; break;
170    case 'S': k = ConversionSpecifier::SArg; break;
171    case '[': k = ConversionSpecifier::ScanListArg; break;
172    case 'u': k = ConversionSpecifier::uArg; break;
173    case 'x': k = ConversionSpecifier::xArg; break;
174    case 'o': k = ConversionSpecifier::oArg; break;
175    case 's': k = ConversionSpecifier::sArg; break;
176    case 'p': k = ConversionSpecifier::pArg; break;
177    // Apple extensions
178      // Apple-specific
179    case 'D':
180      if (Target.getTriple().isOSDarwin())
181        k = ConversionSpecifier::DArg;
182      break;
183    case 'O':
184      if (Target.getTriple().isOSDarwin())
185        k = ConversionSpecifier::OArg;
186      break;
187    case 'U':
188      if (Target.getTriple().isOSDarwin())
189        k = ConversionSpecifier::UArg;
190      break;
191  }
192  ScanfConversionSpecifier CS(conversionPosition, k);
193  if (k == ScanfConversionSpecifier::ScanListArg) {
194    if (ParseScanList(H, CS, I, E))
195      return true;
196  }
197  FS.setConversionSpecifier(CS);
198  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
199      && !FS.usesPositionalArg())
200    FS.setArgIndex(argIndex++);
201
202  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
203  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
204
205  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
206    // Assume the conversion takes one argument.
207    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
208  }
209  return ScanfSpecifierResult(Start, FS);
210}
211
212ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
213  const ScanfConversionSpecifier &CS = getConversionSpecifier();
214
215  if (!CS.consumesDataArgument())
216    return ArgType::Invalid();
217
218  switch(CS.getKind()) {
219    // Signed int.
220    case ConversionSpecifier::dArg:
221    case ConversionSpecifier::DArg:
222    case ConversionSpecifier::iArg:
223      switch (LM.getKind()) {
224        case LengthModifier::None:
225          return ArgType::PtrTo(Ctx.IntTy);
226        case LengthModifier::AsChar:
227          return ArgType::PtrTo(ArgType::AnyCharTy);
228        case LengthModifier::AsShort:
229          return ArgType::PtrTo(Ctx.ShortTy);
230        case LengthModifier::AsLong:
231          return ArgType::PtrTo(Ctx.LongTy);
232        case LengthModifier::AsLongLong:
233        case LengthModifier::AsQuad:
234          return ArgType::PtrTo(Ctx.LongLongTy);
235        case LengthModifier::AsIntMax:
236          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
237        case LengthModifier::AsSizeT:
238          // FIXME: ssize_t.
239          return ArgType();
240        case LengthModifier::AsPtrDiff:
241          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
242        case LengthModifier::AsLongDouble:
243          // GNU extension.
244          return ArgType::PtrTo(Ctx.LongLongTy);
245        case LengthModifier::AsAllocate:
246          return ArgType::Invalid();
247        case LengthModifier::AsMAllocate:
248          return ArgType::Invalid();
249      }
250
251    // Unsigned int.
252    case ConversionSpecifier::oArg:
253    case ConversionSpecifier::OArg:
254    case ConversionSpecifier::uArg:
255    case ConversionSpecifier::UArg:
256    case ConversionSpecifier::xArg:
257    case ConversionSpecifier::XArg:
258      switch (LM.getKind()) {
259        case LengthModifier::None:
260          return ArgType::PtrTo(Ctx.UnsignedIntTy);
261        case LengthModifier::AsChar:
262          return ArgType::PtrTo(Ctx.UnsignedCharTy);
263        case LengthModifier::AsShort:
264          return ArgType::PtrTo(Ctx.UnsignedShortTy);
265        case LengthModifier::AsLong:
266          return ArgType::PtrTo(Ctx.UnsignedLongTy);
267        case LengthModifier::AsLongLong:
268        case LengthModifier::AsQuad:
269          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
270        case LengthModifier::AsIntMax:
271          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
272        case LengthModifier::AsSizeT:
273          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
274        case LengthModifier::AsPtrDiff:
275          // FIXME: Unsigned version of ptrdiff_t?
276          return ArgType();
277        case LengthModifier::AsLongDouble:
278          // GNU extension.
279          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
280        case LengthModifier::AsAllocate:
281          return ArgType::Invalid();
282        case LengthModifier::AsMAllocate:
283          return ArgType::Invalid();
284      }
285
286    // Float.
287    case ConversionSpecifier::aArg:
288    case ConversionSpecifier::AArg:
289    case ConversionSpecifier::eArg:
290    case ConversionSpecifier::EArg:
291    case ConversionSpecifier::fArg:
292    case ConversionSpecifier::FArg:
293    case ConversionSpecifier::gArg:
294    case ConversionSpecifier::GArg:
295      switch (LM.getKind()) {
296        case LengthModifier::None:
297          return ArgType::PtrTo(Ctx.FloatTy);
298        case LengthModifier::AsLong:
299          return ArgType::PtrTo(Ctx.DoubleTy);
300        case LengthModifier::AsLongDouble:
301          return ArgType::PtrTo(Ctx.LongDoubleTy);
302        default:
303          return ArgType::Invalid();
304      }
305
306    // Char, string and scanlist.
307    case ConversionSpecifier::cArg:
308    case ConversionSpecifier::sArg:
309    case ConversionSpecifier::ScanListArg:
310      switch (LM.getKind()) {
311        case LengthModifier::None:
312          return ArgType::PtrTo(ArgType::AnyCharTy);
313        case LengthModifier::AsLong:
314          return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
315        case LengthModifier::AsAllocate:
316        case LengthModifier::AsMAllocate:
317          return ArgType::PtrTo(ArgType::CStrTy);
318        default:
319          return ArgType::Invalid();
320      }
321    case ConversionSpecifier::CArg:
322    case ConversionSpecifier::SArg:
323      // FIXME: Mac OS X specific?
324      switch (LM.getKind()) {
325        case LengthModifier::None:
326          return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
327        case LengthModifier::AsAllocate:
328        case LengthModifier::AsMAllocate:
329          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
330        default:
331          return ArgType::Invalid();
332      }
333
334    // Pointer.
335    case ConversionSpecifier::pArg:
336      return ArgType::PtrTo(ArgType::CPointerTy);
337
338    // Write-back.
339    case ConversionSpecifier::nArg:
340      switch (LM.getKind()) {
341        case LengthModifier::None:
342          return ArgType::PtrTo(Ctx.IntTy);
343        case LengthModifier::AsChar:
344          return ArgType::PtrTo(Ctx.SignedCharTy);
345        case LengthModifier::AsShort:
346          return ArgType::PtrTo(Ctx.ShortTy);
347        case LengthModifier::AsLong:
348          return ArgType::PtrTo(Ctx.LongTy);
349        case LengthModifier::AsLongLong:
350        case LengthModifier::AsQuad:
351          return ArgType::PtrTo(Ctx.LongLongTy);
352        case LengthModifier::AsIntMax:
353          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
354        case LengthModifier::AsSizeT:
355          return ArgType(); // FIXME: ssize_t
356        case LengthModifier::AsPtrDiff:
357          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
358        case LengthModifier::AsLongDouble:
359          return ArgType(); // FIXME: Is this a known extension?
360        case LengthModifier::AsAllocate:
361        case LengthModifier::AsMAllocate:
362          return ArgType::Invalid();
363        }
364
365    default:
366      break;
367  }
368
369  return ArgType();
370}
371
372bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
373                             ASTContext &Ctx) {
374  if (!QT->isPointerType())
375    return false;
376
377  // %n is different from other conversion specifiers; don't try to fix it.
378  if (CS.getKind() == ConversionSpecifier::nArg)
379    return false;
380
381  QualType PT = QT->getPointeeType();
382
383  // If it's an enum, get its underlying type.
384  if (const EnumType *ETy = QT->getAs<EnumType>())
385    QT = ETy->getDecl()->getIntegerType();
386
387  const BuiltinType *BT = PT->getAs<BuiltinType>();
388  if (!BT)
389    return false;
390
391  // Pointer to a character.
392  if (PT->isAnyCharacterType()) {
393    CS.setKind(ConversionSpecifier::sArg);
394    if (PT->isWideCharType())
395      LM.setKind(LengthModifier::AsWideChar);
396    else
397      LM.setKind(LengthModifier::None);
398    return true;
399  }
400
401  // Figure out the length modifier.
402  switch (BT->getKind()) {
403    // no modifier
404    case BuiltinType::UInt:
405    case BuiltinType::Int:
406    case BuiltinType::Float:
407      LM.setKind(LengthModifier::None);
408      break;
409
410    // hh
411    case BuiltinType::Char_U:
412    case BuiltinType::UChar:
413    case BuiltinType::Char_S:
414    case BuiltinType::SChar:
415      LM.setKind(LengthModifier::AsChar);
416      break;
417
418    // h
419    case BuiltinType::Short:
420    case BuiltinType::UShort:
421      LM.setKind(LengthModifier::AsShort);
422      break;
423
424    // l
425    case BuiltinType::Long:
426    case BuiltinType::ULong:
427    case BuiltinType::Double:
428      LM.setKind(LengthModifier::AsLong);
429      break;
430
431    // ll
432    case BuiltinType::LongLong:
433    case BuiltinType::ULongLong:
434      LM.setKind(LengthModifier::AsLongLong);
435      break;
436
437    // L
438    case BuiltinType::LongDouble:
439      LM.setKind(LengthModifier::AsLongDouble);
440      break;
441
442    // Don't know.
443    default:
444      return false;
445  }
446
447  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
448  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
449    namedTypeToLengthModifier(PT, LM);
450
451  // If fixing the length modifier was enough, we are done.
452  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
453    const analyze_scanf::ArgType &AT = getArgType(Ctx);
454    if (AT.isValid() && AT.matchesType(Ctx, QT))
455      return true;
456  }
457
458  // Figure out the conversion specifier.
459  if (PT->isRealFloatingType())
460    CS.setKind(ConversionSpecifier::fArg);
461  else if (PT->isSignedIntegerType())
462    CS.setKind(ConversionSpecifier::dArg);
463  else if (PT->isUnsignedIntegerType())
464    CS.setKind(ConversionSpecifier::uArg);
465  else
466    llvm_unreachable("Unexpected type");
467
468  return true;
469}
470
471void ScanfSpecifier::toString(raw_ostream &os) const {
472  os << "%";
473
474  if (usesPositionalArg())
475    os << getPositionalArgIndex() << "$";
476  if (SuppressAssignment)
477    os << "*";
478
479  FieldWidth.toString(os);
480  os << LM.toString();
481  os << CS.toString();
482}
483
484bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
485                                                    const char *I,
486                                                    const char *E,
487                                                    const LangOptions &LO,
488                                                    const TargetInfo &Target) {
489
490  unsigned argIndex = 0;
491
492  // Keep looking for a format specifier until we have exhausted the string.
493  while (I != E) {
494    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
495                                                          LO, Target);
496    // Did a fail-stop error of any kind occur when parsing the specifier?
497    // If so, don't do any more processing.
498    if (FSR.shouldStop())
499      return true;
500      // Did we exhaust the string or encounter an error that
501      // we can recover from?
502    if (!FSR.hasValue())
503      continue;
504      // We have a format specifier.  Pass it to the callback.
505    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
506                                I - FSR.getStart())) {
507      return true;
508    }
509  }
510  assert(I == E && "Format string not exhausted");
511  return false;
512}
513