ScanfFormatString.cpp revision 651f13cea278ec967336033dd032faef0e9fc2ec
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17#include "clang/Basic/TargetInfo.h"
18
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::FormatStringHandler;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::ConversionSpecifier;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Look for a ']' character which denotes the end of the scan list.
54  while (*I != ']') {
55    if (++I == E) {
56      H.HandleIncompleteScanList(start, I - 1);
57      return true;
58    }
59  }
60
61  CS.setEndScanList(I);
62  return false;
63}
64
65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66// We can possibly refactor.
67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
68                                                const char *&Beg,
69                                                const char *E,
70                                                unsigned &argIndex,
71                                                const LangOptions &LO,
72                                                const TargetInfo &Target) {
73
74  using namespace clang::analyze_scanf;
75  const char *I = Beg;
76  const char *Start = 0;
77  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
78
79    // Look for a '%' character that indicates the start of a format specifier.
80  for ( ; I != E ; ++I) {
81    char c = *I;
82    if (c == '\0') {
83        // Detect spurious null characters, which are likely errors.
84      H.HandleNullChar(I);
85      return true;
86    }
87    if (c == '%') {
88      Start = I++;  // Record the start of the format specifier.
89      break;
90    }
91  }
92
93    // No format specifier found?
94  if (!Start)
95    return false;
96
97  if (I == E) {
98      // No more characters left?
99    H.HandleIncompleteSpecifier(Start, E - Start);
100    return true;
101  }
102
103  ScanfSpecifier FS;
104  if (ParseArgPosition(H, FS, Start, I, E))
105    return true;
106
107  if (I == E) {
108      // No more characters left?
109    H.HandleIncompleteSpecifier(Start, E - Start);
110    return true;
111  }
112
113  // Look for '*' flag if it is present.
114  if (*I == '*') {
115    FS.setSuppressAssignment(I);
116    if (++I == E) {
117      H.HandleIncompleteSpecifier(Start, E - Start);
118      return true;
119    }
120  }
121
122  // Look for the field width (if any).  Unlike printf, this is either
123  // a fixed integer or isn't present.
124  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
125  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
126    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
127    FS.setFieldWidth(Amt);
128
129    if (I == E) {
130      // No more characters left?
131      H.HandleIncompleteSpecifier(Start, E - Start);
132      return true;
133    }
134  }
135
136  // Look for the length modifier.
137  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
138      // No more characters left?
139    H.HandleIncompleteSpecifier(Start, E - Start);
140    return true;
141  }
142
143  // Detect spurious null characters, which are likely errors.
144  if (*I == '\0') {
145    H.HandleNullChar(I);
146    return true;
147  }
148
149  // Finally, look for the conversion specifier.
150  const char *conversionPosition = I++;
151  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
152  switch (*conversionPosition) {
153    default:
154      break;
155    case '%': k = ConversionSpecifier::PercentArg;   break;
156    case 'A': k = ConversionSpecifier::AArg; break;
157    case 'E': k = ConversionSpecifier::EArg; break;
158    case 'F': k = ConversionSpecifier::FArg; break;
159    case 'G': k = ConversionSpecifier::GArg; break;
160    case 'X': k = ConversionSpecifier::XArg; break;
161    case 'a': k = ConversionSpecifier::aArg; break;
162    case 'd': k = ConversionSpecifier::dArg; break;
163    case 'e': k = ConversionSpecifier::eArg; break;
164    case 'f': k = ConversionSpecifier::fArg; break;
165    case 'g': k = ConversionSpecifier::gArg; break;
166    case 'i': k = ConversionSpecifier::iArg; break;
167    case 'n': k = ConversionSpecifier::nArg; break;
168    case 'c': k = ConversionSpecifier::cArg; break;
169    case 'C': k = ConversionSpecifier::CArg; break;
170    case 'S': k = ConversionSpecifier::SArg; break;
171    case '[': k = ConversionSpecifier::ScanListArg; break;
172    case 'u': k = ConversionSpecifier::uArg; break;
173    case 'x': k = ConversionSpecifier::xArg; break;
174    case 'o': k = ConversionSpecifier::oArg; break;
175    case 's': k = ConversionSpecifier::sArg; break;
176    case 'p': k = ConversionSpecifier::pArg; break;
177    // Apple extensions
178      // Apple-specific
179    case 'D':
180      if (Target.getTriple().isOSDarwin())
181        k = ConversionSpecifier::DArg;
182      break;
183    case 'O':
184      if (Target.getTriple().isOSDarwin())
185        k = ConversionSpecifier::OArg;
186      break;
187    case 'U':
188      if (Target.getTriple().isOSDarwin())
189        k = ConversionSpecifier::UArg;
190      break;
191  }
192  ScanfConversionSpecifier CS(conversionPosition, k);
193  if (k == ScanfConversionSpecifier::ScanListArg) {
194    if (ParseScanList(H, CS, I, E))
195      return true;
196  }
197  FS.setConversionSpecifier(CS);
198  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
199      && !FS.usesPositionalArg())
200    FS.setArgIndex(argIndex++);
201
202  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
203  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
204
205  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
206    // Assume the conversion takes one argument.
207    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
208  }
209  return ScanfSpecifierResult(Start, FS);
210}
211
212ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
213  const ScanfConversionSpecifier &CS = getConversionSpecifier();
214
215  if (!CS.consumesDataArgument())
216    return ArgType::Invalid();
217
218  switch(CS.getKind()) {
219    // Signed int.
220    case ConversionSpecifier::dArg:
221    case ConversionSpecifier::DArg:
222    case ConversionSpecifier::iArg:
223      switch (LM.getKind()) {
224        case LengthModifier::None:
225          return ArgType::PtrTo(Ctx.IntTy);
226        case LengthModifier::AsChar:
227          return ArgType::PtrTo(ArgType::AnyCharTy);
228        case LengthModifier::AsShort:
229          return ArgType::PtrTo(Ctx.ShortTy);
230        case LengthModifier::AsLong:
231          return ArgType::PtrTo(Ctx.LongTy);
232        case LengthModifier::AsLongLong:
233        case LengthModifier::AsQuad:
234          return ArgType::PtrTo(Ctx.LongLongTy);
235        case LengthModifier::AsInt64:
236          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
237        case LengthModifier::AsIntMax:
238          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
239        case LengthModifier::AsSizeT:
240          // FIXME: ssize_t.
241          return ArgType();
242        case LengthModifier::AsPtrDiff:
243          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
244        case LengthModifier::AsLongDouble:
245          // GNU extension.
246          return ArgType::PtrTo(Ctx.LongLongTy);
247        case LengthModifier::AsAllocate:
248        case LengthModifier::AsMAllocate:
249        case LengthModifier::AsInt32:
250        case LengthModifier::AsInt3264:
251          return ArgType::Invalid();
252      }
253
254    // Unsigned int.
255    case ConversionSpecifier::oArg:
256    case ConversionSpecifier::OArg:
257    case ConversionSpecifier::uArg:
258    case ConversionSpecifier::UArg:
259    case ConversionSpecifier::xArg:
260    case ConversionSpecifier::XArg:
261      switch (LM.getKind()) {
262        case LengthModifier::None:
263          return ArgType::PtrTo(Ctx.UnsignedIntTy);
264        case LengthModifier::AsChar:
265          return ArgType::PtrTo(Ctx.UnsignedCharTy);
266        case LengthModifier::AsShort:
267          return ArgType::PtrTo(Ctx.UnsignedShortTy);
268        case LengthModifier::AsLong:
269          return ArgType::PtrTo(Ctx.UnsignedLongTy);
270        case LengthModifier::AsLongLong:
271        case LengthModifier::AsQuad:
272          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
273        case LengthModifier::AsInt64:
274          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
275        case LengthModifier::AsIntMax:
276          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
277        case LengthModifier::AsSizeT:
278          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
279        case LengthModifier::AsPtrDiff:
280          // FIXME: Unsigned version of ptrdiff_t?
281          return ArgType();
282        case LengthModifier::AsLongDouble:
283          // GNU extension.
284          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
285        case LengthModifier::AsAllocate:
286        case LengthModifier::AsMAllocate:
287        case LengthModifier::AsInt32:
288        case LengthModifier::AsInt3264:
289          return ArgType::Invalid();
290      }
291
292    // Float.
293    case ConversionSpecifier::aArg:
294    case ConversionSpecifier::AArg:
295    case ConversionSpecifier::eArg:
296    case ConversionSpecifier::EArg:
297    case ConversionSpecifier::fArg:
298    case ConversionSpecifier::FArg:
299    case ConversionSpecifier::gArg:
300    case ConversionSpecifier::GArg:
301      switch (LM.getKind()) {
302        case LengthModifier::None:
303          return ArgType::PtrTo(Ctx.FloatTy);
304        case LengthModifier::AsLong:
305          return ArgType::PtrTo(Ctx.DoubleTy);
306        case LengthModifier::AsLongDouble:
307          return ArgType::PtrTo(Ctx.LongDoubleTy);
308        default:
309          return ArgType::Invalid();
310      }
311
312    // Char, string and scanlist.
313    case ConversionSpecifier::cArg:
314    case ConversionSpecifier::sArg:
315    case ConversionSpecifier::ScanListArg:
316      switch (LM.getKind()) {
317        case LengthModifier::None:
318          return ArgType::PtrTo(ArgType::AnyCharTy);
319        case LengthModifier::AsLong:
320          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
321        case LengthModifier::AsAllocate:
322        case LengthModifier::AsMAllocate:
323          return ArgType::PtrTo(ArgType::CStrTy);
324        default:
325          return ArgType::Invalid();
326      }
327    case ConversionSpecifier::CArg:
328    case ConversionSpecifier::SArg:
329      // FIXME: Mac OS X specific?
330      switch (LM.getKind()) {
331        case LengthModifier::None:
332          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
333        case LengthModifier::AsAllocate:
334        case LengthModifier::AsMAllocate:
335          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
336        default:
337          return ArgType::Invalid();
338      }
339
340    // Pointer.
341    case ConversionSpecifier::pArg:
342      return ArgType::PtrTo(ArgType::CPointerTy);
343
344    // Write-back.
345    case ConversionSpecifier::nArg:
346      switch (LM.getKind()) {
347        case LengthModifier::None:
348          return ArgType::PtrTo(Ctx.IntTy);
349        case LengthModifier::AsChar:
350          return ArgType::PtrTo(Ctx.SignedCharTy);
351        case LengthModifier::AsShort:
352          return ArgType::PtrTo(Ctx.ShortTy);
353        case LengthModifier::AsLong:
354          return ArgType::PtrTo(Ctx.LongTy);
355        case LengthModifier::AsLongLong:
356        case LengthModifier::AsQuad:
357          return ArgType::PtrTo(Ctx.LongLongTy);
358        case LengthModifier::AsInt64:
359          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
360        case LengthModifier::AsIntMax:
361          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
362        case LengthModifier::AsSizeT:
363          return ArgType(); // FIXME: ssize_t
364        case LengthModifier::AsPtrDiff:
365          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
366        case LengthModifier::AsLongDouble:
367          return ArgType(); // FIXME: Is this a known extension?
368        case LengthModifier::AsAllocate:
369        case LengthModifier::AsMAllocate:
370        case LengthModifier::AsInt32:
371        case LengthModifier::AsInt3264:
372          return ArgType::Invalid();
373        }
374
375    default:
376      break;
377  }
378
379  return ArgType();
380}
381
382bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
383                             const LangOptions &LangOpt,
384                             ASTContext &Ctx) {
385
386  // %n is different from other conversion specifiers; don't try to fix it.
387  if (CS.getKind() == ConversionSpecifier::nArg)
388    return false;
389
390  if (!QT->isPointerType())
391    return false;
392
393  QualType PT = QT->getPointeeType();
394
395  // If it's an enum, get its underlying type.
396  if (const EnumType *ETy = PT->getAs<EnumType>())
397    PT = ETy->getDecl()->getIntegerType();
398
399  const BuiltinType *BT = PT->getAs<BuiltinType>();
400  if (!BT)
401    return false;
402
403  // Pointer to a character.
404  if (PT->isAnyCharacterType()) {
405    CS.setKind(ConversionSpecifier::sArg);
406    if (PT->isWideCharType())
407      LM.setKind(LengthModifier::AsWideChar);
408    else
409      LM.setKind(LengthModifier::None);
410
411    // If we know the target array length, we can use it as a field width.
412    if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
413      if (CAT->getSizeModifier() == ArrayType::Normal)
414        FieldWidth = OptionalAmount(OptionalAmount::Constant,
415                                    CAT->getSize().getZExtValue() - 1,
416                                    "", 0, false);
417
418    }
419    return true;
420  }
421
422  // Figure out the length modifier.
423  switch (BT->getKind()) {
424    // no modifier
425    case BuiltinType::UInt:
426    case BuiltinType::Int:
427    case BuiltinType::Float:
428      LM.setKind(LengthModifier::None);
429      break;
430
431    // hh
432    case BuiltinType::Char_U:
433    case BuiltinType::UChar:
434    case BuiltinType::Char_S:
435    case BuiltinType::SChar:
436      LM.setKind(LengthModifier::AsChar);
437      break;
438
439    // h
440    case BuiltinType::Short:
441    case BuiltinType::UShort:
442      LM.setKind(LengthModifier::AsShort);
443      break;
444
445    // l
446    case BuiltinType::Long:
447    case BuiltinType::ULong:
448    case BuiltinType::Double:
449      LM.setKind(LengthModifier::AsLong);
450      break;
451
452    // ll
453    case BuiltinType::LongLong:
454    case BuiltinType::ULongLong:
455      LM.setKind(LengthModifier::AsLongLong);
456      break;
457
458    // L
459    case BuiltinType::LongDouble:
460      LM.setKind(LengthModifier::AsLongDouble);
461      break;
462
463    // Don't know.
464    default:
465      return false;
466  }
467
468  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
469  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
470    namedTypeToLengthModifier(PT, LM);
471
472  // If fixing the length modifier was enough, we are done.
473  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
474    const analyze_scanf::ArgType &AT = getArgType(Ctx);
475    if (AT.isValid() && AT.matchesType(Ctx, QT))
476      return true;
477  }
478
479  // Figure out the conversion specifier.
480  if (PT->isRealFloatingType())
481    CS.setKind(ConversionSpecifier::fArg);
482  else if (PT->isSignedIntegerType())
483    CS.setKind(ConversionSpecifier::dArg);
484  else if (PT->isUnsignedIntegerType())
485    CS.setKind(ConversionSpecifier::uArg);
486  else
487    llvm_unreachable("Unexpected type");
488
489  return true;
490}
491
492void ScanfSpecifier::toString(raw_ostream &os) const {
493  os << "%";
494
495  if (usesPositionalArg())
496    os << getPositionalArgIndex() << "$";
497  if (SuppressAssignment)
498    os << "*";
499
500  FieldWidth.toString(os);
501  os << LM.toString();
502  os << CS.toString();
503}
504
505bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
506                                                    const char *I,
507                                                    const char *E,
508                                                    const LangOptions &LO,
509                                                    const TargetInfo &Target) {
510
511  unsigned argIndex = 0;
512
513  // Keep looking for a format specifier until we have exhausted the string.
514  while (I != E) {
515    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
516                                                          LO, Target);
517    // Did a fail-stop error of any kind occur when parsing the specifier?
518    // If so, don't do any more processing.
519    if (FSR.shouldStop())
520      return true;
521      // Did we exhaust the string or encounter an error that
522      // we can recover from?
523    if (!FSR.hasValue())
524      continue;
525      // We have a format specifier.  Pass it to the callback.
526    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
527                                I - FSR.getStart())) {
528      return true;
529    }
530  }
531  assert(I == E && "Format string not exhausted");
532  return false;
533}
534