ScanfFormatString.cpp revision f762905bdefad77f91c7c6782a9c17e6b274d393
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfConversionSpecifier;
24using clang::analyze_scanf::ScanfSpecifier;
25
26typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
27        ScanfSpecifierResult;
28
29static bool ParseScanList(FormatStringHandler &H,
30                          ScanfConversionSpecifier &CS,
31                          const char *&Beg, const char *E) {
32  const char *I = Beg;
33  const char *start = I - 1;
34  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
35
36  // No more characters?
37  if (I == E) {
38    H.HandleIncompleteScanList(start, I);
39    return true;
40  }
41
42  // Special case: ']' is the first character.
43  if (*I == ']') {
44    if (++I == E) {
45      H.HandleIncompleteScanList(start, I - 1);
46      return true;
47    }
48  }
49
50  // Look for a ']' character which denotes the end of the scan list.
51  while (*I != ']') {
52    if (++I == E) {
53      H.HandleIncompleteScanList(start, I - 1);
54      return true;
55    }
56  }
57
58  CS.setEndScanList(I);
59  return false;
60}
61
62// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
63// We can possibly refactor.
64static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
65                                                const char *&Beg,
66                                                const char *E,
67                                                unsigned &argIndex) {
68
69  using namespace clang::analyze_scanf;
70  const char *I = Beg;
71  const char *Start = 0;
72  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
73
74    // Look for a '%' character that indicates the start of a format specifier.
75  for ( ; I != E ; ++I) {
76    char c = *I;
77    if (c == '\0') {
78        // Detect spurious null characters, which are likely errors.
79      H.HandleNullChar(I);
80      return true;
81    }
82    if (c == '%') {
83      Start = I++;  // Record the start of the format specifier.
84      break;
85    }
86  }
87
88    // No format specifier found?
89  if (!Start)
90    return false;
91
92  if (I == E) {
93      // No more characters left?
94    H.HandleIncompleteSpecifier(Start, E - Start);
95    return true;
96  }
97
98  ScanfSpecifier FS;
99  if (ParseArgPosition(H, FS, Start, I, E))
100    return true;
101
102  if (I == E) {
103      // No more characters left?
104    H.HandleIncompleteSpecifier(Start, E - Start);
105    return true;
106  }
107
108  // Look for '*' flag if it is present.
109  if (*I == '*') {
110    FS.setSuppressAssignment(I);
111    if (++I == E) {
112      H.HandleIncompleteSpecifier(Start, E - Start);
113      return true;
114    }
115  }
116
117  // Look for the field width (if any).  Unlike printf, this is either
118  // a fixed integer or isn't present.
119  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
120  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
121    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
122    FS.setFieldWidth(Amt);
123
124    if (I == E) {
125      // No more characters left?
126      H.HandleIncompleteSpecifier(Start, E - Start);
127      return true;
128    }
129  }
130
131  // Look for the length modifier.
132  if (ParseLengthModifier(FS, I, E) && I == E) {
133      // No more characters left?
134    H.HandleIncompleteSpecifier(Start, E - Start);
135    return true;
136  }
137
138  // Detect spurious null characters, which are likely errors.
139  if (*I == '\0') {
140    H.HandleNullChar(I);
141    return true;
142  }
143
144  // Finally, look for the conversion specifier.
145  const char *conversionPosition = I++;
146  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
147  switch (*conversionPosition) {
148    default:
149      break;
150    case '%': k = ConversionSpecifier::PercentArg;   break;
151    case 'A': k = ConversionSpecifier::AArg; break;
152    case 'E': k = ConversionSpecifier::EArg; break;
153    case 'F': k = ConversionSpecifier::FArg; break;
154    case 'G': k = ConversionSpecifier::GArg; break;
155    case 'X': k = ConversionSpecifier::XArg; break;
156    case 'a': k = ConversionSpecifier::aArg; break;
157    case 'd': k = ConversionSpecifier::dArg; break;
158    case 'e': k = ConversionSpecifier::eArg; break;
159    case 'f': k = ConversionSpecifier::fArg; break;
160    case 'g': k = ConversionSpecifier::gArg; break;
161    case 'i': k = ConversionSpecifier::iArg; break;
162    case 'n': k = ConversionSpecifier::nArg; break;
163    case 'c': k = ConversionSpecifier::cArg; break;
164    case 'C': k = ConversionSpecifier::CArg; break;
165    case 'S': k = ConversionSpecifier::SArg; break;
166    case '[': k = ConversionSpecifier::ScanListArg; break;
167    case 'u': k = ConversionSpecifier::uArg; break;
168    case 'x': k = ConversionSpecifier::xArg; break;
169    case 'o': k = ConversionSpecifier::oArg; break;
170    case 's': k = ConversionSpecifier::sArg; break;
171    case 'p': k = ConversionSpecifier::pArg; break;
172  }
173  ScanfConversionSpecifier CS(conversionPosition, k);
174  if (k == ScanfConversionSpecifier::ScanListArg) {
175    if (!ParseScanList(H, CS, I, E))
176      return true;
177  }
178  FS.setConversionSpecifier(CS);
179  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
180      && !FS.usesPositionalArg())
181    FS.setArgIndex(argIndex++);
182
183  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
184  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
185
186  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
187    // Assume the conversion takes one argument.
188    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
189  }
190  return ScanfSpecifierResult(Start, FS);
191}
192
193bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
194                                                    const char *I,
195                                                    const char *E) {
196
197  unsigned argIndex = 0;
198
199  // Keep looking for a format specifier until we have exhausted the string.
200  while (I != E) {
201    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex);
202    // Did a fail-stop error of any kind occur when parsing the specifier?
203    // If so, don't do any more processing.
204    if (FSR.shouldStop())
205      return true;;
206      // Did we exhaust the string or encounter an error that
207      // we can recover from?
208    if (!FSR.hasValue())
209      continue;
210      // We have a format specifier.  Pass it to the callback.
211    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
212                                I - FSR.getStart())) {
213      return true;
214    }
215  }
216  assert(I == E && "Format string not exhausted");
217  return false;
218}
219
220ArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
221  // FIXME: Fill in.
222  return ArgTypeResult();
223}
224
225
226
227