PrintfFormatString.cpp revision f911eba72e6d7275e5cfdb79ab23fb2aa9cc01d0
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
20using clang::analyze_printf::ArgTypeResult;
21using namespace clang;
22
23namespace {
24class FormatSpecifierResult {
25  FormatSpecifier FS;
26  const char *Start;
27  bool Stop;
28public:
29  FormatSpecifierResult(bool stop = false)
30    : Start(0), Stop(stop) {}
31  FormatSpecifierResult(const char *start,
32                        const FormatSpecifier &fs)
33    : FS(fs), Start(start), Stop(false) {}
34
35
36  const char *getStart() const { return Start; }
37  bool shouldStop() const { return Stop; }
38  bool hasValue() const { return Start != 0; }
39  const FormatSpecifier &getValue() const {
40    assert(hasValue());
41    return FS;
42  }
43  const FormatSpecifier &getValue() { return FS; }
44};
45} // end anonymous namespace
46
47template <typename T>
48class UpdateOnReturn {
49  T &ValueToUpdate;
50  const T &ValueToCopy;
51public:
52  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
53    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
54
55  ~UpdateOnReturn() {
56    ValueToUpdate = ValueToCopy;
57  }
58};
59
60static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
61  const char *I = Beg;
62  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
63
64  bool foundDigits = false;
65  unsigned accumulator = 0;
66
67  for ( ; I != E; ++I) {
68    char c = *I;
69    if (c >= '0' && c <= '9') {
70      foundDigits = true;
71      accumulator += (accumulator * 10) + (c - '0');
72      continue;
73    }
74
75    if (foundDigits)
76      return OptionalAmount(accumulator, Beg);
77
78    if (c == '*') {
79      ++I;
80      return OptionalAmount(OptionalAmount::Arg, Beg);
81    }
82
83    break;
84  }
85
86  return OptionalAmount();
87}
88
89static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
90                                                  const char *&Beg, const char *E) {
91
92  using namespace clang::analyze_printf;
93
94  const char *I = Beg;
95  const char *Start = 0;
96  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
97
98  // Look for a '%' character that indicates the start of a format specifier.
99  for ( ; I != E ; ++I) {
100    char c = *I;
101    if (c == '\0') {
102      // Detect spurious null characters, which are likely errors.
103      H.HandleNullChar(I);
104      return true;
105    }
106    if (c == '%') {
107      Start = I++;  // Record the start of the format specifier.
108      break;
109    }
110  }
111
112  // No format specifier found?
113  if (!Start)
114    return false;
115
116  if (I == E) {
117    // No more characters left?
118    H.HandleIncompleteFormatSpecifier(Start, E - Start);
119    return true;
120  }
121
122  FormatSpecifier FS;
123
124  // Look for flags (if any).
125  bool hasMore = true;
126  for ( ; I != E; ++I) {
127    switch (*I) {
128      default: hasMore = false; break;
129      case '-': FS.setIsLeftJustified(); break;
130      case '+': FS.setHasPlusPrefix(); break;
131      case ' ': FS.setHasSpacePrefix(); break;
132      case '#': FS.setHasAlternativeForm(); break;
133      case '0': FS.setHasLeadingZeros(); break;
134    }
135    if (!hasMore)
136      break;
137  }
138
139  if (I == E) {
140    // No more characters left?
141    H.HandleIncompleteFormatSpecifier(Start, E - Start);
142    return true;
143  }
144
145  // Look for the field width (if any).
146  FS.setFieldWidth(ParseAmount(I, E));
147
148  if (I == E) {
149    // No more characters left?
150    H.HandleIncompleteFormatSpecifier(Start, E - Start);
151    return true;
152  }
153
154  // Look for the precision (if any).
155  if (*I == '.') {
156    ++I;
157    if (I == E) {
158      H.HandleIncompleteFormatSpecifier(Start, E - Start);
159      return true;
160    }
161
162    FS.setPrecision(ParseAmount(I, E));
163
164    if (I == E) {
165      // No more characters left?
166      H.HandleIncompleteFormatSpecifier(Start, E - Start);
167      return true;
168    }
169  }
170
171  // Look for the length modifier.
172  LengthModifier lm = None;
173  switch (*I) {
174    default:
175      break;
176    case 'h':
177      ++I;
178      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
179      break;
180    case 'l':
181      ++I;
182      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
183      break;
184    case 'j': lm = AsIntMax;     ++I; break;
185    case 'z': lm = AsSizeT;      ++I; break;
186    case 't': lm = AsPtrDiff;    ++I; break;
187    case 'L': lm = AsLongDouble; ++I; break;
188    case 'q': lm = AsLongLong;   ++I; break;
189  }
190  FS.setLengthModifier(lm);
191
192  if (I == E) {
193    // No more characters left?
194    H.HandleIncompleteFormatSpecifier(Start, E - Start);
195    return true;
196  }
197
198  if (*I == '\0') {
199	// Detect spurious null characters, which are likely errors.
200	H.HandleNullChar(I);
201	return true;
202  }
203
204  // Finally, look for the conversion specifier.
205  const char *conversionPosition = I++;
206  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
207  switch (*conversionPosition) {
208    default:
209      break;
210    // C99: 7.19.6.1 (section 8).
211    case 'd': k = ConversionSpecifier::dArg; break;
212    case 'i': k = ConversionSpecifier::iArg; break;
213    case 'o': k = ConversionSpecifier::oArg; break;
214    case 'u': k = ConversionSpecifier::uArg; break;
215    case 'x': k = ConversionSpecifier::xArg; break;
216    case 'X': k = ConversionSpecifier::XArg; break;
217    case 'f': k = ConversionSpecifier::fArg; break;
218    case 'F': k = ConversionSpecifier::FArg; break;
219    case 'e': k = ConversionSpecifier::eArg; break;
220    case 'E': k = ConversionSpecifier::EArg; break;
221    case 'g': k = ConversionSpecifier::gArg; break;
222    case 'G': k = ConversionSpecifier::GArg; break;
223    case 'a': k = ConversionSpecifier::aArg; break;
224    case 'A': k = ConversionSpecifier::AArg; break;
225    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
226    case 's': k = ConversionSpecifier::CStrArg;      break;
227    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
228    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
229    case '%': k = ConversionSpecifier::PercentArg;   break;
230    // Objective-C.
231    case '@': k = ConversionSpecifier::ObjCObjArg; break;
232	// Glibc specific.
233    case 'm': k = ConversionSpecifier::PrintErrno; break;
234  }
235  FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
236
237  if (k == ConversionSpecifier::InvalidSpecifier) {
238    H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
239    return false; // Keep processing format specifiers.
240  }
241  return FormatSpecifierResult(Start, FS);
242}
243
244bool clang::ParseFormatString(FormatStringHandler &H,
245                       const char *I, const char *E) {
246  // Keep looking for a format specifier until we have exhausted the string.
247  while (I != E) {
248    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
249    // Did a fail-stop error of any kind occur when parsing the specifier?
250    // If so, don't do any more processing.
251    if (FSR.shouldStop())
252      return true;;
253    // Did we exhaust the string or encounter an error that
254    // we can recover from?
255    if (!FSR.hasValue())
256      continue;
257    // We have a format specifier.  Pass it to the callback.
258    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
259                                 I - FSR.getStart()))
260      return true;
261  }
262  assert(I == E && "Format string not exhausted");
263  return false;
264}
265
266FormatStringHandler::~FormatStringHandler() {}
267
268//===----------------------------------------------------------------------===//
269// Methods on FormatSpecifier.
270//===----------------------------------------------------------------------===//
271
272ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
273  if (!CS.consumesDataArgument())
274    return ArgTypeResult::Invalid();
275
276  if (CS.isIntArg())
277    switch (LM) {
278      case AsLongDouble:
279        return ArgTypeResult::Invalid();
280      case None: return Ctx.IntTy;
281      case AsChar: return Ctx.SignedCharTy;
282      case AsShort: return Ctx.ShortTy;
283      case AsLong: return Ctx.LongTy;
284      case AsLongLong: return Ctx.LongLongTy;
285      case AsIntMax:
286        // FIXME: Return unknown for now.
287        return ArgTypeResult();
288      case AsSizeT: return Ctx.getSizeType();
289      case AsPtrDiff: return Ctx.getPointerDiffType();
290    }
291
292  if (CS.isUIntArg())
293    switch (LM) {
294      case AsLongDouble:
295        return ArgTypeResult::Invalid();
296      case None: return Ctx.UnsignedIntTy;
297      case AsChar: return Ctx.UnsignedCharTy;
298      case AsShort: return Ctx.UnsignedShortTy;
299      case AsLong: return Ctx.UnsignedLongTy;
300      case AsLongLong: return Ctx.UnsignedLongLongTy;
301      case AsIntMax:
302        // FIXME: Return unknown for now.
303        return ArgTypeResult();
304      case AsSizeT:
305        // FIXME: How to get the corresponding unsigned
306        // version of size_t?
307        return ArgTypeResult();
308      case AsPtrDiff:
309        // FIXME: How to get the corresponding unsigned
310        // version of ptrdiff_t?
311        return ArgTypeResult();
312    }
313
314  if (CS.isDoubleArg()) {
315    if (LM == AsLongDouble)
316      return Ctx.LongDoubleTy;
317    return Ctx.DoubleTy;
318  }
319
320  // FIXME: Handle other cases.
321  return ArgTypeResult();
322}
323
324