PrintfFormatString.cpp revision 2e8f8dc12c16b7a499d4898ad55cafe1c71b1e25
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
20using clang::analyze_printf::ArgTypeResult;
21using clang::analyze_printf::FormatStringHandler;
22using namespace clang;
23
24namespace {
25class FormatSpecifierResult {
26  FormatSpecifier FS;
27  const char *Start;
28  bool Stop;
29public:
30  FormatSpecifierResult(bool stop = false)
31    : Start(0), Stop(stop) {}
32  FormatSpecifierResult(const char *start,
33                        const FormatSpecifier &fs)
34    : FS(fs), Start(start), Stop(false) {}
35
36
37  const char *getStart() const { return Start; }
38  bool shouldStop() const { return Stop; }
39  bool hasValue() const { return Start != 0; }
40  const FormatSpecifier &getValue() const {
41    assert(hasValue());
42    return FS;
43  }
44  const FormatSpecifier &getValue() { return FS; }
45};
46} // end anonymous namespace
47
48template <typename T>
49class UpdateOnReturn {
50  T &ValueToUpdate;
51  const T &ValueToCopy;
52public:
53  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
54    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
55
56  ~UpdateOnReturn() {
57    ValueToUpdate = ValueToCopy;
58  }
59};
60
61static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
62  const char *I = Beg;
63  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64
65  bool foundDigits = false;
66  unsigned accumulator = 0;
67
68  for ( ; I != E; ++I) {
69    char c = *I;
70    if (c >= '0' && c <= '9') {
71      foundDigits = true;
72      accumulator += (accumulator * 10) + (c - '0');
73      continue;
74    }
75
76    if (foundDigits)
77      return OptionalAmount(accumulator, Beg);
78
79    if (c == '*') {
80      ++I;
81      return OptionalAmount(OptionalAmount::Arg, Beg);
82    }
83
84    break;
85  }
86
87  return OptionalAmount();
88}
89
90static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
91                                                  const char *&Beg,
92                                                  const char *E) {
93
94  using namespace clang::analyze_printf;
95
96  const char *I = Beg;
97  const char *Start = 0;
98  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
99
100  // Look for a '%' character that indicates the start of a format specifier.
101  for ( ; I != E ; ++I) {
102    char c = *I;
103    if (c == '\0') {
104      // Detect spurious null characters, which are likely errors.
105      H.HandleNullChar(I);
106      return true;
107    }
108    if (c == '%') {
109      Start = I++;  // Record the start of the format specifier.
110      break;
111    }
112  }
113
114  // No format specifier found?
115  if (!Start)
116    return false;
117
118  if (I == E) {
119    // No more characters left?
120    H.HandleIncompleteFormatSpecifier(Start, E - Start);
121    return true;
122  }
123
124  FormatSpecifier FS;
125
126  // Look for flags (if any).
127  bool hasMore = true;
128  for ( ; I != E; ++I) {
129    switch (*I) {
130      default: hasMore = false; break;
131      case '-': FS.setIsLeftJustified(); break;
132      case '+': FS.setHasPlusPrefix(); break;
133      case ' ': FS.setHasSpacePrefix(); break;
134      case '#': FS.setHasAlternativeForm(); break;
135      case '0': FS.setHasLeadingZeros(); break;
136    }
137    if (!hasMore)
138      break;
139  }
140
141  if (I == E) {
142    // No more characters left?
143    H.HandleIncompleteFormatSpecifier(Start, E - Start);
144    return true;
145  }
146
147  // Look for the field width (if any).
148  FS.setFieldWidth(ParseAmount(I, E));
149
150  if (I == E) {
151    // No more characters left?
152    H.HandleIncompleteFormatSpecifier(Start, E - Start);
153    return true;
154  }
155
156  // Look for the precision (if any).
157  if (*I == '.') {
158    ++I;
159    if (I == E) {
160      H.HandleIncompleteFormatSpecifier(Start, E - Start);
161      return true;
162    }
163
164    FS.setPrecision(ParseAmount(I, E));
165
166    if (I == E) {
167      // No more characters left?
168      H.HandleIncompleteFormatSpecifier(Start, E - Start);
169      return true;
170    }
171  }
172
173  // Look for the length modifier.
174  LengthModifier lm = None;
175  switch (*I) {
176    default:
177      break;
178    case 'h':
179      ++I;
180      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
181      break;
182    case 'l':
183      ++I;
184      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
185      break;
186    case 'j': lm = AsIntMax;     ++I; break;
187    case 'z': lm = AsSizeT;      ++I; break;
188    case 't': lm = AsPtrDiff;    ++I; break;
189    case 'L': lm = AsLongDouble; ++I; break;
190    case 'q': lm = AsLongLong;   ++I; break;
191  }
192  FS.setLengthModifier(lm);
193
194  if (I == E) {
195    // No more characters left?
196    H.HandleIncompleteFormatSpecifier(Start, E - Start);
197    return true;
198  }
199
200  if (*I == '\0') {
201    // Detect spurious null characters, which are likely errors.
202    H.HandleNullChar(I);
203    return true;
204  }
205
206  // Finally, look for the conversion specifier.
207  const char *conversionPosition = I++;
208  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
209  switch (*conversionPosition) {
210    default:
211      break;
212    // C99: 7.19.6.1 (section 8).
213    case 'd': k = ConversionSpecifier::dArg; break;
214    case 'i': k = ConversionSpecifier::iArg; break;
215    case 'o': k = ConversionSpecifier::oArg; break;
216    case 'u': k = ConversionSpecifier::uArg; break;
217    case 'x': k = ConversionSpecifier::xArg; break;
218    case 'X': k = ConversionSpecifier::XArg; break;
219    case 'f': k = ConversionSpecifier::fArg; break;
220    case 'F': k = ConversionSpecifier::FArg; break;
221    case 'e': k = ConversionSpecifier::eArg; break;
222    case 'E': k = ConversionSpecifier::EArg; break;
223    case 'g': k = ConversionSpecifier::gArg; break;
224    case 'G': k = ConversionSpecifier::GArg; break;
225    case 'a': k = ConversionSpecifier::aArg; break;
226    case 'A': k = ConversionSpecifier::AArg; break;
227    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
228    case 's': k = ConversionSpecifier::CStrArg;      break;
229    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
230    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
231    case '%': k = ConversionSpecifier::PercentArg;   break;
232    // Objective-C.
233    case '@': k = ConversionSpecifier::ObjCObjArg; break;
234    // Glibc specific.
235    case 'm': k = ConversionSpecifier::PrintErrno; break;
236  }
237  FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
238
239  if (k == ConversionSpecifier::InvalidSpecifier) {
240    H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
241    return false; // Keep processing format specifiers.
242  }
243  return FormatSpecifierResult(Start, FS);
244}
245
246bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
247                       const char *I, const char *E) {
248  // Keep looking for a format specifier until we have exhausted the string.
249  while (I != E) {
250    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
251    // Did a fail-stop error of any kind occur when parsing the specifier?
252    // If so, don't do any more processing.
253    if (FSR.shouldStop())
254      return true;;
255    // Did we exhaust the string or encounter an error that
256    // we can recover from?
257    if (!FSR.hasValue())
258      continue;
259    // We have a format specifier.  Pass it to the callback.
260    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
261                                 I - FSR.getStart()))
262      return true;
263  }
264  assert(I == E && "Format string not exhausted");
265  return false;
266}
267
268FormatStringHandler::~FormatStringHandler() {}
269
270//===----------------------------------------------------------------------===//
271// Methods on FormatSpecifier.
272//===----------------------------------------------------------------------===//
273
274ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
275  if (!CS.consumesDataArgument())
276    return ArgTypeResult::Invalid();
277
278  if (CS.isIntArg())
279    switch (LM) {
280      case AsLongDouble:
281        return ArgTypeResult::Invalid();
282      case None: return Ctx.IntTy;
283      case AsChar: return Ctx.SignedCharTy;
284      case AsShort: return Ctx.ShortTy;
285      case AsLong: return Ctx.LongTy;
286      case AsLongLong: return Ctx.LongLongTy;
287      case AsIntMax:
288        // FIXME: Return unknown for now.
289        return ArgTypeResult();
290      case AsSizeT: return Ctx.getSizeType();
291      case AsPtrDiff: return Ctx.getPointerDiffType();
292    }
293
294  if (CS.isUIntArg())
295    switch (LM) {
296      case AsLongDouble:
297        return ArgTypeResult::Invalid();
298      case None: return Ctx.UnsignedIntTy;
299      case AsChar: return Ctx.UnsignedCharTy;
300      case AsShort: return Ctx.UnsignedShortTy;
301      case AsLong: return Ctx.UnsignedLongTy;
302      case AsLongLong: return Ctx.UnsignedLongLongTy;
303      case AsIntMax:
304        // FIXME: Return unknown for now.
305        return ArgTypeResult();
306      case AsSizeT:
307        // FIXME: How to get the corresponding unsigned
308        // version of size_t?
309        return ArgTypeResult();
310      case AsPtrDiff:
311        // FIXME: How to get the corresponding unsigned
312        // version of ptrdiff_t?
313        return ArgTypeResult();
314    }
315
316  if (CS.isDoubleArg()) {
317    if (LM == AsLongDouble)
318      return Ctx.LongDoubleTy;
319    return Ctx.DoubleTy;
320  }
321
322  // FIXME: Handle other cases.
323  return ArgTypeResult();
324}
325
326