PrintfFormatString.cpp revision e7ffcad1e083f395a0c3cc45d4ad8437e84eb8aa
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16
17using namespace clang;
18using namespace clang::analyze_printf;
19
20namespace {
21class FormatSpecifierResult {
22  FormatSpecifier FS;
23  const char *Start;
24  bool HasError;
25public:
26  FormatSpecifierResult(bool err = false)
27    : Start(0), HasError(err) {}
28  FormatSpecifierResult(const char *start,
29                        const FormatSpecifier &fs)
30    : FS(fs), Start(start), HasError(false) {}
31
32
33  const char *getStart() const { return Start; }
34  bool hasError() const { return HasError; }
35  bool hasValue() const { return Start != 0; }
36  const FormatSpecifier &getValue() const {
37    assert(hasValue());
38    return FS;
39  }
40  const FormatSpecifier &getValue() { return FS; }
41};
42} // end anonymous namespace
43
44template <typename T>
45class UpdateOnReturn {
46  T &ValueToUpdate;
47  const T &ValueToCopy;
48public:
49  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
50    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
51
52  ~UpdateOnReturn() {
53    ValueToUpdate = ValueToCopy;
54  }
55};
56
57static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
58  const char *I = Beg;
59  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
60
61  bool foundDigits = false;
62  unsigned accumulator = 0;
63
64  for ( ; I != E; ++I) {
65    char c = *I;
66    if (c >= '0' && c <= '9') {
67      foundDigits = true;
68      accumulator += (accumulator * 10) + (c - '0');
69      continue;
70    }
71
72    if (foundDigits)
73      return OptionalAmount(accumulator, Beg);
74
75    if (c == '*') {
76      ++I;
77      return OptionalAmount(OptionalAmount::Arg, Beg);
78    }
79
80    break;
81  }
82
83  return OptionalAmount();
84}
85
86static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
87                                                  const char *&Beg, const char *E) {
88
89  const char *I = Beg;
90  const char *Start = 0;
91  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
92
93  // Look for a '%' character that indicates the start of a format specifier.
94  for ( ; I != E ; ++I) {
95    char c = *I;
96    if (c == '\0') {
97      // Detect spurious null characters, which are likely errors.
98      H.HandleNullChar(I);
99      return true;
100    }
101    if (c == '%') {
102      Start = I++;  // Record the start of the format specifier.
103      break;
104    }
105  }
106
107  // No format specifier found?
108  if (!Start)
109    return false;
110
111  if (I == E) {
112    // No more characters left?
113    H.HandleIncompleteFormatSpecifier(Start, E);
114    return true;
115  }
116
117  FormatSpecifier FS;
118
119  // Look for flags (if any).
120  bool hasMore = true;
121  for ( ; I != E; ++I) {
122    switch (*I) {
123      default: hasMore = false; break;
124      case '-': FS.setIsLeftJustified(); break;
125      case '+': FS.setHasPlusPrefix(); break;
126      case ' ': FS.setHasSpacePrefix(); break;
127      case '#': FS.setHasAlternativeForm(); break;
128      case '0': FS.setHasLeadingZeros(); break;
129    }
130    if (!hasMore)
131      break;
132  }
133
134  if (I == E) {
135    // No more characters left?
136    H.HandleIncompleteFormatSpecifier(Start, E);
137    return true;
138  }
139
140  // Look for the field width (if any).
141  FS.setFieldWidth(ParseAmount(I, E));
142
143  if (I == E) {
144    // No more characters left?
145    H.HandleIncompleteFormatSpecifier(Start, E);
146    return true;
147  }
148
149  // Look for the precision (if any).
150  if (*I == '.') {
151    const char *startPrecision = I++;
152    if (I == E) {
153      H.HandleIncompletePrecision(I - 1);
154      return true;
155    }
156
157    FS.setPrecision(ParseAmount(I, E));
158
159    if (I == E) {
160      // No more characters left?
161      H.HandleIncompletePrecision(startPrecision);
162      return true;
163    }
164  }
165
166  // Look for the length modifier.
167  LengthModifier lm = None;
168  switch (*I) {
169    default:
170      break;
171    case 'h':
172      ++I;
173      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
174      break;
175    case 'l':
176      ++I;
177      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
178      break;
179    case 'j': lm = AsIntMax;     ++I; break;
180    case 'z': lm = AsSizeT;      ++I; break;
181    case 't': lm = AsPtrDiff;    ++I; break;
182    case 'L': lm = AsLongDouble; ++I; break;
183  }
184  FS.setLengthModifier(lm);
185
186  if (I == E) {
187    // No more characters left?
188    H.HandleIncompleteFormatSpecifier(Start, E);
189    return true;
190  }
191
192  // Finally, look for the conversion specifier.
193  const char *conversionPosition = I++;
194  ConversionSpecifier::Kind k;
195  switch (*conversionPosition) {
196    default:
197      H.HandleInvalidConversionSpecifier(conversionPosition);
198      return true;
199    // C99: 7.19.6.1 (section 8).
200    case 'd': k = ConversionSpecifier::dArg; break;
201    case 'i': k = ConversionSpecifier::iArg; break;
202    case 'o': k = ConversionSpecifier::oArg; break;
203    case 'u': k = ConversionSpecifier::uArg; break;
204    case 'x': k = ConversionSpecifier::xArg; break;
205    case 'X': k = ConversionSpecifier::XArg; break;
206    case 'f': k = ConversionSpecifier::fArg; break;
207    case 'F': k = ConversionSpecifier::FArg; break;
208    case 'e': k = ConversionSpecifier::eArg; break;
209    case 'E': k = ConversionSpecifier::EArg; break;
210    case 'g': k = ConversionSpecifier::gArg; break;
211    case 'G': k = ConversionSpecifier::GArg; break;
212    case 'a': k = ConversionSpecifier::aArg; break;
213    case 'A': k = ConversionSpecifier::AArg; break;
214    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
215    case 's': k = ConversionSpecifier::CStrArg;      break;
216    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
217    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
218    case '%': k = ConversionSpecifier::PercentArg;   break;
219    // Objective-C.
220    case '@': k = ConversionSpecifier::ObjCObjArg; break;
221  }
222  FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
223  return FormatSpecifierResult(Start, FS);
224}
225
226namespace clang { namespace analyze_printf {
227bool ParseFormatString(FormatStringHandler &H,
228                       const char *I, const char *E) {
229  // Keep looking for a format specifier until we have exhausted the string.
230  while (I != E) {
231    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
232    // Did an error of any kind occur when parsing the specifier?  If so,
233    // don't do any more processing.
234    if (FSR.hasError())
235      return true;;
236    // Done processing the string?
237    if (!FSR.hasValue())
238      break;
239    // We have a format specifier.  Pass it to the callback.
240    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
241                                 I - FSR.getStart()))
242      return false;
243  }
244  assert(I == E && "Format string not exhausted");
245  return false;
246}
247
248FormatStringHandler::~FormatStringHandler() {}
249}} // end namespace clang::analyze_printf
250