PrintfFormatString.cpp revision 4dcb18ff9d92c66c78077ac5cae4b83af37292e4
1c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2f976c856fcc5055f3fc7d9f070d72c2d027c1d9dMisha Brukman//
3b576c94c15af9a440f69d9d03c2afead7971118cJohn Criswell//                     The LLVM Compiler Infrastructure
4b576c94c15af9a440f69d9d03c2afead7971118cJohn Criswell//
54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source
64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details.
7f976c856fcc5055f3fc7d9f070d72c2d027c1d9dMisha Brukman//
8b576c94c15af9a440f69d9d03c2afead7971118cJohn Criswell//===----------------------------------------------------------------------===//
9c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner//
1013a253aae77594bb3fd804417e4aa3d4ffe0229bChris Lattner// Handling of format string in printf and friends.  The structure of format
11c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner// strings for fprintf() are described in C99 7.19.6.1.
12c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner//
13c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner//===----------------------------------------------------------------------===//
1413a253aae77594bb3fd804417e4aa3d4ffe0229bChris Lattner
15551ccae044b0ff658fe629dd67edd5ffe75d10e8Reid Spencer#include "clang/Analysis/Analyses/PrintfFormatString.h"
161f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencer
17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruthusing clang::analyze_printf::FormatSpecifier;
181f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencerusing clang::analyze_printf::OptionalAmount;
19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruthusing namespace clang;
204b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth
212cdd21c2e4d855500dfb53f77aa74da53ccf9de6Chris Lattnernamespace {
22d0fde30ce850b78371fd1386338350591f9ff494Brian Gaekeclass FormatSpecifierResult {
23e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerouge  FormatSpecifier FS;
2461ffc0c7fd9a23dc423305f144948fbae9956bf6Owen Anderson  const char *Start;
254b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth  bool Stop;
2613a253aae77594bb3fd804417e4aa3d4ffe0229bChris Lattnerpublic:
27a9d1f2c559ef4b2549e29288fe6944e68913ba0fOwen Anderson  FormatSpecifierResult(bool stop = false)
2817aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner    : Start(0), Stop(stop) {}
2917aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner  FormatSpecifierResult(const char *start,
30d9ea85ab01fb0f2929ed50223d3758dceea8bcbdChris Lattner                        const FormatSpecifier &fs)
31d9ea85ab01fb0f2929ed50223d3758dceea8bcbdChris Lattner    : FS(fs), Start(start), Stop(false) {}
3217aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner
3317aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner
3417aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner  const char *getStart() const { return Start; }
35c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner  bool shouldStop() const { return Stop; }
364b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth  bool hasValue() const { return Start != 0; }
3717aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner  const FormatSpecifier &getValue() const {
38a9d1f2c559ef4b2549e29288fe6944e68913ba0fOwen Anderson    assert(hasValue());
39e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerouge    return FS;
404b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth  }
414b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth  const FormatSpecifier &getValue() { return FS; }
4217aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner};
43a9d1f2c559ef4b2549e29288fe6944e68913ba0fOwen Anderson} // end anonymous namespace
44e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerouge
45e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerougetemplate <typename T>
4617aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattnerclass UpdateOnReturn {
474b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth  T &ValueToUpdate;
48  const T &ValueToCopy;
49public:
50  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
51    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
52
53  ~UpdateOnReturn() {
54    ValueToUpdate = ValueToCopy;
55  }
56};
57
58static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
59  const char *I = Beg;
60  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
61
62  bool foundDigits = false;
63  unsigned accumulator = 0;
64
65  for ( ; I != E; ++I) {
66    char c = *I;
67    if (c >= '0' && c <= '9') {
68      foundDigits = true;
69      accumulator += (accumulator * 10) + (c - '0');
70      continue;
71    }
72
73    if (foundDigits)
74      return OptionalAmount(accumulator, Beg);
75
76    if (c == '*') {
77      ++I;
78      return OptionalAmount(OptionalAmount::Arg, Beg);
79    }
80
81    break;
82  }
83
84  return OptionalAmount();
85}
86
87static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
88                                                  const char *&Beg, const char *E) {
89
90  using namespace clang::analyze_printf;
91
92  const char *I = Beg;
93  const char *Start = 0;
94  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
95
96  // Look for a '%' character that indicates the start of a format specifier.
97  for ( ; I != E ; ++I) {
98    char c = *I;
99    if (c == '\0') {
100      // Detect spurious null characters, which are likely errors.
101      H.HandleNullChar(I);
102      return true;
103    }
104    if (c == '%') {
105      Start = I++;  // Record the start of the format specifier.
106      break;
107    }
108  }
109
110  // No format specifier found?
111  if (!Start)
112    return false;
113
114  if (I == E) {
115    // No more characters left?
116    H.HandleIncompleteFormatSpecifier(Start, E - Start);
117    return true;
118  }
119
120  FormatSpecifier FS;
121
122  // Look for flags (if any).
123  bool hasMore = true;
124  for ( ; I != E; ++I) {
125    switch (*I) {
126      default: hasMore = false; break;
127      case '-': FS.setIsLeftJustified(); break;
128      case '+': FS.setHasPlusPrefix(); break;
129      case ' ': FS.setHasSpacePrefix(); break;
130      case '#': FS.setHasAlternativeForm(); break;
131      case '0': FS.setHasLeadingZeros(); break;
132    }
133    if (!hasMore)
134      break;
135  }
136
137  if (I == E) {
138    // No more characters left?
139    H.HandleIncompleteFormatSpecifier(Start, E - Start);
140    return true;
141  }
142
143  // Look for the field width (if any).
144  FS.setFieldWidth(ParseAmount(I, E));
145
146  if (I == E) {
147    // No more characters left?
148    H.HandleIncompleteFormatSpecifier(Start, E - Start);
149    return true;
150  }
151
152  // Look for the precision (if any).
153  if (*I == '.') {
154    ++I;
155    if (I == E) {
156      H.HandleIncompleteFormatSpecifier(Start, E - Start);
157      return true;
158    }
159
160    FS.setPrecision(ParseAmount(I, E));
161
162    if (I == E) {
163      // No more characters left?
164      H.HandleIncompleteFormatSpecifier(Start, E - Start);
165      return true;
166    }
167  }
168
169  // Look for the length modifier.
170  LengthModifier lm = None;
171  switch (*I) {
172    default:
173      break;
174    case 'h':
175      ++I;
176      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
177      break;
178    case 'l':
179      ++I;
180      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
181      break;
182    case 'j': lm = AsIntMax;     ++I; break;
183    case 'z': lm = AsSizeT;      ++I; break;
184    case 't': lm = AsPtrDiff;    ++I; break;
185    case 'L': lm = AsLongDouble; ++I; break;
186  }
187  FS.setLengthModifier(lm);
188
189  if (I == E) {
190    // No more characters left?
191    H.HandleIncompleteFormatSpecifier(Start, E - Start);
192    return true;
193  }
194
195  if (*I == '\0') {
196	// Detect spurious null characters, which are likely errors.
197	H.HandleNullChar(I);
198	return true;
199  }
200
201  // Finally, look for the conversion specifier.
202  const char *conversionPosition = I++;
203  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
204  switch (*conversionPosition) {
205    default:
206      break;
207    // C99: 7.19.6.1 (section 8).
208    case 'd': k = ConversionSpecifier::dArg; break;
209    case 'i': k = ConversionSpecifier::iArg; break;
210    case 'o': k = ConversionSpecifier::oArg; break;
211    case 'u': k = ConversionSpecifier::uArg; break;
212    case 'x': k = ConversionSpecifier::xArg; break;
213    case 'X': k = ConversionSpecifier::XArg; break;
214    case 'f': k = ConversionSpecifier::fArg; break;
215    case 'F': k = ConversionSpecifier::FArg; break;
216    case 'e': k = ConversionSpecifier::eArg; break;
217    case 'E': k = ConversionSpecifier::EArg; break;
218    case 'g': k = ConversionSpecifier::gArg; break;
219    case 'G': k = ConversionSpecifier::GArg; break;
220    case 'a': k = ConversionSpecifier::aArg; break;
221    case 'A': k = ConversionSpecifier::AArg; break;
222    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
223    case 's': k = ConversionSpecifier::CStrArg;      break;
224    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
225    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
226    case '%': k = ConversionSpecifier::PercentArg;   break;
227    // Objective-C.
228    case '@': k = ConversionSpecifier::ObjCObjArg; break;
229	// Glibc specific.
230    case 'm': k = ConversionSpecifier::PrintErrno; break;
231  }
232  FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
233
234  if (k == ConversionSpecifier::InvalidSpecifier) {
235    H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
236    return false; // Keep processing format specifiers.
237  }
238  return FormatSpecifierResult(Start, FS);
239}
240
241bool clang::ParseFormatString(FormatStringHandler &H,
242                       const char *I, const char *E) {
243  // Keep looking for a format specifier until we have exhausted the string.
244  while (I != E) {
245    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
246    // Did a fail-stop error of any kind occur when parsing the specifier?
247    // If so, don't do any more processing.
248    if (FSR.shouldStop())
249      return true;;
250    // Did we exhaust the string or encounter an error that
251    // we can recover from?
252    if (!FSR.hasValue())
253      continue;
254    // We have a format specifier.  Pass it to the callback.
255    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
256                                 I - FSR.getStart()))
257      return true;
258  }
259  assert(I == E && "Format string not exhausted");
260  return false;
261}
262
263FormatStringHandler::~FormatStringHandler() {}
264