PrintfFormatString.cpp revision e812a3122c72da6cf7e69c0c68939ca4bf1cbfc4
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16
17using namespace clang;
18using namespace printf;
19
20namespace {
21class FormatSpecifierResult {
22  FormatSpecifier FS;
23  const char *Start;
24  bool HasError;
25public:
26  FormatSpecifierResult(bool err = false)
27    : Start(0), HasError(err) {}
28  FormatSpecifierResult(const char *start,
29                        const printf::FormatSpecifier &fs)
30    : FS(fs), Start(start), HasError(false) {}
31
32
33  const char *getStart() const { return Start; }
34  bool hasError() const { return HasError; }
35  bool hasValue() const { return Start != 0; }
36  const FormatSpecifier &getValue() const {
37    assert(hasValue());
38    return FS;
39  }
40  const printf::FormatSpecifier &getValue() { return FS; }
41};
42} // end anonymous namespace
43
44template <typename T>
45class UpdateOnReturn {
46  T &ValueToUpdate;
47  const T &ValueToCopy;
48public:
49  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
50    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
51
52  ~UpdateOnReturn() {
53    ValueToUpdate = ValueToCopy;
54  }
55};
56
57static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
58  const char *I = Beg;
59  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
60
61  bool foundDigits = false;
62  unsigned accumulator = 0;
63
64  for ( ; I != E; ++I) {
65    char c = *I;
66    if (c >= '0' && c <= '9') {
67      foundDigits = true;
68      accumulator += (accumulator * 10) + (c - '0');
69      continue;
70    }
71
72    if (foundDigits)
73      return OptionalAmount(accumulator);
74
75    if (c == '*')
76      return OptionalAmount(OptionalAmount::Arg);
77
78    break;
79  }
80
81  return OptionalAmount();
82}
83
84static FormatSpecifierResult ParseFormatSpecifier(printf::FormatStringHandler &H,
85                                                  const char *&Beg, const char *E) {
86
87  const char *I = Beg;
88  const char *Start = 0;
89  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
90
91  // Look for a '%' character that indicates the start of a format specifier.
92  while (I != E) {
93    char c = *I;
94    ++I;
95    if (c == '\0') {
96      // Detect spurious null characters, which are likely errors.
97      H.HandleNullChar(I);
98      return true;
99    }
100    if (c == '%') {
101      Start = I;  // Record the start of the format specifier.
102      break;
103    }
104  }
105
106  // No format specifier found?
107  if (!Start)
108    return false;
109
110  if (I == E) {
111    // No more characters left?
112    H.HandleIncompleteFormatSpecifier(Start, E);
113    return true;
114  }
115
116  FormatSpecifier FS;
117
118  // Look for flags (if any).
119  bool hasMore = true;
120  for ( ; I != E; ++I) {
121    switch (*I) {
122      default: hasMore = false; break;
123      case '-': FS.setIsLeftJustified(); break;
124      case '+': FS.setHasPlusPrefix(); break;
125      case ' ': FS.setHasSpacePrefix(); break;
126      case '#': FS.setHasAlternativeForm(); break;
127      case '0': FS.setHasLeadingZeros(); break;
128    }
129    if (!hasMore)
130      break;
131  }
132
133  if (I == E) {
134    // No more characters left?
135    H.HandleIncompleteFormatSpecifier(Start, E);
136    return true;
137  }
138
139  // Look for the field width (if any).
140  FS.setFieldWidth(ParseAmount(I, E));
141
142  if (I == E) {
143    // No more characters left?
144    H.HandleIncompleteFormatSpecifier(Start, E);
145    return true;
146  }
147
148  // Look for the precision (if any).
149  if (*I == '.') {
150    const char *startPrecision = I++;
151    if (I == E) {
152      H.HandleIncompletePrecision(I - 1);
153      return true;
154    }
155
156    FS.setPrecision(ParseAmount(I, E));
157
158    if (I == E) {
159      // No more characters left?
160      H.HandleIncompletePrecision(startPrecision);
161      return true;
162    }
163  }
164
165  // Look for the length modifier.
166  LengthModifier lm = None;
167  switch (*I) {
168    default:
169      break;
170    case 'h':
171      ++I;
172      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
173      break;
174    case 'l':
175      ++I;
176      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
177      break;
178    case 'j': lm = AsIntMax;     ++I; break;
179    case 'z': lm = AsSizeT;      ++I; break;
180    case 't': lm = AsPtrDiff;    ++I; break;
181    case 'L': lm = AsLongDouble; ++I; break;
182  }
183  FS.setLengthModifier(lm);
184
185  if (I == E) {
186    // No more characters left?
187    H.HandleIncompleteFormatSpecifier(Start, E);
188    return true;
189  }
190
191  // Finally, look for the conversion specifier.
192  ConversionSpecifier::Kind cs;
193  switch (*I) {
194    default:
195      H.HandleInvalidConversionSpecifier(I);
196      return true;
197    // C99: 7.19.6.1 (section 8).
198    case 'd': cs = ConversionSpecifier::dArg; break;
199    case 'i': cs = ConversionSpecifier::iArg; break;
200    case 'o': cs = ConversionSpecifier::oArg; break;
201    case 'u': cs = ConversionSpecifier::uArg; break;
202    case 'x': cs = ConversionSpecifier::xArg; break;
203    case 'X': cs = ConversionSpecifier::XArg; break;
204    case 'f': cs = ConversionSpecifier::fArg; break;
205    case 'F': cs = ConversionSpecifier::FArg; break;
206    case 'e': cs = ConversionSpecifier::eArg; break;
207    case 'E': cs = ConversionSpecifier::EArg; break;
208    case 'g': cs = ConversionSpecifier::gArg; break;
209    case 'G': cs = ConversionSpecifier::GArg; break;
210    case 'a': cs = ConversionSpecifier::aArg; break;
211    case 'A': cs = ConversionSpecifier::AArg; break;
212    case 'c': cs = ConversionSpecifier::IntAsCharArg; break;
213    case 's': cs = ConversionSpecifier::CStrArg;      break;
214    case 'p': cs = ConversionSpecifier::VoidPtrArg;   break;
215    case 'n': cs = ConversionSpecifier::OutIntPtrArg; break;
216    case '%': cs = ConversionSpecifier::PercentArg;   break;
217    // Objective-C.
218    case '@': cs = ConversionSpecifier::ObjCObjArg; break;
219  }
220  FS.setConversionSpecifier(cs);
221  return FormatSpecifierResult(Start, FS);
222}
223
224bool ParseFormatSring(FormatStringHandler &H, const char *I, const char *E) {
225  // Keep looking for a format specifier until we have exhausted the string.
226  while (I != E) {
227    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
228    // Did an error of any kind occur when parsing the specifier?  If so,
229    // don't do any more processing.
230    if (FSR.hasError())
231      return true;;
232    // Done processing the string?
233    if (!FSR.hasValue())
234      break;
235    // We have a format specifier.  Pass it to the callback.
236    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), I))
237      return false;
238  }
239  assert(I == E && "Format string not exhausted");
240  return false;
241}
242
243FormatStringHandler::~FormatStringHandler() {}
244