PrintfFormatString.cpp revision 7f70dc85d5055c19c8003f43a59135de211ad1b9
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
20using clang::analyze_printf::ArgTypeResult;
21using clang::analyze_printf::FormatStringHandler;
22using namespace clang;
23
24namespace {
25class FormatSpecifierResult {
26  FormatSpecifier FS;
27  const char *Start;
28  bool Stop;
29public:
30  FormatSpecifierResult(bool stop = false)
31    : Start(0), Stop(stop) {}
32  FormatSpecifierResult(const char *start,
33                        const FormatSpecifier &fs)
34    : FS(fs), Start(start), Stop(false) {}
35
36
37  const char *getStart() const { return Start; }
38  bool shouldStop() const { return Stop; }
39  bool hasValue() const { return Start != 0; }
40  const FormatSpecifier &getValue() const {
41    assert(hasValue());
42    return FS;
43  }
44  const FormatSpecifier &getValue() { return FS; }
45};
46} // end anonymous namespace
47
48template <typename T>
49class UpdateOnReturn {
50  T &ValueToUpdate;
51  const T &ValueToCopy;
52public:
53  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
54    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
55
56  ~UpdateOnReturn() {
57    ValueToUpdate = ValueToCopy;
58  }
59};
60
61//===----------------------------------------------------------------------===//
62// Methods for parsing format strings.
63//===----------------------------------------------------------------------===//
64
65static OptionalAmount ParseAmount(const char *&Beg, const char *E,
66                                  unsigned &argIndex) {
67  const char *I = Beg;
68  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
69
70  bool foundDigits = false;
71  unsigned accumulator = 0;
72
73  for ( ; I != E; ++I) {
74    char c = *I;
75    if (c >= '0' && c <= '9') {
76      foundDigits = true;
77      accumulator += (accumulator * 10) + (c - '0');
78      continue;
79    }
80
81    if (foundDigits)
82      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
83
84    if (c == '*') {
85      ++I;
86      return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
87    }
88
89    break;
90  }
91
92  return OptionalAmount();
93}
94
95static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
96                                                  const char *&Beg,
97                                                  const char *E,
98                                                  unsigned &argIndex) {
99
100  using namespace clang::analyze_printf;
101
102  const char *I = Beg;
103  const char *Start = 0;
104  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
105
106  // Look for a '%' character that indicates the start of a format specifier.
107  for ( ; I != E ; ++I) {
108    char c = *I;
109    if (c == '\0') {
110      // Detect spurious null characters, which are likely errors.
111      H.HandleNullChar(I);
112      return true;
113    }
114    if (c == '%') {
115      Start = I++;  // Record the start of the format specifier.
116      break;
117    }
118  }
119
120  // No format specifier found?
121  if (!Start)
122    return false;
123
124  if (I == E) {
125    // No more characters left?
126    H.HandleIncompleteFormatSpecifier(Start, E - Start);
127    return true;
128  }
129
130  FormatSpecifier FS;
131
132  // Look for flags (if any).
133  bool hasMore = true;
134  for ( ; I != E; ++I) {
135    switch (*I) {
136      default: hasMore = false; break;
137      case '-': FS.setIsLeftJustified(); break;
138      case '+': FS.setHasPlusPrefix(); break;
139      case ' ': FS.setHasSpacePrefix(); break;
140      case '#': FS.setHasAlternativeForm(); break;
141      case '0': FS.setHasLeadingZeros(); break;
142    }
143    if (!hasMore)
144      break;
145  }
146
147  if (I == E) {
148    // No more characters left?
149    H.HandleIncompleteFormatSpecifier(Start, E - Start);
150    return true;
151  }
152
153  // Look for the field width (if any).
154  FS.setFieldWidth(ParseAmount(I, E, argIndex));
155
156  if (I == E) {
157    // No more characters left?
158    H.HandleIncompleteFormatSpecifier(Start, E - Start);
159    return true;
160  }
161
162  // Look for the precision (if any).
163  if (*I == '.') {
164    ++I;
165    if (I == E) {
166      H.HandleIncompleteFormatSpecifier(Start, E - Start);
167      return true;
168    }
169
170    FS.setPrecision(ParseAmount(I, E, argIndex));
171
172    if (I == E) {
173      // No more characters left?
174      H.HandleIncompleteFormatSpecifier(Start, E - Start);
175      return true;
176    }
177  }
178
179  // Look for the length modifier.
180  LengthModifier lm = None;
181  switch (*I) {
182    default:
183      break;
184    case 'h':
185      ++I;
186      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
187      break;
188    case 'l':
189      ++I;
190      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
191      break;
192    case 'j': lm = AsIntMax;     ++I; break;
193    case 'z': lm = AsSizeT;      ++I; break;
194    case 't': lm = AsPtrDiff;    ++I; break;
195    case 'L': lm = AsLongDouble; ++I; break;
196    case 'q': lm = AsLongLong;   ++I; break;
197  }
198  FS.setLengthModifier(lm);
199
200  if (I == E) {
201    // No more characters left?
202    H.HandleIncompleteFormatSpecifier(Start, E - Start);
203    return true;
204  }
205
206  if (*I == '\0') {
207    // Detect spurious null characters, which are likely errors.
208    H.HandleNullChar(I);
209    return true;
210  }
211
212  // Finally, look for the conversion specifier.
213  const char *conversionPosition = I++;
214  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
215  switch (*conversionPosition) {
216    default:
217      break;
218    // C99: 7.19.6.1 (section 8).
219    case '%': k = ConversionSpecifier::PercentArg;   break;
220    case 'A': k = ConversionSpecifier::AArg; break;
221    case 'E': k = ConversionSpecifier::EArg; break;
222    case 'F': k = ConversionSpecifier::FArg; break;
223    case 'G': k = ConversionSpecifier::GArg; break;
224    case 'X': k = ConversionSpecifier::XArg; break;
225    case 'a': k = ConversionSpecifier::aArg; break;
226    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
227    case 'd': k = ConversionSpecifier::dArg; break;
228    case 'e': k = ConversionSpecifier::eArg; break;
229    case 'f': k = ConversionSpecifier::fArg; break;
230    case 'g': k = ConversionSpecifier::gArg; break;
231    case 'i': k = ConversionSpecifier::iArg; break;
232    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
233    case 'o': k = ConversionSpecifier::oArg; break;
234    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
235    case 's': k = ConversionSpecifier::CStrArg;      break;
236    case 'u': k = ConversionSpecifier::uArg; break;
237    case 'x': k = ConversionSpecifier::xArg; break;
238    // Mac OS X (unicode) specific
239    case 'C': k = ConversionSpecifier::CArg; break;
240    case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
241    // Objective-C.
242    case '@': k = ConversionSpecifier::ObjCObjArg; break;
243    // Glibc specific.
244    case 'm': k = ConversionSpecifier::PrintErrno; break;
245  }
246  ConversionSpecifier CS(conversionPosition, k);
247  FS.setConversionSpecifier(CS);
248  if (CS.consumesDataArgument())
249    FS.setArgIndex(argIndex++);
250
251  if (k == ConversionSpecifier::InvalidSpecifier) {
252    // Assume the conversion takes one argument.
253    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
254  }
255  return FormatSpecifierResult(Start, FS);
256}
257
258bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
259                       const char *I, const char *E) {
260
261  unsigned argIndex = 0;
262
263  // Keep looking for a format specifier until we have exhausted the string.
264  while (I != E) {
265    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
266    // Did a fail-stop error of any kind occur when parsing the specifier?
267    // If so, don't do any more processing.
268    if (FSR.shouldStop())
269      return true;;
270    // Did we exhaust the string or encounter an error that
271    // we can recover from?
272    if (!FSR.hasValue())
273      continue;
274    // We have a format specifier.  Pass it to the callback.
275    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
276                                 I - FSR.getStart()))
277      return true;
278  }
279  assert(I == E && "Format string not exhausted");
280  return false;
281}
282
283FormatStringHandler::~FormatStringHandler() {}
284
285//===----------------------------------------------------------------------===//
286// Methods on ArgTypeResult.
287//===----------------------------------------------------------------------===//
288
289bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
290  assert(isValid());
291
292  if (K == UnknownTy)
293    return true;
294
295  if (K == SpecificTy) {
296    argTy = C.getCanonicalType(argTy).getUnqualifiedType();
297
298    if (T == argTy)
299      return true;
300
301    if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
302      switch (BT->getKind()) {
303        default:
304          break;
305        case BuiltinType::Char_S:
306        case BuiltinType::SChar:
307          return T == C.UnsignedCharTy;
308        case BuiltinType::Char_U:
309        case BuiltinType::UChar:
310          return T == C.SignedCharTy;
311        case BuiltinType::Short:
312          return T == C.UnsignedShortTy;
313        case BuiltinType::UShort:
314          return T == C.ShortTy;
315        case BuiltinType::Int:
316          return T == C.UnsignedIntTy;
317        case BuiltinType::UInt:
318          return T == C.IntTy;
319        case BuiltinType::Long:
320          return T == C.UnsignedLongTy;
321        case BuiltinType::ULong:
322          return T == C.LongTy;
323        case BuiltinType::LongLong:
324          return T == C.UnsignedLongLongTy;
325        case BuiltinType::ULongLong:
326          return T == C.LongLongTy;
327      }
328
329    return false;
330  }
331
332  if (K == CStrTy) {
333    const PointerType *PT = argTy->getAs<PointerType>();
334    if (!PT)
335      return false;
336
337    QualType pointeeTy = PT->getPointeeType();
338
339    if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
340      switch (BT->getKind()) {
341        case BuiltinType::Void:
342        case BuiltinType::Char_U:
343        case BuiltinType::UChar:
344        case BuiltinType::Char_S:
345        case BuiltinType::SChar:
346          return true;
347        default:
348          break;
349      }
350
351    return false;
352  }
353
354  if (K == WCStrTy) {
355    const PointerType *PT = argTy->getAs<PointerType>();
356    if (!PT)
357      return false;
358
359    QualType pointeeTy =
360      C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
361
362    return pointeeTy == C.getWCharType();
363  }
364
365  return false;
366}
367
368QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
369  assert(isValid());
370  if (K == SpecificTy)
371    return T;
372  if (K == CStrTy)
373    return C.getPointerType(C.CharTy);
374  if (K == WCStrTy)
375    return C.getPointerType(C.getWCharType());
376  if (K == ObjCPointerTy)
377    return C.ObjCBuiltinIdTy;
378
379  return QualType();
380}
381
382//===----------------------------------------------------------------------===//
383// Methods on OptionalAmount.
384//===----------------------------------------------------------------------===//
385
386ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
387  return Ctx.IntTy;
388}
389
390//===----------------------------------------------------------------------===//
391// Methods on FormatSpecifier.
392//===----------------------------------------------------------------------===//
393
394ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
395  if (!CS.consumesDataArgument())
396    return ArgTypeResult::Invalid();
397
398  if (CS.isIntArg())
399    switch (LM) {
400      case AsLongDouble:
401        return ArgTypeResult::Invalid();
402      case None: return Ctx.IntTy;
403      case AsChar: return Ctx.SignedCharTy;
404      case AsShort: return Ctx.ShortTy;
405      case AsLong: return Ctx.LongTy;
406      case AsLongLong: return Ctx.LongLongTy;
407      case AsIntMax:
408        // FIXME: Return unknown for now.
409        return ArgTypeResult();
410      case AsSizeT: return Ctx.getSizeType();
411      case AsPtrDiff: return Ctx.getPointerDiffType();
412    }
413
414  if (CS.isUIntArg())
415    switch (LM) {
416      case AsLongDouble:
417        return ArgTypeResult::Invalid();
418      case None: return Ctx.UnsignedIntTy;
419      case AsChar: return Ctx.UnsignedCharTy;
420      case AsShort: return Ctx.UnsignedShortTy;
421      case AsLong: return Ctx.UnsignedLongTy;
422      case AsLongLong: return Ctx.UnsignedLongLongTy;
423      case AsIntMax:
424        // FIXME: Return unknown for now.
425        return ArgTypeResult();
426      case AsSizeT:
427        // FIXME: How to get the corresponding unsigned
428        // version of size_t?
429        return ArgTypeResult();
430      case AsPtrDiff:
431        // FIXME: How to get the corresponding unsigned
432        // version of ptrdiff_t?
433        return ArgTypeResult();
434    }
435
436  if (CS.isDoubleArg()) {
437    if (LM == AsLongDouble)
438      return Ctx.LongDoubleTy;
439    return Ctx.DoubleTy;
440  }
441
442  switch (CS.getKind()) {
443    case ConversionSpecifier::CStrArg:
444      return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy                                            : ArgTypeResult::CStrTy);
445    case ConversionSpecifier::UnicodeStrArg:
446      // FIXME: This appears to be Mac OS X specific.
447      return ArgTypeResult::WCStrTy;
448    case ConversionSpecifier::CArg:
449      return Ctx.WCharTy;
450    default:
451      break;
452  }
453
454  // FIXME: Handle other cases.
455  return ArgTypeResult();
456}
457
458