PrintfFormatString.cpp revision 5c8a08c5da6439047a8521f1b4ea6a61438917b4
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::FormatSpecifier;
19using clang::analyze_printf::OptionalAmount;
20using clang::analyze_printf::ArgTypeResult;
21using clang::analyze_printf::FormatStringHandler;
22using namespace clang;
23
24namespace {
25class FormatSpecifierResult {
26  FormatSpecifier FS;
27  const char *Start;
28  bool Stop;
29public:
30  FormatSpecifierResult(bool stop = false)
31    : Start(0), Stop(stop) {}
32  FormatSpecifierResult(const char *start,
33                        const FormatSpecifier &fs)
34    : FS(fs), Start(start), Stop(false) {}
35
36
37  const char *getStart() const { return Start; }
38  bool shouldStop() const { return Stop; }
39  bool hasValue() const { return Start != 0; }
40  const FormatSpecifier &getValue() const {
41    assert(hasValue());
42    return FS;
43  }
44  const FormatSpecifier &getValue() { return FS; }
45};
46} // end anonymous namespace
47
48template <typename T>
49class UpdateOnReturn {
50  T &ValueToUpdate;
51  const T &ValueToCopy;
52public:
53  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
54    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
55
56  ~UpdateOnReturn() {
57    ValueToUpdate = ValueToCopy;
58  }
59};
60
61//===----------------------------------------------------------------------===//
62// Methods for parsing format strings.
63//===----------------------------------------------------------------------===//
64
65static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
66  const char *I = Beg;
67  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
68
69  bool foundDigits = false;
70  unsigned accumulator = 0;
71
72  for ( ; I != E; ++I) {
73    char c = *I;
74    if (c >= '0' && c <= '9') {
75      foundDigits = true;
76      accumulator += (accumulator * 10) + (c - '0');
77      continue;
78    }
79
80    if (foundDigits)
81      return OptionalAmount(accumulator, Beg);
82
83    if (c == '*') {
84      ++I;
85      return OptionalAmount(OptionalAmount::Arg, Beg);
86    }
87
88    break;
89  }
90
91  return OptionalAmount();
92}
93
94static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
95                                                  const char *&Beg,
96                                                  const char *E) {
97
98  using namespace clang::analyze_printf;
99
100  const char *I = Beg;
101  const char *Start = 0;
102  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
103
104  // Look for a '%' character that indicates the start of a format specifier.
105  for ( ; I != E ; ++I) {
106    char c = *I;
107    if (c == '\0') {
108      // Detect spurious null characters, which are likely errors.
109      H.HandleNullChar(I);
110      return true;
111    }
112    if (c == '%') {
113      Start = I++;  // Record the start of the format specifier.
114      break;
115    }
116  }
117
118  // No format specifier found?
119  if (!Start)
120    return false;
121
122  if (I == E) {
123    // No more characters left?
124    H.HandleIncompleteFormatSpecifier(Start, E - Start);
125    return true;
126  }
127
128  FormatSpecifier FS;
129
130  // Look for flags (if any).
131  bool hasMore = true;
132  for ( ; I != E; ++I) {
133    switch (*I) {
134      default: hasMore = false; break;
135      case '-': FS.setIsLeftJustified(); break;
136      case '+': FS.setHasPlusPrefix(); break;
137      case ' ': FS.setHasSpacePrefix(); break;
138      case '#': FS.setHasAlternativeForm(); break;
139      case '0': FS.setHasLeadingZeros(); break;
140    }
141    if (!hasMore)
142      break;
143  }
144
145  if (I == E) {
146    // No more characters left?
147    H.HandleIncompleteFormatSpecifier(Start, E - Start);
148    return true;
149  }
150
151  // Look for the field width (if any).
152  FS.setFieldWidth(ParseAmount(I, E));
153
154  if (I == E) {
155    // No more characters left?
156    H.HandleIncompleteFormatSpecifier(Start, E - Start);
157    return true;
158  }
159
160  // Look for the precision (if any).
161  if (*I == '.') {
162    ++I;
163    if (I == E) {
164      H.HandleIncompleteFormatSpecifier(Start, E - Start);
165      return true;
166    }
167
168    FS.setPrecision(ParseAmount(I, E));
169
170    if (I == E) {
171      // No more characters left?
172      H.HandleIncompleteFormatSpecifier(Start, E - Start);
173      return true;
174    }
175  }
176
177  // Look for the length modifier.
178  LengthModifier lm = None;
179  switch (*I) {
180    default:
181      break;
182    case 'h':
183      ++I;
184      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
185      break;
186    case 'l':
187      ++I;
188      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
189      break;
190    case 'j': lm = AsIntMax;     ++I; break;
191    case 'z': lm = AsSizeT;      ++I; break;
192    case 't': lm = AsPtrDiff;    ++I; break;
193    case 'L': lm = AsLongDouble; ++I; break;
194    case 'q': lm = AsLongLong;   ++I; break;
195  }
196  FS.setLengthModifier(lm);
197
198  if (I == E) {
199    // No more characters left?
200    H.HandleIncompleteFormatSpecifier(Start, E - Start);
201    return true;
202  }
203
204  if (*I == '\0') {
205    // Detect spurious null characters, which are likely errors.
206    H.HandleNullChar(I);
207    return true;
208  }
209
210  // Finally, look for the conversion specifier.
211  const char *conversionPosition = I++;
212  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
213  switch (*conversionPosition) {
214    default:
215      break;
216    // C99: 7.19.6.1 (section 8).
217    case 'd': k = ConversionSpecifier::dArg; break;
218    case 'i': k = ConversionSpecifier::iArg; break;
219    case 'o': k = ConversionSpecifier::oArg; break;
220    case 'u': k = ConversionSpecifier::uArg; break;
221    case 'x': k = ConversionSpecifier::xArg; break;
222    case 'X': k = ConversionSpecifier::XArg; break;
223    case 'f': k = ConversionSpecifier::fArg; break;
224    case 'F': k = ConversionSpecifier::FArg; break;
225    case 'e': k = ConversionSpecifier::eArg; break;
226    case 'E': k = ConversionSpecifier::EArg; break;
227    case 'g': k = ConversionSpecifier::gArg; break;
228    case 'G': k = ConversionSpecifier::GArg; break;
229    case 'a': k = ConversionSpecifier::aArg; break;
230    case 'A': k = ConversionSpecifier::AArg; break;
231    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
232    case 's': k = ConversionSpecifier::CStrArg;      break;
233    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
234    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
235    case '%': k = ConversionSpecifier::PercentArg;   break;
236    // Objective-C.
237    case '@': k = ConversionSpecifier::ObjCObjArg; break;
238    // Glibc specific.
239    case 'm': k = ConversionSpecifier::PrintErrno; break;
240  }
241  FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
242
243  if (k == ConversionSpecifier::InvalidSpecifier) {
244    H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
245    return false; // Keep processing format specifiers.
246  }
247  return FormatSpecifierResult(Start, FS);
248}
249
250bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
251                       const char *I, const char *E) {
252  // Keep looking for a format specifier until we have exhausted the string.
253  while (I != E) {
254    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
255    // Did a fail-stop error of any kind occur when parsing the specifier?
256    // If so, don't do any more processing.
257    if (FSR.shouldStop())
258      return true;;
259    // Did we exhaust the string or encounter an error that
260    // we can recover from?
261    if (!FSR.hasValue())
262      continue;
263    // We have a format specifier.  Pass it to the callback.
264    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
265                                 I - FSR.getStart()))
266      return true;
267  }
268  assert(I == E && "Format string not exhausted");
269  return false;
270}
271
272FormatStringHandler::~FormatStringHandler() {}
273
274//===----------------------------------------------------------------------===//
275// Methods on ArgTypeResult.
276//===----------------------------------------------------------------------===//
277
278bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
279  assert(isValid());
280
281  if (K == UnknownTy)
282    return true;
283
284  if (K == SpecificTy) {
285    argTy = C.getCanonicalType(argTy).getUnqualifiedType();
286
287    if (T == argTy)
288      return true;
289
290    if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
291      switch (BT->getKind()) {
292        default:
293          break;
294        case BuiltinType::Char_S:
295        case BuiltinType::SChar:
296          return T == C.UnsignedCharTy;
297        case BuiltinType::Char_U:
298        case BuiltinType::UChar:
299          return T == C.SignedCharTy;
300        case BuiltinType::Short:
301          return T == C.UnsignedShortTy;
302        case BuiltinType::UShort:
303          return T == C.ShortTy;
304        case BuiltinType::Int:
305          return T == C.UnsignedIntTy;
306        case BuiltinType::UInt:
307          return T == C.IntTy;
308        case BuiltinType::Long:
309          return T == C.UnsignedLongTy;
310        case BuiltinType::ULong:
311          return T == C.LongTy;
312        case BuiltinType::LongLong:
313          return T == C.UnsignedLongLongTy;
314        case BuiltinType::ULongLong:
315          return T == C.LongLongTy;
316      }
317
318    return false;
319  }
320
321  if (K == CStrTy) {
322    const PointerType *PT = argTy->getAs<PointerType>();
323    if (!PT)
324      return false;
325
326    QualType pointeeTy = PT->getPointeeType();
327
328    if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
329      switch (BT->getKind()) {
330        case BuiltinType::Void:
331        case BuiltinType::Char_U:
332        case BuiltinType::UChar:
333        case BuiltinType::Char_S:
334        case BuiltinType::SChar:
335          return true;
336        default:
337          break;
338      }
339
340    return false;
341  }
342
343  if (K == WCStrTy) {
344    const PointerType *PT = argTy->getAs<PointerType>();
345    if (!PT)
346      return false;
347
348    QualType pointeeTy = PT->getPointeeType();
349    return pointeeTy == C.WCharTy;
350  }
351
352  return false;
353}
354
355QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
356  assert(isValid());
357  if (K == SpecificTy)
358    return T;
359  if (K == CStrTy)
360    return C.getPointerType(C.CharTy);
361  if (K == WCStrTy)
362    return C.getPointerType(C.WCharTy);
363  if (K == ObjCPointerTy)
364    return C.ObjCBuiltinIdTy;
365
366  return QualType();
367}
368
369//===----------------------------------------------------------------------===//
370// Methods on OptionalAmount.
371//===----------------------------------------------------------------------===//
372
373ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
374  return Ctx.IntTy;
375}
376
377//===----------------------------------------------------------------------===//
378// Methods on FormatSpecifier.
379//===----------------------------------------------------------------------===//
380
381ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
382  if (!CS.consumesDataArgument())
383    return ArgTypeResult::Invalid();
384
385  if (CS.isIntArg())
386    switch (LM) {
387      case AsLongDouble:
388        return ArgTypeResult::Invalid();
389      case None: return Ctx.IntTy;
390      case AsChar: return Ctx.SignedCharTy;
391      case AsShort: return Ctx.ShortTy;
392      case AsLong: return Ctx.LongTy;
393      case AsLongLong: return Ctx.LongLongTy;
394      case AsIntMax:
395        // FIXME: Return unknown for now.
396        return ArgTypeResult();
397      case AsSizeT: return Ctx.getSizeType();
398      case AsPtrDiff: return Ctx.getPointerDiffType();
399    }
400
401  if (CS.isUIntArg())
402    switch (LM) {
403      case AsLongDouble:
404        return ArgTypeResult::Invalid();
405      case None: return Ctx.UnsignedIntTy;
406      case AsChar: return Ctx.UnsignedCharTy;
407      case AsShort: return Ctx.UnsignedShortTy;
408      case AsLong: return Ctx.UnsignedLongTy;
409      case AsLongLong: return Ctx.UnsignedLongLongTy;
410      case AsIntMax:
411        // FIXME: Return unknown for now.
412        return ArgTypeResult();
413      case AsSizeT:
414        // FIXME: How to get the corresponding unsigned
415        // version of size_t?
416        return ArgTypeResult();
417      case AsPtrDiff:
418        // FIXME: How to get the corresponding unsigned
419        // version of ptrdiff_t?
420        return ArgTypeResult();
421    }
422
423  if (CS.isDoubleArg()) {
424    if (LM == AsLongDouble)
425      return Ctx.LongDoubleTy;
426    return Ctx.DoubleTy;
427  }
428
429  if (CS.getKind() == ConversionSpecifier::CStrArg)
430    return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy
431                                          : ArgTypeResult::CStrTy);
432
433  // FIXME: Handle other cases.
434  return ArgTypeResult();
435}
436
437