PrintfFormatString.cpp revision 62f46195bb61f91f9b3b476f4dcfa10126c18ff2
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/PrintfFormatString.h"
16#include "clang/AST/ASTContext.h"
17
18using clang::analyze_printf::ArgTypeResult;
19using clang::analyze_printf::FormatSpecifier;
20using clang::analyze_printf::FormatStringHandler;
21using clang::analyze_printf::OptionalAmount;
22using clang::analyze_printf::PositionContext;
23
24using namespace clang;
25
26namespace {
27class FormatSpecifierResult {
28  FormatSpecifier FS;
29  const char *Start;
30  bool Stop;
31public:
32  FormatSpecifierResult(bool stop = false)
33    : Start(0), Stop(stop) {}
34  FormatSpecifierResult(const char *start,
35                        const FormatSpecifier &fs)
36    : FS(fs), Start(start), Stop(false) {}
37
38
39  const char *getStart() const { return Start; }
40  bool shouldStop() const { return Stop; }
41  bool hasValue() const { return Start != 0; }
42  const FormatSpecifier &getValue() const {
43    assert(hasValue());
44    return FS;
45  }
46  const FormatSpecifier &getValue() { return FS; }
47};
48} // end anonymous namespace
49
50template <typename T>
51class UpdateOnReturn {
52  T &ValueToUpdate;
53  const T &ValueToCopy;
54public:
55  UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
56    : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
57
58  ~UpdateOnReturn() {
59    ValueToUpdate = ValueToCopy;
60  }
61};
62
63//===----------------------------------------------------------------------===//
64// Methods for parsing format strings.
65//===----------------------------------------------------------------------===//
66
67static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
68  const char *I = Beg;
69  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
70
71  unsigned accumulator = 0;
72
73  for ( ; I != E; ++I) {
74    char c = *I;
75    if (c >= '0' && c <= '9') {
76      // Ignore '0' on the first character.
77      if (c == '0' && I == Beg)
78        break;
79      accumulator += (accumulator * 10) + (c - '0');
80      continue;
81    }
82
83    if (accumulator)
84      return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
85
86    break;
87  }
88
89  return OptionalAmount();
90}
91
92static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
93                                             unsigned &argIndex) {
94  if (*Beg == '*') {
95    ++Beg;
96    return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
97  }
98
99  return ParseAmount(Beg, E);
100}
101
102static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
103                                          const char *Start,
104                                          const char *&Beg, const char *E,
105                                          PositionContext p) {
106  if (*Beg == '*') {
107    const char *I = Beg + 1;
108    const OptionalAmount &Amt = ParseAmount(I, E);
109
110    if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
111      H.HandleInvalidPosition(Beg, I - Beg, p);
112      return OptionalAmount(false);
113    }
114
115    if (I== E) {
116      // No more characters left?
117      H.HandleIncompleteFormatSpecifier(Start, E - Start);
118      return OptionalAmount(false);
119    }
120
121    if (*I == '$') {
122      const char *Tmp = Beg;
123      Beg = ++I;
124      return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
125                            Tmp);
126    }
127
128    H.HandleInvalidPosition(Beg, I - Beg, p);
129    return OptionalAmount(false);
130  }
131
132  return ParseAmount(Beg, E);
133}
134
135static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
136                           const char *Start, const char *&Beg, const char *E,
137                           unsigned *argIndex) {
138  if (argIndex) {
139    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
140  }
141  else {
142    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
143                                                  analyze_printf::PrecisionPos);
144    if (Amt.isInvalid())
145      return true;
146    FS.setPrecision(Amt);
147  }
148  return false;
149}
150
151static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
152                            const char *Start, const char *&Beg, const char *E,
153                            unsigned *argIndex) {
154  // FIXME: Support negative field widths.
155  if (argIndex) {
156    FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
157  }
158  else {
159    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
160                                                 analyze_printf::FieldWidthPos);
161    if (Amt.isInvalid())
162      return true;
163    FS.setFieldWidth(Amt);
164  }
165  return false;
166}
167
168
169static bool ParseArgPosition(FormatStringHandler &H,
170                             FormatSpecifier &FS, const char *Start,
171                             const char *&Beg, const char *E) {
172
173  using namespace clang::analyze_printf;
174  const char *I = Beg;
175
176  const OptionalAmount &Amt = ParseAmount(I, E);
177
178  if (I == E) {
179    // No more characters left?
180    H.HandleIncompleteFormatSpecifier(Start, E - Start);
181    return true;
182  }
183
184  if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
185    FS.setArgIndex(Amt.getConstantAmount() - 1);
186    FS.setUsesPositionalArg();
187    // Update the caller's pointer if we decided to consume
188    // these characters.
189    Beg = I;
190    return false;
191  }
192
193  // Special case: '%0$', since this is an easy mistake.
194  if (*I == '0' && (I+1) != E && *(I+1) == '$') {
195    H.HandleZeroPosition(Start, I - Start + 2);
196    return true;
197  }
198
199  return false;
200}
201
202static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
203                                                  const char *&Beg,
204                                                  const char *E,
205                                                  unsigned &argIndex) {
206
207  using namespace clang::analyze_printf;
208
209  const char *I = Beg;
210  const char *Start = 0;
211  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
212
213  // Look for a '%' character that indicates the start of a format specifier.
214  for ( ; I != E ; ++I) {
215    char c = *I;
216    if (c == '\0') {
217      // Detect spurious null characters, which are likely errors.
218      H.HandleNullChar(I);
219      return true;
220    }
221    if (c == '%') {
222      Start = I++;  // Record the start of the format specifier.
223      break;
224    }
225  }
226
227  // No format specifier found?
228  if (!Start)
229    return false;
230
231  if (I == E) {
232    // No more characters left?
233    H.HandleIncompleteFormatSpecifier(Start, E - Start);
234    return true;
235  }
236
237  FormatSpecifier FS;
238  if (ParseArgPosition(H, FS, Start, I, E))
239    return true;
240
241  if (I == E) {
242    // No more characters left?
243    H.HandleIncompleteFormatSpecifier(Start, E - Start);
244    return true;
245  }
246
247  // Look for flags (if any).
248  bool hasMore = true;
249  for ( ; I != E; ++I) {
250    switch (*I) {
251      default: hasMore = false; break;
252      case '-': FS.setIsLeftJustified(); break;
253      case '+': FS.setHasPlusPrefix(); break;
254      case ' ': FS.setHasSpacePrefix(); break;
255      case '#': FS.setHasAlternativeForm(); break;
256      case '0': FS.setHasLeadingZeros(); break;
257    }
258    if (!hasMore)
259      break;
260  }
261
262  if (I == E) {
263    // No more characters left?
264    H.HandleIncompleteFormatSpecifier(Start, E - Start);
265    return true;
266  }
267
268  // Look for the field width (if any).
269  if (ParseFieldWidth(H, FS, Start, I, E,
270                      FS.usesPositionalArg() ? 0 : &argIndex))
271    return true;
272
273  if (I == E) {
274    // No more characters left?
275    H.HandleIncompleteFormatSpecifier(Start, E - Start);
276    return true;
277  }
278
279  // Look for the precision (if any).
280  if (*I == '.') {
281    ++I;
282    if (I == E) {
283      H.HandleIncompleteFormatSpecifier(Start, E - Start);
284      return true;
285    }
286
287    if (ParsePrecision(H, FS, Start, I, E,
288                       FS.usesPositionalArg() ? 0 : &argIndex))
289      return true;
290
291    if (I == E) {
292      // No more characters left?
293      H.HandleIncompleteFormatSpecifier(Start, E - Start);
294      return true;
295    }
296  }
297
298  // Look for the length modifier.
299  LengthModifier lm = None;
300  switch (*I) {
301    default:
302      break;
303    case 'h':
304      ++I;
305      lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort;
306      break;
307    case 'l':
308      ++I;
309      lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong;
310      break;
311    case 'j': lm = AsIntMax;     ++I; break;
312    case 'z': lm = AsSizeT;      ++I; break;
313    case 't': lm = AsPtrDiff;    ++I; break;
314    case 'L': lm = AsLongDouble; ++I; break;
315    case 'q': lm = AsLongLong;   ++I; break;
316  }
317  FS.setLengthModifier(lm);
318
319  if (I == E) {
320    // No more characters left?
321    H.HandleIncompleteFormatSpecifier(Start, E - Start);
322    return true;
323  }
324
325  if (*I == '\0') {
326    // Detect spurious null characters, which are likely errors.
327    H.HandleNullChar(I);
328    return true;
329  }
330
331  // Finally, look for the conversion specifier.
332  const char *conversionPosition = I++;
333  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
334  switch (*conversionPosition) {
335    default:
336      break;
337    // C99: 7.19.6.1 (section 8).
338    case '%': k = ConversionSpecifier::PercentArg;   break;
339    case 'A': k = ConversionSpecifier::AArg; break;
340    case 'E': k = ConversionSpecifier::EArg; break;
341    case 'F': k = ConversionSpecifier::FArg; break;
342    case 'G': k = ConversionSpecifier::GArg; break;
343    case 'X': k = ConversionSpecifier::XArg; break;
344    case 'a': k = ConversionSpecifier::aArg; break;
345    case 'c': k = ConversionSpecifier::IntAsCharArg; break;
346    case 'd': k = ConversionSpecifier::dArg; break;
347    case 'e': k = ConversionSpecifier::eArg; break;
348    case 'f': k = ConversionSpecifier::fArg; break;
349    case 'g': k = ConversionSpecifier::gArg; break;
350    case 'i': k = ConversionSpecifier::iArg; break;
351    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
352    case 'o': k = ConversionSpecifier::oArg; break;
353    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
354    case 's': k = ConversionSpecifier::CStrArg;      break;
355    case 'u': k = ConversionSpecifier::uArg; break;
356    case 'x': k = ConversionSpecifier::xArg; break;
357    // Mac OS X (unicode) specific
358    case 'C': k = ConversionSpecifier::CArg; break;
359    case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
360    // Objective-C.
361    case '@': k = ConversionSpecifier::ObjCObjArg; break;
362    // Glibc specific.
363    case 'm': k = ConversionSpecifier::PrintErrno; break;
364  }
365  ConversionSpecifier CS(conversionPosition, k);
366  FS.setConversionSpecifier(CS);
367  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
368    FS.setArgIndex(argIndex++);
369
370  if (k == ConversionSpecifier::InvalidSpecifier) {
371    // Assume the conversion takes one argument.
372    return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
373  }
374  return FormatSpecifierResult(Start, FS);
375}
376
377bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
378                       const char *I, const char *E) {
379
380  unsigned argIndex = 0;
381
382  // Keep looking for a format specifier until we have exhausted the string.
383  while (I != E) {
384    const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
385    // Did a fail-stop error of any kind occur when parsing the specifier?
386    // If so, don't do any more processing.
387    if (FSR.shouldStop())
388      return true;;
389    // Did we exhaust the string or encounter an error that
390    // we can recover from?
391    if (!FSR.hasValue())
392      continue;
393    // We have a format specifier.  Pass it to the callback.
394    if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
395                                 I - FSR.getStart()))
396      return true;
397  }
398  assert(I == E && "Format string not exhausted");
399  return false;
400}
401
402FormatStringHandler::~FormatStringHandler() {}
403
404//===----------------------------------------------------------------------===//
405// Methods on ArgTypeResult.
406//===----------------------------------------------------------------------===//
407
408bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
409  assert(isValid());
410
411  if (K == UnknownTy)
412    return true;
413
414  if (K == SpecificTy) {
415    argTy = C.getCanonicalType(argTy).getUnqualifiedType();
416
417    if (T == argTy)
418      return true;
419
420    if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
421      switch (BT->getKind()) {
422        default:
423          break;
424        case BuiltinType::Char_S:
425        case BuiltinType::SChar:
426          return T == C.UnsignedCharTy;
427        case BuiltinType::Char_U:
428        case BuiltinType::UChar:
429          return T == C.SignedCharTy;
430        case BuiltinType::Short:
431          return T == C.UnsignedShortTy;
432        case BuiltinType::UShort:
433          return T == C.ShortTy;
434        case BuiltinType::Int:
435          return T == C.UnsignedIntTy;
436        case BuiltinType::UInt:
437          return T == C.IntTy;
438        case BuiltinType::Long:
439          return T == C.UnsignedLongTy;
440        case BuiltinType::ULong:
441          return T == C.LongTy;
442        case BuiltinType::LongLong:
443          return T == C.UnsignedLongLongTy;
444        case BuiltinType::ULongLong:
445          return T == C.LongLongTy;
446      }
447
448    return false;
449  }
450
451  if (K == CStrTy) {
452    const PointerType *PT = argTy->getAs<PointerType>();
453    if (!PT)
454      return false;
455
456    QualType pointeeTy = PT->getPointeeType();
457
458    if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
459      switch (BT->getKind()) {
460        case BuiltinType::Void:
461        case BuiltinType::Char_U:
462        case BuiltinType::UChar:
463        case BuiltinType::Char_S:
464        case BuiltinType::SChar:
465          return true;
466        default:
467          break;
468      }
469
470    return false;
471  }
472
473  if (K == WCStrTy) {
474    const PointerType *PT = argTy->getAs<PointerType>();
475    if (!PT)
476      return false;
477
478    QualType pointeeTy =
479      C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
480
481    return pointeeTy == C.getWCharType();
482  }
483
484  return false;
485}
486
487QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
488  assert(isValid());
489  if (K == SpecificTy)
490    return T;
491  if (K == CStrTy)
492    return C.getPointerType(C.CharTy);
493  if (K == WCStrTy)
494    return C.getPointerType(C.getWCharType());
495  if (K == ObjCPointerTy)
496    return C.ObjCBuiltinIdTy;
497
498  return QualType();
499}
500
501//===----------------------------------------------------------------------===//
502// Methods on OptionalAmount.
503//===----------------------------------------------------------------------===//
504
505ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
506  return Ctx.IntTy;
507}
508
509//===----------------------------------------------------------------------===//
510// Methods on FormatSpecifier.
511//===----------------------------------------------------------------------===//
512
513ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
514  if (!CS.consumesDataArgument())
515    return ArgTypeResult::Invalid();
516
517  if (CS.isIntArg())
518    switch (LM) {
519      case AsLongDouble:
520        return ArgTypeResult::Invalid();
521      case None: return Ctx.IntTy;
522      case AsChar: return Ctx.SignedCharTy;
523      case AsShort: return Ctx.ShortTy;
524      case AsLong: return Ctx.LongTy;
525      case AsLongLong: return Ctx.LongLongTy;
526      case AsIntMax:
527        // FIXME: Return unknown for now.
528        return ArgTypeResult();
529      case AsSizeT: return Ctx.getSizeType();
530      case AsPtrDiff: return Ctx.getPointerDiffType();
531    }
532
533  if (CS.isUIntArg())
534    switch (LM) {
535      case AsLongDouble:
536        return ArgTypeResult::Invalid();
537      case None: return Ctx.UnsignedIntTy;
538      case AsChar: return Ctx.UnsignedCharTy;
539      case AsShort: return Ctx.UnsignedShortTy;
540      case AsLong: return Ctx.UnsignedLongTy;
541      case AsLongLong: return Ctx.UnsignedLongLongTy;
542      case AsIntMax:
543        // FIXME: Return unknown for now.
544        return ArgTypeResult();
545      case AsSizeT:
546        // FIXME: How to get the corresponding unsigned
547        // version of size_t?
548        return ArgTypeResult();
549      case AsPtrDiff:
550        // FIXME: How to get the corresponding unsigned
551        // version of ptrdiff_t?
552        return ArgTypeResult();
553    }
554
555  if (CS.isDoubleArg()) {
556    if (LM == AsLongDouble)
557      return Ctx.LongDoubleTy;
558    return Ctx.DoubleTy;
559  }
560
561  switch (CS.getKind()) {
562    case ConversionSpecifier::CStrArg:
563      return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy                                            : ArgTypeResult::CStrTy);
564    case ConversionSpecifier::UnicodeStrArg:
565      // FIXME: This appears to be Mac OS X specific.
566      return ArgTypeResult::WCStrTy;
567    case ConversionSpecifier::CArg:
568      return Ctx.WCharTy;
569    default:
570      break;
571  }
572
573  // FIXME: Handle other cases.
574  return ArgTypeResult();
575}
576
577