PrintfFormatString.cpp revision 92a6febe130dd9ad726983835297e11b2fa3b93f
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in printf and friends.  The structure of format
11// strings for fprintf() are described in C99 7.19.6.1.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17
18using clang::analyze_format_string::ArgTypeResult;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_printf::ConversionSpecifier;
23using clang::analyze_printf::PrintfSpecifier;
24
25using namespace clang;
26
27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28        PrintfSpecifierResult;
29
30//===----------------------------------------------------------------------===//
31// Methods for parsing format strings.
32//===----------------------------------------------------------------------===//
33
34using analyze_format_string::ParseNonPositionAmount;
35
36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37                           const char *Start, const char *&Beg, const char *E,
38                           unsigned *argIndex) {
39  if (argIndex) {
40    FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
41  }
42  else {
43    const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                           analyze_format_string::PrecisionPos);
45    if (Amt.isInvalid())
46      return true;
47    FS.setPrecision(Amt);
48  }
49  return false;
50}
51
52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
53                                                  const char *&Beg,
54                                                  const char *E,
55                                                  unsigned &argIndex) {
56
57  using namespace clang::analyze_printf;
58
59  const char *I = Beg;
60  const char *Start = 0;
61  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
62
63  // Look for a '%' character that indicates the start of a format specifier.
64  for ( ; I != E ; ++I) {
65    char c = *I;
66    if (c == '\0') {
67      // Detect spurious null characters, which are likely errors.
68      H.HandleNullChar(I);
69      return true;
70    }
71    if (c == '%') {
72      Start = I++;  // Record the start of the format specifier.
73      break;
74    }
75  }
76
77  // No format specifier found?
78  if (!Start)
79    return false;
80
81  if (I == E) {
82    // No more characters left?
83    H.HandleIncompleteSpecifier(Start, E - Start);
84    return true;
85  }
86
87  PrintfSpecifier FS;
88  if (ParseArgPosition(H, FS, Start, I, E))
89    return true;
90
91  if (I == E) {
92    // No more characters left?
93    H.HandleIncompleteSpecifier(Start, E - Start);
94    return true;
95  }
96
97  // Look for flags (if any).
98  bool hasMore = true;
99  for ( ; I != E; ++I) {
100    switch (*I) {
101      default: hasMore = false; break;
102      case '-': FS.setIsLeftJustified(I); break;
103      case '+': FS.setHasPlusPrefix(I); break;
104      case ' ': FS.setHasSpacePrefix(I); break;
105      case '#': FS.setHasAlternativeForm(I); break;
106      case '0': FS.setHasLeadingZeros(I); break;
107    }
108    if (!hasMore)
109      break;
110  }
111
112  if (I == E) {
113    // No more characters left?
114    H.HandleIncompleteSpecifier(Start, E - Start);
115    return true;
116  }
117
118  // Look for the field width (if any).
119  if (ParseFieldWidth(H, FS, Start, I, E,
120                      FS.usesPositionalArg() ? 0 : &argIndex))
121    return true;
122
123  if (I == E) {
124    // No more characters left?
125    H.HandleIncompleteSpecifier(Start, E - Start);
126    return true;
127  }
128
129  // Look for the precision (if any).
130  if (*I == '.') {
131    ++I;
132    if (I == E) {
133      H.HandleIncompleteSpecifier(Start, E - Start);
134      return true;
135    }
136
137    if (ParsePrecision(H, FS, Start, I, E,
138                       FS.usesPositionalArg() ? 0 : &argIndex))
139      return true;
140
141    if (I == E) {
142      // No more characters left?
143      H.HandleIncompleteSpecifier(Start, E - Start);
144      return true;
145    }
146  }
147
148  // Look for the length modifier.
149  if (ParseLengthModifier(FS, I, E) && I == E) {
150    // No more characters left?
151    H.HandleIncompleteSpecifier(Start, E - Start);
152    return true;
153  }
154
155  if (*I == '\0') {
156    // Detect spurious null characters, which are likely errors.
157    H.HandleNullChar(I);
158    return true;
159  }
160
161  // Finally, look for the conversion specifier.
162  const char *conversionPosition = I++;
163  ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
164  switch (*conversionPosition) {
165    default:
166      break;
167    // C99: 7.19.6.1 (section 8).
168    case '%': k = ConversionSpecifier::PercentArg;   break;
169    case 'A': k = ConversionSpecifier::AArg; break;
170    case 'E': k = ConversionSpecifier::EArg; break;
171    case 'F': k = ConversionSpecifier::FArg; break;
172    case 'G': k = ConversionSpecifier::GArg; break;
173    case 'X': k = ConversionSpecifier::XArg; break;
174    case 'a': k = ConversionSpecifier::aArg; break;
175    case 'c': k = ConversionSpecifier::cArg; break;
176    case 'd': k = ConversionSpecifier::dArg; break;
177    case 'e': k = ConversionSpecifier::eArg; break;
178    case 'f': k = ConversionSpecifier::fArg; break;
179    case 'g': k = ConversionSpecifier::gArg; break;
180    case 'i': k = ConversionSpecifier::iArg; break;
181    case 'n': k = ConversionSpecifier::OutIntPtrArg; break;
182    case 'o': k = ConversionSpecifier::oArg; break;
183    case 'p': k = ConversionSpecifier::VoidPtrArg;   break;
184    case 's': k = ConversionSpecifier::CStrArg;      break;
185    case 'u': k = ConversionSpecifier::uArg; break;
186    case 'x': k = ConversionSpecifier::xArg; break;
187    // Mac OS X (unicode) specific
188    case 'C': k = ConversionSpecifier::CArg; break;
189    case 'S': k = ConversionSpecifier::UnicodeStrArg; break;
190    // Objective-C.
191    case '@': k = ConversionSpecifier::ObjCObjArg; break;
192    // Glibc specific.
193    case 'm': k = ConversionSpecifier::PrintErrno; break;
194  }
195  ConversionSpecifier CS(conversionPosition, k);
196  FS.setConversionSpecifier(CS);
197  if (CS.consumesDataArgument() && !FS.usesPositionalArg())
198    FS.setArgIndex(argIndex++);
199
200  if (k == ConversionSpecifier::InvalidSpecifier) {
201    // Assume the conversion takes one argument.
202    return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
203  }
204  return PrintfSpecifierResult(Start, FS);
205}
206
207bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
208                                                     const char *I,
209                                                     const char *E) {
210
211  unsigned argIndex = 0;
212
213  // Keep looking for a format specifier until we have exhausted the string.
214  while (I != E) {
215    const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
216    // Did a fail-stop error of any kind occur when parsing the specifier?
217    // If so, don't do any more processing.
218    if (FSR.shouldStop())
219      return true;;
220    // Did we exhaust the string or encounter an error that
221    // we can recover from?
222    if (!FSR.hasValue())
223      continue;
224    // We have a format specifier.  Pass it to the callback.
225    if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
226                                 I - FSR.getStart()))
227      return true;
228  }
229  assert(I == E && "Format string not exhausted");
230  return false;
231}
232
233//===----------------------------------------------------------------------===//
234// Methods on ConversionSpecifier.
235//===----------------------------------------------------------------------===//
236const char *ConversionSpecifier::toString() const {
237  switch (kind) {
238  case dArg: return "d";
239  case iArg: return "i";
240  case oArg: return "o";
241  case uArg: return "u";
242  case xArg: return "x";
243  case XArg: return "X";
244  case fArg: return "f";
245  case FArg: return "F";
246  case eArg: return "e";
247  case EArg: return "E";
248  case gArg: return "g";
249  case GArg: return "G";
250  case aArg: return "a";
251  case AArg: return "A";
252  case cArg:     return "c";
253  case CStrArg:          return "s";
254  case VoidPtrArg:       return "p";
255  case OutIntPtrArg:     return "n";
256  case PercentArg:       return "%";
257  case InvalidSpecifier: return NULL;
258
259  // MacOS X unicode extensions.
260  case CArg:          return "C";
261  case UnicodeStrArg: return "S";
262
263  // Objective-C specific specifiers.
264  case ObjCObjArg: return "@";
265
266  // GlibC specific specifiers.
267  case PrintErrno: return "m";
268  }
269  return NULL;
270}
271
272//===----------------------------------------------------------------------===//
273// Methods on PrintfSpecifier.
274//===----------------------------------------------------------------------===//
275
276ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
277  if (!CS.consumesDataArgument())
278    return ArgTypeResult::Invalid();
279
280  if (CS.isIntArg())
281    switch (LM.getKind()) {
282      case LengthModifier::AsLongDouble:
283        return ArgTypeResult::Invalid();
284      case LengthModifier::None: return Ctx.IntTy;
285      case LengthModifier::AsChar: return Ctx.SignedCharTy;
286      case LengthModifier::AsShort: return Ctx.ShortTy;
287      case LengthModifier::AsLong: return Ctx.LongTy;
288      case LengthModifier::AsLongLong: return Ctx.LongLongTy;
289      case LengthModifier::AsIntMax:
290        // FIXME: Return unknown for now.
291        return ArgTypeResult();
292      case LengthModifier::AsSizeT: return Ctx.getSizeType();
293      case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
294    }
295
296  if (CS.isUIntArg())
297    switch (LM.getKind()) {
298      case LengthModifier::AsLongDouble:
299        return ArgTypeResult::Invalid();
300      case LengthModifier::None: return Ctx.UnsignedIntTy;
301      case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
302      case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
303      case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
304      case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
305      case LengthModifier::AsIntMax:
306        // FIXME: Return unknown for now.
307        return ArgTypeResult();
308      case LengthModifier::AsSizeT:
309        // FIXME: How to get the corresponding unsigned
310        // version of size_t?
311        return ArgTypeResult();
312      case LengthModifier::AsPtrDiff:
313        // FIXME: How to get the corresponding unsigned
314        // version of ptrdiff_t?
315        return ArgTypeResult();
316    }
317
318  if (CS.isDoubleArg()) {
319    if (LM.getKind() == LengthModifier::AsLongDouble)
320      return Ctx.LongDoubleTy;
321    return Ctx.DoubleTy;
322  }
323
324  switch (CS.getKind()) {
325    case ConversionSpecifier::CStrArg:
326      return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
327          ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
328    case ConversionSpecifier::UnicodeStrArg:
329      // FIXME: This appears to be Mac OS X specific.
330      return ArgTypeResult::WCStrTy;
331    case ConversionSpecifier::CArg:
332      return Ctx.WCharTy;
333    case ConversionSpecifier::VoidPtrArg:
334      return ArgTypeResult::CPointerTy;
335    default:
336      break;
337  }
338
339  // FIXME: Handle other cases.
340  return ArgTypeResult();
341}
342
343bool PrintfSpecifier::fixType(QualType QT) {
344  // Handle strings first (char *, wchar_t *)
345  if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
346    CS.setKind(ConversionSpecifier::CStrArg);
347
348    // Disable irrelevant flags
349    HasAlternativeForm = 0;
350    HasLeadingZeroes = 0;
351
352    // Set the long length modifier for wide characters
353    if (QT->getPointeeType()->isWideCharType())
354      LM.setKind(LengthModifier::AsWideChar);
355
356    return true;
357  }
358
359  // We can only work with builtin types.
360  if (!QT->isBuiltinType())
361    return false;
362
363  // Everything else should be a base type
364  const BuiltinType *BT = QT->getAs<BuiltinType>();
365
366  // Set length modifier
367  switch (BT->getKind()) {
368  default:
369    // The rest of the conversions are either optional or for non-builtin types
370    LM.setKind(LengthModifier::None);
371    break;
372
373  case BuiltinType::WChar:
374  case BuiltinType::Long:
375  case BuiltinType::ULong:
376    LM.setKind(LengthModifier::AsLong);
377    break;
378
379  case BuiltinType::LongLong:
380  case BuiltinType::ULongLong:
381    LM.setKind(LengthModifier::AsLongLong);
382    break;
383
384  case BuiltinType::LongDouble:
385    LM.setKind(LengthModifier::AsLongDouble);
386    break;
387  }
388
389  // Set conversion specifier and disable any flags which do not apply to it.
390  if (QT->isAnyCharacterType()) {
391    CS.setKind(ConversionSpecifier::cArg);
392    Precision.setHowSpecified(OptionalAmount::NotSpecified);
393    HasAlternativeForm = 0;
394    HasLeadingZeroes = 0;
395    HasPlusPrefix = 0;
396  }
397  // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
398  else if (QT->isRealFloatingType()) {
399    CS.setKind(ConversionSpecifier::fArg);
400  }
401  else if (QT->isPointerType()) {
402    CS.setKind(ConversionSpecifier::VoidPtrArg);
403    Precision.setHowSpecified(OptionalAmount::NotSpecified);
404    HasAlternativeForm = 0;
405    HasLeadingZeroes = 0;
406    HasPlusPrefix = 0;
407  }
408  else if (QT->isSignedIntegerType()) {
409    CS.setKind(ConversionSpecifier::dArg);
410    HasAlternativeForm = 0;
411  }
412  else if (QT->isUnsignedIntegerType()) {
413    CS.setKind(ConversionSpecifier::uArg);
414    HasAlternativeForm = 0;
415    HasPlusPrefix = 0;
416  }
417  else {
418    return false;
419  }
420
421  return true;
422}
423
424void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
425  // Whilst some features have no defined order, we are using the order
426  // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
427  os << "%";
428
429  // Positional args
430  if (usesPositionalArg()) {
431    os << getPositionalArgIndex() << "$";
432  }
433
434  // Conversion flags
435  if (IsLeftJustified)    os << "-";
436  if (HasPlusPrefix)      os << "+";
437  if (HasSpacePrefix)     os << " ";
438  if (HasAlternativeForm) os << "#";
439  if (HasLeadingZeroes)   os << "0";
440
441  // Minimum field width
442  FieldWidth.toString(os);
443  // Precision
444  Precision.toString(os);
445  // Length modifier
446  os << LM.toString();
447  // Conversion specifier
448  os << CS.toString();
449}
450
451bool PrintfSpecifier::hasValidPlusPrefix() const {
452  if (!HasPlusPrefix)
453    return true;
454
455  // The plus prefix only makes sense for signed conversions
456  switch (CS.getKind()) {
457  case ConversionSpecifier::dArg:
458  case ConversionSpecifier::iArg:
459  case ConversionSpecifier::fArg:
460  case ConversionSpecifier::FArg:
461  case ConversionSpecifier::eArg:
462  case ConversionSpecifier::EArg:
463  case ConversionSpecifier::gArg:
464  case ConversionSpecifier::GArg:
465  case ConversionSpecifier::aArg:
466  case ConversionSpecifier::AArg:
467    return true;
468
469  default:
470    return false;
471  }
472}
473
474bool PrintfSpecifier::hasValidAlternativeForm() const {
475  if (!HasAlternativeForm)
476    return true;
477
478  // Alternate form flag only valid with the oxaAeEfFgG conversions
479  switch (CS.getKind()) {
480  case ConversionSpecifier::oArg:
481  case ConversionSpecifier::xArg:
482  case ConversionSpecifier::aArg:
483  case ConversionSpecifier::AArg:
484  case ConversionSpecifier::eArg:
485  case ConversionSpecifier::EArg:
486  case ConversionSpecifier::fArg:
487  case ConversionSpecifier::FArg:
488  case ConversionSpecifier::gArg:
489  case ConversionSpecifier::GArg:
490    return true;
491
492  default:
493    return false;
494  }
495}
496
497bool PrintfSpecifier::hasValidLeadingZeros() const {
498  if (!HasLeadingZeroes)
499    return true;
500
501  // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
502  switch (CS.getKind()) {
503  case ConversionSpecifier::dArg:
504  case ConversionSpecifier::iArg:
505  case ConversionSpecifier::oArg:
506  case ConversionSpecifier::uArg:
507  case ConversionSpecifier::xArg:
508  case ConversionSpecifier::XArg:
509  case ConversionSpecifier::aArg:
510  case ConversionSpecifier::AArg:
511  case ConversionSpecifier::eArg:
512  case ConversionSpecifier::EArg:
513  case ConversionSpecifier::fArg:
514  case ConversionSpecifier::FArg:
515  case ConversionSpecifier::gArg:
516  case ConversionSpecifier::GArg:
517    return true;
518
519  default:
520    return false;
521  }
522}
523
524bool PrintfSpecifier::hasValidSpacePrefix() const {
525  if (!HasSpacePrefix)
526    return true;
527
528  // The space prefix only makes sense for signed conversions
529  switch (CS.getKind()) {
530  case ConversionSpecifier::dArg:
531  case ConversionSpecifier::iArg:
532  case ConversionSpecifier::fArg:
533  case ConversionSpecifier::FArg:
534  case ConversionSpecifier::eArg:
535  case ConversionSpecifier::EArg:
536  case ConversionSpecifier::gArg:
537  case ConversionSpecifier::GArg:
538  case ConversionSpecifier::aArg:
539  case ConversionSpecifier::AArg:
540    return true;
541
542  default:
543    return false;
544  }
545}
546
547bool PrintfSpecifier::hasValidLeftJustified() const {
548  if (!IsLeftJustified)
549    return true;
550
551  // The left justified flag is valid for all conversions except n
552  switch (CS.getKind()) {
553  case ConversionSpecifier::OutIntPtrArg:
554    return false;
555
556  default:
557    return true;
558  }
559}
560
561bool PrintfSpecifier::hasValidLengthModifier() const {
562  switch (LM.getKind()) {
563  case LengthModifier::None:
564    return true;
565
566  // Handle most integer flags
567  case LengthModifier::AsChar:
568  case LengthModifier::AsShort:
569  case LengthModifier::AsLongLong:
570  case LengthModifier::AsIntMax:
571  case LengthModifier::AsSizeT:
572  case LengthModifier::AsPtrDiff:
573    switch (CS.getKind()) {
574    case ConversionSpecifier::dArg:
575    case ConversionSpecifier::iArg:
576    case ConversionSpecifier::oArg:
577    case ConversionSpecifier::uArg:
578    case ConversionSpecifier::xArg:
579    case ConversionSpecifier::XArg:
580    case ConversionSpecifier::OutIntPtrArg:
581      return true;
582    default:
583      return false;
584    }
585
586  // Handle 'l' flag
587  case LengthModifier::AsLong:
588    switch (CS.getKind()) {
589    case ConversionSpecifier::dArg:
590    case ConversionSpecifier::iArg:
591    case ConversionSpecifier::oArg:
592    case ConversionSpecifier::uArg:
593    case ConversionSpecifier::xArg:
594    case ConversionSpecifier::XArg:
595    case ConversionSpecifier::aArg:
596    case ConversionSpecifier::AArg:
597    case ConversionSpecifier::fArg:
598    case ConversionSpecifier::FArg:
599    case ConversionSpecifier::eArg:
600    case ConversionSpecifier::EArg:
601    case ConversionSpecifier::gArg:
602    case ConversionSpecifier::GArg:
603    case ConversionSpecifier::OutIntPtrArg:
604    case ConversionSpecifier::cArg:
605    case ConversionSpecifier::CStrArg:
606      return true;
607    default:
608      return false;
609    }
610
611  case LengthModifier::AsLongDouble:
612    switch (CS.getKind()) {
613    case ConversionSpecifier::aArg:
614    case ConversionSpecifier::AArg:
615    case ConversionSpecifier::fArg:
616    case ConversionSpecifier::FArg:
617    case ConversionSpecifier::eArg:
618    case ConversionSpecifier::EArg:
619    case ConversionSpecifier::gArg:
620    case ConversionSpecifier::GArg:
621      return true;
622    default:
623      return false;
624    }
625  }
626  return false;
627}
628
629bool PrintfSpecifier::hasValidPrecision() const {
630  if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
631    return true;
632
633  // Precision is only valid with the diouxXaAeEfFgGs conversions
634  switch (CS.getKind()) {
635  case ConversionSpecifier::dArg:
636  case ConversionSpecifier::iArg:
637  case ConversionSpecifier::oArg:
638  case ConversionSpecifier::uArg:
639  case ConversionSpecifier::xArg:
640  case ConversionSpecifier::XArg:
641  case ConversionSpecifier::aArg:
642  case ConversionSpecifier::AArg:
643  case ConversionSpecifier::eArg:
644  case ConversionSpecifier::EArg:
645  case ConversionSpecifier::fArg:
646  case ConversionSpecifier::FArg:
647  case ConversionSpecifier::gArg:
648  case ConversionSpecifier::GArg:
649  case ConversionSpecifier::CStrArg:
650    return true;
651
652  default:
653    return false;
654  }
655}
656bool PrintfSpecifier::hasValidFieldWidth() const {
657  if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
658      return true;
659
660  // The field width is valid for all conversions except n
661  switch (CS.getKind()) {
662  case ConversionSpecifier::OutIntPtrArg:
663    return false;
664
665  default:
666    return true;
667  }
668}
669