FormatString.h revision 4684778993c667246039b4664acbce59dc99440c
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll'
70    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
71    AsIntMax,     // 'j'
72    AsSizeT,      // 'z'
73    AsPtrDiff,    // 't'
74    AsLongDouble, // 'L'
75    AsAllocate,   // for '%as', GNU extension to C90 scanf
76    AsMAllocate,  // for '%ms', GNU extension to scanf
77    AsWideChar = AsLong // for '%ls', only makes sense for printf
78  };
79
80  LengthModifier()
81    : Position(0), kind(None) {}
82  LengthModifier(const char *pos, Kind k)
83    : Position(pos), kind(k) {}
84
85  const char *getStart() const {
86    return Position;
87  }
88
89  unsigned getLength() const {
90    switch (kind) {
91      default:
92        return 1;
93      case AsLongLong:
94      case AsChar:
95        return 2;
96      case None:
97        return 0;
98    }
99  }
100
101  Kind getKind() const { return kind; }
102  void setKind(Kind k) { kind = k; }
103
104  const char *toString() const;
105
106private:
107  const char *Position;
108  Kind kind;
109};
110
111class ConversionSpecifier {
112public:
113  enum Kind {
114    InvalidSpecifier = 0,
115      // C99 conversion specifiers.
116    cArg,
117    dArg,
118    iArg,
119    IntArgBeg = cArg, IntArgEnd = iArg,
120
121    oArg,
122    uArg,
123    xArg,
124    XArg,
125    UIntArgBeg = oArg, UIntArgEnd = XArg,
126
127    fArg,
128    FArg,
129    eArg,
130    EArg,
131    gArg,
132    GArg,
133    aArg,
134    AArg,
135    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
136
137    sArg,
138    pArg,
139    nArg,
140    PercentArg,
141    CArg,
142    SArg,
143
144    // ** Printf-specific **
145
146    // Objective-C specific specifiers.
147    ObjCObjArg,  // '@'
148    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
149
150    // GlibC specific specifiers.
151    PrintErrno,   // 'm'
152
153    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
154
155    // ** Scanf-specific **
156    ScanListArg, // '['
157    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
158  };
159
160  ConversionSpecifier(bool isPrintf)
161    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
162
163  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
165
166  const char *getStart() const {
167    return Position;
168  }
169
170  StringRef getCharacters() const {
171    return StringRef(getStart(), getLength());
172  }
173
174  bool consumesDataArgument() const {
175    switch (kind) {
176      case PrintErrno:
177        assert(IsPrintf);
178        return false;
179      case PercentArg:
180        return false;
181      default:
182        return true;
183    }
184  }
185
186  Kind getKind() const { return kind; }
187  void setKind(Kind k) { kind = k; }
188  unsigned getLength() const {
189    return EndScanList ? EndScanList - Position : 1;
190  }
191
192  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
193  const char *toString() const;
194
195  bool isPrintfKind() const { return IsPrintf; }
196
197protected:
198  bool IsPrintf;
199  const char *Position;
200  const char *EndScanList;
201  Kind kind;
202};
203
204class ArgTypeResult {
205public:
206  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
207              AnyCharTy, CStrTy, WCStrTy, WIntTy };
208private:
209  const Kind K;
210  QualType T;
211  const char *Name;
212  ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
213public:
214  ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
215  ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
216  ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
217  ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
218  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
219
220  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
221
222  bool isValid() const { return K != InvalidTy; }
223
224  const QualType *getSpecificType() const {
225    return K == SpecificTy ? &T : 0;
226  }
227
228  bool matchesType(ASTContext &C, QualType argTy) const;
229
230  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
231
232  QualType getRepresentativeType(ASTContext &C) const;
233
234  std::string getRepresentativeTypeName(ASTContext &C) const;
235};
236
237class OptionalAmount {
238public:
239  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
240
241  OptionalAmount(HowSpecified howSpecified,
242                 unsigned amount,
243                 const char *amountStart,
244                 unsigned amountLength,
245                 bool usesPositionalArg)
246  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
247  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
248
249  OptionalAmount(bool valid = true)
250  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
251  UsesPositionalArg(0), UsesDotPrefix(0) {}
252
253  bool isInvalid() const {
254    return hs == Invalid;
255  }
256
257  HowSpecified getHowSpecified() const { return hs; }
258  void setHowSpecified(HowSpecified h) { hs = h; }
259
260  bool hasDataArgument() const { return hs == Arg; }
261
262  unsigned getArgIndex() const {
263    assert(hasDataArgument());
264    return amt;
265  }
266
267  unsigned getConstantAmount() const {
268    assert(hs == Constant);
269    return amt;
270  }
271
272  const char *getStart() const {
273      // We include the . character if it is given.
274    return start - UsesDotPrefix;
275  }
276
277  unsigned getConstantLength() const {
278    assert(hs == Constant);
279    return length + UsesDotPrefix;
280  }
281
282  ArgTypeResult getArgType(ASTContext &Ctx) const;
283
284  void toString(raw_ostream &os) const;
285
286  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
287  unsigned getPositionalArgIndex() const {
288    assert(hasDataArgument());
289    return amt + 1;
290  }
291
292  bool usesDotPrefix() const { return UsesDotPrefix; }
293  void setUsesDotPrefix() { UsesDotPrefix = true; }
294
295private:
296  const char *start;
297  unsigned length;
298  HowSpecified hs;
299  unsigned amt;
300  bool UsesPositionalArg : 1;
301  bool UsesDotPrefix;
302};
303
304
305class FormatSpecifier {
306protected:
307  LengthModifier LM;
308  OptionalAmount FieldWidth;
309  ConversionSpecifier CS;
310  /// Positional arguments, an IEEE extension:
311  ///  IEEE Std 1003.1, 2004 Edition
312  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
313  bool UsesPositionalArg;
314  unsigned argIndex;
315public:
316  FormatSpecifier(bool isPrintf)
317    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
318
319  void setLengthModifier(LengthModifier lm) {
320    LM = lm;
321  }
322
323  void setUsesPositionalArg() { UsesPositionalArg = true; }
324
325  void setArgIndex(unsigned i) {
326    argIndex = i;
327  }
328
329  unsigned getArgIndex() const {
330    return argIndex;
331  }
332
333  unsigned getPositionalArgIndex() const {
334    return argIndex + 1;
335  }
336
337  const LengthModifier &getLengthModifier() const {
338    return LM;
339  }
340
341  const OptionalAmount &getFieldWidth() const {
342    return FieldWidth;
343  }
344
345  void setFieldWidth(const OptionalAmount &Amt) {
346    FieldWidth = Amt;
347  }
348
349  bool usesPositionalArg() const { return UsesPositionalArg; }
350
351  bool hasValidLengthModifier() const;
352
353  bool hasStandardLengthModifier() const;
354
355  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
356
357  bool hasStandardLengthConversionCombination() const;
358
359  /// For a TypedefType QT, if it is a named integer type such as size_t,
360  /// assign the appropriate value to LM and return true.
361  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
362};
363
364} // end analyze_format_string namespace
365
366//===----------------------------------------------------------------------===//
367/// Pieces specific to fprintf format strings.
368
369namespace analyze_printf {
370
371class PrintfConversionSpecifier :
372  public analyze_format_string::ConversionSpecifier  {
373public:
374  PrintfConversionSpecifier()
375    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
376
377  PrintfConversionSpecifier(const char *pos, Kind k)
378    : ConversionSpecifier(true, pos, k) {}
379
380  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
381  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
382  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
383                                    kind <= DoubleArgEnd; }
384  unsigned getLength() const {
385      // Conversion specifiers currently only are represented by
386      // single characters, but we be flexible.
387    return 1;
388  }
389
390  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
391    return CS->isPrintfKind();
392  }
393};
394
395using analyze_format_string::ArgTypeResult;
396using analyze_format_string::LengthModifier;
397using analyze_format_string::OptionalAmount;
398using analyze_format_string::OptionalFlag;
399
400class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
401  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
402  OptionalFlag IsLeftJustified; // '-'
403  OptionalFlag HasPlusPrefix; // '+'
404  OptionalFlag HasSpacePrefix; // ' '
405  OptionalFlag HasAlternativeForm; // '#'
406  OptionalFlag HasLeadingZeroes; // '0'
407  OptionalAmount Precision;
408public:
409  PrintfSpecifier() :
410    FormatSpecifier(/* isPrintf = */ true),
411    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
412    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
413
414  static PrintfSpecifier Parse(const char *beg, const char *end);
415
416    // Methods for incrementally constructing the PrintfSpecifier.
417  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
418    CS = cs;
419  }
420  void setHasThousandsGrouping(const char *position) {
421    HasThousandsGrouping = true;
422    HasThousandsGrouping.setPosition(position);
423  }
424  void setIsLeftJustified(const char *position) {
425    IsLeftJustified = true;
426    IsLeftJustified.setPosition(position);
427  }
428  void setHasPlusPrefix(const char *position) {
429    HasPlusPrefix = true;
430    HasPlusPrefix.setPosition(position);
431  }
432  void setHasSpacePrefix(const char *position) {
433    HasSpacePrefix = true;
434    HasSpacePrefix.setPosition(position);
435  }
436  void setHasAlternativeForm(const char *position) {
437    HasAlternativeForm = true;
438    HasAlternativeForm.setPosition(position);
439  }
440  void setHasLeadingZeros(const char *position) {
441    HasLeadingZeroes = true;
442    HasLeadingZeroes.setPosition(position);
443  }
444  void setUsesPositionalArg() { UsesPositionalArg = true; }
445
446    // Methods for querying the format specifier.
447
448  const PrintfConversionSpecifier &getConversionSpecifier() const {
449    return cast<PrintfConversionSpecifier>(CS);
450  }
451
452  void setPrecision(const OptionalAmount &Amt) {
453    Precision = Amt;
454    Precision.setUsesDotPrefix();
455  }
456
457  const OptionalAmount &getPrecision() const {
458    return Precision;
459  }
460
461  bool consumesDataArgument() const {
462    return getConversionSpecifier().consumesDataArgument();
463  }
464
465  /// \brief Returns the builtin type that a data argument
466  /// paired with this format specifier should have.  This method
467  /// will return null if the format specifier does not have
468  /// a matching data argument or the matching argument matches
469  /// more than one type.
470  ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
471
472  const OptionalFlag &hasThousandsGrouping() const {
473      return HasThousandsGrouping;
474  }
475  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
476  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
477  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
478  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
479  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
480  bool usesPositionalArg() const { return UsesPositionalArg; }
481
482  /// Changes the specifier and length according to a QualType, retaining any
483  /// flags or options. Returns true on success, or false when a conversion
484  /// was not successful.
485  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
486               bool IsObjCLiteral);
487
488  void toString(raw_ostream &os) const;
489
490  // Validation methods - to check if any element results in undefined behavior
491  bool hasValidPlusPrefix() const;
492  bool hasValidAlternativeForm() const;
493  bool hasValidLeadingZeros() const;
494  bool hasValidSpacePrefix() const;
495  bool hasValidLeftJustified() const;
496  bool hasValidThousandsGroupingPrefix() const;
497
498  bool hasValidPrecision() const;
499  bool hasValidFieldWidth() const;
500};
501}  // end analyze_printf namespace
502
503//===----------------------------------------------------------------------===//
504/// Pieces specific to fscanf format strings.
505
506namespace analyze_scanf {
507
508class ScanfConversionSpecifier :
509    public analyze_format_string::ConversionSpecifier  {
510public:
511  ScanfConversionSpecifier()
512    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
513
514  ScanfConversionSpecifier(const char *pos, Kind k)
515    : ConversionSpecifier(false, pos, k) {}
516
517  void setEndScanList(const char *pos) { EndScanList = pos; }
518
519  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
520    return !CS->isPrintfKind();
521  }
522};
523
524using analyze_format_string::ArgTypeResult;
525using analyze_format_string::LengthModifier;
526using analyze_format_string::OptionalAmount;
527using analyze_format_string::OptionalFlag;
528
529class ScanfArgTypeResult : public ArgTypeResult {
530public:
531  enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
532private:
533  Kind K;
534  ArgTypeResult A;
535  const char *Name;
536  QualType getRepresentativeType(ASTContext &C) const;
537public:
538  ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
539  ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
540      : K(PtrToArgTypeResultTy), A(a), Name(n) {
541    assert(A.isValid());
542  }
543
544  static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
545
546  bool isValid() const { return K != InvalidTy; }
547
548  bool matchesType(ASTContext& C, QualType argTy) const;
549
550  std::string getRepresentativeTypeName(ASTContext& C) const;
551};
552
553class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
554  OptionalFlag SuppressAssignment; // '*'
555public:
556  ScanfSpecifier() :
557    FormatSpecifier(/* isPrintf = */ false),
558    SuppressAssignment("*") {}
559
560  void setSuppressAssignment(const char *position) {
561    SuppressAssignment = true;
562    SuppressAssignment.setPosition(position);
563  }
564
565  const OptionalFlag &getSuppressAssignment() const {
566    return SuppressAssignment;
567  }
568
569  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
570    CS = cs;
571  }
572
573  const ScanfConversionSpecifier &getConversionSpecifier() const {
574    return cast<ScanfConversionSpecifier>(CS);
575  }
576
577  bool consumesDataArgument() const {
578    return CS.consumesDataArgument() && !SuppressAssignment;
579  }
580
581  ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
582
583  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
584
585  void toString(raw_ostream &os) const;
586
587  static ScanfSpecifier Parse(const char *beg, const char *end);
588};
589
590} // end analyze_scanf namespace
591
592//===----------------------------------------------------------------------===//
593// Parsing and processing of format strings (both fprintf and fscanf).
594
595namespace analyze_format_string {
596
597enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
598
599class FormatStringHandler {
600public:
601  FormatStringHandler() {}
602  virtual ~FormatStringHandler();
603
604  virtual void HandleNullChar(const char *nullCharacter) {}
605
606  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
607
608  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
609                                     PositionContext p) {}
610
611  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
612
613  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
614                                         unsigned specifierLen) {}
615
616  // Printf-specific handlers.
617
618  virtual bool HandleInvalidPrintfConversionSpecifier(
619                                      const analyze_printf::PrintfSpecifier &FS,
620                                      const char *startSpecifier,
621                                      unsigned specifierLen) {
622    return true;
623  }
624
625  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
626                                     const char *startSpecifier,
627                                     unsigned specifierLen) {
628    return true;
629  }
630
631    // Scanf-specific handlers.
632
633  virtual bool HandleInvalidScanfConversionSpecifier(
634                                        const analyze_scanf::ScanfSpecifier &FS,
635                                        const char *startSpecifier,
636                                        unsigned specifierLen) {
637    return true;
638  }
639
640  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
641                                    const char *startSpecifier,
642                                    unsigned specifierLen) {
643    return true;
644  }
645
646  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
647};
648
649bool ParsePrintfString(FormatStringHandler &H,
650                       const char *beg, const char *end, const LangOptions &LO);
651
652bool ParseScanfString(FormatStringHandler &H,
653                      const char *beg, const char *end, const LangOptions &LO);
654
655} // end analyze_format_string namespace
656} // end clang namespace
657#endif
658