FormatString.h revision 37969b7e14d6a4dfd934ef6d3738cc90b832ec1d
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsAllocate,   // for '%as', GNU extension to C90 scanf
75    AsMAllocate,  // for '%ms', GNU extension to scanf
76    AsWideChar = AsLong // for '%ls', only makes sense for printf
77  };
78
79  LengthModifier()
80    : Position(0), kind(None) {}
81  LengthModifier(const char *pos, Kind k)
82    : Position(pos), kind(k) {}
83
84  const char *getStart() const {
85    return Position;
86  }
87
88  unsigned getLength() const {
89    switch (kind) {
90      default:
91        return 1;
92      case AsLongLong:
93      case AsChar:
94        return 2;
95      case None:
96        return 0;
97    }
98  }
99
100  Kind getKind() const { return kind; }
101  void setKind(Kind k) { kind = k; }
102
103  const char *toString() const;
104
105private:
106  const char *Position;
107  Kind kind;
108};
109
110class ConversionSpecifier {
111public:
112  enum Kind {
113    InvalidSpecifier = 0,
114      // C99 conversion specifiers.
115    cArg,
116    dArg,
117    iArg,
118    IntArgBeg = cArg, IntArgEnd = iArg,
119
120    oArg,
121    uArg,
122    xArg,
123    XArg,
124    UIntArgBeg = oArg, UIntArgEnd = XArg,
125
126    fArg,
127    FArg,
128    eArg,
129    EArg,
130    gArg,
131    GArg,
132    aArg,
133    AArg,
134    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
135
136    sArg,
137    pArg,
138    nArg,
139    PercentArg,
140    CArg,
141    SArg,
142
143    // ** Printf-specific **
144
145    // Objective-C specific specifiers.
146    ObjCObjArg,  // '@'
147    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
148
149    // GlibC specific specifiers.
150    PrintErrno,   // 'm'
151
152    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
153
154    // ** Scanf-specific **
155    ScanListArg, // '['
156    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
157  };
158
159  ConversionSpecifier(bool isPrintf)
160    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
161
162  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
163    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
164
165  const char *getStart() const {
166    return Position;
167  }
168
169  StringRef getCharacters() const {
170    return StringRef(getStart(), getLength());
171  }
172
173  bool consumesDataArgument() const {
174    switch (kind) {
175      case PrintErrno:
176        assert(IsPrintf);
177      case PercentArg:
178        return false;
179      default:
180        return true;
181    }
182  }
183
184  Kind getKind() const { return kind; }
185  void setKind(Kind k) { kind = k; }
186  unsigned getLength() const {
187    return EndScanList ? EndScanList - Position : 1;
188  }
189
190  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
191  const char *toString() const;
192
193  bool isPrintfKind() const { return IsPrintf; }
194
195protected:
196  bool IsPrintf;
197  const char *Position;
198  const char *EndScanList;
199  Kind kind;
200};
201
202class ArgTypeResult {
203public:
204  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
205              AnyCharTy, CStrTy, WCStrTy, WIntTy };
206private:
207  const Kind K;
208  QualType T;
209  const char *Name;
210  ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
211public:
212  ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
213  ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
214  ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
215  ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
216  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
217
218  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
219
220  bool isValid() const { return K != InvalidTy; }
221
222  const QualType *getSpecificType() const {
223    return K == SpecificTy ? &T : 0;
224  }
225
226  bool matchesType(ASTContext &C, QualType argTy) const;
227
228  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
229
230  QualType getRepresentativeType(ASTContext &C) const;
231
232  std::string getRepresentativeTypeName(ASTContext &C) const;
233};
234
235class OptionalAmount {
236public:
237  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
238
239  OptionalAmount(HowSpecified howSpecified,
240                 unsigned amount,
241                 const char *amountStart,
242                 unsigned amountLength,
243                 bool usesPositionalArg)
244  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
245  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
246
247  OptionalAmount(bool valid = true)
248  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
249  UsesPositionalArg(0), UsesDotPrefix(0) {}
250
251  bool isInvalid() const {
252    return hs == Invalid;
253  }
254
255  HowSpecified getHowSpecified() const { return hs; }
256  void setHowSpecified(HowSpecified h) { hs = h; }
257
258  bool hasDataArgument() const { return hs == Arg; }
259
260  unsigned getArgIndex() const {
261    assert(hasDataArgument());
262    return amt;
263  }
264
265  unsigned getConstantAmount() const {
266    assert(hs == Constant);
267    return amt;
268  }
269
270  const char *getStart() const {
271      // We include the . character if it is given.
272    return start - UsesDotPrefix;
273  }
274
275  unsigned getConstantLength() const {
276    assert(hs == Constant);
277    return length + UsesDotPrefix;
278  }
279
280  ArgTypeResult getArgType(ASTContext &Ctx) const;
281
282  void toString(raw_ostream &os) const;
283
284  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
285  unsigned getPositionalArgIndex() const {
286    assert(hasDataArgument());
287    return amt + 1;
288  }
289
290  bool usesDotPrefix() const { return UsesDotPrefix; }
291  void setUsesDotPrefix() { UsesDotPrefix = true; }
292
293private:
294  const char *start;
295  unsigned length;
296  HowSpecified hs;
297  unsigned amt;
298  bool UsesPositionalArg : 1;
299  bool UsesDotPrefix;
300};
301
302
303class FormatSpecifier {
304protected:
305  LengthModifier LM;
306  OptionalAmount FieldWidth;
307  ConversionSpecifier CS;
308    /// Positional arguments, an IEEE extension:
309    ///  IEEE Std 1003.1, 2004 Edition
310    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
311  bool UsesPositionalArg;
312  unsigned argIndex;
313public:
314  FormatSpecifier(bool isPrintf)
315    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
316
317  void setLengthModifier(LengthModifier lm) {
318    LM = lm;
319  }
320
321  void setUsesPositionalArg() { UsesPositionalArg = true; }
322
323  void setArgIndex(unsigned i) {
324    argIndex = i;
325  }
326
327  unsigned getArgIndex() const {
328    return argIndex;
329  }
330
331  unsigned getPositionalArgIndex() const {
332    return argIndex + 1;
333  }
334
335  const LengthModifier &getLengthModifier() const {
336    return LM;
337  }
338
339  const OptionalAmount &getFieldWidth() const {
340    return FieldWidth;
341  }
342
343  void setFieldWidth(const OptionalAmount &Amt) {
344    FieldWidth = Amt;
345  }
346
347  bool usesPositionalArg() const { return UsesPositionalArg; }
348
349  bool hasValidLengthModifier() const;
350};
351
352} // end analyze_format_string namespace
353
354//===----------------------------------------------------------------------===//
355/// Pieces specific to fprintf format strings.
356
357namespace analyze_printf {
358
359class PrintfConversionSpecifier :
360  public analyze_format_string::ConversionSpecifier  {
361public:
362  PrintfConversionSpecifier()
363    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
364
365  PrintfConversionSpecifier(const char *pos, Kind k)
366    : ConversionSpecifier(true, pos, k) {}
367
368  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
369  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
370  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
371                                    kind <= DoubleArgBeg; }
372  unsigned getLength() const {
373      // Conversion specifiers currently only are represented by
374      // single characters, but we be flexible.
375    return 1;
376  }
377
378  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
379    return CS->isPrintfKind();
380  }
381};
382
383using analyze_format_string::ArgTypeResult;
384using analyze_format_string::LengthModifier;
385using analyze_format_string::OptionalAmount;
386using analyze_format_string::OptionalFlag;
387
388class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
389  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
390  OptionalFlag IsLeftJustified; // '-'
391  OptionalFlag HasPlusPrefix; // '+'
392  OptionalFlag HasSpacePrefix; // ' '
393  OptionalFlag HasAlternativeForm; // '#'
394  OptionalFlag HasLeadingZeroes; // '0'
395  OptionalAmount Precision;
396public:
397  PrintfSpecifier() :
398    FormatSpecifier(/* isPrintf = */ true),
399    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
400    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
401
402  static PrintfSpecifier Parse(const char *beg, const char *end);
403
404    // Methods for incrementally constructing the PrintfSpecifier.
405  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
406    CS = cs;
407  }
408  void setHasThousandsGrouping(const char *position) {
409    HasThousandsGrouping = true;
410    HasThousandsGrouping.setPosition(position);
411  }
412  void setIsLeftJustified(const char *position) {
413    IsLeftJustified = true;
414    IsLeftJustified.setPosition(position);
415  }
416  void setHasPlusPrefix(const char *position) {
417    HasPlusPrefix = true;
418    HasPlusPrefix.setPosition(position);
419  }
420  void setHasSpacePrefix(const char *position) {
421    HasSpacePrefix = true;
422    HasSpacePrefix.setPosition(position);
423  }
424  void setHasAlternativeForm(const char *position) {
425    HasAlternativeForm = true;
426    HasAlternativeForm.setPosition(position);
427  }
428  void setHasLeadingZeros(const char *position) {
429    HasLeadingZeroes = true;
430    HasLeadingZeroes.setPosition(position);
431  }
432  void setUsesPositionalArg() { UsesPositionalArg = true; }
433
434    // Methods for querying the format specifier.
435
436  const PrintfConversionSpecifier &getConversionSpecifier() const {
437    return cast<PrintfConversionSpecifier>(CS);
438  }
439
440  void setPrecision(const OptionalAmount &Amt) {
441    Precision = Amt;
442    Precision.setUsesDotPrefix();
443  }
444
445  const OptionalAmount &getPrecision() const {
446    return Precision;
447  }
448
449  bool consumesDataArgument() const {
450    return getConversionSpecifier().consumesDataArgument();
451  }
452
453  /// \brief Returns the builtin type that a data argument
454  /// paired with this format specifier should have.  This method
455  /// will return null if the format specifier does not have
456  /// a matching data argument or the matching argument matches
457  /// more than one type.
458  ArgTypeResult getArgType(ASTContext &Ctx) const;
459
460  const OptionalFlag &hasThousandsGrouping() const {
461      return HasThousandsGrouping;
462  }
463  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
464  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
465  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
466  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
467  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
468  bool usesPositionalArg() const { return UsesPositionalArg; }
469
470    /// Changes the specifier and length according to a QualType, retaining any
471    /// flags or options. Returns true on success, or false when a conversion
472    /// was not successful.
473  bool fixType(QualType QT, const LangOptions &LangOpt);
474
475  void toString(raw_ostream &os) const;
476
477  // Validation methods - to check if any element results in undefined behavior
478  bool hasValidPlusPrefix() const;
479  bool hasValidAlternativeForm() const;
480  bool hasValidLeadingZeros() const;
481  bool hasValidSpacePrefix() const;
482  bool hasValidLeftJustified() const;
483  bool hasValidThousandsGroupingPrefix() const;
484
485  bool hasValidPrecision() const;
486  bool hasValidFieldWidth() const;
487};
488}  // end analyze_printf namespace
489
490//===----------------------------------------------------------------------===//
491/// Pieces specific to fscanf format strings.
492
493namespace analyze_scanf {
494
495class ScanfConversionSpecifier :
496    public analyze_format_string::ConversionSpecifier  {
497public:
498  ScanfConversionSpecifier()
499    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
500
501  ScanfConversionSpecifier(const char *pos, Kind k)
502    : ConversionSpecifier(false, pos, k) {}
503
504  void setEndScanList(const char *pos) { EndScanList = pos; }
505
506  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
507    return !CS->isPrintfKind();
508  }
509};
510
511using analyze_format_string::ArgTypeResult;
512using analyze_format_string::LengthModifier;
513using analyze_format_string::OptionalAmount;
514using analyze_format_string::OptionalFlag;
515
516class ScanfArgTypeResult : public ArgTypeResult {
517public:
518  enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
519private:
520  Kind K;
521  ArgTypeResult A;
522  const char *Name;
523  QualType getRepresentativeType(ASTContext &C) const;
524public:
525  ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
526  ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
527      : K(PtrToArgTypeResultTy), A(a), Name(n) {
528    assert(A.isValid());
529  }
530
531  static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
532
533  bool isValid() const { return K != InvalidTy; }
534
535  bool matchesType(ASTContext& C, QualType argTy) const;
536
537  std::string getRepresentativeTypeName(ASTContext& C) const;
538};
539
540class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
541  OptionalFlag SuppressAssignment; // '*'
542public:
543  ScanfSpecifier() :
544    FormatSpecifier(/* isPrintf = */ false),
545    SuppressAssignment("*") {}
546
547  void setSuppressAssignment(const char *position) {
548    SuppressAssignment = true;
549    SuppressAssignment.setPosition(position);
550  }
551
552  const OptionalFlag &getSuppressAssignment() const {
553    return SuppressAssignment;
554  }
555
556  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
557    CS = cs;
558  }
559
560  const ScanfConversionSpecifier &getConversionSpecifier() const {
561    return cast<ScanfConversionSpecifier>(CS);
562  }
563
564  bool consumesDataArgument() const {
565    return CS.consumesDataArgument() && !SuppressAssignment;
566  }
567
568  ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
569
570  bool fixType(QualType QT, const LangOptions &LangOpt);
571
572  void toString(raw_ostream &os) const;
573
574  static ScanfSpecifier Parse(const char *beg, const char *end);
575};
576
577} // end analyze_scanf namespace
578
579//===----------------------------------------------------------------------===//
580// Parsing and processing of format strings (both fprintf and fscanf).
581
582namespace analyze_format_string {
583
584enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
585
586class FormatStringHandler {
587public:
588  FormatStringHandler() {}
589  virtual ~FormatStringHandler();
590
591  virtual void HandleNullChar(const char *nullCharacter) {}
592
593  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
594                                     PositionContext p) {}
595
596  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
597
598  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
599                                         unsigned specifierLen) {}
600
601  // Printf-specific handlers.
602
603  virtual bool HandleInvalidPrintfConversionSpecifier(
604                                      const analyze_printf::PrintfSpecifier &FS,
605                                      const char *startSpecifier,
606                                      unsigned specifierLen) {
607    return true;
608  }
609
610  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
611                                     const char *startSpecifier,
612                                     unsigned specifierLen) {
613    return true;
614  }
615
616    // Scanf-specific handlers.
617
618  virtual bool HandleInvalidScanfConversionSpecifier(
619                                        const analyze_scanf::ScanfSpecifier &FS,
620                                        const char *startSpecifier,
621                                        unsigned specifierLen) {
622    return true;
623  }
624
625  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
626                                    const char *startSpecifier,
627                                    unsigned specifierLen) {
628    return true;
629  }
630
631  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
632};
633
634bool ParsePrintfString(FormatStringHandler &H,
635                       const char *beg, const char *end, const LangOptions &LO);
636
637bool ParseScanfString(FormatStringHandler &H,
638                      const char *beg, const char *end, const LangOptions &LO);
639
640} // end analyze_format_string namespace
641} // end clang namespace
642#endif
643