FormatString.h revision f85626453123f9691bcef13cff963f556e209c27
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll'
70    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
71    AsIntMax,     // 'j'
72    AsSizeT,      // 'z'
73    AsPtrDiff,    // 't'
74    AsLongDouble, // 'L'
75    AsAllocate,   // for '%as', GNU extension to C90 scanf
76    AsMAllocate,  // for '%ms', GNU extension to scanf
77    AsWideChar = AsLong // for '%ls', only makes sense for printf
78  };
79
80  LengthModifier()
81    : Position(0), kind(None) {}
82  LengthModifier(const char *pos, Kind k)
83    : Position(pos), kind(k) {}
84
85  const char *getStart() const {
86    return Position;
87  }
88
89  unsigned getLength() const {
90    switch (kind) {
91      default:
92        return 1;
93      case AsLongLong:
94      case AsChar:
95        return 2;
96      case None:
97        return 0;
98    }
99  }
100
101  Kind getKind() const { return kind; }
102  void setKind(Kind k) { kind = k; }
103
104  const char *toString() const;
105
106private:
107  const char *Position;
108  Kind kind;
109};
110
111class ConversionSpecifier {
112public:
113  enum Kind {
114    InvalidSpecifier = 0,
115      // C99 conversion specifiers.
116    cArg,
117    dArg,
118    iArg,
119    IntArgBeg = cArg, IntArgEnd = iArg,
120
121    oArg,
122    uArg,
123    xArg,
124    XArg,
125    UIntArgBeg = oArg, UIntArgEnd = XArg,
126
127    fArg,
128    FArg,
129    eArg,
130    EArg,
131    gArg,
132    GArg,
133    aArg,
134    AArg,
135    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
136
137    sArg,
138    pArg,
139    nArg,
140    PercentArg,
141    CArg,
142    SArg,
143
144    // ** Printf-specific **
145
146    // Objective-C specific specifiers.
147    ObjCObjArg,  // '@'
148    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
149
150    // GlibC specific specifiers.
151    PrintErrno,   // 'm'
152
153    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
154
155    // ** Scanf-specific **
156    ScanListArg, // '['
157    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
158  };
159
160  ConversionSpecifier(bool isPrintf)
161    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
162
163  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
165
166  const char *getStart() const {
167    return Position;
168  }
169
170  StringRef getCharacters() const {
171    return StringRef(getStart(), getLength());
172  }
173
174  bool consumesDataArgument() const {
175    switch (kind) {
176      case PrintErrno:
177        assert(IsPrintf);
178      case PercentArg:
179        return false;
180      default:
181        return true;
182    }
183  }
184
185  Kind getKind() const { return kind; }
186  void setKind(Kind k) { kind = k; }
187  unsigned getLength() const {
188    return EndScanList ? EndScanList - Position : 1;
189  }
190
191  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
192  const char *toString() const;
193
194  bool isPrintfKind() const { return IsPrintf; }
195
196protected:
197  bool IsPrintf;
198  const char *Position;
199  const char *EndScanList;
200  Kind kind;
201};
202
203class ArgTypeResult {
204public:
205  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
206              AnyCharTy, CStrTy, WCStrTy, WIntTy };
207private:
208  const Kind K;
209  QualType T;
210  const char *Name;
211  ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
212public:
213  ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
214  ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
215  ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
216  ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
217  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
218
219  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
220
221  bool isValid() const { return K != InvalidTy; }
222
223  const QualType *getSpecificType() const {
224    return K == SpecificTy ? &T : 0;
225  }
226
227  bool matchesType(ASTContext &C, QualType argTy) const;
228
229  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
230
231  QualType getRepresentativeType(ASTContext &C) const;
232
233  std::string getRepresentativeTypeName(ASTContext &C) const;
234};
235
236class OptionalAmount {
237public:
238  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
239
240  OptionalAmount(HowSpecified howSpecified,
241                 unsigned amount,
242                 const char *amountStart,
243                 unsigned amountLength,
244                 bool usesPositionalArg)
245  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
246  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
247
248  OptionalAmount(bool valid = true)
249  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
250  UsesPositionalArg(0), UsesDotPrefix(0) {}
251
252  bool isInvalid() const {
253    return hs == Invalid;
254  }
255
256  HowSpecified getHowSpecified() const { return hs; }
257  void setHowSpecified(HowSpecified h) { hs = h; }
258
259  bool hasDataArgument() const { return hs == Arg; }
260
261  unsigned getArgIndex() const {
262    assert(hasDataArgument());
263    return amt;
264  }
265
266  unsigned getConstantAmount() const {
267    assert(hs == Constant);
268    return amt;
269  }
270
271  const char *getStart() const {
272      // We include the . character if it is given.
273    return start - UsesDotPrefix;
274  }
275
276  unsigned getConstantLength() const {
277    assert(hs == Constant);
278    return length + UsesDotPrefix;
279  }
280
281  ArgTypeResult getArgType(ASTContext &Ctx) const;
282
283  void toString(raw_ostream &os) const;
284
285  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
286  unsigned getPositionalArgIndex() const {
287    assert(hasDataArgument());
288    return amt + 1;
289  }
290
291  bool usesDotPrefix() const { return UsesDotPrefix; }
292  void setUsesDotPrefix() { UsesDotPrefix = true; }
293
294private:
295  const char *start;
296  unsigned length;
297  HowSpecified hs;
298  unsigned amt;
299  bool UsesPositionalArg : 1;
300  bool UsesDotPrefix;
301};
302
303
304class FormatSpecifier {
305protected:
306  LengthModifier LM;
307  OptionalAmount FieldWidth;
308  ConversionSpecifier CS;
309  /// Positional arguments, an IEEE extension:
310  ///  IEEE Std 1003.1, 2004 Edition
311  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
312  bool UsesPositionalArg;
313  unsigned argIndex;
314public:
315  FormatSpecifier(bool isPrintf)
316    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
317
318  void setLengthModifier(LengthModifier lm) {
319    LM = lm;
320  }
321
322  void setUsesPositionalArg() { UsesPositionalArg = true; }
323
324  void setArgIndex(unsigned i) {
325    argIndex = i;
326  }
327
328  unsigned getArgIndex() const {
329    return argIndex;
330  }
331
332  unsigned getPositionalArgIndex() const {
333    return argIndex + 1;
334  }
335
336  const LengthModifier &getLengthModifier() const {
337    return LM;
338  }
339
340  const OptionalAmount &getFieldWidth() const {
341    return FieldWidth;
342  }
343
344  void setFieldWidth(const OptionalAmount &Amt) {
345    FieldWidth = Amt;
346  }
347
348  bool usesPositionalArg() const { return UsesPositionalArg; }
349
350  bool hasValidLengthModifier() const;
351
352  bool hasStandardLengthModifier() const;
353
354  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
355
356  bool hasStandardLengthConversionCombination() const;
357};
358
359} // end analyze_format_string namespace
360
361//===----------------------------------------------------------------------===//
362/// Pieces specific to fprintf format strings.
363
364namespace analyze_printf {
365
366class PrintfConversionSpecifier :
367  public analyze_format_string::ConversionSpecifier  {
368public:
369  PrintfConversionSpecifier()
370    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
371
372  PrintfConversionSpecifier(const char *pos, Kind k)
373    : ConversionSpecifier(true, pos, k) {}
374
375  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
376  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
377  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
378                                    kind <= DoubleArgEnd; }
379  unsigned getLength() const {
380      // Conversion specifiers currently only are represented by
381      // single characters, but we be flexible.
382    return 1;
383  }
384
385  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
386    return CS->isPrintfKind();
387  }
388};
389
390using analyze_format_string::ArgTypeResult;
391using analyze_format_string::LengthModifier;
392using analyze_format_string::OptionalAmount;
393using analyze_format_string::OptionalFlag;
394
395class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
396  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
397  OptionalFlag IsLeftJustified; // '-'
398  OptionalFlag HasPlusPrefix; // '+'
399  OptionalFlag HasSpacePrefix; // ' '
400  OptionalFlag HasAlternativeForm; // '#'
401  OptionalFlag HasLeadingZeroes; // '0'
402  OptionalAmount Precision;
403public:
404  PrintfSpecifier() :
405    FormatSpecifier(/* isPrintf = */ true),
406    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
407    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
408
409  static PrintfSpecifier Parse(const char *beg, const char *end);
410
411    // Methods for incrementally constructing the PrintfSpecifier.
412  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
413    CS = cs;
414  }
415  void setHasThousandsGrouping(const char *position) {
416    HasThousandsGrouping = true;
417    HasThousandsGrouping.setPosition(position);
418  }
419  void setIsLeftJustified(const char *position) {
420    IsLeftJustified = true;
421    IsLeftJustified.setPosition(position);
422  }
423  void setHasPlusPrefix(const char *position) {
424    HasPlusPrefix = true;
425    HasPlusPrefix.setPosition(position);
426  }
427  void setHasSpacePrefix(const char *position) {
428    HasSpacePrefix = true;
429    HasSpacePrefix.setPosition(position);
430  }
431  void setHasAlternativeForm(const char *position) {
432    HasAlternativeForm = true;
433    HasAlternativeForm.setPosition(position);
434  }
435  void setHasLeadingZeros(const char *position) {
436    HasLeadingZeroes = true;
437    HasLeadingZeroes.setPosition(position);
438  }
439  void setUsesPositionalArg() { UsesPositionalArg = true; }
440
441    // Methods for querying the format specifier.
442
443  const PrintfConversionSpecifier &getConversionSpecifier() const {
444    return cast<PrintfConversionSpecifier>(CS);
445  }
446
447  void setPrecision(const OptionalAmount &Amt) {
448    Precision = Amt;
449    Precision.setUsesDotPrefix();
450  }
451
452  const OptionalAmount &getPrecision() const {
453    return Precision;
454  }
455
456  bool consumesDataArgument() const {
457    return getConversionSpecifier().consumesDataArgument();
458  }
459
460  /// \brief Returns the builtin type that a data argument
461  /// paired with this format specifier should have.  This method
462  /// will return null if the format specifier does not have
463  /// a matching data argument or the matching argument matches
464  /// more than one type.
465  ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
466
467  const OptionalFlag &hasThousandsGrouping() const {
468      return HasThousandsGrouping;
469  }
470  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
471  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
472  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
473  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
474  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
475  bool usesPositionalArg() const { return UsesPositionalArg; }
476
477  /// Changes the specifier and length according to a QualType, retaining any
478  /// flags or options. Returns true on success, or false when a conversion
479  /// was not successful.
480  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
481               bool IsObjCLiteral);
482
483  void toString(raw_ostream &os) const;
484
485  // Validation methods - to check if any element results in undefined behavior
486  bool hasValidPlusPrefix() const;
487  bool hasValidAlternativeForm() const;
488  bool hasValidLeadingZeros() const;
489  bool hasValidSpacePrefix() const;
490  bool hasValidLeftJustified() const;
491  bool hasValidThousandsGroupingPrefix() const;
492
493  bool hasValidPrecision() const;
494  bool hasValidFieldWidth() const;
495};
496}  // end analyze_printf namespace
497
498//===----------------------------------------------------------------------===//
499/// Pieces specific to fscanf format strings.
500
501namespace analyze_scanf {
502
503class ScanfConversionSpecifier :
504    public analyze_format_string::ConversionSpecifier  {
505public:
506  ScanfConversionSpecifier()
507    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
508
509  ScanfConversionSpecifier(const char *pos, Kind k)
510    : ConversionSpecifier(false, pos, k) {}
511
512  void setEndScanList(const char *pos) { EndScanList = pos; }
513
514  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
515    return !CS->isPrintfKind();
516  }
517};
518
519using analyze_format_string::ArgTypeResult;
520using analyze_format_string::LengthModifier;
521using analyze_format_string::OptionalAmount;
522using analyze_format_string::OptionalFlag;
523
524class ScanfArgTypeResult : public ArgTypeResult {
525public:
526  enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
527private:
528  Kind K;
529  ArgTypeResult A;
530  const char *Name;
531  QualType getRepresentativeType(ASTContext &C) const;
532public:
533  ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
534  ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
535      : K(PtrToArgTypeResultTy), A(a), Name(n) {
536    assert(A.isValid());
537  }
538
539  static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
540
541  bool isValid() const { return K != InvalidTy; }
542
543  bool matchesType(ASTContext& C, QualType argTy) const;
544
545  std::string getRepresentativeTypeName(ASTContext& C) const;
546};
547
548class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
549  OptionalFlag SuppressAssignment; // '*'
550public:
551  ScanfSpecifier() :
552    FormatSpecifier(/* isPrintf = */ false),
553    SuppressAssignment("*") {}
554
555  void setSuppressAssignment(const char *position) {
556    SuppressAssignment = true;
557    SuppressAssignment.setPosition(position);
558  }
559
560  const OptionalFlag &getSuppressAssignment() const {
561    return SuppressAssignment;
562  }
563
564  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
565    CS = cs;
566  }
567
568  const ScanfConversionSpecifier &getConversionSpecifier() const {
569    return cast<ScanfConversionSpecifier>(CS);
570  }
571
572  bool consumesDataArgument() const {
573    return CS.consumesDataArgument() && !SuppressAssignment;
574  }
575
576  ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
577
578  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
579
580  void toString(raw_ostream &os) const;
581
582  static ScanfSpecifier Parse(const char *beg, const char *end);
583};
584
585} // end analyze_scanf namespace
586
587//===----------------------------------------------------------------------===//
588// Parsing and processing of format strings (both fprintf and fscanf).
589
590namespace analyze_format_string {
591
592enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
593
594class FormatStringHandler {
595public:
596  FormatStringHandler() {}
597  virtual ~FormatStringHandler();
598
599  virtual void HandleNullChar(const char *nullCharacter) {}
600
601  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
602
603  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
604                                     PositionContext p) {}
605
606  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
607
608  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
609                                         unsigned specifierLen) {}
610
611  // Printf-specific handlers.
612
613  virtual bool HandleInvalidPrintfConversionSpecifier(
614                                      const analyze_printf::PrintfSpecifier &FS,
615                                      const char *startSpecifier,
616                                      unsigned specifierLen) {
617    return true;
618  }
619
620  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
621                                     const char *startSpecifier,
622                                     unsigned specifierLen) {
623    return true;
624  }
625
626    // Scanf-specific handlers.
627
628  virtual bool HandleInvalidScanfConversionSpecifier(
629                                        const analyze_scanf::ScanfSpecifier &FS,
630                                        const char *startSpecifier,
631                                        unsigned specifierLen) {
632    return true;
633  }
634
635  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
636                                    const char *startSpecifier,
637                                    unsigned specifierLen) {
638    return true;
639  }
640
641  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
642};
643
644bool ParsePrintfString(FormatStringHandler &H,
645                       const char *beg, const char *end, const LangOptions &LO);
646
647bool ParseScanfString(FormatStringHandler &H,
648                      const char *beg, const char *end, const LangOptions &LO);
649
650} // end analyze_format_string namespace
651} // end clang namespace
652#endif
653