FormatString.h revision 6ecb950c65329f8d6ce9ad0514632df35a5ab61f
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsWideChar = AsLong // for '%ls', only makes sense for printf
75  };
76
77  LengthModifier()
78    : Position(0), kind(None) {}
79  LengthModifier(const char *pos, Kind k)
80    : Position(pos), kind(k) {}
81
82  const char *getStart() const {
83    return Position;
84  }
85
86  unsigned getLength() const {
87    switch (kind) {
88      default:
89        return 1;
90      case AsLongLong:
91      case AsChar:
92        return 2;
93      case None:
94        return 0;
95    }
96  }
97
98  Kind getKind() const { return kind; }
99  void setKind(Kind k) { kind = k; }
100
101  const char *toString() const;
102
103private:
104  const char *Position;
105  Kind kind;
106};
107
108class ConversionSpecifier {
109public:
110  enum Kind {
111    InvalidSpecifier = 0,
112      // C99 conversion specifiers.
113    cArg,
114    dArg,
115    iArg,
116    IntArgBeg = cArg, IntArgEnd = iArg,
117
118    oArg,
119    uArg,
120    xArg,
121    XArg,
122    UIntArgBeg = oArg, UIntArgEnd = XArg,
123
124    fArg,
125    FArg,
126    eArg,
127    EArg,
128    gArg,
129    GArg,
130    aArg,
131    AArg,
132    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
133
134    sArg,
135    pArg,
136    nArg,
137    PercentArg,
138    CArg,
139    SArg,
140
141    // ** Printf-specific **
142
143    // Objective-C specific specifiers.
144    ObjCObjArg,  // '@'
145    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
146
147    // GlibC specific specifiers.
148    PrintErrno,   // 'm'
149
150    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno
151  };
152
153  ConversionSpecifier(bool isPrintf)
154    : IsPrintf(isPrintf), Position(0), kind(InvalidSpecifier) {}
155
156  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
157    : IsPrintf(isPrintf), Position(pos), kind(k) {}
158
159  const char *getStart() const {
160    return Position;
161  }
162
163  llvm::StringRef getCharacters() const {
164    return llvm::StringRef(getStart(), getLength());
165  }
166
167  Kind getKind() const { return kind; }
168  void setKind(Kind k) { kind = k; }
169  unsigned getLength() const {
170      // Conversion specifiers currently only are represented by
171      // single characters, but we be flexible.
172    return 1;
173  }
174  const char *toString() const;
175
176  bool isPrintfKind() const { return IsPrintf; }
177
178protected:
179  bool IsPrintf;
180  const char *Position;
181  Kind kind;
182};
183
184class ArgTypeResult {
185public:
186  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
187    CStrTy, WCStrTy };
188private:
189  const Kind K;
190  QualType T;
191  ArgTypeResult(bool) : K(InvalidTy) {}
192public:
193  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
194  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
195  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
196
197  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
198
199  bool isValid() const { return K != InvalidTy; }
200
201  const QualType *getSpecificType() const {
202    return K == SpecificTy ? &T : 0;
203  }
204
205  bool matchesType(ASTContext &C, QualType argTy) const;
206
207  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
208
209  QualType getRepresentativeType(ASTContext &C) const;
210};
211
212class OptionalAmount {
213public:
214  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
215
216  OptionalAmount(HowSpecified howSpecified,
217                 unsigned amount,
218                 const char *amountStart,
219                 unsigned amountLength,
220                 bool usesPositionalArg)
221  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
222  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
223
224  OptionalAmount(bool valid = true)
225  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
226  UsesPositionalArg(0), UsesDotPrefix(0) {}
227
228  bool isInvalid() const {
229    return hs == Invalid;
230  }
231
232  HowSpecified getHowSpecified() const { return hs; }
233  void setHowSpecified(HowSpecified h) { hs = h; }
234
235  bool hasDataArgument() const { return hs == Arg; }
236
237  unsigned getArgIndex() const {
238    assert(hasDataArgument());
239    return amt;
240  }
241
242  unsigned getConstantAmount() const {
243    assert(hs == Constant);
244    return amt;
245  }
246
247  const char *getStart() const {
248      // We include the . character if it is given.
249    return start - UsesDotPrefix;
250  }
251
252  unsigned getConstantLength() const {
253    assert(hs == Constant);
254    return length + UsesDotPrefix;
255  }
256
257  ArgTypeResult getArgType(ASTContext &Ctx) const;
258
259  void toString(llvm::raw_ostream &os) const;
260
261  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
262  unsigned getPositionalArgIndex() const {
263    assert(hasDataArgument());
264    return amt + 1;
265  }
266
267  bool usesDotPrefix() const { return UsesDotPrefix; }
268  void setUsesDotPrefix() { UsesDotPrefix = true; }
269
270private:
271  const char *start;
272  unsigned length;
273  HowSpecified hs;
274  unsigned amt;
275  bool UsesPositionalArg : 1;
276  bool UsesDotPrefix;
277};
278
279
280class FormatSpecifier {
281protected:
282  LengthModifier LM;
283  OptionalAmount FieldWidth;
284    /// Positional arguments, an IEEE extension:
285    ///  IEEE Std 1003.1, 2004 Edition
286    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
287  bool UsesPositionalArg;
288  unsigned argIndex;
289public:
290  FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {}
291
292  void setLengthModifier(LengthModifier lm) {
293    LM = lm;
294  }
295
296  void setUsesPositionalArg() { UsesPositionalArg = true; }
297
298  void setArgIndex(unsigned i) {
299    argIndex = i;
300  }
301
302  unsigned getArgIndex() const {
303    return argIndex;
304  }
305
306  unsigned getPositionalArgIndex() const {
307    return argIndex + 1;
308  }
309
310  const LengthModifier &getLengthModifier() const {
311    return LM;
312  }
313
314  const OptionalAmount &getFieldWidth() const {
315    return FieldWidth;
316  }
317
318  void setFieldWidth(const OptionalAmount &Amt) {
319    FieldWidth = Amt;
320  }
321
322  bool usesPositionalArg() const { return UsesPositionalArg; }
323};
324
325} // end analyze_format_string namespace
326
327//===----------------------------------------------------------------------===//
328/// Pieces specific to fprintf format strings.
329
330namespace analyze_printf {
331
332class PrintfConversionSpecifier :
333  public analyze_format_string::ConversionSpecifier  {
334public:
335  PrintfConversionSpecifier()
336    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
337
338  PrintfConversionSpecifier(const char *pos, Kind k)
339    : ConversionSpecifier(true, pos, k) {}
340
341  bool consumesDataArgument() const {
342    switch (kind) {
343      case PercentArg:
344      case PrintErrno:
345        return false;
346      default:
347        return true;
348    }
349  }
350
351  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
352  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
353  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
354  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
355                                    kind <= DoubleArgBeg; }
356  unsigned getLength() const {
357      // Conversion specifiers currently only are represented by
358      // single characters, but we be flexible.
359    return 1;
360  }
361
362  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
363    return CS->isPrintfKind();
364  }
365};
366
367using analyze_format_string::ArgTypeResult;
368using analyze_format_string::LengthModifier;
369using analyze_format_string::OptionalAmount;
370using analyze_format_string::OptionalFlag;
371
372class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
373  OptionalFlag IsLeftJustified; // '-'
374  OptionalFlag HasPlusPrefix; // '+'
375  OptionalFlag HasSpacePrefix; // ' '
376  OptionalFlag HasAlternativeForm; // '#'
377  OptionalFlag HasLeadingZeroes; // '0'
378  analyze_format_string::ConversionSpecifier CS;
379  OptionalAmount Precision;
380public:
381  PrintfSpecifier() :
382    IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
383    HasAlternativeForm("#"), HasLeadingZeroes("0"), CS(/* isPrintf = */ true) {}
384
385  static PrintfSpecifier Parse(const char *beg, const char *end);
386
387    // Methods for incrementally constructing the PrintfSpecifier.
388  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
389    CS = cs;
390  }
391  void setIsLeftJustified(const char *position) {
392    IsLeftJustified = true;
393    IsLeftJustified.setPosition(position);
394  }
395  void setHasPlusPrefix(const char *position) {
396    HasPlusPrefix = true;
397    HasPlusPrefix.setPosition(position);
398  }
399  void setHasSpacePrefix(const char *position) {
400    HasSpacePrefix = true;
401    HasSpacePrefix.setPosition(position);
402  }
403  void setHasAlternativeForm(const char *position) {
404    HasAlternativeForm = true;
405    HasAlternativeForm.setPosition(position);
406  }
407  void setHasLeadingZeros(const char *position) {
408    HasLeadingZeroes = true;
409    HasLeadingZeroes.setPosition(position);
410  }
411  void setUsesPositionalArg() { UsesPositionalArg = true; }
412
413    // Methods for querying the format specifier.
414
415  const PrintfConversionSpecifier &getConversionSpecifier() const {
416    return cast<PrintfConversionSpecifier>(CS);
417  }
418
419  void setPrecision(const OptionalAmount &Amt) {
420    Precision = Amt;
421    Precision.setUsesDotPrefix();
422  }
423
424  const OptionalAmount &getPrecision() const {
425    return Precision;
426  }
427
428  bool consumesDataArgument() const {
429    return getConversionSpecifier().consumesDataArgument();
430  }
431
432  /// \brief Returns the builtin type that a data argument
433  /// paired with this format specifier should have.  This method
434  /// will return null if the format specifier does not have
435  /// a matching data argument or the matching argument matches
436  /// more than one type.
437  ArgTypeResult getArgType(ASTContext &Ctx) const;
438
439  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
440  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
441  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
442  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
443  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
444  bool usesPositionalArg() const { return UsesPositionalArg; }
445
446    /// Changes the specifier and length according to a QualType, retaining any
447    /// flags or options. Returns true on success, or false when a conversion
448    /// was not successful.
449  bool fixType(QualType QT);
450
451  void toString(llvm::raw_ostream &os) const;
452
453    // Validation methods - to check if any element results in undefined behavior
454  bool hasValidPlusPrefix() const;
455  bool hasValidAlternativeForm() const;
456  bool hasValidLeadingZeros() const;
457  bool hasValidSpacePrefix() const;
458  bool hasValidLeftJustified() const;
459
460  bool hasValidLengthModifier() const;
461  bool hasValidPrecision() const;
462  bool hasValidFieldWidth() const;
463};
464}  // end analyze_printf namespace
465
466//===----------------------------------------------------------------------===//
467/// Pieces specific to fscanf format strings.
468
469namespace analyze_scanf {
470
471class ScanfConversionSpecifier {
472public:
473  enum Kind {
474    InvalidSpecifier = 0,
475      // C99 conversion specifiers.
476    dArg, // 'd'
477    iArg, // 'i',
478    oArg, // 'o',
479    uArg, // 'u',
480    xArg, // 'x',
481    XArg, // 'X',
482    fArg, // 'f',
483    FArg, // 'F',
484    eArg, // 'e',
485    EArg, // 'E',
486    gArg, // 'g',
487    GArg, // 'G',
488    aArg, // 'a',
489    AArg, // 'A',
490    sArg, // 's', // match sequence of non-write-space characters
491    pArg,        // 'p'
492    cArg,              // 'c', differs from printf, writes array of characters
493    nArg,  // 'n', differs from printf, writes back args consumed
494    PercentArg,        // '%'
495    ScanListArg,       // '[' followed by scan list
496      // IEEE Std 1003.1 extensions.
497    CArg, // 'C', same as writing 'lc'
498    SArg, // 'S', same as writing 'ls'
499      // Specifier ranges.
500    IntArgBeg = dArg,
501    IntArgEnd = iArg,
502    UIntArgBeg = oArg,
503    UIntArgEnd = XArg,
504    DoubleArgBeg = fArg,
505    DoubleArgEnd = AArg
506  };
507
508  ScanfConversionSpecifier()
509  : Position(0), EndScanList(0), kind(InvalidSpecifier) {}
510
511  ScanfConversionSpecifier(const char *pos, Kind k)
512  : Position(pos), EndScanList(0), kind(k) {}
513
514  const char *getStart() const {
515    return Position;
516  }
517
518  void setEndScanList(const char *pos) { EndScanList = pos; }
519
520  llvm::StringRef getCharacters() const {
521    return llvm::StringRef(getStart(), getLength());
522  }
523
524  bool consumesDataArgument() const {
525    return kind != PercentArg;
526  }
527
528  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
529  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
530  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
531  Kind getKind() const { return kind; }
532  void setKind(Kind k) { kind = k; }
533
534  unsigned getLength() const {
535    return EndScanList ? EndScanList - Position : 1;
536  }
537
538  const char *toString() const;
539
540private:
541  const char *Position;
542  const char *EndScanList;
543  Kind kind;
544};
545
546using analyze_format_string::LengthModifier;
547using analyze_format_string::OptionalAmount;
548using analyze_format_string::OptionalFlag;
549
550class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
551  OptionalFlag SuppressAssignment; // '*'
552  ScanfConversionSpecifier CS;
553public:
554  ScanfSpecifier() : SuppressAssignment("*") {}
555
556  void setSuppressAssignment(const char *position) {
557    SuppressAssignment = true;
558    SuppressAssignment.setPosition(position);
559  }
560
561  const OptionalFlag &getSuppressAssignment() const {
562    return SuppressAssignment;
563  }
564
565  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
566    CS = cs;
567  }
568
569  const ScanfConversionSpecifier &getConversionSpecifier() const {
570    return CS;
571  }
572
573  bool consumesDataArgument() const {
574    return CS.consumesDataArgument() && !SuppressAssignment;
575  }
576
577  static ScanfSpecifier Parse(const char *beg, const char *end);
578
579};
580
581} // end analyze_scanf namespace
582
583//===----------------------------------------------------------------------===//
584// Parsing and processing of format strings (both fprintf and fscanf).
585
586namespace analyze_format_string {
587
588enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
589
590class FormatStringHandler {
591public:
592  FormatStringHandler() {}
593  virtual ~FormatStringHandler();
594
595  virtual void HandleNullChar(const char *nullCharacter) {}
596
597  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
598                                     PositionContext p) {}
599
600  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
601
602  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
603                                         unsigned specifierLen) {}
604
605  // Printf-specific handlers.
606
607  virtual bool HandleInvalidPrintfConversionSpecifier(
608                                      const analyze_printf::PrintfSpecifier &FS,
609                                      const char *startSpecifier,
610                                      unsigned specifierLen) {
611    return true;
612  }
613
614  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
615                                     const char *startSpecifier,
616                                     unsigned specifierLen) {
617    return true;
618  }
619
620    // Scanf-specific handlers.
621
622  virtual bool HandleInvalidScanfConversionSpecifier(
623                                        const analyze_scanf::ScanfSpecifier &FS,
624                                        const char *startSpecifier,
625                                        unsigned specifierLen) {
626    return true;
627  }
628
629  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
630                                    const char *startSpecifier,
631                                    unsigned specifierLen) {
632    return true;
633  }
634
635  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
636};
637
638bool ParsePrintfString(FormatStringHandler &H,
639                       const char *beg, const char *end);
640
641bool ParseScanfString(FormatStringHandler &H,
642                       const char *beg, const char *end);
643
644} // end analyze_format_string namespace
645} // end clang namespace
646#endif
647