FormatString.h revision 826a3457f737f1fc45a22954fd1bfde38160c165
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsWideChar = AsLong // for '%ls', only makes sense for printf
75  };
76
77  LengthModifier()
78    : Position(0), kind(None) {}
79  LengthModifier(const char *pos, Kind k)
80    : Position(pos), kind(k) {}
81
82  const char *getStart() const {
83    return Position;
84  }
85
86  unsigned getLength() const {
87    switch (kind) {
88      default:
89        return 1;
90      case AsLongLong:
91      case AsChar:
92        return 2;
93      case None:
94        return 0;
95    }
96  }
97
98  Kind getKind() const { return kind; }
99  void setKind(Kind k) { kind = k; }
100
101  const char *toString() const;
102
103private:
104  const char *Position;
105  Kind kind;
106};
107
108class ArgTypeResult {
109public:
110  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
111    CStrTy, WCStrTy };
112private:
113  const Kind K;
114  QualType T;
115  ArgTypeResult(bool) : K(InvalidTy) {}
116public:
117  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
118  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
119  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
120
121  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
122
123  bool isValid() const { return K != InvalidTy; }
124
125  const QualType *getSpecificType() const {
126    return K == SpecificTy ? &T : 0;
127  }
128
129  bool matchesType(ASTContext &C, QualType argTy) const;
130
131  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
132
133  QualType getRepresentativeType(ASTContext &C) const;
134};
135
136class OptionalAmount {
137public:
138  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
139
140  OptionalAmount(HowSpecified howSpecified,
141                 unsigned amount,
142                 const char *amountStart,
143                 unsigned amountLength,
144                 bool usesPositionalArg)
145  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
146  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
147
148  OptionalAmount(bool valid = true)
149  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
150  UsesPositionalArg(0), UsesDotPrefix(0) {}
151
152  bool isInvalid() const {
153    return hs == Invalid;
154  }
155
156  HowSpecified getHowSpecified() const { return hs; }
157  void setHowSpecified(HowSpecified h) { hs = h; }
158
159  bool hasDataArgument() const { return hs == Arg; }
160
161  unsigned getArgIndex() const {
162    assert(hasDataArgument());
163    return amt;
164  }
165
166  unsigned getConstantAmount() const {
167    assert(hs == Constant);
168    return amt;
169  }
170
171  const char *getStart() const {
172      // We include the . character if it is given.
173    return start - UsesDotPrefix;
174  }
175
176  unsigned getConstantLength() const {
177    assert(hs == Constant);
178    return length + UsesDotPrefix;
179  }
180
181  ArgTypeResult getArgType(ASTContext &Ctx) const;
182
183  void toString(llvm::raw_ostream &os) const;
184
185  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
186  unsigned getPositionalArgIndex() const {
187    assert(hasDataArgument());
188    return amt + 1;
189  }
190
191  bool usesDotPrefix() const { return UsesDotPrefix; }
192  void setUsesDotPrefix() { UsesDotPrefix = true; }
193
194private:
195  const char *start;
196  unsigned length;
197  HowSpecified hs;
198  unsigned amt;
199  bool UsesPositionalArg : 1;
200  bool UsesDotPrefix;
201};
202
203
204class FormatSpecifier {
205protected:
206  LengthModifier LM;
207  OptionalAmount FieldWidth;
208    /// Positional arguments, an IEEE extension:
209    ///  IEEE Std 1003.1, 2004 Edition
210    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
211  bool UsesPositionalArg;
212  unsigned argIndex;
213public:
214  FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {}
215
216  void setLengthModifier(LengthModifier lm) {
217    LM = lm;
218  }
219
220  void setUsesPositionalArg() { UsesPositionalArg = true; }
221
222  void setArgIndex(unsigned i) {
223      // assert(CS.consumesDataArgument());
224    argIndex = i;
225  }
226
227  unsigned getArgIndex() const {
228      //assert(CS.consumesDataArgument());
229    return argIndex;
230  }
231
232  unsigned getPositionalArgIndex() const {
233      //assert(CS.consumesDataArgument());
234    return argIndex + 1;
235  }
236
237  const LengthModifier &getLengthModifier() const {
238    return LM;
239  }
240
241  const OptionalAmount &getFieldWidth() const {
242    return FieldWidth;
243  }
244
245  void setFieldWidth(const OptionalAmount &Amt) {
246    FieldWidth = Amt;
247  }
248
249  bool usesPositionalArg() const { return UsesPositionalArg; }
250};
251
252} // end analyze_format_string namespace
253
254//===----------------------------------------------------------------------===//
255/// Pieces specific to fprintf format strings.
256
257namespace analyze_printf {
258
259class ConversionSpecifier {
260public:
261  enum Kind {
262    InvalidSpecifier = 0,
263      // C99 conversion specifiers.
264    dArg, // 'd'
265    IntAsCharArg,  // 'c'
266    iArg, // 'i',
267    oArg, // 'o',
268    uArg, // 'u',
269    xArg, // 'x',
270    XArg, // 'X',
271    fArg, // 'f',
272    FArg, // 'F',
273    eArg, // 'e',
274    EArg, // 'E',
275    gArg, // 'g',
276    GArg, // 'G',
277    aArg, // 'a',
278    AArg, // 'A',
279    CStrArg,       // 's'
280    VoidPtrArg,    // 'p'
281    OutIntPtrArg,  // 'n'
282    PercentArg,    // '%'
283      // MacOS X unicode extensions.
284    CArg, // 'C'
285    UnicodeStrArg, // 'S'
286      // Objective-C specific specifiers.
287    ObjCObjArg,    // '@'
288      // GlibC specific specifiers.
289    PrintErrno,    // 'm'
290      // Specifier ranges.
291    IntArgBeg = dArg,
292    IntArgEnd = iArg,
293    UIntArgBeg = oArg,
294    UIntArgEnd = XArg,
295    DoubleArgBeg = fArg,
296    DoubleArgEnd = AArg,
297    C99Beg = IntArgBeg,
298    C99End = DoubleArgEnd,
299    ObjCBeg = ObjCObjArg,
300    ObjCEnd = ObjCObjArg
301  };
302
303  ConversionSpecifier()
304  : Position(0), kind(InvalidSpecifier) {}
305
306  ConversionSpecifier(const char *pos, Kind k)
307  : Position(pos), kind(k) {}
308
309  const char *getStart() const {
310    return Position;
311  }
312
313  llvm::StringRef getCharacters() const {
314    return llvm::StringRef(getStart(), getLength());
315  }
316
317  bool consumesDataArgument() const {
318    switch (kind) {
319      case PercentArg:
320      case PrintErrno:
321        return false;
322      default:
323        return true;
324    }
325  }
326
327  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
328  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
329  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
330  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
331  Kind getKind() const { return kind; }
332  void setKind(Kind k) { kind = k; }
333  unsigned getLength() const {
334      // Conversion specifiers currently only are represented by
335      // single characters, but we be flexible.
336    return 1;
337  }
338  const char *toString() const;
339
340private:
341  const char *Position;
342  Kind kind;
343};
344
345using analyze_format_string::ArgTypeResult;
346using analyze_format_string::LengthModifier;
347using analyze_format_string::OptionalAmount;
348using analyze_format_string::OptionalFlag;
349
350class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
351  OptionalFlag IsLeftJustified; // '-'
352  OptionalFlag HasPlusPrefix; // '+'
353  OptionalFlag HasSpacePrefix; // ' '
354  OptionalFlag HasAlternativeForm; // '#'
355  OptionalFlag HasLeadingZeroes; // '0'
356  ConversionSpecifier CS;
357  OptionalAmount Precision;
358public:
359  PrintfSpecifier() :
360  IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
361  HasAlternativeForm("#"), HasLeadingZeroes("0") {}
362
363  static PrintfSpecifier Parse(const char *beg, const char *end);
364
365    // Methods for incrementally constructing the PrintfSpecifier.
366  void setConversionSpecifier(const ConversionSpecifier &cs) {
367    CS = cs;
368  }
369  void setIsLeftJustified(const char *position) {
370    IsLeftJustified = true;
371    IsLeftJustified.setPosition(position);
372  }
373  void setHasPlusPrefix(const char *position) {
374    HasPlusPrefix = true;
375    HasPlusPrefix.setPosition(position);
376  }
377  void setHasSpacePrefix(const char *position) {
378    HasSpacePrefix = true;
379    HasSpacePrefix.setPosition(position);
380  }
381  void setHasAlternativeForm(const char *position) {
382    HasAlternativeForm = true;
383    HasAlternativeForm.setPosition(position);
384  }
385  void setHasLeadingZeros(const char *position) {
386    HasLeadingZeroes = true;
387    HasLeadingZeroes.setPosition(position);
388  }
389  void setUsesPositionalArg() { UsesPositionalArg = true; }
390
391    // Methods for querying the format specifier.
392
393  const ConversionSpecifier &getConversionSpecifier() const {
394    return CS;
395  }
396
397  void setPrecision(const OptionalAmount &Amt) {
398    Precision = Amt;
399    Precision.setUsesDotPrefix();
400  }
401
402  const OptionalAmount &getPrecision() const {
403    return Precision;
404  }
405
406    /// \brief Returns the builtin type that a data argument
407    /// paired with this format specifier should have.  This method
408    /// will return null if the format specifier does not have
409    /// a matching data argument or the matching argument matches
410    /// more than one type.
411  ArgTypeResult getArgType(ASTContext &Ctx) const;
412
413  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
414  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
415  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
416  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
417  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
418  bool usesPositionalArg() const { return UsesPositionalArg; }
419
420    /// Changes the specifier and length according to a QualType, retaining any
421    /// flags or options. Returns true on success, or false when a conversion
422    /// was not successful.
423  bool fixType(QualType QT);
424
425  void toString(llvm::raw_ostream &os) const;
426
427    // Validation methods - to check if any element results in undefined behavior
428  bool hasValidPlusPrefix() const;
429  bool hasValidAlternativeForm() const;
430  bool hasValidLeadingZeros() const;
431  bool hasValidSpacePrefix() const;
432  bool hasValidLeftJustified() const;
433
434  bool hasValidLengthModifier() const;
435  bool hasValidPrecision() const;
436  bool hasValidFieldWidth() const;
437};
438}  // end analyze_printf namespace
439
440//===----------------------------------------------------------------------===//
441/// Pieces specific to fscanf format strings.
442
443namespace analyze_scanf {
444
445class ConversionSpecifier {
446public:
447  enum Kind {
448    InvalidSpecifier = 0,
449      // C99 conversion specifiers.
450    dArg, // 'd'
451    iArg, // 'i',
452    oArg, // 'o',
453    uArg, // 'u',
454    xArg, // 'x',
455    XArg, // 'X',
456    fArg, // 'f',
457    FArg, // 'F',
458    eArg, // 'e',
459    EArg, // 'E',
460    gArg, // 'g',
461    GArg, // 'G',
462    aArg, // 'a',
463    AArg, // 'A',
464    sArg, // 's', // match sequence of non-write-space characters
465    VoidPtrArg,        // 'p'
466    cArg,              // 'c', differs from printf, writes array of characters
467    ConsumedSoFarArg,  // 'n', differs from printf, writes back args consumed
468    PercentArg,        // '%'
469    ScanListArg,       // '[' followed by scan list
470      // IEEE Std 1003.1 extensions.
471    CArg, // 'C', same as writing 'lc'
472    SArg, // 'S', same as writing 'ls'
473      // Specifier ranges.
474    IntArgBeg = dArg,
475    IntArgEnd = iArg,
476    UIntArgBeg = oArg,
477    UIntArgEnd = XArg,
478    DoubleArgBeg = fArg,
479    DoubleArgEnd = AArg
480  };
481
482  ConversionSpecifier()
483  : Position(0), EndScanList(0), kind(InvalidSpecifier) {}
484
485  ConversionSpecifier(const char *pos, Kind k)
486  : Position(pos), EndScanList(0), kind(k) {}
487
488  const char *getStart() const {
489    return Position;
490  }
491
492  void setEndScanList(const char *pos) { EndScanList = pos; }
493
494  llvm::StringRef getCharacters() const {
495    return llvm::StringRef(getStart(), getLength());
496  }
497
498  bool consumesDataArgument() const {
499    return kind != PercentArg;
500  }
501
502  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
503  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
504  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
505  Kind getKind() const { return kind; }
506  void setKind(Kind k) { kind = k; }
507
508  unsigned getLength() const {
509    return EndScanList ? EndScanList - Position : 1;
510  }
511
512  const char *toString() const;
513
514private:
515  const char *Position;
516  const char *EndScanList;
517  Kind kind;
518};
519
520using analyze_format_string::LengthModifier;
521using analyze_format_string::OptionalAmount;
522using analyze_format_string::OptionalFlag;
523
524class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
525  OptionalFlag SuppressAssignment; // '*'
526  ConversionSpecifier CS;
527public:
528  ScanfSpecifier() : SuppressAssignment("*") {}
529
530  void setSuppressAssignment(const char *position) {
531    SuppressAssignment = true;
532    SuppressAssignment.setPosition(position);
533  }
534
535  const OptionalFlag &getSuppressAssignment() const {
536    return SuppressAssignment;
537  }
538
539  void setConversionSpecifier(const ConversionSpecifier &cs) {
540    CS = cs;
541  }
542
543  const ConversionSpecifier &getConversionSpecifier() const {
544    return CS;
545  }
546
547  bool consumesDataArgument() const {
548    return CS.consumesDataArgument() && !SuppressAssignment;
549  }
550
551  static ScanfSpecifier Parse(const char *beg, const char *end);
552
553};
554
555} // end analyze_scanf namespace
556
557//===----------------------------------------------------------------------===//
558// Parsing and processing of format strings (both fprintf and fscanf).
559
560namespace analyze_format_string {
561
562enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
563
564class FormatStringHandler {
565public:
566  FormatStringHandler() {}
567  virtual ~FormatStringHandler();
568
569  virtual void HandleNullChar(const char *nullCharacter) {}
570
571  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
572                                     PositionContext p) {}
573
574  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
575
576  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
577                                         unsigned specifierLen) {}
578
579  // Printf-specific handlers.
580
581  virtual bool HandleInvalidPrintfConversionSpecifier(
582                                      const analyze_printf::PrintfSpecifier &FS,
583                                      const char *startSpecifier,
584                                      unsigned specifierLen) {
585    return true;
586  }
587
588  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
589                                     const char *startSpecifier,
590                                     unsigned specifierLen) {
591    return true;
592  }
593
594    // Scanf-specific handlers.
595
596  virtual bool HandleInvalidScanfConversionSpecifier(
597                                        const analyze_scanf::ScanfSpecifier &FS,
598                                        const char *startSpecifier,
599                                        unsigned specifierLen) {
600    return true;
601  }
602
603  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
604                                    const char *startSpecifier,
605                                    unsigned specifierLen) {
606    return true;
607  }
608
609  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
610};
611
612bool ParsePrintfString(FormatStringHandler &H,
613                       const char *beg, const char *end);
614
615bool ParseScanfString(FormatStringHandler &H,
616                       const char *beg, const char *end);
617
618} // end analyze_format_string namespace
619} // end clang namespace
620#endif
621