FormatString.h revision d39d23e610c2a7815515d60c5a538d65d05e8bdc
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll'
70    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
71    AsIntMax,     // 'j'
72    AsSizeT,      // 'z'
73    AsPtrDiff,    // 't'
74    AsLongDouble, // 'L'
75    AsAllocate,   // for '%as', GNU extension to C90 scanf
76    AsMAllocate,  // for '%ms', GNU extension to scanf
77    AsWideChar = AsLong // for '%ls', only makes sense for printf
78  };
79
80  LengthModifier()
81    : Position(0), kind(None) {}
82  LengthModifier(const char *pos, Kind k)
83    : Position(pos), kind(k) {}
84
85  const char *getStart() const {
86    return Position;
87  }
88
89  unsigned getLength() const {
90    switch (kind) {
91      default:
92        return 1;
93      case AsLongLong:
94      case AsChar:
95        return 2;
96      case None:
97        return 0;
98    }
99  }
100
101  Kind getKind() const { return kind; }
102  void setKind(Kind k) { kind = k; }
103
104  const char *toString() const;
105
106private:
107  const char *Position;
108  Kind kind;
109};
110
111class ConversionSpecifier {
112public:
113  enum Kind {
114    InvalidSpecifier = 0,
115      // C99 conversion specifiers.
116    cArg,
117    dArg,
118    iArg,
119    IntArgBeg = cArg, IntArgEnd = iArg,
120
121    oArg,
122    uArg,
123    xArg,
124    XArg,
125    UIntArgBeg = oArg, UIntArgEnd = XArg,
126
127    fArg,
128    FArg,
129    eArg,
130    EArg,
131    gArg,
132    GArg,
133    aArg,
134    AArg,
135    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
136
137    sArg,
138    pArg,
139    nArg,
140    PercentArg,
141    CArg,
142    SArg,
143
144    // ** Printf-specific **
145
146    // Objective-C specific specifiers.
147    ObjCObjArg,  // '@'
148    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
149
150    // GlibC specific specifiers.
151    PrintErrno,   // 'm'
152
153    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
154
155    // ** Scanf-specific **
156    ScanListArg, // '['
157    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
158  };
159
160  ConversionSpecifier(bool isPrintf)
161    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
162
163  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
165
166  const char *getStart() const {
167    return Position;
168  }
169
170  StringRef getCharacters() const {
171    return StringRef(getStart(), getLength());
172  }
173
174  bool consumesDataArgument() const {
175    switch (kind) {
176      case PrintErrno:
177        assert(IsPrintf);
178        return false;
179      case PercentArg:
180        return false;
181      default:
182        return true;
183    }
184  }
185
186  Kind getKind() const { return kind; }
187  void setKind(Kind k) { kind = k; }
188  unsigned getLength() const {
189    return EndScanList ? EndScanList - Position : 1;
190  }
191
192  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
193  const char *toString() const;
194
195  bool isPrintfKind() const { return IsPrintf; }
196
197protected:
198  bool IsPrintf;
199  const char *Position;
200  const char *EndScanList;
201  Kind kind;
202};
203
204class ArgTypeResult {
205public:
206  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
207              AnyCharTy, CStrTy, WCStrTy, WIntTy };
208private:
209  const Kind K;
210  QualType T;
211  const char *Name;
212  ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
213public:
214  ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
215  ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
216  ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
217  ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
218  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
219
220  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
221
222  bool isValid() const { return K != InvalidTy; }
223
224  const QualType *getSpecificType() const {
225    return K == SpecificTy ? &T : 0;
226  }
227
228  bool matchesType(ASTContext &C, QualType argTy) const;
229
230  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
231
232  QualType getRepresentativeType(ASTContext &C) const;
233
234  std::string getRepresentativeTypeName(ASTContext &C) const;
235};
236
237class OptionalAmount {
238public:
239  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
240
241  OptionalAmount(HowSpecified howSpecified,
242                 unsigned amount,
243                 const char *amountStart,
244                 unsigned amountLength,
245                 bool usesPositionalArg)
246  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
247  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
248
249  OptionalAmount(bool valid = true)
250  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
251  UsesPositionalArg(0), UsesDotPrefix(0) {}
252
253  bool isInvalid() const {
254    return hs == Invalid;
255  }
256
257  HowSpecified getHowSpecified() const { return hs; }
258  void setHowSpecified(HowSpecified h) { hs = h; }
259
260  bool hasDataArgument() const { return hs == Arg; }
261
262  unsigned getArgIndex() const {
263    assert(hasDataArgument());
264    return amt;
265  }
266
267  unsigned getConstantAmount() const {
268    assert(hs == Constant);
269    return amt;
270  }
271
272  const char *getStart() const {
273      // We include the . character if it is given.
274    return start - UsesDotPrefix;
275  }
276
277  unsigned getConstantLength() const {
278    assert(hs == Constant);
279    return length + UsesDotPrefix;
280  }
281
282  ArgTypeResult getArgType(ASTContext &Ctx) const;
283
284  void toString(raw_ostream &os) const;
285
286  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
287  unsigned getPositionalArgIndex() const {
288    assert(hasDataArgument());
289    return amt + 1;
290  }
291
292  bool usesDotPrefix() const { return UsesDotPrefix; }
293  void setUsesDotPrefix() { UsesDotPrefix = true; }
294
295private:
296  const char *start;
297  unsigned length;
298  HowSpecified hs;
299  unsigned amt;
300  bool UsesPositionalArg : 1;
301  bool UsesDotPrefix;
302};
303
304
305class FormatSpecifier {
306protected:
307  LengthModifier LM;
308  OptionalAmount FieldWidth;
309  ConversionSpecifier CS;
310  /// Positional arguments, an IEEE extension:
311  ///  IEEE Std 1003.1, 2004 Edition
312  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
313  bool UsesPositionalArg;
314  unsigned argIndex;
315public:
316  FormatSpecifier(bool isPrintf)
317    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
318
319  void setLengthModifier(LengthModifier lm) {
320    LM = lm;
321  }
322
323  void setUsesPositionalArg() { UsesPositionalArg = true; }
324
325  void setArgIndex(unsigned i) {
326    argIndex = i;
327  }
328
329  unsigned getArgIndex() const {
330    return argIndex;
331  }
332
333  unsigned getPositionalArgIndex() const {
334    return argIndex + 1;
335  }
336
337  const LengthModifier &getLengthModifier() const {
338    return LM;
339  }
340
341  const OptionalAmount &getFieldWidth() const {
342    return FieldWidth;
343  }
344
345  void setFieldWidth(const OptionalAmount &Amt) {
346    FieldWidth = Amt;
347  }
348
349  bool usesPositionalArg() const { return UsesPositionalArg; }
350
351  bool hasValidLengthModifier() const;
352
353  bool hasStandardLengthModifier() const;
354
355  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
356
357  bool hasStandardLengthConversionCombination() const;
358};
359
360} // end analyze_format_string namespace
361
362//===----------------------------------------------------------------------===//
363/// Pieces specific to fprintf format strings.
364
365namespace analyze_printf {
366
367class PrintfConversionSpecifier :
368  public analyze_format_string::ConversionSpecifier  {
369public:
370  PrintfConversionSpecifier()
371    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
372
373  PrintfConversionSpecifier(const char *pos, Kind k)
374    : ConversionSpecifier(true, pos, k) {}
375
376  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
377  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
378  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
379                                    kind <= DoubleArgEnd; }
380  unsigned getLength() const {
381      // Conversion specifiers currently only are represented by
382      // single characters, but we be flexible.
383    return 1;
384  }
385
386  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
387    return CS->isPrintfKind();
388  }
389};
390
391using analyze_format_string::ArgTypeResult;
392using analyze_format_string::LengthModifier;
393using analyze_format_string::OptionalAmount;
394using analyze_format_string::OptionalFlag;
395
396class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
397  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
398  OptionalFlag IsLeftJustified; // '-'
399  OptionalFlag HasPlusPrefix; // '+'
400  OptionalFlag HasSpacePrefix; // ' '
401  OptionalFlag HasAlternativeForm; // '#'
402  OptionalFlag HasLeadingZeroes; // '0'
403  OptionalAmount Precision;
404public:
405  PrintfSpecifier() :
406    FormatSpecifier(/* isPrintf = */ true),
407    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
408    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
409
410  static PrintfSpecifier Parse(const char *beg, const char *end);
411
412    // Methods for incrementally constructing the PrintfSpecifier.
413  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
414    CS = cs;
415  }
416  void setHasThousandsGrouping(const char *position) {
417    HasThousandsGrouping = true;
418    HasThousandsGrouping.setPosition(position);
419  }
420  void setIsLeftJustified(const char *position) {
421    IsLeftJustified = true;
422    IsLeftJustified.setPosition(position);
423  }
424  void setHasPlusPrefix(const char *position) {
425    HasPlusPrefix = true;
426    HasPlusPrefix.setPosition(position);
427  }
428  void setHasSpacePrefix(const char *position) {
429    HasSpacePrefix = true;
430    HasSpacePrefix.setPosition(position);
431  }
432  void setHasAlternativeForm(const char *position) {
433    HasAlternativeForm = true;
434    HasAlternativeForm.setPosition(position);
435  }
436  void setHasLeadingZeros(const char *position) {
437    HasLeadingZeroes = true;
438    HasLeadingZeroes.setPosition(position);
439  }
440  void setUsesPositionalArg() { UsesPositionalArg = true; }
441
442    // Methods for querying the format specifier.
443
444  const PrintfConversionSpecifier &getConversionSpecifier() const {
445    return cast<PrintfConversionSpecifier>(CS);
446  }
447
448  void setPrecision(const OptionalAmount &Amt) {
449    Precision = Amt;
450    Precision.setUsesDotPrefix();
451  }
452
453  const OptionalAmount &getPrecision() const {
454    return Precision;
455  }
456
457  bool consumesDataArgument() const {
458    return getConversionSpecifier().consumesDataArgument();
459  }
460
461  /// \brief Returns the builtin type that a data argument
462  /// paired with this format specifier should have.  This method
463  /// will return null if the format specifier does not have
464  /// a matching data argument or the matching argument matches
465  /// more than one type.
466  ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
467
468  const OptionalFlag &hasThousandsGrouping() const {
469      return HasThousandsGrouping;
470  }
471  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
472  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
473  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
474  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
475  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
476  bool usesPositionalArg() const { return UsesPositionalArg; }
477
478  /// Changes the specifier and length according to a QualType, retaining any
479  /// flags or options. Returns true on success, or false when a conversion
480  /// was not successful.
481  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
482               bool IsObjCLiteral);
483
484  void toString(raw_ostream &os) const;
485
486  // Validation methods - to check if any element results in undefined behavior
487  bool hasValidPlusPrefix() const;
488  bool hasValidAlternativeForm() const;
489  bool hasValidLeadingZeros() const;
490  bool hasValidSpacePrefix() const;
491  bool hasValidLeftJustified() const;
492  bool hasValidThousandsGroupingPrefix() const;
493
494  bool hasValidPrecision() const;
495  bool hasValidFieldWidth() const;
496};
497}  // end analyze_printf namespace
498
499//===----------------------------------------------------------------------===//
500/// Pieces specific to fscanf format strings.
501
502namespace analyze_scanf {
503
504class ScanfConversionSpecifier :
505    public analyze_format_string::ConversionSpecifier  {
506public:
507  ScanfConversionSpecifier()
508    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
509
510  ScanfConversionSpecifier(const char *pos, Kind k)
511    : ConversionSpecifier(false, pos, k) {}
512
513  void setEndScanList(const char *pos) { EndScanList = pos; }
514
515  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
516    return !CS->isPrintfKind();
517  }
518};
519
520using analyze_format_string::ArgTypeResult;
521using analyze_format_string::LengthModifier;
522using analyze_format_string::OptionalAmount;
523using analyze_format_string::OptionalFlag;
524
525class ScanfArgTypeResult : public ArgTypeResult {
526public:
527  enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
528private:
529  Kind K;
530  ArgTypeResult A;
531  const char *Name;
532  QualType getRepresentativeType(ASTContext &C) const;
533public:
534  ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
535  ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
536      : K(PtrToArgTypeResultTy), A(a), Name(n) {
537    assert(A.isValid());
538  }
539
540  static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
541
542  bool isValid() const { return K != InvalidTy; }
543
544  bool matchesType(ASTContext& C, QualType argTy) const;
545
546  std::string getRepresentativeTypeName(ASTContext& C) const;
547};
548
549class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
550  OptionalFlag SuppressAssignment; // '*'
551public:
552  ScanfSpecifier() :
553    FormatSpecifier(/* isPrintf = */ false),
554    SuppressAssignment("*") {}
555
556  void setSuppressAssignment(const char *position) {
557    SuppressAssignment = true;
558    SuppressAssignment.setPosition(position);
559  }
560
561  const OptionalFlag &getSuppressAssignment() const {
562    return SuppressAssignment;
563  }
564
565  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
566    CS = cs;
567  }
568
569  const ScanfConversionSpecifier &getConversionSpecifier() const {
570    return cast<ScanfConversionSpecifier>(CS);
571  }
572
573  bool consumesDataArgument() const {
574    return CS.consumesDataArgument() && !SuppressAssignment;
575  }
576
577  ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
578
579  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
580
581  void toString(raw_ostream &os) const;
582
583  static ScanfSpecifier Parse(const char *beg, const char *end);
584};
585
586} // end analyze_scanf namespace
587
588//===----------------------------------------------------------------------===//
589// Parsing and processing of format strings (both fprintf and fscanf).
590
591namespace analyze_format_string {
592
593enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
594
595class FormatStringHandler {
596public:
597  FormatStringHandler() {}
598  virtual ~FormatStringHandler();
599
600  virtual void HandleNullChar(const char *nullCharacter) {}
601
602  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
603
604  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
605                                     PositionContext p) {}
606
607  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
608
609  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
610                                         unsigned specifierLen) {}
611
612  // Printf-specific handlers.
613
614  virtual bool HandleInvalidPrintfConversionSpecifier(
615                                      const analyze_printf::PrintfSpecifier &FS,
616                                      const char *startSpecifier,
617                                      unsigned specifierLen) {
618    return true;
619  }
620
621  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
622                                     const char *startSpecifier,
623                                     unsigned specifierLen) {
624    return true;
625  }
626
627    // Scanf-specific handlers.
628
629  virtual bool HandleInvalidScanfConversionSpecifier(
630                                        const analyze_scanf::ScanfSpecifier &FS,
631                                        const char *startSpecifier,
632                                        unsigned specifierLen) {
633    return true;
634  }
635
636  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
637                                    const char *startSpecifier,
638                                    unsigned specifierLen) {
639    return true;
640  }
641
642  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
643};
644
645bool ParsePrintfString(FormatStringHandler &H,
646                       const char *beg, const char *end, const LangOptions &LO);
647
648bool ParseScanfString(FormatStringHandler &H,
649                      const char *beg, const char *end, const LangOptions &LO);
650
651} // end analyze_format_string namespace
652} // end clang namespace
653#endif
654