FormatString.h revision 58e1e54476d610d6c33ef483f216ed8a1282d35c
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll'
70    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
71    AsIntMax,     // 'j'
72    AsSizeT,      // 'z'
73    AsPtrDiff,    // 't'
74    AsLongDouble, // 'L'
75    AsAllocate,   // for '%as', GNU extension to C90 scanf
76    AsMAllocate,  // for '%ms', GNU extension to scanf
77    AsWideChar = AsLong // for '%ls', only makes sense for printf
78  };
79
80  LengthModifier()
81    : Position(0), kind(None) {}
82  LengthModifier(const char *pos, Kind k)
83    : Position(pos), kind(k) {}
84
85  const char *getStart() const {
86    return Position;
87  }
88
89  unsigned getLength() const {
90    switch (kind) {
91      default:
92        return 1;
93      case AsLongLong:
94      case AsChar:
95        return 2;
96      case None:
97        return 0;
98    }
99  }
100
101  Kind getKind() const { return kind; }
102  void setKind(Kind k) { kind = k; }
103
104  const char *toString() const;
105
106private:
107  const char *Position;
108  Kind kind;
109};
110
111class ConversionSpecifier {
112public:
113  enum Kind {
114    InvalidSpecifier = 0,
115      // C99 conversion specifiers.
116    cArg,
117    dArg,
118    iArg,
119    IntArgBeg = cArg, IntArgEnd = iArg,
120
121    oArg,
122    uArg,
123    xArg,
124    XArg,
125    UIntArgBeg = oArg, UIntArgEnd = XArg,
126
127    fArg,
128    FArg,
129    eArg,
130    EArg,
131    gArg,
132    GArg,
133    aArg,
134    AArg,
135    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
136
137    sArg,
138    pArg,
139    nArg,
140    PercentArg,
141    CArg,
142    SArg,
143
144    // ** Printf-specific **
145
146    // Objective-C specific specifiers.
147    ObjCObjArg,  // '@'
148    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
149
150    // GlibC specific specifiers.
151    PrintErrno,   // 'm'
152
153    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
154
155    // ** Scanf-specific **
156    ScanListArg, // '['
157    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
158  };
159
160  ConversionSpecifier(bool isPrintf)
161    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
162
163  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
165
166  const char *getStart() const {
167    return Position;
168  }
169
170  StringRef getCharacters() const {
171    return StringRef(getStart(), getLength());
172  }
173
174  bool consumesDataArgument() const {
175    switch (kind) {
176      case PrintErrno:
177        assert(IsPrintf);
178        return false;
179      case PercentArg:
180        return false;
181      default:
182        return true;
183    }
184  }
185
186  Kind getKind() const { return kind; }
187  void setKind(Kind k) { kind = k; }
188  unsigned getLength() const {
189    return EndScanList ? EndScanList - Position : 1;
190  }
191
192  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
193  const char *toString() const;
194
195  bool isPrintfKind() const { return IsPrintf; }
196
197protected:
198  bool IsPrintf;
199  const char *Position;
200  const char *EndScanList;
201  Kind kind;
202};
203
204class ArgType {
205public:
206  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
207              AnyCharTy, CStrTy, WCStrTy, WIntTy };
208private:
209  const Kind K;
210  QualType T;
211  const char *Name;
212  bool Ptr;
213public:
214  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
215  ArgType(QualType t, const char *n = 0)
216      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
217  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
218
219  static ArgType Invalid() { return ArgType(InvalidTy); }
220  bool isValid() const { return K != InvalidTy; }
221
222  /// Create an ArgType which corresponds to the type pointer to A.
223  static ArgType PtrTo(const ArgType& A) {
224    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
225    ArgType Res = A;
226    Res.Ptr = true;
227    return Res;
228  }
229
230  bool matchesType(ASTContext &C, QualType argTy) const;
231
232  QualType getRepresentativeType(ASTContext &C) const;
233
234  std::string getRepresentativeTypeName(ASTContext &C) const;
235};
236
237class OptionalAmount {
238public:
239  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
240
241  OptionalAmount(HowSpecified howSpecified,
242                 unsigned amount,
243                 const char *amountStart,
244                 unsigned amountLength,
245                 bool usesPositionalArg)
246  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
247  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
248
249  OptionalAmount(bool valid = true)
250  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
251  UsesPositionalArg(0), UsesDotPrefix(0) {}
252
253  bool isInvalid() const {
254    return hs == Invalid;
255  }
256
257  HowSpecified getHowSpecified() const { return hs; }
258  void setHowSpecified(HowSpecified h) { hs = h; }
259
260  bool hasDataArgument() const { return hs == Arg; }
261
262  unsigned getArgIndex() const {
263    assert(hasDataArgument());
264    return amt;
265  }
266
267  unsigned getConstantAmount() const {
268    assert(hs == Constant);
269    return amt;
270  }
271
272  const char *getStart() const {
273      // We include the . character if it is given.
274    return start - UsesDotPrefix;
275  }
276
277  unsigned getConstantLength() const {
278    assert(hs == Constant);
279    return length + UsesDotPrefix;
280  }
281
282  ArgType getArgType(ASTContext &Ctx) const;
283
284  void toString(raw_ostream &os) const;
285
286  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
287  unsigned getPositionalArgIndex() const {
288    assert(hasDataArgument());
289    return amt + 1;
290  }
291
292  bool usesDotPrefix() const { return UsesDotPrefix; }
293  void setUsesDotPrefix() { UsesDotPrefix = true; }
294
295private:
296  const char *start;
297  unsigned length;
298  HowSpecified hs;
299  unsigned amt;
300  bool UsesPositionalArg : 1;
301  bool UsesDotPrefix;
302};
303
304
305class FormatSpecifier {
306protected:
307  LengthModifier LM;
308  OptionalAmount FieldWidth;
309  ConversionSpecifier CS;
310  /// Positional arguments, an IEEE extension:
311  ///  IEEE Std 1003.1, 2004 Edition
312  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
313  bool UsesPositionalArg;
314  unsigned argIndex;
315public:
316  FormatSpecifier(bool isPrintf)
317    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
318
319  void setLengthModifier(LengthModifier lm) {
320    LM = lm;
321  }
322
323  void setUsesPositionalArg() { UsesPositionalArg = true; }
324
325  void setArgIndex(unsigned i) {
326    argIndex = i;
327  }
328
329  unsigned getArgIndex() const {
330    return argIndex;
331  }
332
333  unsigned getPositionalArgIndex() const {
334    return argIndex + 1;
335  }
336
337  const LengthModifier &getLengthModifier() const {
338    return LM;
339  }
340
341  const OptionalAmount &getFieldWidth() const {
342    return FieldWidth;
343  }
344
345  void setFieldWidth(const OptionalAmount &Amt) {
346    FieldWidth = Amt;
347  }
348
349  bool usesPositionalArg() const { return UsesPositionalArg; }
350
351  bool hasValidLengthModifier() const;
352
353  bool hasStandardLengthModifier() const;
354
355  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
356
357  bool hasStandardLengthConversionCombination() const;
358
359  /// For a TypedefType QT, if it is a named integer type such as size_t,
360  /// assign the appropriate value to LM and return true.
361  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
362};
363
364} // end analyze_format_string namespace
365
366//===----------------------------------------------------------------------===//
367/// Pieces specific to fprintf format strings.
368
369namespace analyze_printf {
370
371class PrintfConversionSpecifier :
372  public analyze_format_string::ConversionSpecifier  {
373public:
374  PrintfConversionSpecifier()
375    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
376
377  PrintfConversionSpecifier(const char *pos, Kind k)
378    : ConversionSpecifier(true, pos, k) {}
379
380  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
381  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
382  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
383                                    kind <= DoubleArgEnd; }
384  unsigned getLength() const {
385      // Conversion specifiers currently only are represented by
386      // single characters, but we be flexible.
387    return 1;
388  }
389
390  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
391    return CS->isPrintfKind();
392  }
393};
394
395using analyze_format_string::ArgType;
396using analyze_format_string::LengthModifier;
397using analyze_format_string::OptionalAmount;
398using analyze_format_string::OptionalFlag;
399
400class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
401  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
402  OptionalFlag IsLeftJustified; // '-'
403  OptionalFlag HasPlusPrefix; // '+'
404  OptionalFlag HasSpacePrefix; // ' '
405  OptionalFlag HasAlternativeForm; // '#'
406  OptionalFlag HasLeadingZeroes; // '0'
407  OptionalAmount Precision;
408public:
409  PrintfSpecifier() :
410    FormatSpecifier(/* isPrintf = */ true),
411    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
412    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
413
414  static PrintfSpecifier Parse(const char *beg, const char *end);
415
416    // Methods for incrementally constructing the PrintfSpecifier.
417  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
418    CS = cs;
419  }
420  void setHasThousandsGrouping(const char *position) {
421    HasThousandsGrouping = true;
422    HasThousandsGrouping.setPosition(position);
423  }
424  void setIsLeftJustified(const char *position) {
425    IsLeftJustified = true;
426    IsLeftJustified.setPosition(position);
427  }
428  void setHasPlusPrefix(const char *position) {
429    HasPlusPrefix = true;
430    HasPlusPrefix.setPosition(position);
431  }
432  void setHasSpacePrefix(const char *position) {
433    HasSpacePrefix = true;
434    HasSpacePrefix.setPosition(position);
435  }
436  void setHasAlternativeForm(const char *position) {
437    HasAlternativeForm = true;
438    HasAlternativeForm.setPosition(position);
439  }
440  void setHasLeadingZeros(const char *position) {
441    HasLeadingZeroes = true;
442    HasLeadingZeroes.setPosition(position);
443  }
444  void setUsesPositionalArg() { UsesPositionalArg = true; }
445
446    // Methods for querying the format specifier.
447
448  const PrintfConversionSpecifier &getConversionSpecifier() const {
449    return cast<PrintfConversionSpecifier>(CS);
450  }
451
452  void setPrecision(const OptionalAmount &Amt) {
453    Precision = Amt;
454    Precision.setUsesDotPrefix();
455  }
456
457  const OptionalAmount &getPrecision() const {
458    return Precision;
459  }
460
461  bool consumesDataArgument() const {
462    return getConversionSpecifier().consumesDataArgument();
463  }
464
465  /// \brief Returns the builtin type that a data argument
466  /// paired with this format specifier should have.  This method
467  /// will return null if the format specifier does not have
468  /// a matching data argument or the matching argument matches
469  /// more than one type.
470  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
471
472  const OptionalFlag &hasThousandsGrouping() const {
473      return HasThousandsGrouping;
474  }
475  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
476  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
477  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
478  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
479  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
480  bool usesPositionalArg() const { return UsesPositionalArg; }
481
482  /// Changes the specifier and length according to a QualType, retaining any
483  /// flags or options. Returns true on success, or false when a conversion
484  /// was not successful.
485  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
486               bool IsObjCLiteral);
487
488  void toString(raw_ostream &os) const;
489
490  // Validation methods - to check if any element results in undefined behavior
491  bool hasValidPlusPrefix() const;
492  bool hasValidAlternativeForm() const;
493  bool hasValidLeadingZeros() const;
494  bool hasValidSpacePrefix() const;
495  bool hasValidLeftJustified() const;
496  bool hasValidThousandsGroupingPrefix() const;
497
498  bool hasValidPrecision() const;
499  bool hasValidFieldWidth() const;
500};
501}  // end analyze_printf namespace
502
503//===----------------------------------------------------------------------===//
504/// Pieces specific to fscanf format strings.
505
506namespace analyze_scanf {
507
508class ScanfConversionSpecifier :
509    public analyze_format_string::ConversionSpecifier  {
510public:
511  ScanfConversionSpecifier()
512    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
513
514  ScanfConversionSpecifier(const char *pos, Kind k)
515    : ConversionSpecifier(false, pos, k) {}
516
517  void setEndScanList(const char *pos) { EndScanList = pos; }
518
519  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
520    return !CS->isPrintfKind();
521  }
522};
523
524using analyze_format_string::ArgType;
525using analyze_format_string::LengthModifier;
526using analyze_format_string::OptionalAmount;
527using analyze_format_string::OptionalFlag;
528
529class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
530  OptionalFlag SuppressAssignment; // '*'
531public:
532  ScanfSpecifier() :
533    FormatSpecifier(/* isPrintf = */ false),
534    SuppressAssignment("*") {}
535
536  void setSuppressAssignment(const char *position) {
537    SuppressAssignment = true;
538    SuppressAssignment.setPosition(position);
539  }
540
541  const OptionalFlag &getSuppressAssignment() const {
542    return SuppressAssignment;
543  }
544
545  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
546    CS = cs;
547  }
548
549  const ScanfConversionSpecifier &getConversionSpecifier() const {
550    return cast<ScanfConversionSpecifier>(CS);
551  }
552
553  bool consumesDataArgument() const {
554    return CS.consumesDataArgument() && !SuppressAssignment;
555  }
556
557  ArgType getArgType(ASTContext &Ctx) const;
558
559  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
560
561  void toString(raw_ostream &os) const;
562
563  static ScanfSpecifier Parse(const char *beg, const char *end);
564};
565
566} // end analyze_scanf namespace
567
568//===----------------------------------------------------------------------===//
569// Parsing and processing of format strings (both fprintf and fscanf).
570
571namespace analyze_format_string {
572
573enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
574
575class FormatStringHandler {
576public:
577  FormatStringHandler() {}
578  virtual ~FormatStringHandler();
579
580  virtual void HandleNullChar(const char *nullCharacter) {}
581
582  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
583
584  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
585                                     PositionContext p) {}
586
587  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
588
589  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
590                                         unsigned specifierLen) {}
591
592  // Printf-specific handlers.
593
594  virtual bool HandleInvalidPrintfConversionSpecifier(
595                                      const analyze_printf::PrintfSpecifier &FS,
596                                      const char *startSpecifier,
597                                      unsigned specifierLen) {
598    return true;
599  }
600
601  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
602                                     const char *startSpecifier,
603                                     unsigned specifierLen) {
604    return true;
605  }
606
607    // Scanf-specific handlers.
608
609  virtual bool HandleInvalidScanfConversionSpecifier(
610                                        const analyze_scanf::ScanfSpecifier &FS,
611                                        const char *startSpecifier,
612                                        unsigned specifierLen) {
613    return true;
614  }
615
616  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
617                                    const char *startSpecifier,
618                                    unsigned specifierLen) {
619    return true;
620  }
621
622  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
623};
624
625bool ParsePrintfString(FormatStringHandler &H,
626                       const char *beg, const char *end, const LangOptions &LO);
627
628bool ParseScanfString(FormatStringHandler &H,
629                      const char *beg, const char *end, const LangOptions &LO);
630
631} // end analyze_format_string namespace
632} // end clang namespace
633#endif
634