FormatString.h revision f3749f4168c5cee59627a681ca4ca6e4116d0761
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll'
70    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
71    AsIntMax,     // 'j'
72    AsSizeT,      // 'z'
73    AsPtrDiff,    // 't'
74    AsLongDouble, // 'L'
75    AsAllocate,   // for '%as', GNU extension to C90 scanf
76    AsMAllocate,  // for '%ms', GNU extension to scanf
77    AsWideChar = AsLong // for '%ls', only makes sense for printf
78  };
79
80  LengthModifier()
81    : Position(0), kind(None) {}
82  LengthModifier(const char *pos, Kind k)
83    : Position(pos), kind(k) {}
84
85  const char *getStart() const {
86    return Position;
87  }
88
89  unsigned getLength() const {
90    switch (kind) {
91      default:
92        return 1;
93      case AsLongLong:
94      case AsChar:
95        return 2;
96      case None:
97        return 0;
98    }
99  }
100
101  Kind getKind() const { return kind; }
102  void setKind(Kind k) { kind = k; }
103
104  const char *toString() const;
105
106private:
107  const char *Position;
108  Kind kind;
109};
110
111class ConversionSpecifier {
112public:
113  enum Kind {
114    InvalidSpecifier = 0,
115      // C99 conversion specifiers.
116    cArg,
117    dArg,
118    iArg,
119    IntArgBeg = cArg, IntArgEnd = iArg,
120
121    oArg,
122    uArg,
123    xArg,
124    XArg,
125    UIntArgBeg = oArg, UIntArgEnd = XArg,
126
127    fArg,
128    FArg,
129    eArg,
130    EArg,
131    gArg,
132    GArg,
133    aArg,
134    AArg,
135    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
136
137    sArg,
138    pArg,
139    nArg,
140    PercentArg,
141    CArg,
142    SArg,
143
144    // ** Printf-specific **
145
146    // Objective-C specific specifiers.
147    ObjCObjArg,  // '@'
148    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
149
150    // GlibC specific specifiers.
151    PrintErrno,   // 'm'
152
153    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
154
155    // ** Scanf-specific **
156    ScanListArg, // '['
157    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
158  };
159
160  ConversionSpecifier(bool isPrintf)
161    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
162
163  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
165
166  const char *getStart() const {
167    return Position;
168  }
169
170  StringRef getCharacters() const {
171    return StringRef(getStart(), getLength());
172  }
173
174  bool consumesDataArgument() const {
175    switch (kind) {
176      case PrintErrno:
177        assert(IsPrintf);
178        return false;
179      case PercentArg:
180        return false;
181      default:
182        return true;
183    }
184  }
185
186  Kind getKind() const { return kind; }
187  void setKind(Kind k) { kind = k; }
188  unsigned getLength() const {
189    return EndScanList ? EndScanList - Position : 1;
190  }
191
192  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
193  const char *toString() const;
194
195  bool isPrintfKind() const { return IsPrintf; }
196
197protected:
198  bool IsPrintf;
199  const char *Position;
200  const char *EndScanList;
201  Kind kind;
202};
203
204class ArgType {
205public:
206  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
207              AnyCharTy, CStrTy, WCStrTy, WIntTy };
208private:
209  const Kind K;
210  QualType T;
211  const char *Name;
212public:
213  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n) {}
214  ArgType(QualType t, const char *n = 0) : K(SpecificTy), T(t), Name(n)  {}
215  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
216
217  static ArgType Invalid() { return ArgType(InvalidTy); }
218
219  bool isValid() const { return K != InvalidTy; }
220
221  bool matchesType(ASTContext &C, QualType argTy) const;
222
223  QualType getRepresentativeType(ASTContext &C) const;
224
225  std::string getRepresentativeTypeName(ASTContext &C) const;
226};
227
228class OptionalAmount {
229public:
230  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
231
232  OptionalAmount(HowSpecified howSpecified,
233                 unsigned amount,
234                 const char *amountStart,
235                 unsigned amountLength,
236                 bool usesPositionalArg)
237  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
238  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
239
240  OptionalAmount(bool valid = true)
241  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
242  UsesPositionalArg(0), UsesDotPrefix(0) {}
243
244  bool isInvalid() const {
245    return hs == Invalid;
246  }
247
248  HowSpecified getHowSpecified() const { return hs; }
249  void setHowSpecified(HowSpecified h) { hs = h; }
250
251  bool hasDataArgument() const { return hs == Arg; }
252
253  unsigned getArgIndex() const {
254    assert(hasDataArgument());
255    return amt;
256  }
257
258  unsigned getConstantAmount() const {
259    assert(hs == Constant);
260    return amt;
261  }
262
263  const char *getStart() const {
264      // We include the . character if it is given.
265    return start - UsesDotPrefix;
266  }
267
268  unsigned getConstantLength() const {
269    assert(hs == Constant);
270    return length + UsesDotPrefix;
271  }
272
273  ArgType getArgType(ASTContext &Ctx) const;
274
275  void toString(raw_ostream &os) const;
276
277  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
278  unsigned getPositionalArgIndex() const {
279    assert(hasDataArgument());
280    return amt + 1;
281  }
282
283  bool usesDotPrefix() const { return UsesDotPrefix; }
284  void setUsesDotPrefix() { UsesDotPrefix = true; }
285
286private:
287  const char *start;
288  unsigned length;
289  HowSpecified hs;
290  unsigned amt;
291  bool UsesPositionalArg : 1;
292  bool UsesDotPrefix;
293};
294
295
296class FormatSpecifier {
297protected:
298  LengthModifier LM;
299  OptionalAmount FieldWidth;
300  ConversionSpecifier CS;
301  /// Positional arguments, an IEEE extension:
302  ///  IEEE Std 1003.1, 2004 Edition
303  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
304  bool UsesPositionalArg;
305  unsigned argIndex;
306public:
307  FormatSpecifier(bool isPrintf)
308    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
309
310  void setLengthModifier(LengthModifier lm) {
311    LM = lm;
312  }
313
314  void setUsesPositionalArg() { UsesPositionalArg = true; }
315
316  void setArgIndex(unsigned i) {
317    argIndex = i;
318  }
319
320  unsigned getArgIndex() const {
321    return argIndex;
322  }
323
324  unsigned getPositionalArgIndex() const {
325    return argIndex + 1;
326  }
327
328  const LengthModifier &getLengthModifier() const {
329    return LM;
330  }
331
332  const OptionalAmount &getFieldWidth() const {
333    return FieldWidth;
334  }
335
336  void setFieldWidth(const OptionalAmount &Amt) {
337    FieldWidth = Amt;
338  }
339
340  bool usesPositionalArg() const { return UsesPositionalArg; }
341
342  bool hasValidLengthModifier() const;
343
344  bool hasStandardLengthModifier() const;
345
346  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
347
348  bool hasStandardLengthConversionCombination() const;
349
350  /// For a TypedefType QT, if it is a named integer type such as size_t,
351  /// assign the appropriate value to LM and return true.
352  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
353};
354
355} // end analyze_format_string namespace
356
357//===----------------------------------------------------------------------===//
358/// Pieces specific to fprintf format strings.
359
360namespace analyze_printf {
361
362class PrintfConversionSpecifier :
363  public analyze_format_string::ConversionSpecifier  {
364public:
365  PrintfConversionSpecifier()
366    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
367
368  PrintfConversionSpecifier(const char *pos, Kind k)
369    : ConversionSpecifier(true, pos, k) {}
370
371  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
372  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
373  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
374                                    kind <= DoubleArgEnd; }
375  unsigned getLength() const {
376      // Conversion specifiers currently only are represented by
377      // single characters, but we be flexible.
378    return 1;
379  }
380
381  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
382    return CS->isPrintfKind();
383  }
384};
385
386using analyze_format_string::ArgType;
387using analyze_format_string::LengthModifier;
388using analyze_format_string::OptionalAmount;
389using analyze_format_string::OptionalFlag;
390
391class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
392  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
393  OptionalFlag IsLeftJustified; // '-'
394  OptionalFlag HasPlusPrefix; // '+'
395  OptionalFlag HasSpacePrefix; // ' '
396  OptionalFlag HasAlternativeForm; // '#'
397  OptionalFlag HasLeadingZeroes; // '0'
398  OptionalAmount Precision;
399public:
400  PrintfSpecifier() :
401    FormatSpecifier(/* isPrintf = */ true),
402    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
403    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
404
405  static PrintfSpecifier Parse(const char *beg, const char *end);
406
407    // Methods for incrementally constructing the PrintfSpecifier.
408  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
409    CS = cs;
410  }
411  void setHasThousandsGrouping(const char *position) {
412    HasThousandsGrouping = true;
413    HasThousandsGrouping.setPosition(position);
414  }
415  void setIsLeftJustified(const char *position) {
416    IsLeftJustified = true;
417    IsLeftJustified.setPosition(position);
418  }
419  void setHasPlusPrefix(const char *position) {
420    HasPlusPrefix = true;
421    HasPlusPrefix.setPosition(position);
422  }
423  void setHasSpacePrefix(const char *position) {
424    HasSpacePrefix = true;
425    HasSpacePrefix.setPosition(position);
426  }
427  void setHasAlternativeForm(const char *position) {
428    HasAlternativeForm = true;
429    HasAlternativeForm.setPosition(position);
430  }
431  void setHasLeadingZeros(const char *position) {
432    HasLeadingZeroes = true;
433    HasLeadingZeroes.setPosition(position);
434  }
435  void setUsesPositionalArg() { UsesPositionalArg = true; }
436
437    // Methods for querying the format specifier.
438
439  const PrintfConversionSpecifier &getConversionSpecifier() const {
440    return cast<PrintfConversionSpecifier>(CS);
441  }
442
443  void setPrecision(const OptionalAmount &Amt) {
444    Precision = Amt;
445    Precision.setUsesDotPrefix();
446  }
447
448  const OptionalAmount &getPrecision() const {
449    return Precision;
450  }
451
452  bool consumesDataArgument() const {
453    return getConversionSpecifier().consumesDataArgument();
454  }
455
456  /// \brief Returns the builtin type that a data argument
457  /// paired with this format specifier should have.  This method
458  /// will return null if the format specifier does not have
459  /// a matching data argument or the matching argument matches
460  /// more than one type.
461  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
462
463  const OptionalFlag &hasThousandsGrouping() const {
464      return HasThousandsGrouping;
465  }
466  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
467  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
468  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
469  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
470  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
471  bool usesPositionalArg() const { return UsesPositionalArg; }
472
473  /// Changes the specifier and length according to a QualType, retaining any
474  /// flags or options. Returns true on success, or false when a conversion
475  /// was not successful.
476  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
477               bool IsObjCLiteral);
478
479  void toString(raw_ostream &os) const;
480
481  // Validation methods - to check if any element results in undefined behavior
482  bool hasValidPlusPrefix() const;
483  bool hasValidAlternativeForm() const;
484  bool hasValidLeadingZeros() const;
485  bool hasValidSpacePrefix() const;
486  bool hasValidLeftJustified() const;
487  bool hasValidThousandsGroupingPrefix() const;
488
489  bool hasValidPrecision() const;
490  bool hasValidFieldWidth() const;
491};
492}  // end analyze_printf namespace
493
494//===----------------------------------------------------------------------===//
495/// Pieces specific to fscanf format strings.
496
497namespace analyze_scanf {
498
499class ScanfConversionSpecifier :
500    public analyze_format_string::ConversionSpecifier  {
501public:
502  ScanfConversionSpecifier()
503    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
504
505  ScanfConversionSpecifier(const char *pos, Kind k)
506    : ConversionSpecifier(false, pos, k) {}
507
508  void setEndScanList(const char *pos) { EndScanList = pos; }
509
510  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
511    return !CS->isPrintfKind();
512  }
513};
514
515using analyze_format_string::ArgType;
516using analyze_format_string::LengthModifier;
517using analyze_format_string::OptionalAmount;
518using analyze_format_string::OptionalFlag;
519
520class ScanfArgType : public ArgType {
521public:
522  enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeTy };
523private:
524  Kind K;
525  ArgType A;
526  const char *Name;
527  QualType getRepresentativeType(ASTContext &C) const;
528public:
529  ScanfArgType(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
530  ScanfArgType(ArgType a, const char *n = 0)
531      : K(PtrToArgTypeTy), A(a), Name(n) {
532    assert(A.isValid());
533  }
534
535  static ScanfArgType Invalid() { return ScanfArgType(InvalidTy); }
536
537  bool isValid() const { return K != InvalidTy; }
538
539  bool matchesType(ASTContext& C, QualType argTy) const;
540
541  std::string getRepresentativeTypeName(ASTContext& C) const;
542};
543
544class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
545  OptionalFlag SuppressAssignment; // '*'
546public:
547  ScanfSpecifier() :
548    FormatSpecifier(/* isPrintf = */ false),
549    SuppressAssignment("*") {}
550
551  void setSuppressAssignment(const char *position) {
552    SuppressAssignment = true;
553    SuppressAssignment.setPosition(position);
554  }
555
556  const OptionalFlag &getSuppressAssignment() const {
557    return SuppressAssignment;
558  }
559
560  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
561    CS = cs;
562  }
563
564  const ScanfConversionSpecifier &getConversionSpecifier() const {
565    return cast<ScanfConversionSpecifier>(CS);
566  }
567
568  bool consumesDataArgument() const {
569    return CS.consumesDataArgument() && !SuppressAssignment;
570  }
571
572  ScanfArgType getArgType(ASTContext &Ctx) const;
573
574  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
575
576  void toString(raw_ostream &os) const;
577
578  static ScanfSpecifier Parse(const char *beg, const char *end);
579};
580
581} // end analyze_scanf namespace
582
583//===----------------------------------------------------------------------===//
584// Parsing and processing of format strings (both fprintf and fscanf).
585
586namespace analyze_format_string {
587
588enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
589
590class FormatStringHandler {
591public:
592  FormatStringHandler() {}
593  virtual ~FormatStringHandler();
594
595  virtual void HandleNullChar(const char *nullCharacter) {}
596
597  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
598
599  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
600                                     PositionContext p) {}
601
602  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
603
604  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
605                                         unsigned specifierLen) {}
606
607  // Printf-specific handlers.
608
609  virtual bool HandleInvalidPrintfConversionSpecifier(
610                                      const analyze_printf::PrintfSpecifier &FS,
611                                      const char *startSpecifier,
612                                      unsigned specifierLen) {
613    return true;
614  }
615
616  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
617                                     const char *startSpecifier,
618                                     unsigned specifierLen) {
619    return true;
620  }
621
622    // Scanf-specific handlers.
623
624  virtual bool HandleInvalidScanfConversionSpecifier(
625                                        const analyze_scanf::ScanfSpecifier &FS,
626                                        const char *startSpecifier,
627                                        unsigned specifierLen) {
628    return true;
629  }
630
631  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
632                                    const char *startSpecifier,
633                                    unsigned specifierLen) {
634    return true;
635  }
636
637  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
638};
639
640bool ParsePrintfString(FormatStringHandler &H,
641                       const char *beg, const char *end, const LangOptions &LO);
642
643bool ParseScanfString(FormatStringHandler &H,
644                      const char *beg, const char *end, const LangOptions &LO);
645
646} // end analyze_format_string namespace
647} // end clang namespace
648#endif
649