FormatString.h revision a76879eb4c75dbd9ec671558f0b8b79a28d4d747
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll'
70    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
71    AsIntMax,     // 'j'
72    AsSizeT,      // 'z'
73    AsPtrDiff,    // 't'
74    AsLongDouble, // 'L'
75    AsAllocate,   // for '%as', GNU extension to C90 scanf
76    AsMAllocate,  // for '%ms', GNU extension to scanf
77    AsWideChar = AsLong // for '%ls', only makes sense for printf
78  };
79
80  LengthModifier()
81    : Position(0), kind(None) {}
82  LengthModifier(const char *pos, Kind k)
83    : Position(pos), kind(k) {}
84
85  const char *getStart() const {
86    return Position;
87  }
88
89  unsigned getLength() const {
90    switch (kind) {
91      default:
92        return 1;
93      case AsLongLong:
94      case AsChar:
95        return 2;
96      case None:
97        return 0;
98    }
99  }
100
101  Kind getKind() const { return kind; }
102  void setKind(Kind k) { kind = k; }
103
104  const char *toString() const;
105
106private:
107  const char *Position;
108  Kind kind;
109};
110
111class ConversionSpecifier {
112public:
113  enum Kind {
114    InvalidSpecifier = 0,
115      // C99 conversion specifiers.
116    cArg,
117    dArg,
118    iArg,
119    IntArgBeg = cArg, IntArgEnd = iArg,
120
121    oArg,
122    uArg,
123    xArg,
124    XArg,
125    UIntArgBeg = oArg, UIntArgEnd = XArg,
126
127    fArg,
128    FArg,
129    eArg,
130    EArg,
131    gArg,
132    GArg,
133    aArg,
134    AArg,
135    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
136
137    sArg,
138    pArg,
139    nArg,
140    PercentArg,
141    CArg,
142    SArg,
143
144    // ** Printf-specific **
145
146    // Objective-C specific specifiers.
147    ObjCObjArg,  // '@'
148    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
149
150    // GlibC specific specifiers.
151    PrintErrno,   // 'm'
152
153    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
154
155    // ** Scanf-specific **
156    ScanListArg, // '['
157    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
158  };
159
160  ConversionSpecifier(bool isPrintf)
161    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
162
163  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
165
166  const char *getStart() const {
167    return Position;
168  }
169
170  StringRef getCharacters() const {
171    return StringRef(getStart(), getLength());
172  }
173
174  bool consumesDataArgument() const {
175    switch (kind) {
176      case PrintErrno:
177        assert(IsPrintf);
178      case PercentArg:
179        return false;
180      default:
181        return true;
182    }
183  }
184
185  Kind getKind() const { return kind; }
186  void setKind(Kind k) { kind = k; }
187  unsigned getLength() const {
188    return EndScanList ? EndScanList - Position : 1;
189  }
190
191  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
192  const char *toString() const;
193
194  bool isPrintfKind() const { return IsPrintf; }
195
196protected:
197  bool IsPrintf;
198  const char *Position;
199  const char *EndScanList;
200  Kind kind;
201};
202
203class ArgTypeResult {
204public:
205  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
206              AnyCharTy, CStrTy, WCStrTy, WIntTy };
207private:
208  const Kind K;
209  QualType T;
210  const char *Name;
211  ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
212public:
213  ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
214  ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
215  ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
216  ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n)  {}
217  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
218
219  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
220
221  bool isValid() const { return K != InvalidTy; }
222
223  const QualType *getSpecificType() const {
224    return K == SpecificTy ? &T : 0;
225  }
226
227  bool matchesType(ASTContext &C, QualType argTy) const;
228
229  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
230
231  QualType getRepresentativeType(ASTContext &C) const;
232
233  std::string getRepresentativeTypeName(ASTContext &C) const;
234};
235
236class OptionalAmount {
237public:
238  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
239
240  OptionalAmount(HowSpecified howSpecified,
241                 unsigned amount,
242                 const char *amountStart,
243                 unsigned amountLength,
244                 bool usesPositionalArg)
245  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
246  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
247
248  OptionalAmount(bool valid = true)
249  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
250  UsesPositionalArg(0), UsesDotPrefix(0) {}
251
252  bool isInvalid() const {
253    return hs == Invalid;
254  }
255
256  HowSpecified getHowSpecified() const { return hs; }
257  void setHowSpecified(HowSpecified h) { hs = h; }
258
259  bool hasDataArgument() const { return hs == Arg; }
260
261  unsigned getArgIndex() const {
262    assert(hasDataArgument());
263    return amt;
264  }
265
266  unsigned getConstantAmount() const {
267    assert(hs == Constant);
268    return amt;
269  }
270
271  const char *getStart() const {
272      // We include the . character if it is given.
273    return start - UsesDotPrefix;
274  }
275
276  unsigned getConstantLength() const {
277    assert(hs == Constant);
278    return length + UsesDotPrefix;
279  }
280
281  ArgTypeResult getArgType(ASTContext &Ctx) const;
282
283  void toString(raw_ostream &os) const;
284
285  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
286  unsigned getPositionalArgIndex() const {
287    assert(hasDataArgument());
288    return amt + 1;
289  }
290
291  bool usesDotPrefix() const { return UsesDotPrefix; }
292  void setUsesDotPrefix() { UsesDotPrefix = true; }
293
294private:
295  const char *start;
296  unsigned length;
297  HowSpecified hs;
298  unsigned amt;
299  bool UsesPositionalArg : 1;
300  bool UsesDotPrefix;
301};
302
303
304class FormatSpecifier {
305protected:
306  LengthModifier LM;
307  OptionalAmount FieldWidth;
308  ConversionSpecifier CS;
309    /// Positional arguments, an IEEE extension:
310    ///  IEEE Std 1003.1, 2004 Edition
311    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
312  bool UsesPositionalArg;
313  unsigned argIndex;
314public:
315  FormatSpecifier(bool isPrintf)
316    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
317
318  void setLengthModifier(LengthModifier lm) {
319    LM = lm;
320  }
321
322  void setUsesPositionalArg() { UsesPositionalArg = true; }
323
324  void setArgIndex(unsigned i) {
325    argIndex = i;
326  }
327
328  unsigned getArgIndex() const {
329    return argIndex;
330  }
331
332  unsigned getPositionalArgIndex() const {
333    return argIndex + 1;
334  }
335
336  const LengthModifier &getLengthModifier() const {
337    return LM;
338  }
339
340  const OptionalAmount &getFieldWidth() const {
341    return FieldWidth;
342  }
343
344  void setFieldWidth(const OptionalAmount &Amt) {
345    FieldWidth = Amt;
346  }
347
348  bool usesPositionalArg() const { return UsesPositionalArg; }
349
350  bool hasValidLengthModifier() const;
351};
352
353} // end analyze_format_string namespace
354
355//===----------------------------------------------------------------------===//
356/// Pieces specific to fprintf format strings.
357
358namespace analyze_printf {
359
360class PrintfConversionSpecifier :
361  public analyze_format_string::ConversionSpecifier  {
362public:
363  PrintfConversionSpecifier()
364    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
365
366  PrintfConversionSpecifier(const char *pos, Kind k)
367    : ConversionSpecifier(true, pos, k) {}
368
369  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
370  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
371  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
372                                    kind <= DoubleArgEnd; }
373  unsigned getLength() const {
374      // Conversion specifiers currently only are represented by
375      // single characters, but we be flexible.
376    return 1;
377  }
378
379  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
380    return CS->isPrintfKind();
381  }
382};
383
384using analyze_format_string::ArgTypeResult;
385using analyze_format_string::LengthModifier;
386using analyze_format_string::OptionalAmount;
387using analyze_format_string::OptionalFlag;
388
389class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
390  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
391  OptionalFlag IsLeftJustified; // '-'
392  OptionalFlag HasPlusPrefix; // '+'
393  OptionalFlag HasSpacePrefix; // ' '
394  OptionalFlag HasAlternativeForm; // '#'
395  OptionalFlag HasLeadingZeroes; // '0'
396  OptionalAmount Precision;
397public:
398  PrintfSpecifier() :
399    FormatSpecifier(/* isPrintf = */ true),
400    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
401    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
402
403  static PrintfSpecifier Parse(const char *beg, const char *end);
404
405    // Methods for incrementally constructing the PrintfSpecifier.
406  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
407    CS = cs;
408  }
409  void setHasThousandsGrouping(const char *position) {
410    HasThousandsGrouping = true;
411    HasThousandsGrouping.setPosition(position);
412  }
413  void setIsLeftJustified(const char *position) {
414    IsLeftJustified = true;
415    IsLeftJustified.setPosition(position);
416  }
417  void setHasPlusPrefix(const char *position) {
418    HasPlusPrefix = true;
419    HasPlusPrefix.setPosition(position);
420  }
421  void setHasSpacePrefix(const char *position) {
422    HasSpacePrefix = true;
423    HasSpacePrefix.setPosition(position);
424  }
425  void setHasAlternativeForm(const char *position) {
426    HasAlternativeForm = true;
427    HasAlternativeForm.setPosition(position);
428  }
429  void setHasLeadingZeros(const char *position) {
430    HasLeadingZeroes = true;
431    HasLeadingZeroes.setPosition(position);
432  }
433  void setUsesPositionalArg() { UsesPositionalArg = true; }
434
435    // Methods for querying the format specifier.
436
437  const PrintfConversionSpecifier &getConversionSpecifier() const {
438    return cast<PrintfConversionSpecifier>(CS);
439  }
440
441  void setPrecision(const OptionalAmount &Amt) {
442    Precision = Amt;
443    Precision.setUsesDotPrefix();
444  }
445
446  const OptionalAmount &getPrecision() const {
447    return Precision;
448  }
449
450  bool consumesDataArgument() const {
451    return getConversionSpecifier().consumesDataArgument();
452  }
453
454  /// \brief Returns the builtin type that a data argument
455  /// paired with this format specifier should have.  This method
456  /// will return null if the format specifier does not have
457  /// a matching data argument or the matching argument matches
458  /// more than one type.
459  ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
460
461  const OptionalFlag &hasThousandsGrouping() const {
462      return HasThousandsGrouping;
463  }
464  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
465  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
466  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
467  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
468  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
469  bool usesPositionalArg() const { return UsesPositionalArg; }
470
471  /// Changes the specifier and length according to a QualType, retaining any
472  /// flags or options. Returns true on success, or false when a conversion
473  /// was not successful.
474  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
475               bool IsObjCLiteral);
476
477  void toString(raw_ostream &os) const;
478
479  // Validation methods - to check if any element results in undefined behavior
480  bool hasValidPlusPrefix() const;
481  bool hasValidAlternativeForm() const;
482  bool hasValidLeadingZeros() const;
483  bool hasValidSpacePrefix() const;
484  bool hasValidLeftJustified() const;
485  bool hasValidThousandsGroupingPrefix() const;
486
487  bool hasValidPrecision() const;
488  bool hasValidFieldWidth() const;
489};
490}  // end analyze_printf namespace
491
492//===----------------------------------------------------------------------===//
493/// Pieces specific to fscanf format strings.
494
495namespace analyze_scanf {
496
497class ScanfConversionSpecifier :
498    public analyze_format_string::ConversionSpecifier  {
499public:
500  ScanfConversionSpecifier()
501    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
502
503  ScanfConversionSpecifier(const char *pos, Kind k)
504    : ConversionSpecifier(false, pos, k) {}
505
506  void setEndScanList(const char *pos) { EndScanList = pos; }
507
508  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
509    return !CS->isPrintfKind();
510  }
511};
512
513using analyze_format_string::ArgTypeResult;
514using analyze_format_string::LengthModifier;
515using analyze_format_string::OptionalAmount;
516using analyze_format_string::OptionalFlag;
517
518class ScanfArgTypeResult : public ArgTypeResult {
519public:
520  enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
521private:
522  Kind K;
523  ArgTypeResult A;
524  const char *Name;
525  QualType getRepresentativeType(ASTContext &C) const;
526public:
527  ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
528  ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
529      : K(PtrToArgTypeResultTy), A(a), Name(n) {
530    assert(A.isValid());
531  }
532
533  static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
534
535  bool isValid() const { return K != InvalidTy; }
536
537  bool matchesType(ASTContext& C, QualType argTy) const;
538
539  std::string getRepresentativeTypeName(ASTContext& C) const;
540};
541
542class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
543  OptionalFlag SuppressAssignment; // '*'
544public:
545  ScanfSpecifier() :
546    FormatSpecifier(/* isPrintf = */ false),
547    SuppressAssignment("*") {}
548
549  void setSuppressAssignment(const char *position) {
550    SuppressAssignment = true;
551    SuppressAssignment.setPosition(position);
552  }
553
554  const OptionalFlag &getSuppressAssignment() const {
555    return SuppressAssignment;
556  }
557
558  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
559    CS = cs;
560  }
561
562  const ScanfConversionSpecifier &getConversionSpecifier() const {
563    return cast<ScanfConversionSpecifier>(CS);
564  }
565
566  bool consumesDataArgument() const {
567    return CS.consumesDataArgument() && !SuppressAssignment;
568  }
569
570  ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
571
572  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
573
574  void toString(raw_ostream &os) const;
575
576  static ScanfSpecifier Parse(const char *beg, const char *end);
577};
578
579} // end analyze_scanf namespace
580
581//===----------------------------------------------------------------------===//
582// Parsing and processing of format strings (both fprintf and fscanf).
583
584namespace analyze_format_string {
585
586enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
587
588class FormatStringHandler {
589public:
590  FormatStringHandler() {}
591  virtual ~FormatStringHandler();
592
593  virtual void HandleNullChar(const char *nullCharacter) {}
594
595  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
596                                     PositionContext p) {}
597
598  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
599
600  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
601                                         unsigned specifierLen) {}
602
603  // Printf-specific handlers.
604
605  virtual bool HandleInvalidPrintfConversionSpecifier(
606                                      const analyze_printf::PrintfSpecifier &FS,
607                                      const char *startSpecifier,
608                                      unsigned specifierLen) {
609    return true;
610  }
611
612  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
613                                     const char *startSpecifier,
614                                     unsigned specifierLen) {
615    return true;
616  }
617
618    // Scanf-specific handlers.
619
620  virtual bool HandleInvalidScanfConversionSpecifier(
621                                        const analyze_scanf::ScanfSpecifier &FS,
622                                        const char *startSpecifier,
623                                        unsigned specifierLen) {
624    return true;
625  }
626
627  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
628                                    const char *startSpecifier,
629                                    unsigned specifierLen) {
630    return true;
631  }
632
633  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
634};
635
636bool ParsePrintfString(FormatStringHandler &H,
637                       const char *beg, const char *end, const LangOptions &LO);
638
639bool ParseScanfString(FormatStringHandler &H,
640                      const char *beg, const char *end, const LangOptions &LO);
641
642} // end analyze_format_string namespace
643} // end clang namespace
644#endif
645