FormatString.h revision 47ad6ce1afad6b70927347dfa15e0f1dc76bf5bb
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  LLVM_EXPLICIT operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll'
72    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73    AsIntMax,     // 'j'
74    AsSizeT,      // 'z'
75    AsPtrDiff,    // 't'
76    AsInt32,      // 'I32' (MSVCRT, like __int32)
77    AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
78    AsInt64,      // 'I64' (MSVCRT, like __int64)
79    AsLongDouble, // 'L'
80    AsAllocate,   // for '%as', GNU extension to C90 scanf
81    AsMAllocate,  // for '%ms', GNU extension to scanf
82    AsWideChar = AsLong // for '%ls', only makes sense for printf
83  };
84
85  LengthModifier()
86    : Position(0), kind(None) {}
87  LengthModifier(const char *pos, Kind k)
88    : Position(pos), kind(k) {}
89
90  const char *getStart() const {
91    return Position;
92  }
93
94  unsigned getLength() const {
95    switch (kind) {
96      default:
97        return 1;
98      case AsLongLong:
99      case AsChar:
100        return 2;
101      case AsInt32:
102      case AsInt64:
103        return 3;
104      case None:
105        return 0;
106    }
107  }
108
109  Kind getKind() const { return kind; }
110  void setKind(Kind k) { kind = k; }
111
112  const char *toString() const;
113
114private:
115  const char *Position;
116  Kind kind;
117};
118
119class ConversionSpecifier {
120public:
121  enum Kind {
122    InvalidSpecifier = 0,
123      // C99 conversion specifiers.
124    cArg,
125    dArg,
126    DArg, // Apple extension
127    iArg,
128    IntArgBeg = dArg, IntArgEnd = iArg,
129
130    oArg,
131    OArg, // Apple extension
132    uArg,
133    UArg, // Apple extension
134    xArg,
135    XArg,
136    UIntArgBeg = oArg, UIntArgEnd = XArg,
137
138    fArg,
139    FArg,
140    eArg,
141    EArg,
142    gArg,
143    GArg,
144    aArg,
145    AArg,
146    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
147
148    sArg,
149    pArg,
150    nArg,
151    PercentArg,
152    CArg,
153    SArg,
154
155    // ** Printf-specific **
156
157    // Objective-C specific specifiers.
158    ObjCObjArg,  // '@'
159    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
160
161    // GlibC specific specifiers.
162    PrintErrno,   // 'm'
163
164    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
165
166    // ** Scanf-specific **
167    ScanListArg, // '['
168    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
169  };
170
171  ConversionSpecifier(bool isPrintf = true)
172    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
173
174  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
175    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
176
177  const char *getStart() const {
178    return Position;
179  }
180
181  StringRef getCharacters() const {
182    return StringRef(getStart(), getLength());
183  }
184
185  bool consumesDataArgument() const {
186    switch (kind) {
187      case PrintErrno:
188        assert(IsPrintf);
189        return false;
190      case PercentArg:
191        return false;
192      default:
193        return true;
194    }
195  }
196
197  Kind getKind() const { return kind; }
198  void setKind(Kind k) { kind = k; }
199  unsigned getLength() const {
200    return EndScanList ? EndScanList - Position : 1;
201  }
202
203  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
204  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
205  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
206  const char *toString() const;
207
208  bool isPrintfKind() const { return IsPrintf; }
209
210  Optional<ConversionSpecifier> getStandardSpecifier() const;
211
212protected:
213  bool IsPrintf;
214  const char *Position;
215  const char *EndScanList;
216  Kind kind;
217};
218
219class ArgType {
220public:
221  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
222              AnyCharTy, CStrTy, WCStrTy, WIntTy };
223private:
224  const Kind K;
225  QualType T;
226  const char *Name;
227  bool Ptr;
228public:
229  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
230  ArgType(QualType t, const char *n = 0)
231      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
232  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
233
234  static ArgType Invalid() { return ArgType(InvalidTy); }
235  bool isValid() const { return K != InvalidTy; }
236
237  /// Create an ArgType which corresponds to the type pointer to A.
238  static ArgType PtrTo(const ArgType& A) {
239    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
240    ArgType Res = A;
241    Res.Ptr = true;
242    return Res;
243  }
244
245  bool matchesType(ASTContext &C, QualType argTy) const;
246
247  QualType getRepresentativeType(ASTContext &C) const;
248
249  std::string getRepresentativeTypeName(ASTContext &C) const;
250};
251
252class OptionalAmount {
253public:
254  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
255
256  OptionalAmount(HowSpecified howSpecified,
257                 unsigned amount,
258                 const char *amountStart,
259                 unsigned amountLength,
260                 bool usesPositionalArg)
261  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
262  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
263
264  OptionalAmount(bool valid = true)
265  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
266  UsesPositionalArg(0), UsesDotPrefix(0) {}
267
268  bool isInvalid() const {
269    return hs == Invalid;
270  }
271
272  HowSpecified getHowSpecified() const { return hs; }
273  void setHowSpecified(HowSpecified h) { hs = h; }
274
275  bool hasDataArgument() const { return hs == Arg; }
276
277  unsigned getArgIndex() const {
278    assert(hasDataArgument());
279    return amt;
280  }
281
282  unsigned getConstantAmount() const {
283    assert(hs == Constant);
284    return amt;
285  }
286
287  const char *getStart() const {
288      // We include the . character if it is given.
289    return start - UsesDotPrefix;
290  }
291
292  unsigned getConstantLength() const {
293    assert(hs == Constant);
294    return length + UsesDotPrefix;
295  }
296
297  ArgType getArgType(ASTContext &Ctx) const;
298
299  void toString(raw_ostream &os) const;
300
301  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
302  unsigned getPositionalArgIndex() const {
303    assert(hasDataArgument());
304    return amt + 1;
305  }
306
307  bool usesDotPrefix() const { return UsesDotPrefix; }
308  void setUsesDotPrefix() { UsesDotPrefix = true; }
309
310private:
311  const char *start;
312  unsigned length;
313  HowSpecified hs;
314  unsigned amt;
315  bool UsesPositionalArg : 1;
316  bool UsesDotPrefix;
317};
318
319
320class FormatSpecifier {
321protected:
322  LengthModifier LM;
323  OptionalAmount FieldWidth;
324  ConversionSpecifier CS;
325  /// Positional arguments, an IEEE extension:
326  ///  IEEE Std 1003.1, 2004 Edition
327  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
328  bool UsesPositionalArg;
329  unsigned argIndex;
330public:
331  FormatSpecifier(bool isPrintf)
332    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
333
334  void setLengthModifier(LengthModifier lm) {
335    LM = lm;
336  }
337
338  void setUsesPositionalArg() { UsesPositionalArg = true; }
339
340  void setArgIndex(unsigned i) {
341    argIndex = i;
342  }
343
344  unsigned getArgIndex() const {
345    return argIndex;
346  }
347
348  unsigned getPositionalArgIndex() const {
349    return argIndex + 1;
350  }
351
352  const LengthModifier &getLengthModifier() const {
353    return LM;
354  }
355
356  const OptionalAmount &getFieldWidth() const {
357    return FieldWidth;
358  }
359
360  void setFieldWidth(const OptionalAmount &Amt) {
361    FieldWidth = Amt;
362  }
363
364  bool usesPositionalArg() const { return UsesPositionalArg; }
365
366  bool hasValidLengthModifier(const TargetInfo &Target) const;
367
368  bool hasStandardLengthModifier() const;
369
370  Optional<LengthModifier> getCorrectedLengthModifier() const;
371
372  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
373
374  bool hasStandardLengthConversionCombination() const;
375
376  /// For a TypedefType QT, if it is a named integer type such as size_t,
377  /// assign the appropriate value to LM and return true.
378  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
379};
380
381} // end analyze_format_string namespace
382
383//===----------------------------------------------------------------------===//
384/// Pieces specific to fprintf format strings.
385
386namespace analyze_printf {
387
388class PrintfConversionSpecifier :
389  public analyze_format_string::ConversionSpecifier  {
390public:
391  PrintfConversionSpecifier()
392    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
393
394  PrintfConversionSpecifier(const char *pos, Kind k)
395    : ConversionSpecifier(true, pos, k) {}
396
397  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
398  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
399                                    kind <= DoubleArgEnd; }
400  unsigned getLength() const {
401      // Conversion specifiers currently only are represented by
402      // single characters, but we be flexible.
403    return 1;
404  }
405
406  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
407    return CS->isPrintfKind();
408  }
409};
410
411using analyze_format_string::ArgType;
412using analyze_format_string::LengthModifier;
413using analyze_format_string::OptionalAmount;
414using analyze_format_string::OptionalFlag;
415
416class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
417  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
418  OptionalFlag IsLeftJustified; // '-'
419  OptionalFlag HasPlusPrefix; // '+'
420  OptionalFlag HasSpacePrefix; // ' '
421  OptionalFlag HasAlternativeForm; // '#'
422  OptionalFlag HasLeadingZeroes; // '0'
423  OptionalAmount Precision;
424public:
425  PrintfSpecifier() :
426    FormatSpecifier(/* isPrintf = */ true),
427    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
428    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
429
430  static PrintfSpecifier Parse(const char *beg, const char *end);
431
432    // Methods for incrementally constructing the PrintfSpecifier.
433  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
434    CS = cs;
435  }
436  void setHasThousandsGrouping(const char *position) {
437    HasThousandsGrouping = true;
438    HasThousandsGrouping.setPosition(position);
439  }
440  void setIsLeftJustified(const char *position) {
441    IsLeftJustified = true;
442    IsLeftJustified.setPosition(position);
443  }
444  void setHasPlusPrefix(const char *position) {
445    HasPlusPrefix = true;
446    HasPlusPrefix.setPosition(position);
447  }
448  void setHasSpacePrefix(const char *position) {
449    HasSpacePrefix = true;
450    HasSpacePrefix.setPosition(position);
451  }
452  void setHasAlternativeForm(const char *position) {
453    HasAlternativeForm = true;
454    HasAlternativeForm.setPosition(position);
455  }
456  void setHasLeadingZeros(const char *position) {
457    HasLeadingZeroes = true;
458    HasLeadingZeroes.setPosition(position);
459  }
460  void setUsesPositionalArg() { UsesPositionalArg = true; }
461
462    // Methods for querying the format specifier.
463
464  const PrintfConversionSpecifier &getConversionSpecifier() const {
465    return cast<PrintfConversionSpecifier>(CS);
466  }
467
468  void setPrecision(const OptionalAmount &Amt) {
469    Precision = Amt;
470    Precision.setUsesDotPrefix();
471  }
472
473  const OptionalAmount &getPrecision() const {
474    return Precision;
475  }
476
477  bool consumesDataArgument() const {
478    return getConversionSpecifier().consumesDataArgument();
479  }
480
481  /// \brief Returns the builtin type that a data argument
482  /// paired with this format specifier should have.  This method
483  /// will return null if the format specifier does not have
484  /// a matching data argument or the matching argument matches
485  /// more than one type.
486  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
487
488  const OptionalFlag &hasThousandsGrouping() const {
489      return HasThousandsGrouping;
490  }
491  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
492  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
493  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
494  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
495  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
496  bool usesPositionalArg() const { return UsesPositionalArg; }
497
498  /// Changes the specifier and length according to a QualType, retaining any
499  /// flags or options. Returns true on success, or false when a conversion
500  /// was not successful.
501  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
502               bool IsObjCLiteral);
503
504  void toString(raw_ostream &os) const;
505
506  // Validation methods - to check if any element results in undefined behavior
507  bool hasValidPlusPrefix() const;
508  bool hasValidAlternativeForm() const;
509  bool hasValidLeadingZeros() const;
510  bool hasValidSpacePrefix() const;
511  bool hasValidLeftJustified() const;
512  bool hasValidThousandsGroupingPrefix() const;
513
514  bool hasValidPrecision() const;
515  bool hasValidFieldWidth() const;
516};
517}  // end analyze_printf namespace
518
519//===----------------------------------------------------------------------===//
520/// Pieces specific to fscanf format strings.
521
522namespace analyze_scanf {
523
524class ScanfConversionSpecifier :
525    public analyze_format_string::ConversionSpecifier  {
526public:
527  ScanfConversionSpecifier()
528    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
529
530  ScanfConversionSpecifier(const char *pos, Kind k)
531    : ConversionSpecifier(false, pos, k) {}
532
533  void setEndScanList(const char *pos) { EndScanList = pos; }
534
535  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
536    return !CS->isPrintfKind();
537  }
538};
539
540using analyze_format_string::ArgType;
541using analyze_format_string::LengthModifier;
542using analyze_format_string::OptionalAmount;
543using analyze_format_string::OptionalFlag;
544
545class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
546  OptionalFlag SuppressAssignment; // '*'
547public:
548  ScanfSpecifier() :
549    FormatSpecifier(/* isPrintf = */ false),
550    SuppressAssignment("*") {}
551
552  void setSuppressAssignment(const char *position) {
553    SuppressAssignment = true;
554    SuppressAssignment.setPosition(position);
555  }
556
557  const OptionalFlag &getSuppressAssignment() const {
558    return SuppressAssignment;
559  }
560
561  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
562    CS = cs;
563  }
564
565  const ScanfConversionSpecifier &getConversionSpecifier() const {
566    return cast<ScanfConversionSpecifier>(CS);
567  }
568
569  bool consumesDataArgument() const {
570    return CS.consumesDataArgument() && !SuppressAssignment;
571  }
572
573  ArgType getArgType(ASTContext &Ctx) const;
574
575  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
576
577  void toString(raw_ostream &os) const;
578
579  static ScanfSpecifier Parse(const char *beg, const char *end);
580};
581
582} // end analyze_scanf namespace
583
584//===----------------------------------------------------------------------===//
585// Parsing and processing of format strings (both fprintf and fscanf).
586
587namespace analyze_format_string {
588
589enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
590
591class FormatStringHandler {
592public:
593  FormatStringHandler() {}
594  virtual ~FormatStringHandler();
595
596  virtual void HandleNullChar(const char *nullCharacter) {}
597
598  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
599
600  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
601                                     PositionContext p) {}
602
603  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
604
605  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
606                                         unsigned specifierLen) {}
607
608  // Printf-specific handlers.
609
610  virtual bool HandleInvalidPrintfConversionSpecifier(
611                                      const analyze_printf::PrintfSpecifier &FS,
612                                      const char *startSpecifier,
613                                      unsigned specifierLen) {
614    return true;
615  }
616
617  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
618                                     const char *startSpecifier,
619                                     unsigned specifierLen) {
620    return true;
621  }
622
623    // Scanf-specific handlers.
624
625  virtual bool HandleInvalidScanfConversionSpecifier(
626                                        const analyze_scanf::ScanfSpecifier &FS,
627                                        const char *startSpecifier,
628                                        unsigned specifierLen) {
629    return true;
630  }
631
632  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
633                                    const char *startSpecifier,
634                                    unsigned specifierLen) {
635    return true;
636  }
637
638  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
639};
640
641bool ParsePrintfString(FormatStringHandler &H,
642                       const char *beg, const char *end, const LangOptions &LO,
643                       const TargetInfo &Target);
644
645bool ParseScanfString(FormatStringHandler &H,
646                      const char *beg, const char *end, const LangOptions &LO,
647                      const TargetInfo &Target);
648
649} // end analyze_format_string namespace
650} // end clang namespace
651#endif
652