FormatString.h revision bbb6bb4952b77e57b842b4d3096848123ae690e7
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll'
72    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73    AsIntMax,     // 'j'
74    AsSizeT,      // 'z'
75    AsPtrDiff,    // 't'
76    AsLongDouble, // 'L'
77    AsAllocate,   // for '%as', GNU extension to C90 scanf
78    AsMAllocate,  // for '%ms', GNU extension to scanf
79    AsWideChar = AsLong // for '%ls', only makes sense for printf
80  };
81
82  LengthModifier()
83    : Position(0), kind(None) {}
84  LengthModifier(const char *pos, Kind k)
85    : Position(pos), kind(k) {}
86
87  const char *getStart() const {
88    return Position;
89  }
90
91  unsigned getLength() const {
92    switch (kind) {
93      default:
94        return 1;
95      case AsLongLong:
96      case AsChar:
97        return 2;
98      case None:
99        return 0;
100    }
101  }
102
103  Kind getKind() const { return kind; }
104  void setKind(Kind k) { kind = k; }
105
106  const char *toString() const;
107
108private:
109  const char *Position;
110  Kind kind;
111};
112
113class ConversionSpecifier {
114public:
115  enum Kind {
116    InvalidSpecifier = 0,
117      // C99 conversion specifiers.
118    cArg,
119    dArg,
120    iArg,
121    IntArgBeg = cArg, IntArgEnd = iArg,
122
123    oArg,
124    uArg,
125    xArg,
126    XArg,
127    UIntArgBeg = oArg, UIntArgEnd = XArg,
128
129    fArg,
130    FArg,
131    eArg,
132    EArg,
133    gArg,
134    GArg,
135    aArg,
136    AArg,
137    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
138
139    sArg,
140    pArg,
141    nArg,
142    PercentArg,
143    CArg,
144    SArg,
145
146    // ** Printf-specific **
147
148    // Objective-C specific specifiers.
149    ObjCObjArg,  // '@'
150    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
151
152    // GlibC specific specifiers.
153    PrintErrno,   // 'm'
154
155    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
156
157    // ** Scanf-specific **
158    ScanListArg, // '['
159    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
160  };
161
162  ConversionSpecifier(bool isPrintf)
163    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
164
165  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
166    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
167
168  const char *getStart() const {
169    return Position;
170  }
171
172  StringRef getCharacters() const {
173    return StringRef(getStart(), getLength());
174  }
175
176  bool consumesDataArgument() const {
177    switch (kind) {
178      case PrintErrno:
179        assert(IsPrintf);
180        return false;
181      case PercentArg:
182        return false;
183      default:
184        return true;
185    }
186  }
187
188  Kind getKind() const { return kind; }
189  void setKind(Kind k) { kind = k; }
190  unsigned getLength() const {
191    return EndScanList ? EndScanList - Position : 1;
192  }
193
194  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
195  const char *toString() const;
196
197  bool isPrintfKind() const { return IsPrintf; }
198
199protected:
200  bool IsPrintf;
201  const char *Position;
202  const char *EndScanList;
203  Kind kind;
204};
205
206class ArgType {
207public:
208  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
209              AnyCharTy, CStrTy, WCStrTy, WIntTy };
210private:
211  const Kind K;
212  QualType T;
213  const char *Name;
214  bool Ptr;
215public:
216  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
217  ArgType(QualType t, const char *n = 0)
218      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
219  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
220
221  static ArgType Invalid() { return ArgType(InvalidTy); }
222  bool isValid() const { return K != InvalidTy; }
223
224  /// Create an ArgType which corresponds to the type pointer to A.
225  static ArgType PtrTo(const ArgType& A) {
226    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
227    ArgType Res = A;
228    Res.Ptr = true;
229    return Res;
230  }
231
232  bool matchesType(ASTContext &C, QualType argTy) const;
233
234  QualType getRepresentativeType(ASTContext &C) const;
235
236  std::string getRepresentativeTypeName(ASTContext &C) const;
237};
238
239class OptionalAmount {
240public:
241  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
242
243  OptionalAmount(HowSpecified howSpecified,
244                 unsigned amount,
245                 const char *amountStart,
246                 unsigned amountLength,
247                 bool usesPositionalArg)
248  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
249  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
250
251  OptionalAmount(bool valid = true)
252  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
253  UsesPositionalArg(0), UsesDotPrefix(0) {}
254
255  bool isInvalid() const {
256    return hs == Invalid;
257  }
258
259  HowSpecified getHowSpecified() const { return hs; }
260  void setHowSpecified(HowSpecified h) { hs = h; }
261
262  bool hasDataArgument() const { return hs == Arg; }
263
264  unsigned getArgIndex() const {
265    assert(hasDataArgument());
266    return amt;
267  }
268
269  unsigned getConstantAmount() const {
270    assert(hs == Constant);
271    return amt;
272  }
273
274  const char *getStart() const {
275      // We include the . character if it is given.
276    return start - UsesDotPrefix;
277  }
278
279  unsigned getConstantLength() const {
280    assert(hs == Constant);
281    return length + UsesDotPrefix;
282  }
283
284  ArgType getArgType(ASTContext &Ctx) const;
285
286  void toString(raw_ostream &os) const;
287
288  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
289  unsigned getPositionalArgIndex() const {
290    assert(hasDataArgument());
291    return amt + 1;
292  }
293
294  bool usesDotPrefix() const { return UsesDotPrefix; }
295  void setUsesDotPrefix() { UsesDotPrefix = true; }
296
297private:
298  const char *start;
299  unsigned length;
300  HowSpecified hs;
301  unsigned amt;
302  bool UsesPositionalArg : 1;
303  bool UsesDotPrefix;
304};
305
306
307class FormatSpecifier {
308protected:
309  LengthModifier LM;
310  OptionalAmount FieldWidth;
311  ConversionSpecifier CS;
312  /// Positional arguments, an IEEE extension:
313  ///  IEEE Std 1003.1, 2004 Edition
314  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
315  bool UsesPositionalArg;
316  unsigned argIndex;
317public:
318  FormatSpecifier(bool isPrintf)
319    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
320
321  void setLengthModifier(LengthModifier lm) {
322    LM = lm;
323  }
324
325  void setUsesPositionalArg() { UsesPositionalArg = true; }
326
327  void setArgIndex(unsigned i) {
328    argIndex = i;
329  }
330
331  unsigned getArgIndex() const {
332    return argIndex;
333  }
334
335  unsigned getPositionalArgIndex() const {
336    return argIndex + 1;
337  }
338
339  const LengthModifier &getLengthModifier() const {
340    return LM;
341  }
342
343  const OptionalAmount &getFieldWidth() const {
344    return FieldWidth;
345  }
346
347  void setFieldWidth(const OptionalAmount &Amt) {
348    FieldWidth = Amt;
349  }
350
351  bool usesPositionalArg() const { return UsesPositionalArg; }
352
353  bool hasValidLengthModifier(const TargetInfo &Target) const;
354
355  bool hasStandardLengthModifier() const;
356
357  llvm::Optional<LengthModifier> getCorrectedLengthModifier() const;
358
359  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
360
361  bool hasStandardLengthConversionCombination() const;
362
363  /// For a TypedefType QT, if it is a named integer type such as size_t,
364  /// assign the appropriate value to LM and return true.
365  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
366};
367
368} // end analyze_format_string namespace
369
370//===----------------------------------------------------------------------===//
371/// Pieces specific to fprintf format strings.
372
373namespace analyze_printf {
374
375class PrintfConversionSpecifier :
376  public analyze_format_string::ConversionSpecifier  {
377public:
378  PrintfConversionSpecifier()
379    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
380
381  PrintfConversionSpecifier(const char *pos, Kind k)
382    : ConversionSpecifier(true, pos, k) {}
383
384  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
385  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
386  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
387                                    kind <= DoubleArgEnd; }
388  unsigned getLength() const {
389      // Conversion specifiers currently only are represented by
390      // single characters, but we be flexible.
391    return 1;
392  }
393
394  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
395    return CS->isPrintfKind();
396  }
397};
398
399using analyze_format_string::ArgType;
400using analyze_format_string::LengthModifier;
401using analyze_format_string::OptionalAmount;
402using analyze_format_string::OptionalFlag;
403
404class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
405  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
406  OptionalFlag IsLeftJustified; // '-'
407  OptionalFlag HasPlusPrefix; // '+'
408  OptionalFlag HasSpacePrefix; // ' '
409  OptionalFlag HasAlternativeForm; // '#'
410  OptionalFlag HasLeadingZeroes; // '0'
411  OptionalAmount Precision;
412public:
413  PrintfSpecifier() :
414    FormatSpecifier(/* isPrintf = */ true),
415    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
416    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
417
418  static PrintfSpecifier Parse(const char *beg, const char *end);
419
420    // Methods for incrementally constructing the PrintfSpecifier.
421  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
422    CS = cs;
423  }
424  void setHasThousandsGrouping(const char *position) {
425    HasThousandsGrouping = true;
426    HasThousandsGrouping.setPosition(position);
427  }
428  void setIsLeftJustified(const char *position) {
429    IsLeftJustified = true;
430    IsLeftJustified.setPosition(position);
431  }
432  void setHasPlusPrefix(const char *position) {
433    HasPlusPrefix = true;
434    HasPlusPrefix.setPosition(position);
435  }
436  void setHasSpacePrefix(const char *position) {
437    HasSpacePrefix = true;
438    HasSpacePrefix.setPosition(position);
439  }
440  void setHasAlternativeForm(const char *position) {
441    HasAlternativeForm = true;
442    HasAlternativeForm.setPosition(position);
443  }
444  void setHasLeadingZeros(const char *position) {
445    HasLeadingZeroes = true;
446    HasLeadingZeroes.setPosition(position);
447  }
448  void setUsesPositionalArg() { UsesPositionalArg = true; }
449
450    // Methods for querying the format specifier.
451
452  const PrintfConversionSpecifier &getConversionSpecifier() const {
453    return cast<PrintfConversionSpecifier>(CS);
454  }
455
456  void setPrecision(const OptionalAmount &Amt) {
457    Precision = Amt;
458    Precision.setUsesDotPrefix();
459  }
460
461  const OptionalAmount &getPrecision() const {
462    return Precision;
463  }
464
465  bool consumesDataArgument() const {
466    return getConversionSpecifier().consumesDataArgument();
467  }
468
469  /// \brief Returns the builtin type that a data argument
470  /// paired with this format specifier should have.  This method
471  /// will return null if the format specifier does not have
472  /// a matching data argument or the matching argument matches
473  /// more than one type.
474  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
475
476  const OptionalFlag &hasThousandsGrouping() const {
477      return HasThousandsGrouping;
478  }
479  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
480  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
481  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
482  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
483  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
484  bool usesPositionalArg() const { return UsesPositionalArg; }
485
486  /// Changes the specifier and length according to a QualType, retaining any
487  /// flags or options. Returns true on success, or false when a conversion
488  /// was not successful.
489  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
490               bool IsObjCLiteral);
491
492  void toString(raw_ostream &os) const;
493
494  // Validation methods - to check if any element results in undefined behavior
495  bool hasValidPlusPrefix() const;
496  bool hasValidAlternativeForm() const;
497  bool hasValidLeadingZeros() const;
498  bool hasValidSpacePrefix() const;
499  bool hasValidLeftJustified() const;
500  bool hasValidThousandsGroupingPrefix() const;
501
502  bool hasValidPrecision() const;
503  bool hasValidFieldWidth() const;
504};
505}  // end analyze_printf namespace
506
507//===----------------------------------------------------------------------===//
508/// Pieces specific to fscanf format strings.
509
510namespace analyze_scanf {
511
512class ScanfConversionSpecifier :
513    public analyze_format_string::ConversionSpecifier  {
514public:
515  ScanfConversionSpecifier()
516    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
517
518  ScanfConversionSpecifier(const char *pos, Kind k)
519    : ConversionSpecifier(false, pos, k) {}
520
521  void setEndScanList(const char *pos) { EndScanList = pos; }
522
523  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
524    return !CS->isPrintfKind();
525  }
526};
527
528using analyze_format_string::ArgType;
529using analyze_format_string::LengthModifier;
530using analyze_format_string::OptionalAmount;
531using analyze_format_string::OptionalFlag;
532
533class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
534  OptionalFlag SuppressAssignment; // '*'
535public:
536  ScanfSpecifier() :
537    FormatSpecifier(/* isPrintf = */ false),
538    SuppressAssignment("*") {}
539
540  void setSuppressAssignment(const char *position) {
541    SuppressAssignment = true;
542    SuppressAssignment.setPosition(position);
543  }
544
545  const OptionalFlag &getSuppressAssignment() const {
546    return SuppressAssignment;
547  }
548
549  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
550    CS = cs;
551  }
552
553  const ScanfConversionSpecifier &getConversionSpecifier() const {
554    return cast<ScanfConversionSpecifier>(CS);
555  }
556
557  bool consumesDataArgument() const {
558    return CS.consumesDataArgument() && !SuppressAssignment;
559  }
560
561  ArgType getArgType(ASTContext &Ctx) const;
562
563  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
564
565  void toString(raw_ostream &os) const;
566
567  static ScanfSpecifier Parse(const char *beg, const char *end);
568};
569
570} // end analyze_scanf namespace
571
572//===----------------------------------------------------------------------===//
573// Parsing and processing of format strings (both fprintf and fscanf).
574
575namespace analyze_format_string {
576
577enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
578
579class FormatStringHandler {
580public:
581  FormatStringHandler() {}
582  virtual ~FormatStringHandler();
583
584  virtual void HandleNullChar(const char *nullCharacter) {}
585
586  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
587
588  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
589                                     PositionContext p) {}
590
591  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
592
593  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
594                                         unsigned specifierLen) {}
595
596  // Printf-specific handlers.
597
598  virtual bool HandleInvalidPrintfConversionSpecifier(
599                                      const analyze_printf::PrintfSpecifier &FS,
600                                      const char *startSpecifier,
601                                      unsigned specifierLen) {
602    return true;
603  }
604
605  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
606                                     const char *startSpecifier,
607                                     unsigned specifierLen) {
608    return true;
609  }
610
611    // Scanf-specific handlers.
612
613  virtual bool HandleInvalidScanfConversionSpecifier(
614                                        const analyze_scanf::ScanfSpecifier &FS,
615                                        const char *startSpecifier,
616                                        unsigned specifierLen) {
617    return true;
618  }
619
620  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
621                                    const char *startSpecifier,
622                                    unsigned specifierLen) {
623    return true;
624  }
625
626  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
627};
628
629bool ParsePrintfString(FormatStringHandler &H,
630                       const char *beg, const char *end, const LangOptions &LO);
631
632bool ParseScanfString(FormatStringHandler &H,
633                      const char *beg, const char *end, const LangOptions &LO);
634
635} // end analyze_format_string namespace
636} // end clang namespace
637#endif
638