1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  LLVM_EXPLICIT operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll'
72    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73    AsIntMax,     // 'j'
74    AsSizeT,      // 'z'
75    AsPtrDiff,    // 't'
76    AsInt32,      // 'I32' (MSVCRT, like __int32)
77    AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
78    AsInt64,      // 'I64' (MSVCRT, like __int64)
79    AsLongDouble, // 'L'
80    AsAllocate,   // for '%as', GNU extension to C90 scanf
81    AsMAllocate,  // for '%ms', GNU extension to scanf
82    AsWideChar = AsLong // for '%ls', only makes sense for printf
83  };
84
85  LengthModifier()
86    : Position(nullptr), kind(None) {}
87  LengthModifier(const char *pos, Kind k)
88    : Position(pos), kind(k) {}
89
90  const char *getStart() const {
91    return Position;
92  }
93
94  unsigned getLength() const {
95    switch (kind) {
96      default:
97        return 1;
98      case AsLongLong:
99      case AsChar:
100        return 2;
101      case AsInt32:
102      case AsInt64:
103        return 3;
104      case None:
105        return 0;
106    }
107  }
108
109  Kind getKind() const { return kind; }
110  void setKind(Kind k) { kind = k; }
111
112  const char *toString() const;
113
114private:
115  const char *Position;
116  Kind kind;
117};
118
119class ConversionSpecifier {
120public:
121  enum Kind {
122    InvalidSpecifier = 0,
123      // C99 conversion specifiers.
124    cArg,
125    dArg,
126    DArg, // Apple extension
127    iArg,
128    IntArgBeg = dArg, IntArgEnd = iArg,
129
130    oArg,
131    OArg, // Apple extension
132    uArg,
133    UArg, // Apple extension
134    xArg,
135    XArg,
136    UIntArgBeg = oArg, UIntArgEnd = XArg,
137
138    fArg,
139    FArg,
140    eArg,
141    EArg,
142    gArg,
143    GArg,
144    aArg,
145    AArg,
146    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
147
148    sArg,
149    pArg,
150    nArg,
151    PercentArg,
152    CArg,
153    SArg,
154
155    // ** Printf-specific **
156
157    // Objective-C specific specifiers.
158    ObjCObjArg,  // '@'
159    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
160
161    // GlibC specific specifiers.
162    PrintErrno,   // 'm'
163
164    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
165
166    // ** Scanf-specific **
167    ScanListArg, // '['
168    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
169  };
170
171  ConversionSpecifier(bool isPrintf = true)
172    : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
173      kind(InvalidSpecifier) {}
174
175  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
176    : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
177
178  const char *getStart() const {
179    return Position;
180  }
181
182  StringRef getCharacters() const {
183    return StringRef(getStart(), getLength());
184  }
185
186  bool consumesDataArgument() const {
187    switch (kind) {
188      case PrintErrno:
189        assert(IsPrintf);
190        return false;
191      case PercentArg:
192        return false;
193      default:
194        return true;
195    }
196  }
197
198  Kind getKind() const { return kind; }
199  void setKind(Kind k) { kind = k; }
200  unsigned getLength() const {
201    return EndScanList ? EndScanList - Position : 1;
202  }
203
204  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
205  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
206  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
207  const char *toString() const;
208
209  bool isPrintfKind() const { return IsPrintf; }
210
211  Optional<ConversionSpecifier> getStandardSpecifier() const;
212
213protected:
214  bool IsPrintf;
215  const char *Position;
216  const char *EndScanList;
217  Kind kind;
218};
219
220class ArgType {
221public:
222  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
223              AnyCharTy, CStrTy, WCStrTy, WIntTy };
224private:
225  const Kind K;
226  QualType T;
227  const char *Name;
228  bool Ptr;
229public:
230  ArgType(Kind k = UnknownTy, const char *n = nullptr)
231      : K(k), Name(n), Ptr(false) {}
232  ArgType(QualType t, const char *n = nullptr)
233      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
234  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
235
236  static ArgType Invalid() { return ArgType(InvalidTy); }
237  bool isValid() const { return K != InvalidTy; }
238
239  /// Create an ArgType which corresponds to the type pointer to A.
240  static ArgType PtrTo(const ArgType& A) {
241    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
242    ArgType Res = A;
243    Res.Ptr = true;
244    return Res;
245  }
246
247  bool matchesType(ASTContext &C, QualType argTy) const;
248
249  QualType getRepresentativeType(ASTContext &C) const;
250
251  std::string getRepresentativeTypeName(ASTContext &C) const;
252};
253
254class OptionalAmount {
255public:
256  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
257
258  OptionalAmount(HowSpecified howSpecified,
259                 unsigned amount,
260                 const char *amountStart,
261                 unsigned amountLength,
262                 bool usesPositionalArg)
263  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
264  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
265
266  OptionalAmount(bool valid = true)
267  : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
268  UsesPositionalArg(0), UsesDotPrefix(0) {}
269
270  bool isInvalid() const {
271    return hs == Invalid;
272  }
273
274  HowSpecified getHowSpecified() const { return hs; }
275  void setHowSpecified(HowSpecified h) { hs = h; }
276
277  bool hasDataArgument() const { return hs == Arg; }
278
279  unsigned getArgIndex() const {
280    assert(hasDataArgument());
281    return amt;
282  }
283
284  unsigned getConstantAmount() const {
285    assert(hs == Constant);
286    return amt;
287  }
288
289  const char *getStart() const {
290      // We include the . character if it is given.
291    return start - UsesDotPrefix;
292  }
293
294  unsigned getConstantLength() const {
295    assert(hs == Constant);
296    return length + UsesDotPrefix;
297  }
298
299  ArgType getArgType(ASTContext &Ctx) const;
300
301  void toString(raw_ostream &os) const;
302
303  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
304  unsigned getPositionalArgIndex() const {
305    assert(hasDataArgument());
306    return amt + 1;
307  }
308
309  bool usesDotPrefix() const { return UsesDotPrefix; }
310  void setUsesDotPrefix() { UsesDotPrefix = true; }
311
312private:
313  const char *start;
314  unsigned length;
315  HowSpecified hs;
316  unsigned amt;
317  bool UsesPositionalArg : 1;
318  bool UsesDotPrefix;
319};
320
321
322class FormatSpecifier {
323protected:
324  LengthModifier LM;
325  OptionalAmount FieldWidth;
326  ConversionSpecifier CS;
327  /// Positional arguments, an IEEE extension:
328  ///  IEEE Std 1003.1, 2004 Edition
329  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
330  bool UsesPositionalArg;
331  unsigned argIndex;
332public:
333  FormatSpecifier(bool isPrintf)
334    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
335
336  void setLengthModifier(LengthModifier lm) {
337    LM = lm;
338  }
339
340  void setUsesPositionalArg() { UsesPositionalArg = true; }
341
342  void setArgIndex(unsigned i) {
343    argIndex = i;
344  }
345
346  unsigned getArgIndex() const {
347    return argIndex;
348  }
349
350  unsigned getPositionalArgIndex() const {
351    return argIndex + 1;
352  }
353
354  const LengthModifier &getLengthModifier() const {
355    return LM;
356  }
357
358  const OptionalAmount &getFieldWidth() const {
359    return FieldWidth;
360  }
361
362  void setFieldWidth(const OptionalAmount &Amt) {
363    FieldWidth = Amt;
364  }
365
366  bool usesPositionalArg() const { return UsesPositionalArg; }
367
368  bool hasValidLengthModifier(const TargetInfo &Target) const;
369
370  bool hasStandardLengthModifier() const;
371
372  Optional<LengthModifier> getCorrectedLengthModifier() const;
373
374  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
375
376  bool hasStandardLengthConversionCombination() const;
377
378  /// For a TypedefType QT, if it is a named integer type such as size_t,
379  /// assign the appropriate value to LM and return true.
380  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
381};
382
383} // end analyze_format_string namespace
384
385//===----------------------------------------------------------------------===//
386/// Pieces specific to fprintf format strings.
387
388namespace analyze_printf {
389
390class PrintfConversionSpecifier :
391  public analyze_format_string::ConversionSpecifier  {
392public:
393  PrintfConversionSpecifier()
394    : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
395
396  PrintfConversionSpecifier(const char *pos, Kind k)
397    : ConversionSpecifier(true, pos, k) {}
398
399  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
400  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
401                                    kind <= DoubleArgEnd; }
402  unsigned getLength() const {
403      // Conversion specifiers currently only are represented by
404      // single characters, but we be flexible.
405    return 1;
406  }
407
408  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
409    return CS->isPrintfKind();
410  }
411};
412
413using analyze_format_string::ArgType;
414using analyze_format_string::LengthModifier;
415using analyze_format_string::OptionalAmount;
416using analyze_format_string::OptionalFlag;
417
418class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
419  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
420  OptionalFlag IsLeftJustified; // '-'
421  OptionalFlag HasPlusPrefix; // '+'
422  OptionalFlag HasSpacePrefix; // ' '
423  OptionalFlag HasAlternativeForm; // '#'
424  OptionalFlag HasLeadingZeroes; // '0'
425  OptionalAmount Precision;
426public:
427  PrintfSpecifier() :
428    FormatSpecifier(/* isPrintf = */ true),
429    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
430    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
431
432  static PrintfSpecifier Parse(const char *beg, const char *end);
433
434    // Methods for incrementally constructing the PrintfSpecifier.
435  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
436    CS = cs;
437  }
438  void setHasThousandsGrouping(const char *position) {
439    HasThousandsGrouping = true;
440    HasThousandsGrouping.setPosition(position);
441  }
442  void setIsLeftJustified(const char *position) {
443    IsLeftJustified = true;
444    IsLeftJustified.setPosition(position);
445  }
446  void setHasPlusPrefix(const char *position) {
447    HasPlusPrefix = true;
448    HasPlusPrefix.setPosition(position);
449  }
450  void setHasSpacePrefix(const char *position) {
451    HasSpacePrefix = true;
452    HasSpacePrefix.setPosition(position);
453  }
454  void setHasAlternativeForm(const char *position) {
455    HasAlternativeForm = true;
456    HasAlternativeForm.setPosition(position);
457  }
458  void setHasLeadingZeros(const char *position) {
459    HasLeadingZeroes = true;
460    HasLeadingZeroes.setPosition(position);
461  }
462  void setUsesPositionalArg() { UsesPositionalArg = true; }
463
464    // Methods for querying the format specifier.
465
466  const PrintfConversionSpecifier &getConversionSpecifier() const {
467    return cast<PrintfConversionSpecifier>(CS);
468  }
469
470  void setPrecision(const OptionalAmount &Amt) {
471    Precision = Amt;
472    Precision.setUsesDotPrefix();
473  }
474
475  const OptionalAmount &getPrecision() const {
476    return Precision;
477  }
478
479  bool consumesDataArgument() const {
480    return getConversionSpecifier().consumesDataArgument();
481  }
482
483  /// \brief Returns the builtin type that a data argument
484  /// paired with this format specifier should have.  This method
485  /// will return null if the format specifier does not have
486  /// a matching data argument or the matching argument matches
487  /// more than one type.
488  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
489
490  const OptionalFlag &hasThousandsGrouping() const {
491      return HasThousandsGrouping;
492  }
493  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
494  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
495  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
496  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
497  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
498  bool usesPositionalArg() const { return UsesPositionalArg; }
499
500  /// Changes the specifier and length according to a QualType, retaining any
501  /// flags or options. Returns true on success, or false when a conversion
502  /// was not successful.
503  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
504               bool IsObjCLiteral);
505
506  void toString(raw_ostream &os) const;
507
508  // Validation methods - to check if any element results in undefined behavior
509  bool hasValidPlusPrefix() const;
510  bool hasValidAlternativeForm() const;
511  bool hasValidLeadingZeros() const;
512  bool hasValidSpacePrefix() const;
513  bool hasValidLeftJustified() const;
514  bool hasValidThousandsGroupingPrefix() const;
515
516  bool hasValidPrecision() const;
517  bool hasValidFieldWidth() const;
518};
519}  // end analyze_printf namespace
520
521//===----------------------------------------------------------------------===//
522/// Pieces specific to fscanf format strings.
523
524namespace analyze_scanf {
525
526class ScanfConversionSpecifier :
527    public analyze_format_string::ConversionSpecifier  {
528public:
529  ScanfConversionSpecifier()
530    : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
531
532  ScanfConversionSpecifier(const char *pos, Kind k)
533    : ConversionSpecifier(false, pos, k) {}
534
535  void setEndScanList(const char *pos) { EndScanList = pos; }
536
537  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
538    return !CS->isPrintfKind();
539  }
540};
541
542using analyze_format_string::ArgType;
543using analyze_format_string::LengthModifier;
544using analyze_format_string::OptionalAmount;
545using analyze_format_string::OptionalFlag;
546
547class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
548  OptionalFlag SuppressAssignment; // '*'
549public:
550  ScanfSpecifier() :
551    FormatSpecifier(/* isPrintf = */ false),
552    SuppressAssignment("*") {}
553
554  void setSuppressAssignment(const char *position) {
555    SuppressAssignment = true;
556    SuppressAssignment.setPosition(position);
557  }
558
559  const OptionalFlag &getSuppressAssignment() const {
560    return SuppressAssignment;
561  }
562
563  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
564    CS = cs;
565  }
566
567  const ScanfConversionSpecifier &getConversionSpecifier() const {
568    return cast<ScanfConversionSpecifier>(CS);
569  }
570
571  bool consumesDataArgument() const {
572    return CS.consumesDataArgument() && !SuppressAssignment;
573  }
574
575  ArgType getArgType(ASTContext &Ctx) const;
576
577  bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
578               ASTContext &Ctx);
579
580  void toString(raw_ostream &os) const;
581
582  static ScanfSpecifier Parse(const char *beg, const char *end);
583};
584
585} // end analyze_scanf namespace
586
587//===----------------------------------------------------------------------===//
588// Parsing and processing of format strings (both fprintf and fscanf).
589
590namespace analyze_format_string {
591
592enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
593
594class FormatStringHandler {
595public:
596  FormatStringHandler() {}
597  virtual ~FormatStringHandler();
598
599  virtual void HandleNullChar(const char *nullCharacter) {}
600
601  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
602
603  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
604                                     PositionContext p) {}
605
606  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
607
608  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
609                                         unsigned specifierLen) {}
610
611  // Printf-specific handlers.
612
613  virtual bool HandleInvalidPrintfConversionSpecifier(
614                                      const analyze_printf::PrintfSpecifier &FS,
615                                      const char *startSpecifier,
616                                      unsigned specifierLen) {
617    return true;
618  }
619
620  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
621                                     const char *startSpecifier,
622                                     unsigned specifierLen) {
623    return true;
624  }
625
626    // Scanf-specific handlers.
627
628  virtual bool HandleInvalidScanfConversionSpecifier(
629                                        const analyze_scanf::ScanfSpecifier &FS,
630                                        const char *startSpecifier,
631                                        unsigned specifierLen) {
632    return true;
633  }
634
635  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
636                                    const char *startSpecifier,
637                                    unsigned specifierLen) {
638    return true;
639  }
640
641  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
642};
643
644bool ParsePrintfString(FormatStringHandler &H,
645                       const char *beg, const char *end, const LangOptions &LO,
646                       const TargetInfo &Target);
647
648bool ParseScanfString(FormatStringHandler &H,
649                      const char *beg, const char *end, const LangOptions &LO,
650                      const TargetInfo &Target);
651
652} // end analyze_format_string namespace
653} // end clang namespace
654#endif
655