1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  LLVM_EXPLICIT operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll'
72    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73    AsIntMax,     // 'j'
74    AsSizeT,      // 'z'
75    AsPtrDiff,    // 't'
76    AsLongDouble, // 'L'
77    AsAllocate,   // for '%as', GNU extension to C90 scanf
78    AsMAllocate,  // for '%ms', GNU extension to scanf
79    AsWideChar = AsLong // for '%ls', only makes sense for printf
80  };
81
82  LengthModifier()
83    : Position(0), kind(None) {}
84  LengthModifier(const char *pos, Kind k)
85    : Position(pos), kind(k) {}
86
87  const char *getStart() const {
88    return Position;
89  }
90
91  unsigned getLength() const {
92    switch (kind) {
93      default:
94        return 1;
95      case AsLongLong:
96      case AsChar:
97        return 2;
98      case None:
99        return 0;
100    }
101  }
102
103  Kind getKind() const { return kind; }
104  void setKind(Kind k) { kind = k; }
105
106  const char *toString() const;
107
108private:
109  const char *Position;
110  Kind kind;
111};
112
113class ConversionSpecifier {
114public:
115  enum Kind {
116    InvalidSpecifier = 0,
117      // C99 conversion specifiers.
118    cArg,
119    dArg,
120    DArg, // Apple extension
121    iArg,
122    IntArgBeg = dArg, IntArgEnd = iArg,
123
124    oArg,
125    OArg, // Apple extension
126    uArg,
127    UArg, // Apple extension
128    xArg,
129    XArg,
130    UIntArgBeg = oArg, UIntArgEnd = XArg,
131
132    fArg,
133    FArg,
134    eArg,
135    EArg,
136    gArg,
137    GArg,
138    aArg,
139    AArg,
140    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
141
142    sArg,
143    pArg,
144    nArg,
145    PercentArg,
146    CArg,
147    SArg,
148
149    // ** Printf-specific **
150
151    // Objective-C specific specifiers.
152    ObjCObjArg,  // '@'
153    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
154
155    // GlibC specific specifiers.
156    PrintErrno,   // 'm'
157
158    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
159
160    // ** Scanf-specific **
161    ScanListArg, // '['
162    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
163  };
164
165  ConversionSpecifier(bool isPrintf = true)
166    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
167
168  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
169    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
170
171  const char *getStart() const {
172    return Position;
173  }
174
175  StringRef getCharacters() const {
176    return StringRef(getStart(), getLength());
177  }
178
179  bool consumesDataArgument() const {
180    switch (kind) {
181      case PrintErrno:
182        assert(IsPrintf);
183        return false;
184      case PercentArg:
185        return false;
186      default:
187        return true;
188    }
189  }
190
191  Kind getKind() const { return kind; }
192  void setKind(Kind k) { kind = k; }
193  unsigned getLength() const {
194    return EndScanList ? EndScanList - Position : 1;
195  }
196
197  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
198  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
199  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
200  const char *toString() const;
201
202  bool isPrintfKind() const { return IsPrintf; }
203
204  Optional<ConversionSpecifier> getStandardSpecifier() const;
205
206protected:
207  bool IsPrintf;
208  const char *Position;
209  const char *EndScanList;
210  Kind kind;
211};
212
213class ArgType {
214public:
215  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
216              AnyCharTy, CStrTy, WCStrTy, WIntTy };
217private:
218  const Kind K;
219  QualType T;
220  const char *Name;
221  bool Ptr;
222public:
223  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
224  ArgType(QualType t, const char *n = 0)
225      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
226  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
227
228  static ArgType Invalid() { return ArgType(InvalidTy); }
229  bool isValid() const { return K != InvalidTy; }
230
231  /// Create an ArgType which corresponds to the type pointer to A.
232  static ArgType PtrTo(const ArgType& A) {
233    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
234    ArgType Res = A;
235    Res.Ptr = true;
236    return Res;
237  }
238
239  bool matchesType(ASTContext &C, QualType argTy) const;
240
241  QualType getRepresentativeType(ASTContext &C) const;
242
243  std::string getRepresentativeTypeName(ASTContext &C) const;
244};
245
246class OptionalAmount {
247public:
248  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
249
250  OptionalAmount(HowSpecified howSpecified,
251                 unsigned amount,
252                 const char *amountStart,
253                 unsigned amountLength,
254                 bool usesPositionalArg)
255  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
256  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
257
258  OptionalAmount(bool valid = true)
259  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
260  UsesPositionalArg(0), UsesDotPrefix(0) {}
261
262  bool isInvalid() const {
263    return hs == Invalid;
264  }
265
266  HowSpecified getHowSpecified() const { return hs; }
267  void setHowSpecified(HowSpecified h) { hs = h; }
268
269  bool hasDataArgument() const { return hs == Arg; }
270
271  unsigned getArgIndex() const {
272    assert(hasDataArgument());
273    return amt;
274  }
275
276  unsigned getConstantAmount() const {
277    assert(hs == Constant);
278    return amt;
279  }
280
281  const char *getStart() const {
282      // We include the . character if it is given.
283    return start - UsesDotPrefix;
284  }
285
286  unsigned getConstantLength() const {
287    assert(hs == Constant);
288    return length + UsesDotPrefix;
289  }
290
291  ArgType getArgType(ASTContext &Ctx) const;
292
293  void toString(raw_ostream &os) const;
294
295  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
296  unsigned getPositionalArgIndex() const {
297    assert(hasDataArgument());
298    return amt + 1;
299  }
300
301  bool usesDotPrefix() const { return UsesDotPrefix; }
302  void setUsesDotPrefix() { UsesDotPrefix = true; }
303
304private:
305  const char *start;
306  unsigned length;
307  HowSpecified hs;
308  unsigned amt;
309  bool UsesPositionalArg : 1;
310  bool UsesDotPrefix;
311};
312
313
314class FormatSpecifier {
315protected:
316  LengthModifier LM;
317  OptionalAmount FieldWidth;
318  ConversionSpecifier CS;
319  /// Positional arguments, an IEEE extension:
320  ///  IEEE Std 1003.1, 2004 Edition
321  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
322  bool UsesPositionalArg;
323  unsigned argIndex;
324public:
325  FormatSpecifier(bool isPrintf)
326    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
327
328  void setLengthModifier(LengthModifier lm) {
329    LM = lm;
330  }
331
332  void setUsesPositionalArg() { UsesPositionalArg = true; }
333
334  void setArgIndex(unsigned i) {
335    argIndex = i;
336  }
337
338  unsigned getArgIndex() const {
339    return argIndex;
340  }
341
342  unsigned getPositionalArgIndex() const {
343    return argIndex + 1;
344  }
345
346  const LengthModifier &getLengthModifier() const {
347    return LM;
348  }
349
350  const OptionalAmount &getFieldWidth() const {
351    return FieldWidth;
352  }
353
354  void setFieldWidth(const OptionalAmount &Amt) {
355    FieldWidth = Amt;
356  }
357
358  bool usesPositionalArg() const { return UsesPositionalArg; }
359
360  bool hasValidLengthModifier(const TargetInfo &Target) const;
361
362  bool hasStandardLengthModifier() const;
363
364  Optional<LengthModifier> getCorrectedLengthModifier() const;
365
366  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
367
368  bool hasStandardLengthConversionCombination() const;
369
370  /// For a TypedefType QT, if it is a named integer type such as size_t,
371  /// assign the appropriate value to LM and return true.
372  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
373};
374
375} // end analyze_format_string namespace
376
377//===----------------------------------------------------------------------===//
378/// Pieces specific to fprintf format strings.
379
380namespace analyze_printf {
381
382class PrintfConversionSpecifier :
383  public analyze_format_string::ConversionSpecifier  {
384public:
385  PrintfConversionSpecifier()
386    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
387
388  PrintfConversionSpecifier(const char *pos, Kind k)
389    : ConversionSpecifier(true, pos, k) {}
390
391  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
392  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
393                                    kind <= DoubleArgEnd; }
394  unsigned getLength() const {
395      // Conversion specifiers currently only are represented by
396      // single characters, but we be flexible.
397    return 1;
398  }
399
400  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
401    return CS->isPrintfKind();
402  }
403};
404
405using analyze_format_string::ArgType;
406using analyze_format_string::LengthModifier;
407using analyze_format_string::OptionalAmount;
408using analyze_format_string::OptionalFlag;
409
410class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
411  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
412  OptionalFlag IsLeftJustified; // '-'
413  OptionalFlag HasPlusPrefix; // '+'
414  OptionalFlag HasSpacePrefix; // ' '
415  OptionalFlag HasAlternativeForm; // '#'
416  OptionalFlag HasLeadingZeroes; // '0'
417  OptionalAmount Precision;
418public:
419  PrintfSpecifier() :
420    FormatSpecifier(/* isPrintf = */ true),
421    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
422    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
423
424  static PrintfSpecifier Parse(const char *beg, const char *end);
425
426    // Methods for incrementally constructing the PrintfSpecifier.
427  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
428    CS = cs;
429  }
430  void setHasThousandsGrouping(const char *position) {
431    HasThousandsGrouping = true;
432    HasThousandsGrouping.setPosition(position);
433  }
434  void setIsLeftJustified(const char *position) {
435    IsLeftJustified = true;
436    IsLeftJustified.setPosition(position);
437  }
438  void setHasPlusPrefix(const char *position) {
439    HasPlusPrefix = true;
440    HasPlusPrefix.setPosition(position);
441  }
442  void setHasSpacePrefix(const char *position) {
443    HasSpacePrefix = true;
444    HasSpacePrefix.setPosition(position);
445  }
446  void setHasAlternativeForm(const char *position) {
447    HasAlternativeForm = true;
448    HasAlternativeForm.setPosition(position);
449  }
450  void setHasLeadingZeros(const char *position) {
451    HasLeadingZeroes = true;
452    HasLeadingZeroes.setPosition(position);
453  }
454  void setUsesPositionalArg() { UsesPositionalArg = true; }
455
456    // Methods for querying the format specifier.
457
458  const PrintfConversionSpecifier &getConversionSpecifier() const {
459    return cast<PrintfConversionSpecifier>(CS);
460  }
461
462  void setPrecision(const OptionalAmount &Amt) {
463    Precision = Amt;
464    Precision.setUsesDotPrefix();
465  }
466
467  const OptionalAmount &getPrecision() const {
468    return Precision;
469  }
470
471  bool consumesDataArgument() const {
472    return getConversionSpecifier().consumesDataArgument();
473  }
474
475  /// \brief Returns the builtin type that a data argument
476  /// paired with this format specifier should have.  This method
477  /// will return null if the format specifier does not have
478  /// a matching data argument or the matching argument matches
479  /// more than one type.
480  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
481
482  const OptionalFlag &hasThousandsGrouping() const {
483      return HasThousandsGrouping;
484  }
485  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
486  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
487  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
488  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
489  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
490  bool usesPositionalArg() const { return UsesPositionalArg; }
491
492  /// Changes the specifier and length according to a QualType, retaining any
493  /// flags or options. Returns true on success, or false when a conversion
494  /// was not successful.
495  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
496               bool IsObjCLiteral);
497
498  void toString(raw_ostream &os) const;
499
500  // Validation methods - to check if any element results in undefined behavior
501  bool hasValidPlusPrefix() const;
502  bool hasValidAlternativeForm() const;
503  bool hasValidLeadingZeros() const;
504  bool hasValidSpacePrefix() const;
505  bool hasValidLeftJustified() const;
506  bool hasValidThousandsGroupingPrefix() const;
507
508  bool hasValidPrecision() const;
509  bool hasValidFieldWidth() const;
510};
511}  // end analyze_printf namespace
512
513//===----------------------------------------------------------------------===//
514/// Pieces specific to fscanf format strings.
515
516namespace analyze_scanf {
517
518class ScanfConversionSpecifier :
519    public analyze_format_string::ConversionSpecifier  {
520public:
521  ScanfConversionSpecifier()
522    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
523
524  ScanfConversionSpecifier(const char *pos, Kind k)
525    : ConversionSpecifier(false, pos, k) {}
526
527  void setEndScanList(const char *pos) { EndScanList = pos; }
528
529  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
530    return !CS->isPrintfKind();
531  }
532};
533
534using analyze_format_string::ArgType;
535using analyze_format_string::LengthModifier;
536using analyze_format_string::OptionalAmount;
537using analyze_format_string::OptionalFlag;
538
539class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
540  OptionalFlag SuppressAssignment; // '*'
541public:
542  ScanfSpecifier() :
543    FormatSpecifier(/* isPrintf = */ false),
544    SuppressAssignment("*") {}
545
546  void setSuppressAssignment(const char *position) {
547    SuppressAssignment = true;
548    SuppressAssignment.setPosition(position);
549  }
550
551  const OptionalFlag &getSuppressAssignment() const {
552    return SuppressAssignment;
553  }
554
555  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
556    CS = cs;
557  }
558
559  const ScanfConversionSpecifier &getConversionSpecifier() const {
560    return cast<ScanfConversionSpecifier>(CS);
561  }
562
563  bool consumesDataArgument() const {
564    return CS.consumesDataArgument() && !SuppressAssignment;
565  }
566
567  ArgType getArgType(ASTContext &Ctx) const;
568
569  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
570
571  void toString(raw_ostream &os) const;
572
573  static ScanfSpecifier Parse(const char *beg, const char *end);
574};
575
576} // end analyze_scanf namespace
577
578//===----------------------------------------------------------------------===//
579// Parsing and processing of format strings (both fprintf and fscanf).
580
581namespace analyze_format_string {
582
583enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
584
585class FormatStringHandler {
586public:
587  FormatStringHandler() {}
588  virtual ~FormatStringHandler();
589
590  virtual void HandleNullChar(const char *nullCharacter) {}
591
592  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
593
594  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
595                                     PositionContext p) {}
596
597  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
598
599  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
600                                         unsigned specifierLen) {}
601
602  // Printf-specific handlers.
603
604  virtual bool HandleInvalidPrintfConversionSpecifier(
605                                      const analyze_printf::PrintfSpecifier &FS,
606                                      const char *startSpecifier,
607                                      unsigned specifierLen) {
608    return true;
609  }
610
611  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
612                                     const char *startSpecifier,
613                                     unsigned specifierLen) {
614    return true;
615  }
616
617    // Scanf-specific handlers.
618
619  virtual bool HandleInvalidScanfConversionSpecifier(
620                                        const analyze_scanf::ScanfSpecifier &FS,
621                                        const char *startSpecifier,
622                                        unsigned specifierLen) {
623    return true;
624  }
625
626  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
627                                    const char *startSpecifier,
628                                    unsigned specifierLen) {
629    return true;
630  }
631
632  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
633};
634
635bool ParsePrintfString(FormatStringHandler &H,
636                       const char *beg, const char *end, const LangOptions &LO,
637                       const TargetInfo &Target);
638
639bool ParseScanfString(FormatStringHandler &H,
640                      const char *beg, const char *end, const LangOptions &LO,
641                      const TargetInfo &Target);
642
643} // end analyze_format_string namespace
644} // end clang namespace
645#endif
646