1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
20#define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    flag = true;
44    this->position = position;
45  }
46  const char *getPosition() const {
47    assert(position);
48    return position;
49  }
50  const char *toString() const { return representation; }
51
52  // Overloaded operators for bool like qualities
53  explicit operator bool() const { return flag; }
54  OptionalFlag& operator=(const bool &rhs) {
55    flag = rhs;
56    return *this;  // Return a reference to myself.
57  }
58private:
59  const char *representation;
60  const char *position;
61  bool flag;
62};
63
64/// Represents the length modifier in a format string in scanf/printf.
65class LengthModifier {
66public:
67  enum Kind {
68    None,
69    AsChar,       // 'hh'
70    AsShort,      // 'h'
71    AsLong,       // 'l'
72    AsLongLong,   // 'll'
73    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
74    AsIntMax,     // 'j'
75    AsSizeT,      // 'z'
76    AsPtrDiff,    // 't'
77    AsInt32,      // 'I32' (MSVCRT, like __int32)
78    AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
79    AsInt64,      // 'I64' (MSVCRT, like __int64)
80    AsLongDouble, // 'L'
81    AsAllocate,   // for '%as', GNU extension to C90 scanf
82    AsMAllocate,  // for '%ms', GNU extension to scanf
83    AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
84    AsWideChar = AsLong // for '%ls', only makes sense for printf
85  };
86
87  LengthModifier()
88    : Position(nullptr), kind(None) {}
89  LengthModifier(const char *pos, Kind k)
90    : Position(pos), kind(k) {}
91
92  const char *getStart() const {
93    return Position;
94  }
95
96  unsigned getLength() const {
97    switch (kind) {
98      default:
99        return 1;
100      case AsLongLong:
101      case AsChar:
102        return 2;
103      case AsInt32:
104      case AsInt64:
105        return 3;
106      case None:
107        return 0;
108    }
109  }
110
111  Kind getKind() const { return kind; }
112  void setKind(Kind k) { kind = k; }
113
114  const char *toString() const;
115
116private:
117  const char *Position;
118  Kind kind;
119};
120
121class ConversionSpecifier {
122public:
123  enum Kind {
124    InvalidSpecifier = 0,
125      // C99 conversion specifiers.
126    cArg,
127    dArg,
128    DArg, // Apple extension
129    iArg,
130    IntArgBeg = dArg, IntArgEnd = iArg,
131
132    oArg,
133    OArg, // Apple extension
134    uArg,
135    UArg, // Apple extension
136    xArg,
137    XArg,
138    UIntArgBeg = oArg, UIntArgEnd = XArg,
139
140    fArg,
141    FArg,
142    eArg,
143    EArg,
144    gArg,
145    GArg,
146    aArg,
147    AArg,
148    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
149
150    sArg,
151    pArg,
152    nArg,
153    PercentArg,
154    CArg,
155    SArg,
156
157    // ** Printf-specific **
158
159    ZArg, // MS extension
160
161    // Objective-C specific specifiers.
162    ObjCObjArg,  // '@'
163    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
164
165    // FreeBSD kernel specific specifiers.
166    FreeBSDbArg,
167    FreeBSDDArg,
168    FreeBSDrArg,
169    FreeBSDyArg,
170
171    // GlibC specific specifiers.
172    PrintErrno,   // 'm'
173
174    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
175
176    // ** Scanf-specific **
177    ScanListArg, // '['
178    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
179  };
180
181  ConversionSpecifier(bool isPrintf = true)
182    : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
183      kind(InvalidSpecifier) {}
184
185  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
186    : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
187
188  const char *getStart() const {
189    return Position;
190  }
191
192  StringRef getCharacters() const {
193    return StringRef(getStart(), getLength());
194  }
195
196  bool consumesDataArgument() const {
197    switch (kind) {
198      case PrintErrno:
199        assert(IsPrintf);
200        return false;
201      case PercentArg:
202        return false;
203      default:
204        return true;
205    }
206  }
207
208  Kind getKind() const { return kind; }
209  void setKind(Kind k) { kind = k; }
210  unsigned getLength() const {
211    return EndScanList ? EndScanList - Position : 1;
212  }
213  void setEndScanList(const char *pos) { EndScanList = pos; }
214
215  bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
216    kind == FreeBSDrArg || kind == FreeBSDyArg; }
217  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
218  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
219  bool isDoubleArg() const {
220    return kind >= DoubleArgBeg && kind <= DoubleArgEnd;
221  }
222
223  const char *toString() const;
224
225  bool isPrintfKind() const { return IsPrintf; }
226
227  Optional<ConversionSpecifier> getStandardSpecifier() const;
228
229protected:
230  bool IsPrintf;
231  const char *Position;
232  const char *EndScanList;
233  Kind kind;
234};
235
236class ArgType {
237public:
238  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
239              AnyCharTy, CStrTy, WCStrTy, WIntTy };
240
241  enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic };
242
243private:
244  const Kind K;
245  QualType T;
246  const char *Name;
247  bool Ptr;
248public:
249  ArgType(Kind k = UnknownTy, const char *n = nullptr)
250      : K(k), Name(n), Ptr(false) {}
251  ArgType(QualType t, const char *n = nullptr)
252      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
253  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
254
255  static ArgType Invalid() { return ArgType(InvalidTy); }
256  bool isValid() const { return K != InvalidTy; }
257
258  /// Create an ArgType which corresponds to the type pointer to A.
259  static ArgType PtrTo(const ArgType& A) {
260    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
261    ArgType Res = A;
262    Res.Ptr = true;
263    return Res;
264  }
265
266  MatchKind matchesType(ASTContext &C, QualType argTy) const;
267
268  QualType getRepresentativeType(ASTContext &C) const;
269
270  std::string getRepresentativeTypeName(ASTContext &C) const;
271};
272
273class OptionalAmount {
274public:
275  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
276
277  OptionalAmount(HowSpecified howSpecified,
278                 unsigned amount,
279                 const char *amountStart,
280                 unsigned amountLength,
281                 bool usesPositionalArg)
282  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
283  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
284
285  OptionalAmount(bool valid = true)
286  : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
287  UsesPositionalArg(0), UsesDotPrefix(0) {}
288
289  bool isInvalid() const {
290    return hs == Invalid;
291  }
292
293  HowSpecified getHowSpecified() const { return hs; }
294  void setHowSpecified(HowSpecified h) { hs = h; }
295
296  bool hasDataArgument() const { return hs == Arg; }
297
298  unsigned getArgIndex() const {
299    assert(hasDataArgument());
300    return amt;
301  }
302
303  unsigned getConstantAmount() const {
304    assert(hs == Constant);
305    return amt;
306  }
307
308  const char *getStart() const {
309      // We include the . character if it is given.
310    return start - UsesDotPrefix;
311  }
312
313  unsigned getConstantLength() const {
314    assert(hs == Constant);
315    return length + UsesDotPrefix;
316  }
317
318  ArgType getArgType(ASTContext &Ctx) const;
319
320  void toString(raw_ostream &os) const;
321
322  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
323  unsigned getPositionalArgIndex() const {
324    assert(hasDataArgument());
325    return amt + 1;
326  }
327
328  bool usesDotPrefix() const { return UsesDotPrefix; }
329  void setUsesDotPrefix() { UsesDotPrefix = true; }
330
331private:
332  const char *start;
333  unsigned length;
334  HowSpecified hs;
335  unsigned amt;
336  bool UsesPositionalArg : 1;
337  bool UsesDotPrefix;
338};
339
340
341class FormatSpecifier {
342protected:
343  LengthModifier LM;
344  OptionalAmount FieldWidth;
345  ConversionSpecifier CS;
346  /// Positional arguments, an IEEE extension:
347  ///  IEEE Std 1003.1, 2004 Edition
348  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
349  bool UsesPositionalArg;
350  unsigned argIndex;
351public:
352  FormatSpecifier(bool isPrintf)
353    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
354
355  void setLengthModifier(LengthModifier lm) {
356    LM = lm;
357  }
358
359  void setUsesPositionalArg() { UsesPositionalArg = true; }
360
361  void setArgIndex(unsigned i) {
362    argIndex = i;
363  }
364
365  unsigned getArgIndex() const {
366    return argIndex;
367  }
368
369  unsigned getPositionalArgIndex() const {
370    return argIndex + 1;
371  }
372
373  const LengthModifier &getLengthModifier() const {
374    return LM;
375  }
376
377  const OptionalAmount &getFieldWidth() const {
378    return FieldWidth;
379  }
380
381  void setFieldWidth(const OptionalAmount &Amt) {
382    FieldWidth = Amt;
383  }
384
385  bool usesPositionalArg() const { return UsesPositionalArg; }
386
387  bool hasValidLengthModifier(const TargetInfo &Target) const;
388
389  bool hasStandardLengthModifier() const;
390
391  Optional<LengthModifier> getCorrectedLengthModifier() const;
392
393  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
394
395  bool hasStandardLengthConversionCombination() const;
396
397  /// For a TypedefType QT, if it is a named integer type such as size_t,
398  /// assign the appropriate value to LM and return true.
399  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
400};
401
402} // end analyze_format_string namespace
403
404//===----------------------------------------------------------------------===//
405/// Pieces specific to fprintf format strings.
406
407namespace analyze_printf {
408
409class PrintfConversionSpecifier :
410  public analyze_format_string::ConversionSpecifier  {
411public:
412  PrintfConversionSpecifier()
413    : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
414
415  PrintfConversionSpecifier(const char *pos, Kind k)
416    : ConversionSpecifier(true, pos, k) {}
417
418  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
419  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
420                                    kind <= DoubleArgEnd; }
421
422  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
423    return CS->isPrintfKind();
424  }
425};
426
427using analyze_format_string::ArgType;
428using analyze_format_string::LengthModifier;
429using analyze_format_string::OptionalAmount;
430using analyze_format_string::OptionalFlag;
431
432class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
433  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
434  OptionalFlag IsLeftJustified; // '-'
435  OptionalFlag HasPlusPrefix; // '+'
436  OptionalFlag HasSpacePrefix; // ' '
437  OptionalFlag HasAlternativeForm; // '#'
438  OptionalFlag HasLeadingZeroes; // '0'
439  OptionalFlag HasObjCTechnicalTerm; // '[tt]'
440  OptionalAmount Precision;
441public:
442  PrintfSpecifier() :
443    FormatSpecifier(/* isPrintf = */ true),
444    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
445    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0"),
446    HasObjCTechnicalTerm("tt") {}
447
448  static PrintfSpecifier Parse(const char *beg, const char *end);
449
450    // Methods for incrementally constructing the PrintfSpecifier.
451  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
452    CS = cs;
453  }
454  void setHasThousandsGrouping(const char *position) {
455    HasThousandsGrouping.setPosition(position);
456  }
457  void setIsLeftJustified(const char *position) {
458    IsLeftJustified.setPosition(position);
459  }
460  void setHasPlusPrefix(const char *position) {
461    HasPlusPrefix.setPosition(position);
462  }
463  void setHasSpacePrefix(const char *position) {
464    HasSpacePrefix.setPosition(position);
465  }
466  void setHasAlternativeForm(const char *position) {
467    HasAlternativeForm.setPosition(position);
468  }
469  void setHasLeadingZeros(const char *position) {
470    HasLeadingZeroes.setPosition(position);
471  }
472  void setHasObjCTechnicalTerm(const char *position) {
473    HasObjCTechnicalTerm.setPosition(position);
474  }
475  void setUsesPositionalArg() { UsesPositionalArg = true; }
476
477    // Methods for querying the format specifier.
478
479  const PrintfConversionSpecifier &getConversionSpecifier() const {
480    return cast<PrintfConversionSpecifier>(CS);
481  }
482
483  void setPrecision(const OptionalAmount &Amt) {
484    Precision = Amt;
485    Precision.setUsesDotPrefix();
486  }
487
488  const OptionalAmount &getPrecision() const {
489    return Precision;
490  }
491
492  bool consumesDataArgument() const {
493    return getConversionSpecifier().consumesDataArgument();
494  }
495
496  /// \brief Returns the builtin type that a data argument
497  /// paired with this format specifier should have.  This method
498  /// will return null if the format specifier does not have
499  /// a matching data argument or the matching argument matches
500  /// more than one type.
501  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
502
503  const OptionalFlag &hasThousandsGrouping() const {
504      return HasThousandsGrouping;
505  }
506  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
507  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
508  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
509  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
510  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
511  const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; }
512  bool usesPositionalArg() const { return UsesPositionalArg; }
513
514  /// Changes the specifier and length according to a QualType, retaining any
515  /// flags or options. Returns true on success, or false when a conversion
516  /// was not successful.
517  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
518               bool IsObjCLiteral);
519
520  void toString(raw_ostream &os) const;
521
522  // Validation methods - to check if any element results in undefined behavior
523  bool hasValidPlusPrefix() const;
524  bool hasValidAlternativeForm() const;
525  bool hasValidLeadingZeros() const;
526  bool hasValidSpacePrefix() const;
527  bool hasValidLeftJustified() const;
528  bool hasValidThousandsGroupingPrefix() const;
529
530  bool hasValidPrecision() const;
531  bool hasValidFieldWidth() const;
532};
533}  // end analyze_printf namespace
534
535//===----------------------------------------------------------------------===//
536/// Pieces specific to fscanf format strings.
537
538namespace analyze_scanf {
539
540class ScanfConversionSpecifier :
541    public analyze_format_string::ConversionSpecifier  {
542public:
543  ScanfConversionSpecifier()
544    : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
545
546  ScanfConversionSpecifier(const char *pos, Kind k)
547    : ConversionSpecifier(false, pos, k) {}
548
549  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
550    return !CS->isPrintfKind();
551  }
552};
553
554using analyze_format_string::ArgType;
555using analyze_format_string::LengthModifier;
556using analyze_format_string::OptionalAmount;
557using analyze_format_string::OptionalFlag;
558
559class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
560  OptionalFlag SuppressAssignment; // '*'
561public:
562  ScanfSpecifier() :
563    FormatSpecifier(/* isPrintf = */ false),
564    SuppressAssignment("*") {}
565
566  void setSuppressAssignment(const char *position) {
567    SuppressAssignment.setPosition(position);
568  }
569
570  const OptionalFlag &getSuppressAssignment() const {
571    return SuppressAssignment;
572  }
573
574  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
575    CS = cs;
576  }
577
578  const ScanfConversionSpecifier &getConversionSpecifier() const {
579    return cast<ScanfConversionSpecifier>(CS);
580  }
581
582  bool consumesDataArgument() const {
583    return CS.consumesDataArgument() && !SuppressAssignment;
584  }
585
586  ArgType getArgType(ASTContext &Ctx) const;
587
588  bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
589               ASTContext &Ctx);
590
591  void toString(raw_ostream &os) const;
592
593  static ScanfSpecifier Parse(const char *beg, const char *end);
594};
595
596} // end analyze_scanf namespace
597
598//===----------------------------------------------------------------------===//
599// Parsing and processing of format strings (both fprintf and fscanf).
600
601namespace analyze_format_string {
602
603enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
604
605class FormatStringHandler {
606public:
607  FormatStringHandler() {}
608  virtual ~FormatStringHandler();
609
610  virtual void HandleNullChar(const char *nullCharacter) {}
611
612  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
613
614  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
615                                     PositionContext p) {}
616
617  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
618
619  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
620                                         unsigned specifierLen) {}
621
622  virtual void HandleEmptyObjCModifierFlag(const char *startFlags,
623                                           unsigned flagsLen) {}
624
625  virtual void HandleInvalidObjCModifierFlag(const char *startFlag,
626                                             unsigned flagLen) {}
627
628  virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
629                                            const char *flagsEnd,
630                                            const char *conversionPosition) {}
631  // Printf-specific handlers.
632
633  virtual bool HandleInvalidPrintfConversionSpecifier(
634                                      const analyze_printf::PrintfSpecifier &FS,
635                                      const char *startSpecifier,
636                                      unsigned specifierLen) {
637    return true;
638  }
639
640  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
641                                     const char *startSpecifier,
642                                     unsigned specifierLen) {
643    return true;
644  }
645
646    // Scanf-specific handlers.
647
648  virtual bool HandleInvalidScanfConversionSpecifier(
649                                        const analyze_scanf::ScanfSpecifier &FS,
650                                        const char *startSpecifier,
651                                        unsigned specifierLen) {
652    return true;
653  }
654
655  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
656                                    const char *startSpecifier,
657                                    unsigned specifierLen) {
658    return true;
659  }
660
661  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
662};
663
664bool ParsePrintfString(FormatStringHandler &H,
665                       const char *beg, const char *end, const LangOptions &LO,
666                       const TargetInfo &Target, bool isFreeBSDKPrintf);
667
668bool ParseFormatStringHasSArg(const char *beg, const char *end,
669                              const LangOptions &LO, const TargetInfo &Target);
670
671bool ParseScanfString(FormatStringHandler &H,
672                      const char *beg, const char *end, const LangOptions &LO,
673                      const TargetInfo &Target);
674
675} // end analyze_format_string namespace
676} // end clang namespace
677#endif
678