1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
20#define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() const { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    flag = true;
44    this->position = position;
45  }
46  const char *getPosition() const {
47    assert(position);
48    return position;
49  }
50  const char *toString() const { return representation; }
51
52  // Overloaded operators for bool like qualities
53  explicit operator bool() const { return flag; }
54  OptionalFlag& operator=(const bool &rhs) {
55    flag = rhs;
56    return *this;  // Return a reference to myself.
57  }
58private:
59  const char *representation;
60  const char *position;
61  bool flag;
62};
63
64/// Represents the length modifier in a format string in scanf/printf.
65class LengthModifier {
66public:
67  enum Kind {
68    None,
69    AsChar,       // 'hh'
70    AsShort,      // 'h'
71    AsLong,       // 'l'
72    AsLongLong,   // 'll'
73    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
74    AsIntMax,     // 'j'
75    AsSizeT,      // 'z'
76    AsPtrDiff,    // 't'
77    AsInt32,      // 'I32' (MSVCRT, like __int32)
78    AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
79    AsInt64,      // 'I64' (MSVCRT, like __int64)
80    AsLongDouble, // 'L'
81    AsAllocate,   // for '%as', GNU extension to C90 scanf
82    AsMAllocate,  // for '%ms', GNU extension to scanf
83    AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
84    AsWideChar = AsLong // for '%ls', only makes sense for printf
85  };
86
87  LengthModifier()
88    : Position(nullptr), kind(None) {}
89  LengthModifier(const char *pos, Kind k)
90    : Position(pos), kind(k) {}
91
92  const char *getStart() const {
93    return Position;
94  }
95
96  unsigned getLength() const {
97    switch (kind) {
98      default:
99        return 1;
100      case AsLongLong:
101      case AsChar:
102        return 2;
103      case AsInt32:
104      case AsInt64:
105        return 3;
106      case None:
107        return 0;
108    }
109  }
110
111  Kind getKind() const { return kind; }
112  void setKind(Kind k) { kind = k; }
113
114  const char *toString() const;
115
116private:
117  const char *Position;
118  Kind kind;
119};
120
121class ConversionSpecifier {
122public:
123  enum Kind {
124    InvalidSpecifier = 0,
125    // C99 conversion specifiers.
126    cArg,
127    dArg,
128    DArg, // Apple extension
129    iArg,
130    IntArgBeg = dArg,
131    IntArgEnd = iArg,
132
133    oArg,
134    OArg, // Apple extension
135    uArg,
136    UArg, // Apple extension
137    xArg,
138    XArg,
139    UIntArgBeg = oArg,
140    UIntArgEnd = XArg,
141
142    fArg,
143    FArg,
144    eArg,
145    EArg,
146    gArg,
147    GArg,
148    aArg,
149    AArg,
150    DoubleArgBeg = fArg,
151    DoubleArgEnd = AArg,
152
153    sArg,
154    pArg,
155    nArg,
156    PercentArg,
157    CArg,
158    SArg,
159
160    // Apple extension: P specifies to os_log that the data being pointed to is
161    // to be copied by os_log. The precision indicates the number of bytes to
162    // copy.
163    PArg,
164
165    // ** Printf-specific **
166
167    ZArg, // MS extension
168
169    // Objective-C specific specifiers.
170    ObjCObjArg, // '@'
171    ObjCBeg = ObjCObjArg,
172    ObjCEnd = ObjCObjArg,
173
174    // FreeBSD kernel specific specifiers.
175    FreeBSDbArg,
176    FreeBSDDArg,
177    FreeBSDrArg,
178    FreeBSDyArg,
179
180    // GlibC specific specifiers.
181    PrintErrno, // 'm'
182
183    PrintfConvBeg = ObjCObjArg,
184    PrintfConvEnd = PrintErrno,
185
186    // ** Scanf-specific **
187    ScanListArg, // '['
188    ScanfConvBeg = ScanListArg,
189    ScanfConvEnd = ScanListArg
190  };
191
192  ConversionSpecifier(bool isPrintf = true)
193    : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
194      kind(InvalidSpecifier) {}
195
196  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
197    : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
198
199  const char *getStart() const {
200    return Position;
201  }
202
203  StringRef getCharacters() const {
204    return StringRef(getStart(), getLength());
205  }
206
207  bool consumesDataArgument() const {
208    switch (kind) {
209      case PrintErrno:
210        assert(IsPrintf);
211        return false;
212      case PercentArg:
213        return false;
214      case InvalidSpecifier:
215        return false;
216      default:
217        return true;
218    }
219  }
220
221  Kind getKind() const { return kind; }
222  void setKind(Kind k) { kind = k; }
223  unsigned getLength() const {
224    return EndScanList ? EndScanList - Position : 1;
225  }
226  void setEndScanList(const char *pos) { EndScanList = pos; }
227
228  bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
229    kind == FreeBSDrArg || kind == FreeBSDyArg; }
230  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
231  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
232  bool isDoubleArg() const {
233    return kind >= DoubleArgBeg && kind <= DoubleArgEnd;
234  }
235
236  const char *toString() const;
237
238  bool isPrintfKind() const { return IsPrintf; }
239
240  Optional<ConversionSpecifier> getStandardSpecifier() const;
241
242protected:
243  bool IsPrintf;
244  const char *Position;
245  const char *EndScanList;
246  Kind kind;
247};
248
249class ArgType {
250public:
251  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
252              AnyCharTy, CStrTy, WCStrTy, WIntTy };
253
254  enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic };
255
256private:
257  const Kind K;
258  QualType T;
259  const char *Name;
260  bool Ptr;
261public:
262  ArgType(Kind k = UnknownTy, const char *n = nullptr)
263      : K(k), Name(n), Ptr(false) {}
264  ArgType(QualType t, const char *n = nullptr)
265      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
266  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
267
268  static ArgType Invalid() { return ArgType(InvalidTy); }
269  bool isValid() const { return K != InvalidTy; }
270
271  /// Create an ArgType which corresponds to the type pointer to A.
272  static ArgType PtrTo(const ArgType& A) {
273    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
274    ArgType Res = A;
275    Res.Ptr = true;
276    return Res;
277  }
278
279  MatchKind matchesType(ASTContext &C, QualType argTy) const;
280
281  QualType getRepresentativeType(ASTContext &C) const;
282
283  std::string getRepresentativeTypeName(ASTContext &C) const;
284};
285
286class OptionalAmount {
287public:
288  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
289
290  OptionalAmount(HowSpecified howSpecified,
291                 unsigned amount,
292                 const char *amountStart,
293                 unsigned amountLength,
294                 bool usesPositionalArg)
295  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
296  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
297
298  OptionalAmount(bool valid = true)
299  : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
300  UsesPositionalArg(0), UsesDotPrefix(0) {}
301
302  bool isInvalid() const {
303    return hs == Invalid;
304  }
305
306  HowSpecified getHowSpecified() const { return hs; }
307  void setHowSpecified(HowSpecified h) { hs = h; }
308
309  bool hasDataArgument() const { return hs == Arg; }
310
311  unsigned getArgIndex() const {
312    assert(hasDataArgument());
313    return amt;
314  }
315
316  unsigned getConstantAmount() const {
317    assert(hs == Constant);
318    return amt;
319  }
320
321  const char *getStart() const {
322      // We include the . character if it is given.
323    return start - UsesDotPrefix;
324  }
325
326  unsigned getConstantLength() const {
327    assert(hs == Constant);
328    return length + UsesDotPrefix;
329  }
330
331  ArgType getArgType(ASTContext &Ctx) const;
332
333  void toString(raw_ostream &os) const;
334
335  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
336  unsigned getPositionalArgIndex() const {
337    assert(hasDataArgument());
338    return amt + 1;
339  }
340
341  bool usesDotPrefix() const { return UsesDotPrefix; }
342  void setUsesDotPrefix() { UsesDotPrefix = true; }
343
344private:
345  const char *start;
346  unsigned length;
347  HowSpecified hs;
348  unsigned amt;
349  bool UsesPositionalArg : 1;
350  bool UsesDotPrefix;
351};
352
353
354class FormatSpecifier {
355protected:
356  LengthModifier LM;
357  OptionalAmount FieldWidth;
358  ConversionSpecifier CS;
359  /// Positional arguments, an IEEE extension:
360  ///  IEEE Std 1003.1, 2004 Edition
361  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
362  bool UsesPositionalArg;
363  unsigned argIndex;
364public:
365  FormatSpecifier(bool isPrintf)
366    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
367
368  void setLengthModifier(LengthModifier lm) {
369    LM = lm;
370  }
371
372  void setUsesPositionalArg() { UsesPositionalArg = true; }
373
374  void setArgIndex(unsigned i) {
375    argIndex = i;
376  }
377
378  unsigned getArgIndex() const {
379    return argIndex;
380  }
381
382  unsigned getPositionalArgIndex() const {
383    return argIndex + 1;
384  }
385
386  const LengthModifier &getLengthModifier() const {
387    return LM;
388  }
389
390  const OptionalAmount &getFieldWidth() const {
391    return FieldWidth;
392  }
393
394  void setFieldWidth(const OptionalAmount &Amt) {
395    FieldWidth = Amt;
396  }
397
398  bool usesPositionalArg() const { return UsesPositionalArg; }
399
400  bool hasValidLengthModifier(const TargetInfo &Target) const;
401
402  bool hasStandardLengthModifier() const;
403
404  Optional<LengthModifier> getCorrectedLengthModifier() const;
405
406  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
407
408  bool hasStandardLengthConversionCombination() const;
409
410  /// For a TypedefType QT, if it is a named integer type such as size_t,
411  /// assign the appropriate value to LM and return true.
412  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
413};
414
415} // end analyze_format_string namespace
416
417//===----------------------------------------------------------------------===//
418/// Pieces specific to fprintf format strings.
419
420namespace analyze_printf {
421
422class PrintfConversionSpecifier :
423  public analyze_format_string::ConversionSpecifier  {
424public:
425  PrintfConversionSpecifier()
426    : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
427
428  PrintfConversionSpecifier(const char *pos, Kind k)
429    : ConversionSpecifier(true, pos, k) {}
430
431  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
432  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
433                                    kind <= DoubleArgEnd; }
434
435  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
436    return CS->isPrintfKind();
437  }
438};
439
440using analyze_format_string::ArgType;
441using analyze_format_string::LengthModifier;
442using analyze_format_string::OptionalAmount;
443using analyze_format_string::OptionalFlag;
444
445class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
446  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
447  OptionalFlag IsLeftJustified; // '-'
448  OptionalFlag HasPlusPrefix; // '+'
449  OptionalFlag HasSpacePrefix; // ' '
450  OptionalFlag HasAlternativeForm; // '#'
451  OptionalFlag HasLeadingZeroes; // '0'
452  OptionalFlag HasObjCTechnicalTerm; // '[tt]'
453  OptionalFlag IsPrivate;            // '{private}'
454  OptionalFlag IsPublic;             // '{public}'
455  OptionalAmount Precision;
456public:
457  PrintfSpecifier()
458      : FormatSpecifier(/* isPrintf = */ true), HasThousandsGrouping("'"),
459        IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
460        HasAlternativeForm("#"), HasLeadingZeroes("0"),
461        HasObjCTechnicalTerm("tt"), IsPrivate("private"), IsPublic("public") {}
462
463  static PrintfSpecifier Parse(const char *beg, const char *end);
464
465    // Methods for incrementally constructing the PrintfSpecifier.
466  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
467    CS = cs;
468  }
469  void setHasThousandsGrouping(const char *position) {
470    HasThousandsGrouping.setPosition(position);
471  }
472  void setIsLeftJustified(const char *position) {
473    IsLeftJustified.setPosition(position);
474  }
475  void setHasPlusPrefix(const char *position) {
476    HasPlusPrefix.setPosition(position);
477  }
478  void setHasSpacePrefix(const char *position) {
479    HasSpacePrefix.setPosition(position);
480  }
481  void setHasAlternativeForm(const char *position) {
482    HasAlternativeForm.setPosition(position);
483  }
484  void setHasLeadingZeros(const char *position) {
485    HasLeadingZeroes.setPosition(position);
486  }
487  void setHasObjCTechnicalTerm(const char *position) {
488    HasObjCTechnicalTerm.setPosition(position);
489  }
490  void setIsPrivate(const char *position) { IsPrivate.setPosition(position); }
491  void setIsPublic(const char *position) { IsPublic.setPosition(position); }
492  void setUsesPositionalArg() { UsesPositionalArg = true; }
493
494    // Methods for querying the format specifier.
495
496  const PrintfConversionSpecifier &getConversionSpecifier() const {
497    return cast<PrintfConversionSpecifier>(CS);
498  }
499
500  void setPrecision(const OptionalAmount &Amt) {
501    Precision = Amt;
502    Precision.setUsesDotPrefix();
503  }
504
505  const OptionalAmount &getPrecision() const {
506    return Precision;
507  }
508
509  bool consumesDataArgument() const {
510    return getConversionSpecifier().consumesDataArgument();
511  }
512
513  /// \brief Returns the builtin type that a data argument
514  /// paired with this format specifier should have.  This method
515  /// will return null if the format specifier does not have
516  /// a matching data argument or the matching argument matches
517  /// more than one type.
518  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
519
520  const OptionalFlag &hasThousandsGrouping() const {
521      return HasThousandsGrouping;
522  }
523  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
524  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
525  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
526  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
527  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
528  const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; }
529  const OptionalFlag &isPrivate() const { return IsPrivate; }
530  const OptionalFlag &isPublic() const { return IsPublic; }
531  bool usesPositionalArg() const { return UsesPositionalArg; }
532
533  /// Changes the specifier and length according to a QualType, retaining any
534  /// flags or options. Returns true on success, or false when a conversion
535  /// was not successful.
536  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
537               bool IsObjCLiteral);
538
539  void toString(raw_ostream &os) const;
540
541  // Validation methods - to check if any element results in undefined behavior
542  bool hasValidPlusPrefix() const;
543  bool hasValidAlternativeForm() const;
544  bool hasValidLeadingZeros() const;
545  bool hasValidSpacePrefix() const;
546  bool hasValidLeftJustified() const;
547  bool hasValidThousandsGroupingPrefix() const;
548
549  bool hasValidPrecision() const;
550  bool hasValidFieldWidth() const;
551};
552}  // end analyze_printf namespace
553
554//===----------------------------------------------------------------------===//
555/// Pieces specific to fscanf format strings.
556
557namespace analyze_scanf {
558
559class ScanfConversionSpecifier :
560    public analyze_format_string::ConversionSpecifier  {
561public:
562  ScanfConversionSpecifier()
563    : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
564
565  ScanfConversionSpecifier(const char *pos, Kind k)
566    : ConversionSpecifier(false, pos, k) {}
567
568  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
569    return !CS->isPrintfKind();
570  }
571};
572
573using analyze_format_string::ArgType;
574using analyze_format_string::LengthModifier;
575using analyze_format_string::OptionalAmount;
576using analyze_format_string::OptionalFlag;
577
578class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
579  OptionalFlag SuppressAssignment; // '*'
580public:
581  ScanfSpecifier() :
582    FormatSpecifier(/* isPrintf = */ false),
583    SuppressAssignment("*") {}
584
585  void setSuppressAssignment(const char *position) {
586    SuppressAssignment.setPosition(position);
587  }
588
589  const OptionalFlag &getSuppressAssignment() const {
590    return SuppressAssignment;
591  }
592
593  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
594    CS = cs;
595  }
596
597  const ScanfConversionSpecifier &getConversionSpecifier() const {
598    return cast<ScanfConversionSpecifier>(CS);
599  }
600
601  bool consumesDataArgument() const {
602    return CS.consumesDataArgument() && !SuppressAssignment;
603  }
604
605  ArgType getArgType(ASTContext &Ctx) const;
606
607  bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
608               ASTContext &Ctx);
609
610  void toString(raw_ostream &os) const;
611
612  static ScanfSpecifier Parse(const char *beg, const char *end);
613};
614
615} // end analyze_scanf namespace
616
617//===----------------------------------------------------------------------===//
618// Parsing and processing of format strings (both fprintf and fscanf).
619
620namespace analyze_format_string {
621
622enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
623
624class FormatStringHandler {
625public:
626  FormatStringHandler() {}
627  virtual ~FormatStringHandler();
628
629  virtual void HandleNullChar(const char *nullCharacter) {}
630
631  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
632
633  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
634                                     PositionContext p) {}
635
636  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
637
638  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
639                                         unsigned specifierLen) {}
640
641  virtual void HandleEmptyObjCModifierFlag(const char *startFlags,
642                                           unsigned flagsLen) {}
643
644  virtual void HandleInvalidObjCModifierFlag(const char *startFlag,
645                                             unsigned flagLen) {}
646
647  virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
648                                            const char *flagsEnd,
649                                            const char *conversionPosition) {}
650  // Printf-specific handlers.
651
652  virtual bool HandleInvalidPrintfConversionSpecifier(
653                                      const analyze_printf::PrintfSpecifier &FS,
654                                      const char *startSpecifier,
655                                      unsigned specifierLen) {
656    return true;
657  }
658
659  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
660                                     const char *startSpecifier,
661                                     unsigned specifierLen) {
662    return true;
663  }
664
665    // Scanf-specific handlers.
666
667  virtual bool HandleInvalidScanfConversionSpecifier(
668                                        const analyze_scanf::ScanfSpecifier &FS,
669                                        const char *startSpecifier,
670                                        unsigned specifierLen) {
671    return true;
672  }
673
674  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
675                                    const char *startSpecifier,
676                                    unsigned specifierLen) {
677    return true;
678  }
679
680  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
681};
682
683bool ParsePrintfString(FormatStringHandler &H,
684                       const char *beg, const char *end, const LangOptions &LO,
685                       const TargetInfo &Target, bool isFreeBSDKPrintf);
686
687bool ParseFormatStringHasSArg(const char *beg, const char *end,
688                              const LangOptions &LO, const TargetInfo &Target);
689
690bool ParseScanfString(FormatStringHandler &H,
691                      const char *beg, const char *end, const LangOptions &LO,
692                      const TargetInfo &Target);
693
694} // end analyze_format_string namespace
695} // end clang namespace
696#endif
697