FormatString.h revision 8be066e6733364cd34f25c4f7b7344f72aa23369
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class TargetInfo;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll'
72    AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
73    AsIntMax,     // 'j'
74    AsSizeT,      // 'z'
75    AsPtrDiff,    // 't'
76    AsLongDouble, // 'L'
77    AsAllocate,   // for '%as', GNU extension to C90 scanf
78    AsMAllocate,  // for '%ms', GNU extension to scanf
79    AsWideChar = AsLong // for '%ls', only makes sense for printf
80  };
81
82  LengthModifier()
83    : Position(0), kind(None) {}
84  LengthModifier(const char *pos, Kind k)
85    : Position(pos), kind(k) {}
86
87  const char *getStart() const {
88    return Position;
89  }
90
91  unsigned getLength() const {
92    switch (kind) {
93      default:
94        return 1;
95      case AsLongLong:
96      case AsChar:
97        return 2;
98      case None:
99        return 0;
100    }
101  }
102
103  Kind getKind() const { return kind; }
104  void setKind(Kind k) { kind = k; }
105
106  const char *toString() const;
107
108private:
109  const char *Position;
110  Kind kind;
111};
112
113class ConversionSpecifier {
114public:
115  enum Kind {
116    InvalidSpecifier = 0,
117      // C99 conversion specifiers.
118    cArg,
119    dArg,
120    iArg,
121    IntArgBeg = dArg, IntArgEnd = iArg,
122
123    oArg,
124    uArg,
125    xArg,
126    XArg,
127    UIntArgBeg = oArg, UIntArgEnd = XArg,
128
129    fArg,
130    FArg,
131    eArg,
132    EArg,
133    gArg,
134    GArg,
135    aArg,
136    AArg,
137    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
138
139    sArg,
140    pArg,
141    nArg,
142    PercentArg,
143    CArg,
144    SArg,
145
146    // ** Printf-specific **
147
148    // Objective-C specific specifiers.
149    ObjCObjArg,  // '@'
150    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
151
152    // GlibC specific specifiers.
153    PrintErrno,   // 'm'
154
155    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
156
157    // ** Scanf-specific **
158    ScanListArg, // '['
159    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
160  };
161
162  ConversionSpecifier(bool isPrintf)
163    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
164
165  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
166    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
167
168  const char *getStart() const {
169    return Position;
170  }
171
172  StringRef getCharacters() const {
173    return StringRef(getStart(), getLength());
174  }
175
176  bool consumesDataArgument() const {
177    switch (kind) {
178      case PrintErrno:
179        assert(IsPrintf);
180        return false;
181      case PercentArg:
182        return false;
183      default:
184        return true;
185    }
186  }
187
188  Kind getKind() const { return kind; }
189  void setKind(Kind k) { kind = k; }
190  unsigned getLength() const {
191    return EndScanList ? EndScanList - Position : 1;
192  }
193
194  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
195  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
196  bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
197  const char *toString() const;
198
199  bool isPrintfKind() const { return IsPrintf; }
200
201protected:
202  bool IsPrintf;
203  const char *Position;
204  const char *EndScanList;
205  Kind kind;
206};
207
208class ArgType {
209public:
210  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
211              AnyCharTy, CStrTy, WCStrTy, WIntTy };
212private:
213  const Kind K;
214  QualType T;
215  const char *Name;
216  bool Ptr;
217public:
218  ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
219  ArgType(QualType t, const char *n = 0)
220      : K(SpecificTy), T(t), Name(n), Ptr(false) {}
221  ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
222
223  static ArgType Invalid() { return ArgType(InvalidTy); }
224  bool isValid() const { return K != InvalidTy; }
225
226  /// Create an ArgType which corresponds to the type pointer to A.
227  static ArgType PtrTo(const ArgType& A) {
228    assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
229    ArgType Res = A;
230    Res.Ptr = true;
231    return Res;
232  }
233
234  bool matchesType(ASTContext &C, QualType argTy) const;
235
236  QualType getRepresentativeType(ASTContext &C) const;
237
238  std::string getRepresentativeTypeName(ASTContext &C) const;
239};
240
241class OptionalAmount {
242public:
243  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
244
245  OptionalAmount(HowSpecified howSpecified,
246                 unsigned amount,
247                 const char *amountStart,
248                 unsigned amountLength,
249                 bool usesPositionalArg)
250  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
251  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
252
253  OptionalAmount(bool valid = true)
254  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
255  UsesPositionalArg(0), UsesDotPrefix(0) {}
256
257  bool isInvalid() const {
258    return hs == Invalid;
259  }
260
261  HowSpecified getHowSpecified() const { return hs; }
262  void setHowSpecified(HowSpecified h) { hs = h; }
263
264  bool hasDataArgument() const { return hs == Arg; }
265
266  unsigned getArgIndex() const {
267    assert(hasDataArgument());
268    return amt;
269  }
270
271  unsigned getConstantAmount() const {
272    assert(hs == Constant);
273    return amt;
274  }
275
276  const char *getStart() const {
277      // We include the . character if it is given.
278    return start - UsesDotPrefix;
279  }
280
281  unsigned getConstantLength() const {
282    assert(hs == Constant);
283    return length + UsesDotPrefix;
284  }
285
286  ArgType getArgType(ASTContext &Ctx) const;
287
288  void toString(raw_ostream &os) const;
289
290  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
291  unsigned getPositionalArgIndex() const {
292    assert(hasDataArgument());
293    return amt + 1;
294  }
295
296  bool usesDotPrefix() const { return UsesDotPrefix; }
297  void setUsesDotPrefix() { UsesDotPrefix = true; }
298
299private:
300  const char *start;
301  unsigned length;
302  HowSpecified hs;
303  unsigned amt;
304  bool UsesPositionalArg : 1;
305  bool UsesDotPrefix;
306};
307
308
309class FormatSpecifier {
310protected:
311  LengthModifier LM;
312  OptionalAmount FieldWidth;
313  ConversionSpecifier CS;
314  /// Positional arguments, an IEEE extension:
315  ///  IEEE Std 1003.1, 2004 Edition
316  ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
317  bool UsesPositionalArg;
318  unsigned argIndex;
319public:
320  FormatSpecifier(bool isPrintf)
321    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
322
323  void setLengthModifier(LengthModifier lm) {
324    LM = lm;
325  }
326
327  void setUsesPositionalArg() { UsesPositionalArg = true; }
328
329  void setArgIndex(unsigned i) {
330    argIndex = i;
331  }
332
333  unsigned getArgIndex() const {
334    return argIndex;
335  }
336
337  unsigned getPositionalArgIndex() const {
338    return argIndex + 1;
339  }
340
341  const LengthModifier &getLengthModifier() const {
342    return LM;
343  }
344
345  const OptionalAmount &getFieldWidth() const {
346    return FieldWidth;
347  }
348
349  void setFieldWidth(const OptionalAmount &Amt) {
350    FieldWidth = Amt;
351  }
352
353  bool usesPositionalArg() const { return UsesPositionalArg; }
354
355  bool hasValidLengthModifier(const TargetInfo &Target) const;
356
357  bool hasStandardLengthModifier() const;
358
359  llvm::Optional<LengthModifier> getCorrectedLengthModifier() const;
360
361  bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
362
363  bool hasStandardLengthConversionCombination() const;
364
365  /// For a TypedefType QT, if it is a named integer type such as size_t,
366  /// assign the appropriate value to LM and return true.
367  static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
368};
369
370} // end analyze_format_string namespace
371
372//===----------------------------------------------------------------------===//
373/// Pieces specific to fprintf format strings.
374
375namespace analyze_printf {
376
377class PrintfConversionSpecifier :
378  public analyze_format_string::ConversionSpecifier  {
379public:
380  PrintfConversionSpecifier()
381    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
382
383  PrintfConversionSpecifier(const char *pos, Kind k)
384    : ConversionSpecifier(true, pos, k) {}
385
386  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
387  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
388                                    kind <= DoubleArgEnd; }
389  unsigned getLength() const {
390      // Conversion specifiers currently only are represented by
391      // single characters, but we be flexible.
392    return 1;
393  }
394
395  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
396    return CS->isPrintfKind();
397  }
398};
399
400using analyze_format_string::ArgType;
401using analyze_format_string::LengthModifier;
402using analyze_format_string::OptionalAmount;
403using analyze_format_string::OptionalFlag;
404
405class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
406  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
407  OptionalFlag IsLeftJustified; // '-'
408  OptionalFlag HasPlusPrefix; // '+'
409  OptionalFlag HasSpacePrefix; // ' '
410  OptionalFlag HasAlternativeForm; // '#'
411  OptionalFlag HasLeadingZeroes; // '0'
412  OptionalAmount Precision;
413public:
414  PrintfSpecifier() :
415    FormatSpecifier(/* isPrintf = */ true),
416    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
417    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
418
419  static PrintfSpecifier Parse(const char *beg, const char *end);
420
421    // Methods for incrementally constructing the PrintfSpecifier.
422  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
423    CS = cs;
424  }
425  void setHasThousandsGrouping(const char *position) {
426    HasThousandsGrouping = true;
427    HasThousandsGrouping.setPosition(position);
428  }
429  void setIsLeftJustified(const char *position) {
430    IsLeftJustified = true;
431    IsLeftJustified.setPosition(position);
432  }
433  void setHasPlusPrefix(const char *position) {
434    HasPlusPrefix = true;
435    HasPlusPrefix.setPosition(position);
436  }
437  void setHasSpacePrefix(const char *position) {
438    HasSpacePrefix = true;
439    HasSpacePrefix.setPosition(position);
440  }
441  void setHasAlternativeForm(const char *position) {
442    HasAlternativeForm = true;
443    HasAlternativeForm.setPosition(position);
444  }
445  void setHasLeadingZeros(const char *position) {
446    HasLeadingZeroes = true;
447    HasLeadingZeroes.setPosition(position);
448  }
449  void setUsesPositionalArg() { UsesPositionalArg = true; }
450
451    // Methods for querying the format specifier.
452
453  const PrintfConversionSpecifier &getConversionSpecifier() const {
454    return cast<PrintfConversionSpecifier>(CS);
455  }
456
457  void setPrecision(const OptionalAmount &Amt) {
458    Precision = Amt;
459    Precision.setUsesDotPrefix();
460  }
461
462  const OptionalAmount &getPrecision() const {
463    return Precision;
464  }
465
466  bool consumesDataArgument() const {
467    return getConversionSpecifier().consumesDataArgument();
468  }
469
470  /// \brief Returns the builtin type that a data argument
471  /// paired with this format specifier should have.  This method
472  /// will return null if the format specifier does not have
473  /// a matching data argument or the matching argument matches
474  /// more than one type.
475  ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
476
477  const OptionalFlag &hasThousandsGrouping() const {
478      return HasThousandsGrouping;
479  }
480  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
481  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
482  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
483  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
484  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
485  bool usesPositionalArg() const { return UsesPositionalArg; }
486
487  /// Changes the specifier and length according to a QualType, retaining any
488  /// flags or options. Returns true on success, or false when a conversion
489  /// was not successful.
490  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
491               bool IsObjCLiteral);
492
493  void toString(raw_ostream &os) const;
494
495  // Validation methods - to check if any element results in undefined behavior
496  bool hasValidPlusPrefix() const;
497  bool hasValidAlternativeForm() const;
498  bool hasValidLeadingZeros() const;
499  bool hasValidSpacePrefix() const;
500  bool hasValidLeftJustified() const;
501  bool hasValidThousandsGroupingPrefix() const;
502
503  bool hasValidPrecision() const;
504  bool hasValidFieldWidth() const;
505};
506}  // end analyze_printf namespace
507
508//===----------------------------------------------------------------------===//
509/// Pieces specific to fscanf format strings.
510
511namespace analyze_scanf {
512
513class ScanfConversionSpecifier :
514    public analyze_format_string::ConversionSpecifier  {
515public:
516  ScanfConversionSpecifier()
517    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
518
519  ScanfConversionSpecifier(const char *pos, Kind k)
520    : ConversionSpecifier(false, pos, k) {}
521
522  void setEndScanList(const char *pos) { EndScanList = pos; }
523
524  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
525    return !CS->isPrintfKind();
526  }
527};
528
529using analyze_format_string::ArgType;
530using analyze_format_string::LengthModifier;
531using analyze_format_string::OptionalAmount;
532using analyze_format_string::OptionalFlag;
533
534class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
535  OptionalFlag SuppressAssignment; // '*'
536public:
537  ScanfSpecifier() :
538    FormatSpecifier(/* isPrintf = */ false),
539    SuppressAssignment("*") {}
540
541  void setSuppressAssignment(const char *position) {
542    SuppressAssignment = true;
543    SuppressAssignment.setPosition(position);
544  }
545
546  const OptionalFlag &getSuppressAssignment() const {
547    return SuppressAssignment;
548  }
549
550  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
551    CS = cs;
552  }
553
554  const ScanfConversionSpecifier &getConversionSpecifier() const {
555    return cast<ScanfConversionSpecifier>(CS);
556  }
557
558  bool consumesDataArgument() const {
559    return CS.consumesDataArgument() && !SuppressAssignment;
560  }
561
562  ArgType getArgType(ASTContext &Ctx) const;
563
564  bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
565
566  void toString(raw_ostream &os) const;
567
568  static ScanfSpecifier Parse(const char *beg, const char *end);
569};
570
571} // end analyze_scanf namespace
572
573//===----------------------------------------------------------------------===//
574// Parsing and processing of format strings (both fprintf and fscanf).
575
576namespace analyze_format_string {
577
578enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
579
580class FormatStringHandler {
581public:
582  FormatStringHandler() {}
583  virtual ~FormatStringHandler();
584
585  virtual void HandleNullChar(const char *nullCharacter) {}
586
587  virtual void HandlePosition(const char *startPos, unsigned posLen) {}
588
589  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
590                                     PositionContext p) {}
591
592  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
593
594  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
595                                         unsigned specifierLen) {}
596
597  // Printf-specific handlers.
598
599  virtual bool HandleInvalidPrintfConversionSpecifier(
600                                      const analyze_printf::PrintfSpecifier &FS,
601                                      const char *startSpecifier,
602                                      unsigned specifierLen) {
603    return true;
604  }
605
606  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
607                                     const char *startSpecifier,
608                                     unsigned specifierLen) {
609    return true;
610  }
611
612    // Scanf-specific handlers.
613
614  virtual bool HandleInvalidScanfConversionSpecifier(
615                                        const analyze_scanf::ScanfSpecifier &FS,
616                                        const char *startSpecifier,
617                                        unsigned specifierLen) {
618    return true;
619  }
620
621  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
622                                    const char *startSpecifier,
623                                    unsigned specifierLen) {
624    return true;
625  }
626
627  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
628};
629
630bool ParsePrintfString(FormatStringHandler &H,
631                       const char *beg, const char *end, const LangOptions &LO);
632
633bool ParseScanfString(FormatStringHandler &H,
634                      const char *beg, const char *end, const LangOptions &LO);
635
636} // end analyze_format_string namespace
637} // end clang namespace
638#endif
639