FormatString.h revision 96827eb52405a71c65c200949f3e644368e86454
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsWideChar = AsLong // for '%ls', only makes sense for printf
75  };
76
77  LengthModifier()
78    : Position(0), kind(None) {}
79  LengthModifier(const char *pos, Kind k)
80    : Position(pos), kind(k) {}
81
82  const char *getStart() const {
83    return Position;
84  }
85
86  unsigned getLength() const {
87    switch (kind) {
88      default:
89        return 1;
90      case AsLongLong:
91      case AsChar:
92        return 2;
93      case None:
94        return 0;
95    }
96  }
97
98  Kind getKind() const { return kind; }
99  void setKind(Kind k) { kind = k; }
100
101  const char *toString() const;
102
103private:
104  const char *Position;
105  Kind kind;
106};
107
108class ConversionSpecifier {
109public:
110  enum Kind {
111    InvalidSpecifier = 0,
112      // C99 conversion specifiers.
113    cArg,
114    dArg,
115    iArg,
116    IntArgBeg = cArg, IntArgEnd = iArg,
117
118    oArg,
119    uArg,
120    xArg,
121    XArg,
122    UIntArgBeg = oArg, UIntArgEnd = XArg,
123
124    fArg,
125    FArg,
126    eArg,
127    EArg,
128    gArg,
129    GArg,
130    aArg,
131    AArg,
132    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
133
134    sArg,
135    pArg,
136    nArg,
137    PercentArg,
138    CArg,
139    SArg,
140
141    // ** Printf-specific **
142
143    // Objective-C specific specifiers.
144    ObjCObjArg,  // '@'
145    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
146
147    // GlibC specific specifiers.
148    PrintErrno,   // 'm'
149
150    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
151
152    // ** Scanf-specific **
153    ScanListArg, // '['
154    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
155  };
156
157  ConversionSpecifier(bool isPrintf)
158    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
159
160  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
161    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
162
163  const char *getStart() const {
164    return Position;
165  }
166
167  llvm::StringRef getCharacters() const {
168    return llvm::StringRef(getStart(), getLength());
169  }
170
171  bool consumesDataArgument() const {
172    switch (kind) {
173      case PrintErrno:
174        assert(IsPrintf);
175      case PercentArg:
176        return false;
177      default:
178        return true;
179    }
180  }
181
182  Kind getKind() const { return kind; }
183  void setKind(Kind k) { kind = k; }
184  unsigned getLength() const {
185    return EndScanList ? EndScanList - Position : 1;
186  }
187
188  const char *toString() const;
189
190  bool isPrintfKind() const { return IsPrintf; }
191
192protected:
193  bool IsPrintf;
194  const char *Position;
195  const char *EndScanList;
196  Kind kind;
197};
198
199class ArgTypeResult {
200public:
201  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
202    CStrTy, WCStrTy };
203private:
204  const Kind K;
205  QualType T;
206  ArgTypeResult(bool) : K(InvalidTy) {}
207public:
208  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
209  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
210  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
211
212  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
213
214  bool isValid() const { return K != InvalidTy; }
215
216  const QualType *getSpecificType() const {
217    return K == SpecificTy ? &T : 0;
218  }
219
220  bool matchesType(ASTContext &C, QualType argTy) const;
221
222  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
223
224  QualType getRepresentativeType(ASTContext &C) const;
225};
226
227class OptionalAmount {
228public:
229  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
230
231  OptionalAmount(HowSpecified howSpecified,
232                 unsigned amount,
233                 const char *amountStart,
234                 unsigned amountLength,
235                 bool usesPositionalArg)
236  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
237  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
238
239  OptionalAmount(bool valid = true)
240  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
241  UsesPositionalArg(0), UsesDotPrefix(0) {}
242
243  bool isInvalid() const {
244    return hs == Invalid;
245  }
246
247  HowSpecified getHowSpecified() const { return hs; }
248  void setHowSpecified(HowSpecified h) { hs = h; }
249
250  bool hasDataArgument() const { return hs == Arg; }
251
252  unsigned getArgIndex() const {
253    assert(hasDataArgument());
254    return amt;
255  }
256
257  unsigned getConstantAmount() const {
258    assert(hs == Constant);
259    return amt;
260  }
261
262  const char *getStart() const {
263      // We include the . character if it is given.
264    return start - UsesDotPrefix;
265  }
266
267  unsigned getConstantLength() const {
268    assert(hs == Constant);
269    return length + UsesDotPrefix;
270  }
271
272  ArgTypeResult getArgType(ASTContext &Ctx) const;
273
274  void toString(llvm::raw_ostream &os) const;
275
276  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
277  unsigned getPositionalArgIndex() const {
278    assert(hasDataArgument());
279    return amt + 1;
280  }
281
282  bool usesDotPrefix() const { return UsesDotPrefix; }
283  void setUsesDotPrefix() { UsesDotPrefix = true; }
284
285private:
286  const char *start;
287  unsigned length;
288  HowSpecified hs;
289  unsigned amt;
290  bool UsesPositionalArg : 1;
291  bool UsesDotPrefix;
292};
293
294
295class FormatSpecifier {
296protected:
297  LengthModifier LM;
298  OptionalAmount FieldWidth;
299  ConversionSpecifier CS;
300    /// Positional arguments, an IEEE extension:
301    ///  IEEE Std 1003.1, 2004 Edition
302    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
303  bool UsesPositionalArg;
304  unsigned argIndex;
305public:
306  FormatSpecifier(bool isPrintf)
307    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
308
309  void setLengthModifier(LengthModifier lm) {
310    LM = lm;
311  }
312
313  void setUsesPositionalArg() { UsesPositionalArg = true; }
314
315  void setArgIndex(unsigned i) {
316    argIndex = i;
317  }
318
319  unsigned getArgIndex() const {
320    return argIndex;
321  }
322
323  unsigned getPositionalArgIndex() const {
324    return argIndex + 1;
325  }
326
327  const LengthModifier &getLengthModifier() const {
328    return LM;
329  }
330
331  const OptionalAmount &getFieldWidth() const {
332    return FieldWidth;
333  }
334
335  void setFieldWidth(const OptionalAmount &Amt) {
336    FieldWidth = Amt;
337  }
338
339  bool usesPositionalArg() const { return UsesPositionalArg; }
340
341  bool hasValidLengthModifier() const;
342};
343
344} // end analyze_format_string namespace
345
346//===----------------------------------------------------------------------===//
347/// Pieces specific to fprintf format strings.
348
349namespace analyze_printf {
350
351class PrintfConversionSpecifier :
352  public analyze_format_string::ConversionSpecifier  {
353public:
354  PrintfConversionSpecifier()
355    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
356
357  PrintfConversionSpecifier(const char *pos, Kind k)
358    : ConversionSpecifier(true, pos, k) {}
359
360  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
361  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
362  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
363  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
364                                    kind <= DoubleArgBeg; }
365  unsigned getLength() const {
366      // Conversion specifiers currently only are represented by
367      // single characters, but we be flexible.
368    return 1;
369  }
370
371  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
372    return CS->isPrintfKind();
373  }
374};
375
376using analyze_format_string::ArgTypeResult;
377using analyze_format_string::LengthModifier;
378using analyze_format_string::OptionalAmount;
379using analyze_format_string::OptionalFlag;
380
381class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
382  OptionalFlag IsLeftJustified; // '-'
383  OptionalFlag HasPlusPrefix; // '+'
384  OptionalFlag HasSpacePrefix; // ' '
385  OptionalFlag HasAlternativeForm; // '#'
386  OptionalFlag HasLeadingZeroes; // '0'
387  OptionalAmount Precision;
388public:
389  PrintfSpecifier() :
390    FormatSpecifier(/* isPrintf = */ true),
391    IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
392    HasAlternativeForm("#"), HasLeadingZeroes("0") {}
393
394  static PrintfSpecifier Parse(const char *beg, const char *end);
395
396    // Methods for incrementally constructing the PrintfSpecifier.
397  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
398    CS = cs;
399  }
400  void setIsLeftJustified(const char *position) {
401    IsLeftJustified = true;
402    IsLeftJustified.setPosition(position);
403  }
404  void setHasPlusPrefix(const char *position) {
405    HasPlusPrefix = true;
406    HasPlusPrefix.setPosition(position);
407  }
408  void setHasSpacePrefix(const char *position) {
409    HasSpacePrefix = true;
410    HasSpacePrefix.setPosition(position);
411  }
412  void setHasAlternativeForm(const char *position) {
413    HasAlternativeForm = true;
414    HasAlternativeForm.setPosition(position);
415  }
416  void setHasLeadingZeros(const char *position) {
417    HasLeadingZeroes = true;
418    HasLeadingZeroes.setPosition(position);
419  }
420  void setUsesPositionalArg() { UsesPositionalArg = true; }
421
422    // Methods for querying the format specifier.
423
424  const PrintfConversionSpecifier &getConversionSpecifier() const {
425    return cast<PrintfConversionSpecifier>(CS);
426  }
427
428  void setPrecision(const OptionalAmount &Amt) {
429    Precision = Amt;
430    Precision.setUsesDotPrefix();
431  }
432
433  const OptionalAmount &getPrecision() const {
434    return Precision;
435  }
436
437  bool consumesDataArgument() const {
438    return getConversionSpecifier().consumesDataArgument();
439  }
440
441  /// \brief Returns the builtin type that a data argument
442  /// paired with this format specifier should have.  This method
443  /// will return null if the format specifier does not have
444  /// a matching data argument or the matching argument matches
445  /// more than one type.
446  ArgTypeResult getArgType(ASTContext &Ctx) const;
447
448  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
449  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
450  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
451  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
452  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
453  bool usesPositionalArg() const { return UsesPositionalArg; }
454
455    /// Changes the specifier and length according to a QualType, retaining any
456    /// flags or options. Returns true on success, or false when a conversion
457    /// was not successful.
458  bool fixType(QualType QT);
459
460  void toString(llvm::raw_ostream &os) const;
461
462    // Validation methods - to check if any element results in undefined behavior
463  bool hasValidPlusPrefix() const;
464  bool hasValidAlternativeForm() const;
465  bool hasValidLeadingZeros() const;
466  bool hasValidSpacePrefix() const;
467  bool hasValidLeftJustified() const;
468
469  bool hasValidPrecision() const;
470  bool hasValidFieldWidth() const;
471};
472}  // end analyze_printf namespace
473
474//===----------------------------------------------------------------------===//
475/// Pieces specific to fscanf format strings.
476
477namespace analyze_scanf {
478
479class ScanfConversionSpecifier :
480    public analyze_format_string::ConversionSpecifier  {
481public:
482  ScanfConversionSpecifier()
483    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
484
485  ScanfConversionSpecifier(const char *pos, Kind k)
486    : ConversionSpecifier(false, pos, k) {}
487
488  void setEndScanList(const char *pos) { EndScanList = pos; }
489
490  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
491    return !CS->isPrintfKind();
492  }
493};
494
495using analyze_format_string::LengthModifier;
496using analyze_format_string::OptionalAmount;
497using analyze_format_string::OptionalFlag;
498
499class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
500  OptionalFlag SuppressAssignment; // '*'
501public:
502  ScanfSpecifier() :
503    FormatSpecifier(/* isPrintf = */ false),
504    SuppressAssignment("*") {}
505
506  void setSuppressAssignment(const char *position) {
507    SuppressAssignment = true;
508    SuppressAssignment.setPosition(position);
509  }
510
511  const OptionalFlag &getSuppressAssignment() const {
512    return SuppressAssignment;
513  }
514
515  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
516    CS = cs;
517  }
518
519  const ScanfConversionSpecifier &getConversionSpecifier() const {
520    return cast<ScanfConversionSpecifier>(CS);
521  }
522
523  bool consumesDataArgument() const {
524    return CS.consumesDataArgument() && !SuppressAssignment;
525  }
526
527  static ScanfSpecifier Parse(const char *beg, const char *end);
528};
529
530} // end analyze_scanf namespace
531
532//===----------------------------------------------------------------------===//
533// Parsing and processing of format strings (both fprintf and fscanf).
534
535namespace analyze_format_string {
536
537enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
538
539class FormatStringHandler {
540public:
541  FormatStringHandler() {}
542  virtual ~FormatStringHandler();
543
544  virtual void HandleNullChar(const char *nullCharacter) {}
545
546  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
547                                     PositionContext p) {}
548
549  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
550
551  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
552                                         unsigned specifierLen) {}
553
554  // Printf-specific handlers.
555
556  virtual bool HandleInvalidPrintfConversionSpecifier(
557                                      const analyze_printf::PrintfSpecifier &FS,
558                                      const char *startSpecifier,
559                                      unsigned specifierLen) {
560    return true;
561  }
562
563  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
564                                     const char *startSpecifier,
565                                     unsigned specifierLen) {
566    return true;
567  }
568
569    // Scanf-specific handlers.
570
571  virtual bool HandleInvalidScanfConversionSpecifier(
572                                        const analyze_scanf::ScanfSpecifier &FS,
573                                        const char *startSpecifier,
574                                        unsigned specifierLen) {
575    return true;
576  }
577
578  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
579                                    const char *startSpecifier,
580                                    unsigned specifierLen) {
581    return true;
582  }
583
584  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
585};
586
587bool ParsePrintfString(FormatStringHandler &H,
588                       const char *beg, const char *end);
589
590bool ParseScanfString(FormatStringHandler &H,
591                       const char *beg, const char *end);
592
593} // end analyze_format_string namespace
594} // end clang namespace
595#endif
596