FormatString.h revision 2da5036adaef57395270ef2dd82358fc513d8616
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsWideChar = AsLong // for '%ls', only makes sense for printf
75  };
76
77  LengthModifier()
78    : Position(0), kind(None) {}
79  LengthModifier(const char *pos, Kind k)
80    : Position(pos), kind(k) {}
81
82  const char *getStart() const {
83    return Position;
84  }
85
86  unsigned getLength() const {
87    switch (kind) {
88      default:
89        return 1;
90      case AsLongLong:
91      case AsChar:
92        return 2;
93      case None:
94        return 0;
95    }
96  }
97
98  Kind getKind() const { return kind; }
99  void setKind(Kind k) { kind = k; }
100
101  const char *toString() const;
102
103private:
104  const char *Position;
105  Kind kind;
106};
107
108class ArgTypeResult {
109public:
110  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
111    CStrTy, WCStrTy };
112private:
113  const Kind K;
114  QualType T;
115  ArgTypeResult(bool) : K(InvalidTy) {}
116public:
117  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
118  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
119  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
120
121  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
122
123  bool isValid() const { return K != InvalidTy; }
124
125  const QualType *getSpecificType() const {
126    return K == SpecificTy ? &T : 0;
127  }
128
129  bool matchesType(ASTContext &C, QualType argTy) const;
130
131  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
132
133  QualType getRepresentativeType(ASTContext &C) const;
134};
135
136class OptionalAmount {
137public:
138  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
139
140  OptionalAmount(HowSpecified howSpecified,
141                 unsigned amount,
142                 const char *amountStart,
143                 unsigned amountLength,
144                 bool usesPositionalArg)
145  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
146  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
147
148  OptionalAmount(bool valid = true)
149  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
150  UsesPositionalArg(0), UsesDotPrefix(0) {}
151
152  bool isInvalid() const {
153    return hs == Invalid;
154  }
155
156  HowSpecified getHowSpecified() const { return hs; }
157  void setHowSpecified(HowSpecified h) { hs = h; }
158
159  bool hasDataArgument() const { return hs == Arg; }
160
161  unsigned getArgIndex() const {
162    assert(hasDataArgument());
163    return amt;
164  }
165
166  unsigned getConstantAmount() const {
167    assert(hs == Constant);
168    return amt;
169  }
170
171  const char *getStart() const {
172      // We include the . character if it is given.
173    return start - UsesDotPrefix;
174  }
175
176  unsigned getConstantLength() const {
177    assert(hs == Constant);
178    return length + UsesDotPrefix;
179  }
180
181  ArgTypeResult getArgType(ASTContext &Ctx) const;
182
183  void toString(llvm::raw_ostream &os) const;
184
185  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
186  unsigned getPositionalArgIndex() const {
187    assert(hasDataArgument());
188    return amt + 1;
189  }
190
191  bool usesDotPrefix() const { return UsesDotPrefix; }
192  void setUsesDotPrefix() { UsesDotPrefix = true; }
193
194private:
195  const char *start;
196  unsigned length;
197  HowSpecified hs;
198  unsigned amt;
199  bool UsesPositionalArg : 1;
200  bool UsesDotPrefix;
201};
202
203
204class FormatSpecifier {
205protected:
206  LengthModifier LM;
207  OptionalAmount FieldWidth;
208    /// Positional arguments, an IEEE extension:
209    ///  IEEE Std 1003.1, 2004 Edition
210    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
211  bool UsesPositionalArg;
212  unsigned argIndex;
213public:
214  FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {}
215
216  void setLengthModifier(LengthModifier lm) {
217    LM = lm;
218  }
219
220  void setUsesPositionalArg() { UsesPositionalArg = true; }
221
222  void setArgIndex(unsigned i) {
223    argIndex = i;
224  }
225
226  unsigned getArgIndex() const {
227    return argIndex;
228  }
229
230  unsigned getPositionalArgIndex() const {
231    return argIndex + 1;
232  }
233
234  const LengthModifier &getLengthModifier() const {
235    return LM;
236  }
237
238  const OptionalAmount &getFieldWidth() const {
239    return FieldWidth;
240  }
241
242  void setFieldWidth(const OptionalAmount &Amt) {
243    FieldWidth = Amt;
244  }
245
246  bool usesPositionalArg() const { return UsesPositionalArg; }
247};
248
249} // end analyze_format_string namespace
250
251//===----------------------------------------------------------------------===//
252/// Pieces specific to fprintf format strings.
253
254namespace analyze_printf {
255
256class ConversionSpecifier {
257public:
258  enum Kind {
259    InvalidSpecifier = 0,
260    // C99 conversion specifiers.
261    cArg,
262    dArg,
263    iArg,
264    IntArgBeg = cArg, IntArgEnd = iArg,
265
266    oArg,
267    uArg,
268    xArg,
269    XArg,
270    UIntArgBeg = oArg, UIntArgEnd = XArg,
271
272    fArg,
273    FArg,
274    eArg,
275    EArg,
276    gArg,
277    GArg,
278    aArg,
279    AArg,
280    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
281
282    sArg,
283    pArg,
284    nArg,
285    PercentArg,
286    CArg,
287    SArg,
288
289    // ** Printf-specific **
290
291    // Objective-C specific specifiers.
292    ObjCObjArg,  // '@'
293    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
294
295    // GlibC specific specifiers.
296    PrintErrno,   // 'm'
297
298    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno
299  };
300
301  ConversionSpecifier()
302  : Position(0), kind(InvalidSpecifier) {}
303
304  ConversionSpecifier(const char *pos, Kind k)
305  : Position(pos), kind(k) {}
306
307  const char *getStart() const {
308    return Position;
309  }
310
311  llvm::StringRef getCharacters() const {
312    return llvm::StringRef(getStart(), getLength());
313  }
314
315  bool consumesDataArgument() const {
316    switch (kind) {
317      case PercentArg:
318      case PrintErrno:
319        return false;
320      default:
321        return true;
322    }
323  }
324
325  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
326  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
327  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
328  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
329  Kind getKind() const { return kind; }
330  void setKind(Kind k) { kind = k; }
331  unsigned getLength() const {
332      // Conversion specifiers currently only are represented by
333      // single characters, but we be flexible.
334    return 1;
335  }
336  const char *toString() const;
337
338private:
339  const char *Position;
340  Kind kind;
341};
342
343using analyze_format_string::ArgTypeResult;
344using analyze_format_string::LengthModifier;
345using analyze_format_string::OptionalAmount;
346using analyze_format_string::OptionalFlag;
347
348class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
349  OptionalFlag IsLeftJustified; // '-'
350  OptionalFlag HasPlusPrefix; // '+'
351  OptionalFlag HasSpacePrefix; // ' '
352  OptionalFlag HasAlternativeForm; // '#'
353  OptionalFlag HasLeadingZeroes; // '0'
354  ConversionSpecifier CS;
355  OptionalAmount Precision;
356public:
357  PrintfSpecifier() :
358  IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
359  HasAlternativeForm("#"), HasLeadingZeroes("0") {}
360
361  static PrintfSpecifier Parse(const char *beg, const char *end);
362
363    // Methods for incrementally constructing the PrintfSpecifier.
364  void setConversionSpecifier(const ConversionSpecifier &cs) {
365    CS = cs;
366  }
367  void setIsLeftJustified(const char *position) {
368    IsLeftJustified = true;
369    IsLeftJustified.setPosition(position);
370  }
371  void setHasPlusPrefix(const char *position) {
372    HasPlusPrefix = true;
373    HasPlusPrefix.setPosition(position);
374  }
375  void setHasSpacePrefix(const char *position) {
376    HasSpacePrefix = true;
377    HasSpacePrefix.setPosition(position);
378  }
379  void setHasAlternativeForm(const char *position) {
380    HasAlternativeForm = true;
381    HasAlternativeForm.setPosition(position);
382  }
383  void setHasLeadingZeros(const char *position) {
384    HasLeadingZeroes = true;
385    HasLeadingZeroes.setPosition(position);
386  }
387  void setUsesPositionalArg() { UsesPositionalArg = true; }
388
389    // Methods for querying the format specifier.
390
391  const ConversionSpecifier &getConversionSpecifier() const {
392    return CS;
393  }
394
395  void setPrecision(const OptionalAmount &Amt) {
396    Precision = Amt;
397    Precision.setUsesDotPrefix();
398  }
399
400  const OptionalAmount &getPrecision() const {
401    return Precision;
402  }
403
404  bool consumesDataArgument() const {
405    return CS.consumesDataArgument();
406  }
407
408  /// \brief Returns the builtin type that a data argument
409  /// paired with this format specifier should have.  This method
410  /// will return null if the format specifier does not have
411  /// a matching data argument or the matching argument matches
412  /// more than one type.
413  ArgTypeResult getArgType(ASTContext &Ctx) const;
414
415  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
416  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
417  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
418  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
419  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
420  bool usesPositionalArg() const { return UsesPositionalArg; }
421
422    /// Changes the specifier and length according to a QualType, retaining any
423    /// flags or options. Returns true on success, or false when a conversion
424    /// was not successful.
425  bool fixType(QualType QT);
426
427  void toString(llvm::raw_ostream &os) const;
428
429    // Validation methods - to check if any element results in undefined behavior
430  bool hasValidPlusPrefix() const;
431  bool hasValidAlternativeForm() const;
432  bool hasValidLeadingZeros() const;
433  bool hasValidSpacePrefix() const;
434  bool hasValidLeftJustified() const;
435
436  bool hasValidLengthModifier() const;
437  bool hasValidPrecision() const;
438  bool hasValidFieldWidth() const;
439};
440}  // end analyze_printf namespace
441
442//===----------------------------------------------------------------------===//
443/// Pieces specific to fscanf format strings.
444
445namespace analyze_scanf {
446
447class ConversionSpecifier {
448public:
449  enum Kind {
450    InvalidSpecifier = 0,
451      // C99 conversion specifiers.
452    dArg, // 'd'
453    iArg, // 'i',
454    oArg, // 'o',
455    uArg, // 'u',
456    xArg, // 'x',
457    XArg, // 'X',
458    fArg, // 'f',
459    FArg, // 'F',
460    eArg, // 'e',
461    EArg, // 'E',
462    gArg, // 'g',
463    GArg, // 'G',
464    aArg, // 'a',
465    AArg, // 'A',
466    sArg, // 's', // match sequence of non-write-space characters
467    pArg,        // 'p'
468    cArg,              // 'c', differs from printf, writes array of characters
469    nArg,  // 'n', differs from printf, writes back args consumed
470    PercentArg,        // '%'
471    ScanListArg,       // '[' followed by scan list
472      // IEEE Std 1003.1 extensions.
473    CArg, // 'C', same as writing 'lc'
474    SArg, // 'S', same as writing 'ls'
475      // Specifier ranges.
476    IntArgBeg = dArg,
477    IntArgEnd = iArg,
478    UIntArgBeg = oArg,
479    UIntArgEnd = XArg,
480    DoubleArgBeg = fArg,
481    DoubleArgEnd = AArg
482  };
483
484  ConversionSpecifier()
485  : Position(0), EndScanList(0), kind(InvalidSpecifier) {}
486
487  ConversionSpecifier(const char *pos, Kind k)
488  : Position(pos), EndScanList(0), kind(k) {}
489
490  const char *getStart() const {
491    return Position;
492  }
493
494  void setEndScanList(const char *pos) { EndScanList = pos; }
495
496  llvm::StringRef getCharacters() const {
497    return llvm::StringRef(getStart(), getLength());
498  }
499
500  bool consumesDataArgument() const {
501    return kind != PercentArg;
502  }
503
504  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
505  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
506  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
507  Kind getKind() const { return kind; }
508  void setKind(Kind k) { kind = k; }
509
510  unsigned getLength() const {
511    return EndScanList ? EndScanList - Position : 1;
512  }
513
514  const char *toString() const;
515
516private:
517  const char *Position;
518  const char *EndScanList;
519  Kind kind;
520};
521
522using analyze_format_string::LengthModifier;
523using analyze_format_string::OptionalAmount;
524using analyze_format_string::OptionalFlag;
525
526class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
527  OptionalFlag SuppressAssignment; // '*'
528  ConversionSpecifier CS;
529public:
530  ScanfSpecifier() : SuppressAssignment("*") {}
531
532  void setSuppressAssignment(const char *position) {
533    SuppressAssignment = true;
534    SuppressAssignment.setPosition(position);
535  }
536
537  const OptionalFlag &getSuppressAssignment() const {
538    return SuppressAssignment;
539  }
540
541  void setConversionSpecifier(const ConversionSpecifier &cs) {
542    CS = cs;
543  }
544
545  const ConversionSpecifier &getConversionSpecifier() const {
546    return CS;
547  }
548
549  bool consumesDataArgument() const {
550    return CS.consumesDataArgument() && !SuppressAssignment;
551  }
552
553  static ScanfSpecifier Parse(const char *beg, const char *end);
554
555};
556
557} // end analyze_scanf namespace
558
559//===----------------------------------------------------------------------===//
560// Parsing and processing of format strings (both fprintf and fscanf).
561
562namespace analyze_format_string {
563
564enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
565
566class FormatStringHandler {
567public:
568  FormatStringHandler() {}
569  virtual ~FormatStringHandler();
570
571  virtual void HandleNullChar(const char *nullCharacter) {}
572
573  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
574                                     PositionContext p) {}
575
576  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
577
578  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
579                                         unsigned specifierLen) {}
580
581  // Printf-specific handlers.
582
583  virtual bool HandleInvalidPrintfConversionSpecifier(
584                                      const analyze_printf::PrintfSpecifier &FS,
585                                      const char *startSpecifier,
586                                      unsigned specifierLen) {
587    return true;
588  }
589
590  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
591                                     const char *startSpecifier,
592                                     unsigned specifierLen) {
593    return true;
594  }
595
596    // Scanf-specific handlers.
597
598  virtual bool HandleInvalidScanfConversionSpecifier(
599                                        const analyze_scanf::ScanfSpecifier &FS,
600                                        const char *startSpecifier,
601                                        unsigned specifierLen) {
602    return true;
603  }
604
605  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
606                                    const char *startSpecifier,
607                                    unsigned specifierLen) {
608    return true;
609  }
610
611  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
612};
613
614bool ParsePrintfString(FormatStringHandler &H,
615                       const char *beg, const char *end);
616
617bool ParseScanfString(FormatStringHandler &H,
618                       const char *beg, const char *end);
619
620} // end analyze_format_string namespace
621} // end clang namespace
622#endif
623