FormatString.h revision de183a48dd8fcff5e0343e84c8a6b563088447ce
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsWideChar = AsLong // for '%ls', only makes sense for printf
75  };
76
77  LengthModifier()
78    : Position(0), kind(None) {}
79  LengthModifier(const char *pos, Kind k)
80    : Position(pos), kind(k) {}
81
82  const char *getStart() const {
83    return Position;
84  }
85
86  unsigned getLength() const {
87    switch (kind) {
88      default:
89        return 1;
90      case AsLongLong:
91      case AsChar:
92        return 2;
93      case None:
94        return 0;
95    }
96  }
97
98  Kind getKind() const { return kind; }
99  void setKind(Kind k) { kind = k; }
100
101  const char *toString() const;
102
103private:
104  const char *Position;
105  Kind kind;
106};
107
108class ArgTypeResult {
109public:
110  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
111    CStrTy, WCStrTy };
112private:
113  const Kind K;
114  QualType T;
115  ArgTypeResult(bool) : K(InvalidTy) {}
116public:
117  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
118  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
119  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
120
121  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
122
123  bool isValid() const { return K != InvalidTy; }
124
125  const QualType *getSpecificType() const {
126    return K == SpecificTy ? &T : 0;
127  }
128
129  bool matchesType(ASTContext &C, QualType argTy) const;
130
131  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
132
133  QualType getRepresentativeType(ASTContext &C) const;
134};
135
136class OptionalAmount {
137public:
138  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
139
140  OptionalAmount(HowSpecified howSpecified,
141                 unsigned amount,
142                 const char *amountStart,
143                 unsigned amountLength,
144                 bool usesPositionalArg)
145  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
146  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
147
148  OptionalAmount(bool valid = true)
149  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
150  UsesPositionalArg(0), UsesDotPrefix(0) {}
151
152  bool isInvalid() const {
153    return hs == Invalid;
154  }
155
156  HowSpecified getHowSpecified() const { return hs; }
157  void setHowSpecified(HowSpecified h) { hs = h; }
158
159  bool hasDataArgument() const { return hs == Arg; }
160
161  unsigned getArgIndex() const {
162    assert(hasDataArgument());
163    return amt;
164  }
165
166  unsigned getConstantAmount() const {
167    assert(hs == Constant);
168    return amt;
169  }
170
171  const char *getStart() const {
172      // We include the . character if it is given.
173    return start - UsesDotPrefix;
174  }
175
176  unsigned getConstantLength() const {
177    assert(hs == Constant);
178    return length + UsesDotPrefix;
179  }
180
181  ArgTypeResult getArgType(ASTContext &Ctx) const;
182
183  void toString(llvm::raw_ostream &os) const;
184
185  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
186  unsigned getPositionalArgIndex() const {
187    assert(hasDataArgument());
188    return amt + 1;
189  }
190
191  bool usesDotPrefix() const { return UsesDotPrefix; }
192  void setUsesDotPrefix() { UsesDotPrefix = true; }
193
194private:
195  const char *start;
196  unsigned length;
197  HowSpecified hs;
198  unsigned amt;
199  bool UsesPositionalArg : 1;
200  bool UsesDotPrefix;
201};
202
203
204class FormatSpecifier {
205protected:
206  LengthModifier LM;
207  OptionalAmount FieldWidth;
208    /// Positional arguments, an IEEE extension:
209    ///  IEEE Std 1003.1, 2004 Edition
210    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
211  bool UsesPositionalArg;
212  unsigned argIndex;
213public:
214  FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {}
215
216  void setLengthModifier(LengthModifier lm) {
217    LM = lm;
218  }
219
220  void setUsesPositionalArg() { UsesPositionalArg = true; }
221
222  void setArgIndex(unsigned i) {
223    argIndex = i;
224  }
225
226  unsigned getArgIndex() const {
227    return argIndex;
228  }
229
230  unsigned getPositionalArgIndex() const {
231    return argIndex + 1;
232  }
233
234  const LengthModifier &getLengthModifier() const {
235    return LM;
236  }
237
238  const OptionalAmount &getFieldWidth() const {
239    return FieldWidth;
240  }
241
242  void setFieldWidth(const OptionalAmount &Amt) {
243    FieldWidth = Amt;
244  }
245
246  bool usesPositionalArg() const { return UsesPositionalArg; }
247};
248
249} // end analyze_format_string namespace
250
251//===----------------------------------------------------------------------===//
252/// Pieces specific to fprintf format strings.
253
254namespace analyze_printf {
255
256class ConversionSpecifier {
257public:
258  enum Kind {
259    InvalidSpecifier = 0,
260      // C99 conversion specifiers.
261    dArg, // 'd'
262    cArg, // 'c'
263    iArg, // 'i',
264    oArg, // 'o',
265    uArg, // 'u',
266    xArg, // 'x',
267    XArg, // 'X',
268    fArg, // 'f',
269    FArg, // 'F',
270    eArg, // 'e',
271    EArg, // 'E',
272    gArg, // 'g',
273    GArg, // 'G',
274    aArg, // 'a',
275    AArg, // 'A',
276    sArg, // 's'
277    pArg, // 'p'
278    nArg, // 'n'
279    PercentArg,    // '%'
280      // MacOS X unicode extensions.
281    CArg, // 'C'
282    SArg, // 'S'
283      // Objective-C specific specifiers.
284    ObjCObjArg,    // '@'
285      // GlibC specific specifiers.
286    PrintErrno,    // 'm'
287      // Specifier ranges.
288    IntArgBeg = dArg,
289    IntArgEnd = iArg,
290    UIntArgBeg = oArg,
291    UIntArgEnd = XArg,
292    DoubleArgBeg = fArg,
293    DoubleArgEnd = AArg,
294    C99Beg = IntArgBeg,
295    C99End = DoubleArgEnd,
296    ObjCBeg = ObjCObjArg,
297    ObjCEnd = ObjCObjArg
298  };
299
300  ConversionSpecifier()
301  : Position(0), kind(InvalidSpecifier) {}
302
303  ConversionSpecifier(const char *pos, Kind k)
304  : Position(pos), kind(k) {}
305
306  const char *getStart() const {
307    return Position;
308  }
309
310  llvm::StringRef getCharacters() const {
311    return llvm::StringRef(getStart(), getLength());
312  }
313
314  bool consumesDataArgument() const {
315    switch (kind) {
316      case PercentArg:
317      case PrintErrno:
318        return false;
319      default:
320        return true;
321    }
322  }
323
324  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
325  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
326  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
327  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
328  Kind getKind() const { return kind; }
329  void setKind(Kind k) { kind = k; }
330  unsigned getLength() const {
331      // Conversion specifiers currently only are represented by
332      // single characters, but we be flexible.
333    return 1;
334  }
335  const char *toString() const;
336
337private:
338  const char *Position;
339  Kind kind;
340};
341
342using analyze_format_string::ArgTypeResult;
343using analyze_format_string::LengthModifier;
344using analyze_format_string::OptionalAmount;
345using analyze_format_string::OptionalFlag;
346
347class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
348  OptionalFlag IsLeftJustified; // '-'
349  OptionalFlag HasPlusPrefix; // '+'
350  OptionalFlag HasSpacePrefix; // ' '
351  OptionalFlag HasAlternativeForm; // '#'
352  OptionalFlag HasLeadingZeroes; // '0'
353  ConversionSpecifier CS;
354  OptionalAmount Precision;
355public:
356  PrintfSpecifier() :
357  IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
358  HasAlternativeForm("#"), HasLeadingZeroes("0") {}
359
360  static PrintfSpecifier Parse(const char *beg, const char *end);
361
362    // Methods for incrementally constructing the PrintfSpecifier.
363  void setConversionSpecifier(const ConversionSpecifier &cs) {
364    CS = cs;
365  }
366  void setIsLeftJustified(const char *position) {
367    IsLeftJustified = true;
368    IsLeftJustified.setPosition(position);
369  }
370  void setHasPlusPrefix(const char *position) {
371    HasPlusPrefix = true;
372    HasPlusPrefix.setPosition(position);
373  }
374  void setHasSpacePrefix(const char *position) {
375    HasSpacePrefix = true;
376    HasSpacePrefix.setPosition(position);
377  }
378  void setHasAlternativeForm(const char *position) {
379    HasAlternativeForm = true;
380    HasAlternativeForm.setPosition(position);
381  }
382  void setHasLeadingZeros(const char *position) {
383    HasLeadingZeroes = true;
384    HasLeadingZeroes.setPosition(position);
385  }
386  void setUsesPositionalArg() { UsesPositionalArg = true; }
387
388    // Methods for querying the format specifier.
389
390  const ConversionSpecifier &getConversionSpecifier() const {
391    return CS;
392  }
393
394  void setPrecision(const OptionalAmount &Amt) {
395    Precision = Amt;
396    Precision.setUsesDotPrefix();
397  }
398
399  const OptionalAmount &getPrecision() const {
400    return Precision;
401  }
402
403  bool consumesDataArgument() const {
404    return CS.consumesDataArgument();
405  }
406
407  /// \brief Returns the builtin type that a data argument
408  /// paired with this format specifier should have.  This method
409  /// will return null if the format specifier does not have
410  /// a matching data argument or the matching argument matches
411  /// more than one type.
412  ArgTypeResult getArgType(ASTContext &Ctx) const;
413
414  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
415  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
416  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
417  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
418  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
419  bool usesPositionalArg() const { return UsesPositionalArg; }
420
421    /// Changes the specifier and length according to a QualType, retaining any
422    /// flags or options. Returns true on success, or false when a conversion
423    /// was not successful.
424  bool fixType(QualType QT);
425
426  void toString(llvm::raw_ostream &os) const;
427
428    // Validation methods - to check if any element results in undefined behavior
429  bool hasValidPlusPrefix() const;
430  bool hasValidAlternativeForm() const;
431  bool hasValidLeadingZeros() const;
432  bool hasValidSpacePrefix() const;
433  bool hasValidLeftJustified() const;
434
435  bool hasValidLengthModifier() const;
436  bool hasValidPrecision() const;
437  bool hasValidFieldWidth() const;
438};
439}  // end analyze_printf namespace
440
441//===----------------------------------------------------------------------===//
442/// Pieces specific to fscanf format strings.
443
444namespace analyze_scanf {
445
446class ConversionSpecifier {
447public:
448  enum Kind {
449    InvalidSpecifier = 0,
450      // C99 conversion specifiers.
451    dArg, // 'd'
452    iArg, // 'i',
453    oArg, // 'o',
454    uArg, // 'u',
455    xArg, // 'x',
456    XArg, // 'X',
457    fArg, // 'f',
458    FArg, // 'F',
459    eArg, // 'e',
460    EArg, // 'E',
461    gArg, // 'g',
462    GArg, // 'G',
463    aArg, // 'a',
464    AArg, // 'A',
465    sArg, // 's', // match sequence of non-write-space characters
466    pArg,        // 'p'
467    cArg,              // 'c', differs from printf, writes array of characters
468    nArg,  // 'n', differs from printf, writes back args consumed
469    PercentArg,        // '%'
470    ScanListArg,       // '[' followed by scan list
471      // IEEE Std 1003.1 extensions.
472    CArg, // 'C', same as writing 'lc'
473    SArg, // 'S', same as writing 'ls'
474      // Specifier ranges.
475    IntArgBeg = dArg,
476    IntArgEnd = iArg,
477    UIntArgBeg = oArg,
478    UIntArgEnd = XArg,
479    DoubleArgBeg = fArg,
480    DoubleArgEnd = AArg
481  };
482
483  ConversionSpecifier()
484  : Position(0), EndScanList(0), kind(InvalidSpecifier) {}
485
486  ConversionSpecifier(const char *pos, Kind k)
487  : Position(pos), EndScanList(0), kind(k) {}
488
489  const char *getStart() const {
490    return Position;
491  }
492
493  void setEndScanList(const char *pos) { EndScanList = pos; }
494
495  llvm::StringRef getCharacters() const {
496    return llvm::StringRef(getStart(), getLength());
497  }
498
499  bool consumesDataArgument() const {
500    return kind != PercentArg;
501  }
502
503  bool isIntArg() const { return kind >= dArg && kind <= iArg; }
504  bool isUIntArg() const { return kind >= oArg && kind <= XArg; }
505  bool isDoubleArg() const { return kind >= fArg && kind <= AArg; }
506  Kind getKind() const { return kind; }
507  void setKind(Kind k) { kind = k; }
508
509  unsigned getLength() const {
510    return EndScanList ? EndScanList - Position : 1;
511  }
512
513  const char *toString() const;
514
515private:
516  const char *Position;
517  const char *EndScanList;
518  Kind kind;
519};
520
521using analyze_format_string::LengthModifier;
522using analyze_format_string::OptionalAmount;
523using analyze_format_string::OptionalFlag;
524
525class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
526  OptionalFlag SuppressAssignment; // '*'
527  ConversionSpecifier CS;
528public:
529  ScanfSpecifier() : SuppressAssignment("*") {}
530
531  void setSuppressAssignment(const char *position) {
532    SuppressAssignment = true;
533    SuppressAssignment.setPosition(position);
534  }
535
536  const OptionalFlag &getSuppressAssignment() const {
537    return SuppressAssignment;
538  }
539
540  void setConversionSpecifier(const ConversionSpecifier &cs) {
541    CS = cs;
542  }
543
544  const ConversionSpecifier &getConversionSpecifier() const {
545    return CS;
546  }
547
548  bool consumesDataArgument() const {
549    return CS.consumesDataArgument() && !SuppressAssignment;
550  }
551
552  static ScanfSpecifier Parse(const char *beg, const char *end);
553
554};
555
556} // end analyze_scanf namespace
557
558//===----------------------------------------------------------------------===//
559// Parsing and processing of format strings (both fprintf and fscanf).
560
561namespace analyze_format_string {
562
563enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
564
565class FormatStringHandler {
566public:
567  FormatStringHandler() {}
568  virtual ~FormatStringHandler();
569
570  virtual void HandleNullChar(const char *nullCharacter) {}
571
572  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
573                                     PositionContext p) {}
574
575  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
576
577  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
578                                         unsigned specifierLen) {}
579
580  // Printf-specific handlers.
581
582  virtual bool HandleInvalidPrintfConversionSpecifier(
583                                      const analyze_printf::PrintfSpecifier &FS,
584                                      const char *startSpecifier,
585                                      unsigned specifierLen) {
586    return true;
587  }
588
589  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
590                                     const char *startSpecifier,
591                                     unsigned specifierLen) {
592    return true;
593  }
594
595    // Scanf-specific handlers.
596
597  virtual bool HandleInvalidScanfConversionSpecifier(
598                                        const analyze_scanf::ScanfSpecifier &FS,
599                                        const char *startSpecifier,
600                                        unsigned specifierLen) {
601    return true;
602  }
603
604  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
605                                    const char *startSpecifier,
606                                    unsigned specifierLen) {
607    return true;
608  }
609
610  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
611};
612
613bool ParsePrintfString(FormatStringHandler &H,
614                       const char *beg, const char *end);
615
616bool ParseScanfString(FormatStringHandler &H,
617                       const char *beg, const char *end);
618
619} // end analyze_format_string namespace
620} // end clang namespace
621#endif
622