FormatString.h revision ba243b59a1074e0962f6abfa3bb9aa984eac1245
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26//===----------------------------------------------------------------------===//
27/// Common components of both fprintf and fscanf format strings.
28namespace analyze_format_string {
29
30/// Class representing optional flags with location and representation
31/// information.
32class OptionalFlag {
33public:
34  OptionalFlag(const char *Representation)
35      : representation(Representation), flag(false) {}
36  bool isSet() { return flag; }
37  void set() { flag = true; }
38  void clear() { flag = false; }
39  void setPosition(const char *position) {
40    assert(position);
41    this->position = position;
42  }
43  const char *getPosition() const {
44    assert(position);
45    return position;
46  }
47  const char *toString() const { return representation; }
48
49  // Overloaded operators for bool like qualities
50  operator bool() const { return flag; }
51  OptionalFlag& operator=(const bool &rhs) {
52    flag = rhs;
53    return *this;  // Return a reference to myself.
54  }
55private:
56  const char *representation;
57  const char *position;
58  bool flag;
59};
60
61/// Represents the length modifier in a format string in scanf/printf.
62class LengthModifier {
63public:
64  enum Kind {
65    None,
66    AsChar,       // 'hh'
67    AsShort,      // 'h'
68    AsLong,       // 'l'
69    AsLongLong,   // 'll', 'q' (BSD, deprecated)
70    AsIntMax,     // 'j'
71    AsSizeT,      // 'z'
72    AsPtrDiff,    // 't'
73    AsLongDouble, // 'L'
74    AsWideChar = AsLong // for '%ls', only makes sense for printf
75  };
76
77  LengthModifier()
78    : Position(0), kind(None) {}
79  LengthModifier(const char *pos, Kind k)
80    : Position(pos), kind(k) {}
81
82  const char *getStart() const {
83    return Position;
84  }
85
86  unsigned getLength() const {
87    switch (kind) {
88      default:
89        return 1;
90      case AsLongLong:
91      case AsChar:
92        return 2;
93      case None:
94        return 0;
95    }
96  }
97
98  Kind getKind() const { return kind; }
99  void setKind(Kind k) { kind = k; }
100
101  const char *toString() const;
102
103private:
104  const char *Position;
105  Kind kind;
106};
107
108class ConversionSpecifier {
109public:
110  enum Kind {
111    InvalidSpecifier = 0,
112      // C99 conversion specifiers.
113    cArg,
114    dArg,
115    iArg,
116    IntArgBeg = cArg, IntArgEnd = iArg,
117
118    oArg,
119    uArg,
120    xArg,
121    XArg,
122    UIntArgBeg = oArg, UIntArgEnd = XArg,
123
124    fArg,
125    FArg,
126    eArg,
127    EArg,
128    gArg,
129    GArg,
130    aArg,
131    AArg,
132    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
133
134    sArg,
135    pArg,
136    nArg,
137    PercentArg,
138    CArg,
139    SArg,
140
141    // ** Printf-specific **
142
143    // Objective-C specific specifiers.
144    ObjCObjArg,  // '@'
145    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
146
147    // GlibC specific specifiers.
148    PrintErrno,   // 'm'
149
150    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
151
152    // ** Scanf-specific **
153    ScanListArg, // '['
154    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
155  };
156
157  ConversionSpecifier(bool isPrintf)
158    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
159
160  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
161    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
162
163  const char *getStart() const {
164    return Position;
165  }
166
167  StringRef getCharacters() const {
168    return StringRef(getStart(), getLength());
169  }
170
171  bool consumesDataArgument() const {
172    switch (kind) {
173      case PrintErrno:
174        assert(IsPrintf);
175      case PercentArg:
176        return false;
177      default:
178        return true;
179    }
180  }
181
182  Kind getKind() const { return kind; }
183  void setKind(Kind k) { kind = k; }
184  unsigned getLength() const {
185    return EndScanList ? EndScanList - Position : 1;
186  }
187
188  const char *toString() const;
189
190  bool isPrintfKind() const { return IsPrintf; }
191
192protected:
193  bool IsPrintf;
194  const char *Position;
195  const char *EndScanList;
196  Kind kind;
197};
198
199class ArgTypeResult {
200public:
201  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
202              AnyCharTy, CStrTy, WCStrTy, WIntTy };
203private:
204  const Kind K;
205  QualType T;
206  ArgTypeResult(bool) : K(InvalidTy) {}
207public:
208  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
209  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
210  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
211
212  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
213
214  bool isValid() const { return K != InvalidTy; }
215
216  const QualType *getSpecificType() const {
217    return K == SpecificTy ? &T : 0;
218  }
219
220  bool matchesType(ASTContext &C, QualType argTy) const;
221
222  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
223
224  QualType getRepresentativeType(ASTContext &C) const;
225};
226
227class OptionalAmount {
228public:
229  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
230
231  OptionalAmount(HowSpecified howSpecified,
232                 unsigned amount,
233                 const char *amountStart,
234                 unsigned amountLength,
235                 bool usesPositionalArg)
236  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
237  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
238
239  OptionalAmount(bool valid = true)
240  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
241  UsesPositionalArg(0), UsesDotPrefix(0) {}
242
243  bool isInvalid() const {
244    return hs == Invalid;
245  }
246
247  HowSpecified getHowSpecified() const { return hs; }
248  void setHowSpecified(HowSpecified h) { hs = h; }
249
250  bool hasDataArgument() const { return hs == Arg; }
251
252  unsigned getArgIndex() const {
253    assert(hasDataArgument());
254    return amt;
255  }
256
257  unsigned getConstantAmount() const {
258    assert(hs == Constant);
259    return amt;
260  }
261
262  const char *getStart() const {
263      // We include the . character if it is given.
264    return start - UsesDotPrefix;
265  }
266
267  unsigned getConstantLength() const {
268    assert(hs == Constant);
269    return length + UsesDotPrefix;
270  }
271
272  ArgTypeResult getArgType(ASTContext &Ctx) const;
273
274  void toString(raw_ostream &os) const;
275
276  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
277  unsigned getPositionalArgIndex() const {
278    assert(hasDataArgument());
279    return amt + 1;
280  }
281
282  bool usesDotPrefix() const { return UsesDotPrefix; }
283  void setUsesDotPrefix() { UsesDotPrefix = true; }
284
285private:
286  const char *start;
287  unsigned length;
288  HowSpecified hs;
289  unsigned amt;
290  bool UsesPositionalArg : 1;
291  bool UsesDotPrefix;
292};
293
294
295class FormatSpecifier {
296protected:
297  LengthModifier LM;
298  OptionalAmount FieldWidth;
299  ConversionSpecifier CS;
300    /// Positional arguments, an IEEE extension:
301    ///  IEEE Std 1003.1, 2004 Edition
302    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
303  bool UsesPositionalArg;
304  unsigned argIndex;
305public:
306  FormatSpecifier(bool isPrintf)
307    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
308
309  void setLengthModifier(LengthModifier lm) {
310    LM = lm;
311  }
312
313  void setUsesPositionalArg() { UsesPositionalArg = true; }
314
315  void setArgIndex(unsigned i) {
316    argIndex = i;
317  }
318
319  unsigned getArgIndex() const {
320    return argIndex;
321  }
322
323  unsigned getPositionalArgIndex() const {
324    return argIndex + 1;
325  }
326
327  const LengthModifier &getLengthModifier() const {
328    return LM;
329  }
330
331  const OptionalAmount &getFieldWidth() const {
332    return FieldWidth;
333  }
334
335  void setFieldWidth(const OptionalAmount &Amt) {
336    FieldWidth = Amt;
337  }
338
339  bool usesPositionalArg() const { return UsesPositionalArg; }
340
341  bool hasValidLengthModifier() const;
342};
343
344} // end analyze_format_string namespace
345
346//===----------------------------------------------------------------------===//
347/// Pieces specific to fprintf format strings.
348
349namespace analyze_printf {
350
351class PrintfConversionSpecifier :
352  public analyze_format_string::ConversionSpecifier  {
353public:
354  PrintfConversionSpecifier()
355    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
356
357  PrintfConversionSpecifier(const char *pos, Kind k)
358    : ConversionSpecifier(true, pos, k) {}
359
360  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
361  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
362  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
363  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
364                                    kind <= DoubleArgBeg; }
365  unsigned getLength() const {
366      // Conversion specifiers currently only are represented by
367      // single characters, but we be flexible.
368    return 1;
369  }
370
371  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
372    return CS->isPrintfKind();
373  }
374};
375
376using analyze_format_string::ArgTypeResult;
377using analyze_format_string::LengthModifier;
378using analyze_format_string::OptionalAmount;
379using analyze_format_string::OptionalFlag;
380
381class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
382  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
383  OptionalFlag IsLeftJustified; // '-'
384  OptionalFlag HasPlusPrefix; // '+'
385  OptionalFlag HasSpacePrefix; // ' '
386  OptionalFlag HasAlternativeForm; // '#'
387  OptionalFlag HasLeadingZeroes; // '0'
388  OptionalAmount Precision;
389public:
390  PrintfSpecifier() :
391    FormatSpecifier(/* isPrintf = */ true),
392    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
393    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
394
395  static PrintfSpecifier Parse(const char *beg, const char *end);
396
397    // Methods for incrementally constructing the PrintfSpecifier.
398  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
399    CS = cs;
400  }
401  void setHasThousandsGrouping(const char *position) {
402    HasThousandsGrouping = true;
403    HasThousandsGrouping.setPosition(position);
404  }
405  void setIsLeftJustified(const char *position) {
406    IsLeftJustified = true;
407    IsLeftJustified.setPosition(position);
408  }
409  void setHasPlusPrefix(const char *position) {
410    HasPlusPrefix = true;
411    HasPlusPrefix.setPosition(position);
412  }
413  void setHasSpacePrefix(const char *position) {
414    HasSpacePrefix = true;
415    HasSpacePrefix.setPosition(position);
416  }
417  void setHasAlternativeForm(const char *position) {
418    HasAlternativeForm = true;
419    HasAlternativeForm.setPosition(position);
420  }
421  void setHasLeadingZeros(const char *position) {
422    HasLeadingZeroes = true;
423    HasLeadingZeroes.setPosition(position);
424  }
425  void setUsesPositionalArg() { UsesPositionalArg = true; }
426
427    // Methods for querying the format specifier.
428
429  const PrintfConversionSpecifier &getConversionSpecifier() const {
430    return cast<PrintfConversionSpecifier>(CS);
431  }
432
433  void setPrecision(const OptionalAmount &Amt) {
434    Precision = Amt;
435    Precision.setUsesDotPrefix();
436  }
437
438  const OptionalAmount &getPrecision() const {
439    return Precision;
440  }
441
442  bool consumesDataArgument() const {
443    return getConversionSpecifier().consumesDataArgument();
444  }
445
446  /// \brief Returns the builtin type that a data argument
447  /// paired with this format specifier should have.  This method
448  /// will return null if the format specifier does not have
449  /// a matching data argument or the matching argument matches
450  /// more than one type.
451  ArgTypeResult getArgType(ASTContext &Ctx) const;
452
453  const OptionalFlag &hasThousandsGrouping() const {
454      return HasThousandsGrouping;
455  }
456  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
457  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
458  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
459  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
460  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
461  bool usesPositionalArg() const { return UsesPositionalArg; }
462
463    /// Changes the specifier and length according to a QualType, retaining any
464    /// flags or options. Returns true on success, or false when a conversion
465    /// was not successful.
466  bool fixType(QualType QT, const LangOptions &LangOpt);
467
468  void toString(raw_ostream &os) const;
469
470  // Validation methods - to check if any element results in undefined behavior
471  bool hasValidPlusPrefix() const;
472  bool hasValidAlternativeForm() const;
473  bool hasValidLeadingZeros() const;
474  bool hasValidSpacePrefix() const;
475  bool hasValidLeftJustified() const;
476  bool hasValidThousandsGroupingPrefix() const;
477
478  bool hasValidPrecision() const;
479  bool hasValidFieldWidth() const;
480};
481}  // end analyze_printf namespace
482
483//===----------------------------------------------------------------------===//
484/// Pieces specific to fscanf format strings.
485
486namespace analyze_scanf {
487
488class ScanfConversionSpecifier :
489    public analyze_format_string::ConversionSpecifier  {
490public:
491  ScanfConversionSpecifier()
492    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
493
494  ScanfConversionSpecifier(const char *pos, Kind k)
495    : ConversionSpecifier(false, pos, k) {}
496
497  void setEndScanList(const char *pos) { EndScanList = pos; }
498
499  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
500    return !CS->isPrintfKind();
501  }
502};
503
504using analyze_format_string::LengthModifier;
505using analyze_format_string::OptionalAmount;
506using analyze_format_string::OptionalFlag;
507
508class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
509  OptionalFlag SuppressAssignment; // '*'
510public:
511  ScanfSpecifier() :
512    FormatSpecifier(/* isPrintf = */ false),
513    SuppressAssignment("*") {}
514
515  void setSuppressAssignment(const char *position) {
516    SuppressAssignment = true;
517    SuppressAssignment.setPosition(position);
518  }
519
520  const OptionalFlag &getSuppressAssignment() const {
521    return SuppressAssignment;
522  }
523
524  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
525    CS = cs;
526  }
527
528  const ScanfConversionSpecifier &getConversionSpecifier() const {
529    return cast<ScanfConversionSpecifier>(CS);
530  }
531
532  bool consumesDataArgument() const {
533    return CS.consumesDataArgument() && !SuppressAssignment;
534  }
535
536  static ScanfSpecifier Parse(const char *beg, const char *end);
537};
538
539} // end analyze_scanf namespace
540
541//===----------------------------------------------------------------------===//
542// Parsing and processing of format strings (both fprintf and fscanf).
543
544namespace analyze_format_string {
545
546enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
547
548class FormatStringHandler {
549public:
550  FormatStringHandler() {}
551  virtual ~FormatStringHandler();
552
553  virtual void HandleNullChar(const char *nullCharacter) {}
554
555  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
556                                     PositionContext p) {}
557
558  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
559
560  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
561                                         unsigned specifierLen) {}
562
563  // Printf-specific handlers.
564
565  virtual bool HandleInvalidPrintfConversionSpecifier(
566                                      const analyze_printf::PrintfSpecifier &FS,
567                                      const char *startSpecifier,
568                                      unsigned specifierLen) {
569    return true;
570  }
571
572  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
573                                     const char *startSpecifier,
574                                     unsigned specifierLen) {
575    return true;
576  }
577
578    // Scanf-specific handlers.
579
580  virtual bool HandleInvalidScanfConversionSpecifier(
581                                        const analyze_scanf::ScanfSpecifier &FS,
582                                        const char *startSpecifier,
583                                        unsigned specifierLen) {
584    return true;
585  }
586
587  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
588                                    const char *startSpecifier,
589                                    unsigned specifierLen) {
590    return true;
591  }
592
593  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
594};
595
596bool ParsePrintfString(FormatStringHandler &H,
597                       const char *beg, const char *end);
598
599bool ParseScanfString(FormatStringHandler &H,
600                       const char *beg, const char *end);
601
602} // end analyze_format_string namespace
603} // end clang namespace
604#endif
605