FormatString.h revision 5fdc1b993dcb01e8a994fdacfc4eb089832c82e3
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines APIs for analyzing the format strings of printf, fscanf,
11// and friends.
12//
13// The structure of format strings for fprintf are described in C99 7.19.6.1.
14//
15// The structure of format strings for fscanf are described in C99 7.19.6.2.
16//
17//===----------------------------------------------------------------------===//
18
19#ifndef LLVM_CLANG_FORMAT_H
20#define LLVM_CLANG_FORMAT_H
21
22#include "clang/AST/CanonicalType.h"
23
24namespace clang {
25
26class Sema;
27
28//===----------------------------------------------------------------------===//
29/// Common components of both fprintf and fscanf format strings.
30namespace analyze_format_string {
31
32/// Class representing optional flags with location and representation
33/// information.
34class OptionalFlag {
35public:
36  OptionalFlag(const char *Representation)
37      : representation(Representation), flag(false) {}
38  bool isSet() { return flag; }
39  void set() { flag = true; }
40  void clear() { flag = false; }
41  void setPosition(const char *position) {
42    assert(position);
43    this->position = position;
44  }
45  const char *getPosition() const {
46    assert(position);
47    return position;
48  }
49  const char *toString() const { return representation; }
50
51  // Overloaded operators for bool like qualities
52  operator bool() const { return flag; }
53  OptionalFlag& operator=(const bool &rhs) {
54    flag = rhs;
55    return *this;  // Return a reference to myself.
56  }
57private:
58  const char *representation;
59  const char *position;
60  bool flag;
61};
62
63/// Represents the length modifier in a format string in scanf/printf.
64class LengthModifier {
65public:
66  enum Kind {
67    None,
68    AsChar,       // 'hh'
69    AsShort,      // 'h'
70    AsLong,       // 'l'
71    AsLongLong,   // 'll', 'q' (BSD, deprecated)
72    AsIntMax,     // 'j'
73    AsSizeT,      // 'z'
74    AsPtrDiff,    // 't'
75    AsLongDouble, // 'L'
76    AsWideChar = AsLong // for '%ls', only makes sense for printf
77  };
78
79  LengthModifier()
80    : Position(0), kind(None) {}
81  LengthModifier(const char *pos, Kind k)
82    : Position(pos), kind(k) {}
83
84  const char *getStart() const {
85    return Position;
86  }
87
88  unsigned getLength() const {
89    switch (kind) {
90      default:
91        return 1;
92      case AsLongLong:
93      case AsChar:
94        return 2;
95      case None:
96        return 0;
97    }
98  }
99
100  Kind getKind() const { return kind; }
101  void setKind(Kind k) { kind = k; }
102
103  const char *toString() const;
104
105private:
106  const char *Position;
107  Kind kind;
108};
109
110class ConversionSpecifier {
111public:
112  enum Kind {
113    InvalidSpecifier = 0,
114      // C99 conversion specifiers.
115    cArg,
116    dArg,
117    iArg,
118    IntArgBeg = cArg, IntArgEnd = iArg,
119
120    oArg,
121    uArg,
122    xArg,
123    XArg,
124    UIntArgBeg = oArg, UIntArgEnd = XArg,
125
126    fArg,
127    FArg,
128    eArg,
129    EArg,
130    gArg,
131    GArg,
132    aArg,
133    AArg,
134    DoubleArgBeg = fArg, DoubleArgEnd = AArg,
135
136    sArg,
137    pArg,
138    nArg,
139    PercentArg,
140    CArg,
141    SArg,
142
143    // ** Printf-specific **
144
145    // Objective-C specific specifiers.
146    ObjCObjArg,  // '@'
147    ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
148
149    // GlibC specific specifiers.
150    PrintErrno,   // 'm'
151
152    PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
153
154    // ** Scanf-specific **
155    ScanListArg, // '['
156    ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
157  };
158
159  ConversionSpecifier(bool isPrintf)
160    : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
161
162  ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
163    : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
164
165  const char *getStart() const {
166    return Position;
167  }
168
169  StringRef getCharacters() const {
170    return StringRef(getStart(), getLength());
171  }
172
173  bool consumesDataArgument() const {
174    switch (kind) {
175      case PrintErrno:
176        assert(IsPrintf);
177      case PercentArg:
178        return false;
179      default:
180        return true;
181    }
182  }
183
184  Kind getKind() const { return kind; }
185  void setKind(Kind k) { kind = k; }
186  unsigned getLength() const {
187    return EndScanList ? EndScanList - Position : 1;
188  }
189
190  const char *toString() const;
191
192  bool isPrintfKind() const { return IsPrintf; }
193
194protected:
195  bool IsPrintf;
196  const char *Position;
197  const char *EndScanList;
198  Kind kind;
199};
200
201class ArgTypeResult {
202public:
203  enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
204              AnyCharTy, CStrTy, WCStrTy, WIntTy };
205private:
206  const Kind K;
207  QualType T;
208  ArgTypeResult(bool) : K(InvalidTy) {}
209public:
210  ArgTypeResult(Kind k = UnknownTy) : K(k) {}
211  ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
212  ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
213
214  static ArgTypeResult Invalid() { return ArgTypeResult(true); }
215
216  bool isValid() const { return K != InvalidTy; }
217
218  const QualType *getSpecificType() const {
219    return K == SpecificTy ? &T : 0;
220  }
221
222  bool matchesType(ASTContext &C, QualType argTy) const;
223
224  bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
225
226  QualType getRepresentativeType(ASTContext &C) const;
227};
228
229class OptionalAmount {
230public:
231  enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
232
233  OptionalAmount(HowSpecified howSpecified,
234                 unsigned amount,
235                 const char *amountStart,
236                 unsigned amountLength,
237                 bool usesPositionalArg)
238  : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
239  UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
240
241  OptionalAmount(bool valid = true)
242  : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
243  UsesPositionalArg(0), UsesDotPrefix(0) {}
244
245  bool isInvalid() const {
246    return hs == Invalid;
247  }
248
249  HowSpecified getHowSpecified() const { return hs; }
250  void setHowSpecified(HowSpecified h) { hs = h; }
251
252  bool hasDataArgument() const { return hs == Arg; }
253
254  unsigned getArgIndex() const {
255    assert(hasDataArgument());
256    return amt;
257  }
258
259  unsigned getConstantAmount() const {
260    assert(hs == Constant);
261    return amt;
262  }
263
264  const char *getStart() const {
265      // We include the . character if it is given.
266    return start - UsesDotPrefix;
267  }
268
269  unsigned getConstantLength() const {
270    assert(hs == Constant);
271    return length + UsesDotPrefix;
272  }
273
274  ArgTypeResult getArgType(ASTContext &Ctx) const;
275
276  void toString(raw_ostream &os) const;
277
278  bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
279  unsigned getPositionalArgIndex() const {
280    assert(hasDataArgument());
281    return amt + 1;
282  }
283
284  bool usesDotPrefix() const { return UsesDotPrefix; }
285  void setUsesDotPrefix() { UsesDotPrefix = true; }
286
287private:
288  const char *start;
289  unsigned length;
290  HowSpecified hs;
291  unsigned amt;
292  bool UsesPositionalArg : 1;
293  bool UsesDotPrefix;
294};
295
296
297class FormatSpecifier {
298protected:
299  LengthModifier LM;
300  OptionalAmount FieldWidth;
301  ConversionSpecifier CS;
302    /// Positional arguments, an IEEE extension:
303    ///  IEEE Std 1003.1, 2004 Edition
304    ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
305  bool UsesPositionalArg;
306  unsigned argIndex;
307public:
308  FormatSpecifier(bool isPrintf)
309    : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
310
311  void setLengthModifier(LengthModifier lm) {
312    LM = lm;
313  }
314
315  void setUsesPositionalArg() { UsesPositionalArg = true; }
316
317  void setArgIndex(unsigned i) {
318    argIndex = i;
319  }
320
321  unsigned getArgIndex() const {
322    return argIndex;
323  }
324
325  unsigned getPositionalArgIndex() const {
326    return argIndex + 1;
327  }
328
329  const LengthModifier &getLengthModifier() const {
330    return LM;
331  }
332
333  const OptionalAmount &getFieldWidth() const {
334    return FieldWidth;
335  }
336
337  void setFieldWidth(const OptionalAmount &Amt) {
338    FieldWidth = Amt;
339  }
340
341  bool usesPositionalArg() const { return UsesPositionalArg; }
342
343  bool hasValidLengthModifier() const;
344};
345
346} // end analyze_format_string namespace
347
348//===----------------------------------------------------------------------===//
349/// Pieces specific to fprintf format strings.
350
351namespace analyze_printf {
352
353class PrintfConversionSpecifier :
354  public analyze_format_string::ConversionSpecifier  {
355public:
356  PrintfConversionSpecifier()
357    : ConversionSpecifier(true, 0, InvalidSpecifier) {}
358
359  PrintfConversionSpecifier(const char *pos, Kind k)
360    : ConversionSpecifier(true, pos, k) {}
361
362  bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
363  bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
364  bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
365  bool isDoubleArg() const { return kind >= DoubleArgBeg &&
366                                    kind <= DoubleArgBeg; }
367  unsigned getLength() const {
368      // Conversion specifiers currently only are represented by
369      // single characters, but we be flexible.
370    return 1;
371  }
372
373  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
374    return CS->isPrintfKind();
375  }
376};
377
378using analyze_format_string::ArgTypeResult;
379using analyze_format_string::LengthModifier;
380using analyze_format_string::OptionalAmount;
381using analyze_format_string::OptionalFlag;
382
383class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
384  OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
385  OptionalFlag IsLeftJustified; // '-'
386  OptionalFlag HasPlusPrefix; // '+'
387  OptionalFlag HasSpacePrefix; // ' '
388  OptionalFlag HasAlternativeForm; // '#'
389  OptionalFlag HasLeadingZeroes; // '0'
390  OptionalAmount Precision;
391public:
392  PrintfSpecifier() :
393    FormatSpecifier(/* isPrintf = */ true),
394    HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
395    HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
396
397  static PrintfSpecifier Parse(const char *beg, const char *end);
398
399    // Methods for incrementally constructing the PrintfSpecifier.
400  void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
401    CS = cs;
402  }
403  void setHasThousandsGrouping(const char *position) {
404    HasThousandsGrouping = true;
405    HasThousandsGrouping.setPosition(position);
406  }
407  void setIsLeftJustified(const char *position) {
408    IsLeftJustified = true;
409    IsLeftJustified.setPosition(position);
410  }
411  void setHasPlusPrefix(const char *position) {
412    HasPlusPrefix = true;
413    HasPlusPrefix.setPosition(position);
414  }
415  void setHasSpacePrefix(const char *position) {
416    HasSpacePrefix = true;
417    HasSpacePrefix.setPosition(position);
418  }
419  void setHasAlternativeForm(const char *position) {
420    HasAlternativeForm = true;
421    HasAlternativeForm.setPosition(position);
422  }
423  void setHasLeadingZeros(const char *position) {
424    HasLeadingZeroes = true;
425    HasLeadingZeroes.setPosition(position);
426  }
427  void setUsesPositionalArg() { UsesPositionalArg = true; }
428
429    // Methods for querying the format specifier.
430
431  const PrintfConversionSpecifier &getConversionSpecifier() const {
432    return cast<PrintfConversionSpecifier>(CS);
433  }
434
435  void setPrecision(const OptionalAmount &Amt) {
436    Precision = Amt;
437    Precision.setUsesDotPrefix();
438  }
439
440  const OptionalAmount &getPrecision() const {
441    return Precision;
442  }
443
444  bool consumesDataArgument() const {
445    return getConversionSpecifier().consumesDataArgument();
446  }
447
448  /// \brief Returns the builtin type that a data argument
449  /// paired with this format specifier should have.  This method
450  /// will return null if the format specifier does not have
451  /// a matching data argument or the matching argument matches
452  /// more than one type.
453  ArgTypeResult getArgType(Sema &S) const;
454
455  const OptionalFlag &hasThousandsGrouping() const {
456      return HasThousandsGrouping;
457  }
458  const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
459  const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
460  const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
461  const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
462  const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
463  bool usesPositionalArg() const { return UsesPositionalArg; }
464
465    /// Changes the specifier and length according to a QualType, retaining any
466    /// flags or options. Returns true on success, or false when a conversion
467    /// was not successful.
468  bool fixType(QualType QT, const LangOptions &LangOpt);
469
470  void toString(raw_ostream &os) const;
471
472  // Validation methods - to check if any element results in undefined behavior
473  bool hasValidPlusPrefix() const;
474  bool hasValidAlternativeForm() const;
475  bool hasValidLeadingZeros() const;
476  bool hasValidSpacePrefix() const;
477  bool hasValidLeftJustified() const;
478  bool hasValidThousandsGroupingPrefix() const;
479
480  bool hasValidPrecision() const;
481  bool hasValidFieldWidth() const;
482};
483}  // end analyze_printf namespace
484
485//===----------------------------------------------------------------------===//
486/// Pieces specific to fscanf format strings.
487
488namespace analyze_scanf {
489
490class ScanfConversionSpecifier :
491    public analyze_format_string::ConversionSpecifier  {
492public:
493  ScanfConversionSpecifier()
494    : ConversionSpecifier(false, 0, InvalidSpecifier) {}
495
496  ScanfConversionSpecifier(const char *pos, Kind k)
497    : ConversionSpecifier(false, pos, k) {}
498
499  void setEndScanList(const char *pos) { EndScanList = pos; }
500
501  static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
502    return !CS->isPrintfKind();
503  }
504};
505
506using analyze_format_string::LengthModifier;
507using analyze_format_string::OptionalAmount;
508using analyze_format_string::OptionalFlag;
509
510class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
511  OptionalFlag SuppressAssignment; // '*'
512public:
513  ScanfSpecifier() :
514    FormatSpecifier(/* isPrintf = */ false),
515    SuppressAssignment("*") {}
516
517  void setSuppressAssignment(const char *position) {
518    SuppressAssignment = true;
519    SuppressAssignment.setPosition(position);
520  }
521
522  const OptionalFlag &getSuppressAssignment() const {
523    return SuppressAssignment;
524  }
525
526  void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
527    CS = cs;
528  }
529
530  const ScanfConversionSpecifier &getConversionSpecifier() const {
531    return cast<ScanfConversionSpecifier>(CS);
532  }
533
534  bool consumesDataArgument() const {
535    return CS.consumesDataArgument() && !SuppressAssignment;
536  }
537
538  static ScanfSpecifier Parse(const char *beg, const char *end);
539};
540
541} // end analyze_scanf namespace
542
543//===----------------------------------------------------------------------===//
544// Parsing and processing of format strings (both fprintf and fscanf).
545
546namespace analyze_format_string {
547
548enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
549
550class FormatStringHandler {
551public:
552  FormatStringHandler() {}
553  virtual ~FormatStringHandler();
554
555  virtual void HandleNullChar(const char *nullCharacter) {}
556
557  virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
558                                     PositionContext p) {}
559
560  virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
561
562  virtual void HandleIncompleteSpecifier(const char *startSpecifier,
563                                         unsigned specifierLen) {}
564
565  // Printf-specific handlers.
566
567  virtual bool HandleInvalidPrintfConversionSpecifier(
568                                      const analyze_printf::PrintfSpecifier &FS,
569                                      const char *startSpecifier,
570                                      unsigned specifierLen) {
571    return true;
572  }
573
574  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
575                                     const char *startSpecifier,
576                                     unsigned specifierLen) {
577    return true;
578  }
579
580    // Scanf-specific handlers.
581
582  virtual bool HandleInvalidScanfConversionSpecifier(
583                                        const analyze_scanf::ScanfSpecifier &FS,
584                                        const char *startSpecifier,
585                                        unsigned specifierLen) {
586    return true;
587  }
588
589  virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
590                                    const char *startSpecifier,
591                                    unsigned specifierLen) {
592    return true;
593  }
594
595  virtual void HandleIncompleteScanList(const char *start, const char *end) {}
596};
597
598bool ParsePrintfString(FormatStringHandler &H,
599                       const char *beg, const char *end);
600
601bool ParseScanfString(FormatStringHandler &H,
602                       const char *beg, const char *end);
603
604} // end analyze_format_string namespace
605} // end clang namespace
606#endif
607