FormatString.h revision f3749f4168c5cee59627a681ca4ca6e4116d0761
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll' 70 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 71 AsIntMax, // 'j' 72 AsSizeT, // 'z' 73 AsPtrDiff, // 't' 74 AsLongDouble, // 'L' 75 AsAllocate, // for '%as', GNU extension to C90 scanf 76 AsMAllocate, // for '%ms', GNU extension to scanf 77 AsWideChar = AsLong // for '%ls', only makes sense for printf 78 }; 79 80 LengthModifier() 81 : Position(0), kind(None) {} 82 LengthModifier(const char *pos, Kind k) 83 : Position(pos), kind(k) {} 84 85 const char *getStart() const { 86 return Position; 87 } 88 89 unsigned getLength() const { 90 switch (kind) { 91 default: 92 return 1; 93 case AsLongLong: 94 case AsChar: 95 return 2; 96 case None: 97 return 0; 98 } 99 } 100 101 Kind getKind() const { return kind; } 102 void setKind(Kind k) { kind = k; } 103 104 const char *toString() const; 105 106private: 107 const char *Position; 108 Kind kind; 109}; 110 111class ConversionSpecifier { 112public: 113 enum Kind { 114 InvalidSpecifier = 0, 115 // C99 conversion specifiers. 116 cArg, 117 dArg, 118 iArg, 119 IntArgBeg = cArg, IntArgEnd = iArg, 120 121 oArg, 122 uArg, 123 xArg, 124 XArg, 125 UIntArgBeg = oArg, UIntArgEnd = XArg, 126 127 fArg, 128 FArg, 129 eArg, 130 EArg, 131 gArg, 132 GArg, 133 aArg, 134 AArg, 135 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 136 137 sArg, 138 pArg, 139 nArg, 140 PercentArg, 141 CArg, 142 SArg, 143 144 // ** Printf-specific ** 145 146 // Objective-C specific specifiers. 147 ObjCObjArg, // '@' 148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 149 150 // GlibC specific specifiers. 151 PrintErrno, // 'm' 152 153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 154 155 // ** Scanf-specific ** 156 ScanListArg, // '[' 157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 158 }; 159 160 ConversionSpecifier(bool isPrintf) 161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 162 163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 165 166 const char *getStart() const { 167 return Position; 168 } 169 170 StringRef getCharacters() const { 171 return StringRef(getStart(), getLength()); 172 } 173 174 bool consumesDataArgument() const { 175 switch (kind) { 176 case PrintErrno: 177 assert(IsPrintf); 178 return false; 179 case PercentArg: 180 return false; 181 default: 182 return true; 183 } 184 } 185 186 Kind getKind() const { return kind; } 187 void setKind(Kind k) { kind = k; } 188 unsigned getLength() const { 189 return EndScanList ? EndScanList - Position : 1; 190 } 191 192 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 193 const char *toString() const; 194 195 bool isPrintfKind() const { return IsPrintf; } 196 197protected: 198 bool IsPrintf; 199 const char *Position; 200 const char *EndScanList; 201 Kind kind; 202}; 203 204class ArgType { 205public: 206 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 207 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 208private: 209 const Kind K; 210 QualType T; 211 const char *Name; 212public: 213 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n) {} 214 ArgType(QualType t, const char *n = 0) : K(SpecificTy), T(t), Name(n) {} 215 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 216 217 static ArgType Invalid() { return ArgType(InvalidTy); } 218 219 bool isValid() const { return K != InvalidTy; } 220 221 bool matchesType(ASTContext &C, QualType argTy) const; 222 223 QualType getRepresentativeType(ASTContext &C) const; 224 225 std::string getRepresentativeTypeName(ASTContext &C) const; 226}; 227 228class OptionalAmount { 229public: 230 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 231 232 OptionalAmount(HowSpecified howSpecified, 233 unsigned amount, 234 const char *amountStart, 235 unsigned amountLength, 236 bool usesPositionalArg) 237 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 238 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 239 240 OptionalAmount(bool valid = true) 241 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 242 UsesPositionalArg(0), UsesDotPrefix(0) {} 243 244 bool isInvalid() const { 245 return hs == Invalid; 246 } 247 248 HowSpecified getHowSpecified() const { return hs; } 249 void setHowSpecified(HowSpecified h) { hs = h; } 250 251 bool hasDataArgument() const { return hs == Arg; } 252 253 unsigned getArgIndex() const { 254 assert(hasDataArgument()); 255 return amt; 256 } 257 258 unsigned getConstantAmount() const { 259 assert(hs == Constant); 260 return amt; 261 } 262 263 const char *getStart() const { 264 // We include the . character if it is given. 265 return start - UsesDotPrefix; 266 } 267 268 unsigned getConstantLength() const { 269 assert(hs == Constant); 270 return length + UsesDotPrefix; 271 } 272 273 ArgType getArgType(ASTContext &Ctx) const; 274 275 void toString(raw_ostream &os) const; 276 277 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 278 unsigned getPositionalArgIndex() const { 279 assert(hasDataArgument()); 280 return amt + 1; 281 } 282 283 bool usesDotPrefix() const { return UsesDotPrefix; } 284 void setUsesDotPrefix() { UsesDotPrefix = true; } 285 286private: 287 const char *start; 288 unsigned length; 289 HowSpecified hs; 290 unsigned amt; 291 bool UsesPositionalArg : 1; 292 bool UsesDotPrefix; 293}; 294 295 296class FormatSpecifier { 297protected: 298 LengthModifier LM; 299 OptionalAmount FieldWidth; 300 ConversionSpecifier CS; 301 /// Positional arguments, an IEEE extension: 302 /// IEEE Std 1003.1, 2004 Edition 303 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 304 bool UsesPositionalArg; 305 unsigned argIndex; 306public: 307 FormatSpecifier(bool isPrintf) 308 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 309 310 void setLengthModifier(LengthModifier lm) { 311 LM = lm; 312 } 313 314 void setUsesPositionalArg() { UsesPositionalArg = true; } 315 316 void setArgIndex(unsigned i) { 317 argIndex = i; 318 } 319 320 unsigned getArgIndex() const { 321 return argIndex; 322 } 323 324 unsigned getPositionalArgIndex() const { 325 return argIndex + 1; 326 } 327 328 const LengthModifier &getLengthModifier() const { 329 return LM; 330 } 331 332 const OptionalAmount &getFieldWidth() const { 333 return FieldWidth; 334 } 335 336 void setFieldWidth(const OptionalAmount &Amt) { 337 FieldWidth = Amt; 338 } 339 340 bool usesPositionalArg() const { return UsesPositionalArg; } 341 342 bool hasValidLengthModifier() const; 343 344 bool hasStandardLengthModifier() const; 345 346 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 347 348 bool hasStandardLengthConversionCombination() const; 349 350 /// For a TypedefType QT, if it is a named integer type such as size_t, 351 /// assign the appropriate value to LM and return true. 352 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 353}; 354 355} // end analyze_format_string namespace 356 357//===----------------------------------------------------------------------===// 358/// Pieces specific to fprintf format strings. 359 360namespace analyze_printf { 361 362class PrintfConversionSpecifier : 363 public analyze_format_string::ConversionSpecifier { 364public: 365 PrintfConversionSpecifier() 366 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 367 368 PrintfConversionSpecifier(const char *pos, Kind k) 369 : ConversionSpecifier(true, pos, k) {} 370 371 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 372 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 373 bool isDoubleArg() const { return kind >= DoubleArgBeg && 374 kind <= DoubleArgEnd; } 375 unsigned getLength() const { 376 // Conversion specifiers currently only are represented by 377 // single characters, but we be flexible. 378 return 1; 379 } 380 381 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 382 return CS->isPrintfKind(); 383 } 384}; 385 386using analyze_format_string::ArgType; 387using analyze_format_string::LengthModifier; 388using analyze_format_string::OptionalAmount; 389using analyze_format_string::OptionalFlag; 390 391class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 392 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 393 OptionalFlag IsLeftJustified; // '-' 394 OptionalFlag HasPlusPrefix; // '+' 395 OptionalFlag HasSpacePrefix; // ' ' 396 OptionalFlag HasAlternativeForm; // '#' 397 OptionalFlag HasLeadingZeroes; // '0' 398 OptionalAmount Precision; 399public: 400 PrintfSpecifier() : 401 FormatSpecifier(/* isPrintf = */ true), 402 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 403 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 404 405 static PrintfSpecifier Parse(const char *beg, const char *end); 406 407 // Methods for incrementally constructing the PrintfSpecifier. 408 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 409 CS = cs; 410 } 411 void setHasThousandsGrouping(const char *position) { 412 HasThousandsGrouping = true; 413 HasThousandsGrouping.setPosition(position); 414 } 415 void setIsLeftJustified(const char *position) { 416 IsLeftJustified = true; 417 IsLeftJustified.setPosition(position); 418 } 419 void setHasPlusPrefix(const char *position) { 420 HasPlusPrefix = true; 421 HasPlusPrefix.setPosition(position); 422 } 423 void setHasSpacePrefix(const char *position) { 424 HasSpacePrefix = true; 425 HasSpacePrefix.setPosition(position); 426 } 427 void setHasAlternativeForm(const char *position) { 428 HasAlternativeForm = true; 429 HasAlternativeForm.setPosition(position); 430 } 431 void setHasLeadingZeros(const char *position) { 432 HasLeadingZeroes = true; 433 HasLeadingZeroes.setPosition(position); 434 } 435 void setUsesPositionalArg() { UsesPositionalArg = true; } 436 437 // Methods for querying the format specifier. 438 439 const PrintfConversionSpecifier &getConversionSpecifier() const { 440 return cast<PrintfConversionSpecifier>(CS); 441 } 442 443 void setPrecision(const OptionalAmount &Amt) { 444 Precision = Amt; 445 Precision.setUsesDotPrefix(); 446 } 447 448 const OptionalAmount &getPrecision() const { 449 return Precision; 450 } 451 452 bool consumesDataArgument() const { 453 return getConversionSpecifier().consumesDataArgument(); 454 } 455 456 /// \brief Returns the builtin type that a data argument 457 /// paired with this format specifier should have. This method 458 /// will return null if the format specifier does not have 459 /// a matching data argument or the matching argument matches 460 /// more than one type. 461 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 462 463 const OptionalFlag &hasThousandsGrouping() const { 464 return HasThousandsGrouping; 465 } 466 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 467 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 468 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 469 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 470 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 471 bool usesPositionalArg() const { return UsesPositionalArg; } 472 473 /// Changes the specifier and length according to a QualType, retaining any 474 /// flags or options. Returns true on success, or false when a conversion 475 /// was not successful. 476 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 477 bool IsObjCLiteral); 478 479 void toString(raw_ostream &os) const; 480 481 // Validation methods - to check if any element results in undefined behavior 482 bool hasValidPlusPrefix() const; 483 bool hasValidAlternativeForm() const; 484 bool hasValidLeadingZeros() const; 485 bool hasValidSpacePrefix() const; 486 bool hasValidLeftJustified() const; 487 bool hasValidThousandsGroupingPrefix() const; 488 489 bool hasValidPrecision() const; 490 bool hasValidFieldWidth() const; 491}; 492} // end analyze_printf namespace 493 494//===----------------------------------------------------------------------===// 495/// Pieces specific to fscanf format strings. 496 497namespace analyze_scanf { 498 499class ScanfConversionSpecifier : 500 public analyze_format_string::ConversionSpecifier { 501public: 502 ScanfConversionSpecifier() 503 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 504 505 ScanfConversionSpecifier(const char *pos, Kind k) 506 : ConversionSpecifier(false, pos, k) {} 507 508 void setEndScanList(const char *pos) { EndScanList = pos; } 509 510 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 511 return !CS->isPrintfKind(); 512 } 513}; 514 515using analyze_format_string::ArgType; 516using analyze_format_string::LengthModifier; 517using analyze_format_string::OptionalAmount; 518using analyze_format_string::OptionalFlag; 519 520class ScanfArgType : public ArgType { 521public: 522 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeTy }; 523private: 524 Kind K; 525 ArgType A; 526 const char *Name; 527 QualType getRepresentativeType(ASTContext &C) const; 528public: 529 ScanfArgType(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 530 ScanfArgType(ArgType a, const char *n = 0) 531 : K(PtrToArgTypeTy), A(a), Name(n) { 532 assert(A.isValid()); 533 } 534 535 static ScanfArgType Invalid() { return ScanfArgType(InvalidTy); } 536 537 bool isValid() const { return K != InvalidTy; } 538 539 bool matchesType(ASTContext& C, QualType argTy) const; 540 541 std::string getRepresentativeTypeName(ASTContext& C) const; 542}; 543 544class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 545 OptionalFlag SuppressAssignment; // '*' 546public: 547 ScanfSpecifier() : 548 FormatSpecifier(/* isPrintf = */ false), 549 SuppressAssignment("*") {} 550 551 void setSuppressAssignment(const char *position) { 552 SuppressAssignment = true; 553 SuppressAssignment.setPosition(position); 554 } 555 556 const OptionalFlag &getSuppressAssignment() const { 557 return SuppressAssignment; 558 } 559 560 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 561 CS = cs; 562 } 563 564 const ScanfConversionSpecifier &getConversionSpecifier() const { 565 return cast<ScanfConversionSpecifier>(CS); 566 } 567 568 bool consumesDataArgument() const { 569 return CS.consumesDataArgument() && !SuppressAssignment; 570 } 571 572 ScanfArgType getArgType(ASTContext &Ctx) const; 573 574 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 575 576 void toString(raw_ostream &os) const; 577 578 static ScanfSpecifier Parse(const char *beg, const char *end); 579}; 580 581} // end analyze_scanf namespace 582 583//===----------------------------------------------------------------------===// 584// Parsing and processing of format strings (both fprintf and fscanf). 585 586namespace analyze_format_string { 587 588enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 589 590class FormatStringHandler { 591public: 592 FormatStringHandler() {} 593 virtual ~FormatStringHandler(); 594 595 virtual void HandleNullChar(const char *nullCharacter) {} 596 597 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 598 599 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 600 PositionContext p) {} 601 602 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 603 604 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 605 unsigned specifierLen) {} 606 607 // Printf-specific handlers. 608 609 virtual bool HandleInvalidPrintfConversionSpecifier( 610 const analyze_printf::PrintfSpecifier &FS, 611 const char *startSpecifier, 612 unsigned specifierLen) { 613 return true; 614 } 615 616 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 617 const char *startSpecifier, 618 unsigned specifierLen) { 619 return true; 620 } 621 622 // Scanf-specific handlers. 623 624 virtual bool HandleInvalidScanfConversionSpecifier( 625 const analyze_scanf::ScanfSpecifier &FS, 626 const char *startSpecifier, 627 unsigned specifierLen) { 628 return true; 629 } 630 631 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 632 const char *startSpecifier, 633 unsigned specifierLen) { 634 return true; 635 } 636 637 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 638}; 639 640bool ParsePrintfString(FormatStringHandler &H, 641 const char *beg, const char *end, const LangOptions &LO); 642 643bool ParseScanfString(FormatStringHandler &H, 644 const char *beg, const char *end, const LangOptions &LO); 645 646} // end analyze_format_string namespace 647} // end clang namespace 648#endif 649