FormatString.h revision 4684778993c667246039b4664acbce59dc99440c
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll' 70 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 71 AsIntMax, // 'j' 72 AsSizeT, // 'z' 73 AsPtrDiff, // 't' 74 AsLongDouble, // 'L' 75 AsAllocate, // for '%as', GNU extension to C90 scanf 76 AsMAllocate, // for '%ms', GNU extension to scanf 77 AsWideChar = AsLong // for '%ls', only makes sense for printf 78 }; 79 80 LengthModifier() 81 : Position(0), kind(None) {} 82 LengthModifier(const char *pos, Kind k) 83 : Position(pos), kind(k) {} 84 85 const char *getStart() const { 86 return Position; 87 } 88 89 unsigned getLength() const { 90 switch (kind) { 91 default: 92 return 1; 93 case AsLongLong: 94 case AsChar: 95 return 2; 96 case None: 97 return 0; 98 } 99 } 100 101 Kind getKind() const { return kind; } 102 void setKind(Kind k) { kind = k; } 103 104 const char *toString() const; 105 106private: 107 const char *Position; 108 Kind kind; 109}; 110 111class ConversionSpecifier { 112public: 113 enum Kind { 114 InvalidSpecifier = 0, 115 // C99 conversion specifiers. 116 cArg, 117 dArg, 118 iArg, 119 IntArgBeg = cArg, IntArgEnd = iArg, 120 121 oArg, 122 uArg, 123 xArg, 124 XArg, 125 UIntArgBeg = oArg, UIntArgEnd = XArg, 126 127 fArg, 128 FArg, 129 eArg, 130 EArg, 131 gArg, 132 GArg, 133 aArg, 134 AArg, 135 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 136 137 sArg, 138 pArg, 139 nArg, 140 PercentArg, 141 CArg, 142 SArg, 143 144 // ** Printf-specific ** 145 146 // Objective-C specific specifiers. 147 ObjCObjArg, // '@' 148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 149 150 // GlibC specific specifiers. 151 PrintErrno, // 'm' 152 153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 154 155 // ** Scanf-specific ** 156 ScanListArg, // '[' 157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 158 }; 159 160 ConversionSpecifier(bool isPrintf) 161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 162 163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 165 166 const char *getStart() const { 167 return Position; 168 } 169 170 StringRef getCharacters() const { 171 return StringRef(getStart(), getLength()); 172 } 173 174 bool consumesDataArgument() const { 175 switch (kind) { 176 case PrintErrno: 177 assert(IsPrintf); 178 return false; 179 case PercentArg: 180 return false; 181 default: 182 return true; 183 } 184 } 185 186 Kind getKind() const { return kind; } 187 void setKind(Kind k) { kind = k; } 188 unsigned getLength() const { 189 return EndScanList ? EndScanList - Position : 1; 190 } 191 192 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 193 const char *toString() const; 194 195 bool isPrintfKind() const { return IsPrintf; } 196 197protected: 198 bool IsPrintf; 199 const char *Position; 200 const char *EndScanList; 201 Kind kind; 202}; 203 204class ArgTypeResult { 205public: 206 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 207 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 208private: 209 const Kind K; 210 QualType T; 211 const char *Name; 212 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 213public: 214 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 215 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 216 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 217 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 218 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 219 220 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 221 222 bool isValid() const { return K != InvalidTy; } 223 224 const QualType *getSpecificType() const { 225 return K == SpecificTy ? &T : 0; 226 } 227 228 bool matchesType(ASTContext &C, QualType argTy) const; 229 230 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 231 232 QualType getRepresentativeType(ASTContext &C) const; 233 234 std::string getRepresentativeTypeName(ASTContext &C) const; 235}; 236 237class OptionalAmount { 238public: 239 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 240 241 OptionalAmount(HowSpecified howSpecified, 242 unsigned amount, 243 const char *amountStart, 244 unsigned amountLength, 245 bool usesPositionalArg) 246 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 247 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 248 249 OptionalAmount(bool valid = true) 250 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 251 UsesPositionalArg(0), UsesDotPrefix(0) {} 252 253 bool isInvalid() const { 254 return hs == Invalid; 255 } 256 257 HowSpecified getHowSpecified() const { return hs; } 258 void setHowSpecified(HowSpecified h) { hs = h; } 259 260 bool hasDataArgument() const { return hs == Arg; } 261 262 unsigned getArgIndex() const { 263 assert(hasDataArgument()); 264 return amt; 265 } 266 267 unsigned getConstantAmount() const { 268 assert(hs == Constant); 269 return amt; 270 } 271 272 const char *getStart() const { 273 // We include the . character if it is given. 274 return start - UsesDotPrefix; 275 } 276 277 unsigned getConstantLength() const { 278 assert(hs == Constant); 279 return length + UsesDotPrefix; 280 } 281 282 ArgTypeResult getArgType(ASTContext &Ctx) const; 283 284 void toString(raw_ostream &os) const; 285 286 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 287 unsigned getPositionalArgIndex() const { 288 assert(hasDataArgument()); 289 return amt + 1; 290 } 291 292 bool usesDotPrefix() const { return UsesDotPrefix; } 293 void setUsesDotPrefix() { UsesDotPrefix = true; } 294 295private: 296 const char *start; 297 unsigned length; 298 HowSpecified hs; 299 unsigned amt; 300 bool UsesPositionalArg : 1; 301 bool UsesDotPrefix; 302}; 303 304 305class FormatSpecifier { 306protected: 307 LengthModifier LM; 308 OptionalAmount FieldWidth; 309 ConversionSpecifier CS; 310 /// Positional arguments, an IEEE extension: 311 /// IEEE Std 1003.1, 2004 Edition 312 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 313 bool UsesPositionalArg; 314 unsigned argIndex; 315public: 316 FormatSpecifier(bool isPrintf) 317 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 318 319 void setLengthModifier(LengthModifier lm) { 320 LM = lm; 321 } 322 323 void setUsesPositionalArg() { UsesPositionalArg = true; } 324 325 void setArgIndex(unsigned i) { 326 argIndex = i; 327 } 328 329 unsigned getArgIndex() const { 330 return argIndex; 331 } 332 333 unsigned getPositionalArgIndex() const { 334 return argIndex + 1; 335 } 336 337 const LengthModifier &getLengthModifier() const { 338 return LM; 339 } 340 341 const OptionalAmount &getFieldWidth() const { 342 return FieldWidth; 343 } 344 345 void setFieldWidth(const OptionalAmount &Amt) { 346 FieldWidth = Amt; 347 } 348 349 bool usesPositionalArg() const { return UsesPositionalArg; } 350 351 bool hasValidLengthModifier() const; 352 353 bool hasStandardLengthModifier() const; 354 355 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 356 357 bool hasStandardLengthConversionCombination() const; 358 359 /// For a TypedefType QT, if it is a named integer type such as size_t, 360 /// assign the appropriate value to LM and return true. 361 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 362}; 363 364} // end analyze_format_string namespace 365 366//===----------------------------------------------------------------------===// 367/// Pieces specific to fprintf format strings. 368 369namespace analyze_printf { 370 371class PrintfConversionSpecifier : 372 public analyze_format_string::ConversionSpecifier { 373public: 374 PrintfConversionSpecifier() 375 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 376 377 PrintfConversionSpecifier(const char *pos, Kind k) 378 : ConversionSpecifier(true, pos, k) {} 379 380 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 381 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 382 bool isDoubleArg() const { return kind >= DoubleArgBeg && 383 kind <= DoubleArgEnd; } 384 unsigned getLength() const { 385 // Conversion specifiers currently only are represented by 386 // single characters, but we be flexible. 387 return 1; 388 } 389 390 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 391 return CS->isPrintfKind(); 392 } 393}; 394 395using analyze_format_string::ArgTypeResult; 396using analyze_format_string::LengthModifier; 397using analyze_format_string::OptionalAmount; 398using analyze_format_string::OptionalFlag; 399 400class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 401 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 402 OptionalFlag IsLeftJustified; // '-' 403 OptionalFlag HasPlusPrefix; // '+' 404 OptionalFlag HasSpacePrefix; // ' ' 405 OptionalFlag HasAlternativeForm; // '#' 406 OptionalFlag HasLeadingZeroes; // '0' 407 OptionalAmount Precision; 408public: 409 PrintfSpecifier() : 410 FormatSpecifier(/* isPrintf = */ true), 411 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 412 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 413 414 static PrintfSpecifier Parse(const char *beg, const char *end); 415 416 // Methods for incrementally constructing the PrintfSpecifier. 417 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 418 CS = cs; 419 } 420 void setHasThousandsGrouping(const char *position) { 421 HasThousandsGrouping = true; 422 HasThousandsGrouping.setPosition(position); 423 } 424 void setIsLeftJustified(const char *position) { 425 IsLeftJustified = true; 426 IsLeftJustified.setPosition(position); 427 } 428 void setHasPlusPrefix(const char *position) { 429 HasPlusPrefix = true; 430 HasPlusPrefix.setPosition(position); 431 } 432 void setHasSpacePrefix(const char *position) { 433 HasSpacePrefix = true; 434 HasSpacePrefix.setPosition(position); 435 } 436 void setHasAlternativeForm(const char *position) { 437 HasAlternativeForm = true; 438 HasAlternativeForm.setPosition(position); 439 } 440 void setHasLeadingZeros(const char *position) { 441 HasLeadingZeroes = true; 442 HasLeadingZeroes.setPosition(position); 443 } 444 void setUsesPositionalArg() { UsesPositionalArg = true; } 445 446 // Methods for querying the format specifier. 447 448 const PrintfConversionSpecifier &getConversionSpecifier() const { 449 return cast<PrintfConversionSpecifier>(CS); 450 } 451 452 void setPrecision(const OptionalAmount &Amt) { 453 Precision = Amt; 454 Precision.setUsesDotPrefix(); 455 } 456 457 const OptionalAmount &getPrecision() const { 458 return Precision; 459 } 460 461 bool consumesDataArgument() const { 462 return getConversionSpecifier().consumesDataArgument(); 463 } 464 465 /// \brief Returns the builtin type that a data argument 466 /// paired with this format specifier should have. This method 467 /// will return null if the format specifier does not have 468 /// a matching data argument or the matching argument matches 469 /// more than one type. 470 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 471 472 const OptionalFlag &hasThousandsGrouping() const { 473 return HasThousandsGrouping; 474 } 475 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 476 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 477 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 478 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 479 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 480 bool usesPositionalArg() const { return UsesPositionalArg; } 481 482 /// Changes the specifier and length according to a QualType, retaining any 483 /// flags or options. Returns true on success, or false when a conversion 484 /// was not successful. 485 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 486 bool IsObjCLiteral); 487 488 void toString(raw_ostream &os) const; 489 490 // Validation methods - to check if any element results in undefined behavior 491 bool hasValidPlusPrefix() const; 492 bool hasValidAlternativeForm() const; 493 bool hasValidLeadingZeros() const; 494 bool hasValidSpacePrefix() const; 495 bool hasValidLeftJustified() const; 496 bool hasValidThousandsGroupingPrefix() const; 497 498 bool hasValidPrecision() const; 499 bool hasValidFieldWidth() const; 500}; 501} // end analyze_printf namespace 502 503//===----------------------------------------------------------------------===// 504/// Pieces specific to fscanf format strings. 505 506namespace analyze_scanf { 507 508class ScanfConversionSpecifier : 509 public analyze_format_string::ConversionSpecifier { 510public: 511 ScanfConversionSpecifier() 512 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 513 514 ScanfConversionSpecifier(const char *pos, Kind k) 515 : ConversionSpecifier(false, pos, k) {} 516 517 void setEndScanList(const char *pos) { EndScanList = pos; } 518 519 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 520 return !CS->isPrintfKind(); 521 } 522}; 523 524using analyze_format_string::ArgTypeResult; 525using analyze_format_string::LengthModifier; 526using analyze_format_string::OptionalAmount; 527using analyze_format_string::OptionalFlag; 528 529class ScanfArgTypeResult : public ArgTypeResult { 530public: 531 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 532private: 533 Kind K; 534 ArgTypeResult A; 535 const char *Name; 536 QualType getRepresentativeType(ASTContext &C) const; 537public: 538 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 539 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 540 : K(PtrToArgTypeResultTy), A(a), Name(n) { 541 assert(A.isValid()); 542 } 543 544 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 545 546 bool isValid() const { return K != InvalidTy; } 547 548 bool matchesType(ASTContext& C, QualType argTy) const; 549 550 std::string getRepresentativeTypeName(ASTContext& C) const; 551}; 552 553class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 554 OptionalFlag SuppressAssignment; // '*' 555public: 556 ScanfSpecifier() : 557 FormatSpecifier(/* isPrintf = */ false), 558 SuppressAssignment("*") {} 559 560 void setSuppressAssignment(const char *position) { 561 SuppressAssignment = true; 562 SuppressAssignment.setPosition(position); 563 } 564 565 const OptionalFlag &getSuppressAssignment() const { 566 return SuppressAssignment; 567 } 568 569 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 570 CS = cs; 571 } 572 573 const ScanfConversionSpecifier &getConversionSpecifier() const { 574 return cast<ScanfConversionSpecifier>(CS); 575 } 576 577 bool consumesDataArgument() const { 578 return CS.consumesDataArgument() && !SuppressAssignment; 579 } 580 581 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 582 583 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 584 585 void toString(raw_ostream &os) const; 586 587 static ScanfSpecifier Parse(const char *beg, const char *end); 588}; 589 590} // end analyze_scanf namespace 591 592//===----------------------------------------------------------------------===// 593// Parsing and processing of format strings (both fprintf and fscanf). 594 595namespace analyze_format_string { 596 597enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 598 599class FormatStringHandler { 600public: 601 FormatStringHandler() {} 602 virtual ~FormatStringHandler(); 603 604 virtual void HandleNullChar(const char *nullCharacter) {} 605 606 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 607 608 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 609 PositionContext p) {} 610 611 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 612 613 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 614 unsigned specifierLen) {} 615 616 // Printf-specific handlers. 617 618 virtual bool HandleInvalidPrintfConversionSpecifier( 619 const analyze_printf::PrintfSpecifier &FS, 620 const char *startSpecifier, 621 unsigned specifierLen) { 622 return true; 623 } 624 625 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 626 const char *startSpecifier, 627 unsigned specifierLen) { 628 return true; 629 } 630 631 // Scanf-specific handlers. 632 633 virtual bool HandleInvalidScanfConversionSpecifier( 634 const analyze_scanf::ScanfSpecifier &FS, 635 const char *startSpecifier, 636 unsigned specifierLen) { 637 return true; 638 } 639 640 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 641 const char *startSpecifier, 642 unsigned specifierLen) { 643 return true; 644 } 645 646 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 647}; 648 649bool ParsePrintfString(FormatStringHandler &H, 650 const char *beg, const char *end, const LangOptions &LO); 651 652bool ParseScanfString(FormatStringHandler &H, 653 const char *beg, const char *end, const LangOptions &LO); 654 655} // end analyze_format_string namespace 656} // end clang namespace 657#endif 658