FormatString.h revision d39d23e610c2a7815515d60c5a538d65d05e8bdc
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll' 70 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 71 AsIntMax, // 'j' 72 AsSizeT, // 'z' 73 AsPtrDiff, // 't' 74 AsLongDouble, // 'L' 75 AsAllocate, // for '%as', GNU extension to C90 scanf 76 AsMAllocate, // for '%ms', GNU extension to scanf 77 AsWideChar = AsLong // for '%ls', only makes sense for printf 78 }; 79 80 LengthModifier() 81 : Position(0), kind(None) {} 82 LengthModifier(const char *pos, Kind k) 83 : Position(pos), kind(k) {} 84 85 const char *getStart() const { 86 return Position; 87 } 88 89 unsigned getLength() const { 90 switch (kind) { 91 default: 92 return 1; 93 case AsLongLong: 94 case AsChar: 95 return 2; 96 case None: 97 return 0; 98 } 99 } 100 101 Kind getKind() const { return kind; } 102 void setKind(Kind k) { kind = k; } 103 104 const char *toString() const; 105 106private: 107 const char *Position; 108 Kind kind; 109}; 110 111class ConversionSpecifier { 112public: 113 enum Kind { 114 InvalidSpecifier = 0, 115 // C99 conversion specifiers. 116 cArg, 117 dArg, 118 iArg, 119 IntArgBeg = cArg, IntArgEnd = iArg, 120 121 oArg, 122 uArg, 123 xArg, 124 XArg, 125 UIntArgBeg = oArg, UIntArgEnd = XArg, 126 127 fArg, 128 FArg, 129 eArg, 130 EArg, 131 gArg, 132 GArg, 133 aArg, 134 AArg, 135 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 136 137 sArg, 138 pArg, 139 nArg, 140 PercentArg, 141 CArg, 142 SArg, 143 144 // ** Printf-specific ** 145 146 // Objective-C specific specifiers. 147 ObjCObjArg, // '@' 148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 149 150 // GlibC specific specifiers. 151 PrintErrno, // 'm' 152 153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 154 155 // ** Scanf-specific ** 156 ScanListArg, // '[' 157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 158 }; 159 160 ConversionSpecifier(bool isPrintf) 161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 162 163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 165 166 const char *getStart() const { 167 return Position; 168 } 169 170 StringRef getCharacters() const { 171 return StringRef(getStart(), getLength()); 172 } 173 174 bool consumesDataArgument() const { 175 switch (kind) { 176 case PrintErrno: 177 assert(IsPrintf); 178 return false; 179 case PercentArg: 180 return false; 181 default: 182 return true; 183 } 184 } 185 186 Kind getKind() const { return kind; } 187 void setKind(Kind k) { kind = k; } 188 unsigned getLength() const { 189 return EndScanList ? EndScanList - Position : 1; 190 } 191 192 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 193 const char *toString() const; 194 195 bool isPrintfKind() const { return IsPrintf; } 196 197protected: 198 bool IsPrintf; 199 const char *Position; 200 const char *EndScanList; 201 Kind kind; 202}; 203 204class ArgTypeResult { 205public: 206 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 207 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 208private: 209 const Kind K; 210 QualType T; 211 const char *Name; 212 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 213public: 214 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 215 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 216 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 217 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 218 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 219 220 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 221 222 bool isValid() const { return K != InvalidTy; } 223 224 const QualType *getSpecificType() const { 225 return K == SpecificTy ? &T : 0; 226 } 227 228 bool matchesType(ASTContext &C, QualType argTy) const; 229 230 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 231 232 QualType getRepresentativeType(ASTContext &C) const; 233 234 std::string getRepresentativeTypeName(ASTContext &C) const; 235}; 236 237class OptionalAmount { 238public: 239 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 240 241 OptionalAmount(HowSpecified howSpecified, 242 unsigned amount, 243 const char *amountStart, 244 unsigned amountLength, 245 bool usesPositionalArg) 246 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 247 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 248 249 OptionalAmount(bool valid = true) 250 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 251 UsesPositionalArg(0), UsesDotPrefix(0) {} 252 253 bool isInvalid() const { 254 return hs == Invalid; 255 } 256 257 HowSpecified getHowSpecified() const { return hs; } 258 void setHowSpecified(HowSpecified h) { hs = h; } 259 260 bool hasDataArgument() const { return hs == Arg; } 261 262 unsigned getArgIndex() const { 263 assert(hasDataArgument()); 264 return amt; 265 } 266 267 unsigned getConstantAmount() const { 268 assert(hs == Constant); 269 return amt; 270 } 271 272 const char *getStart() const { 273 // We include the . character if it is given. 274 return start - UsesDotPrefix; 275 } 276 277 unsigned getConstantLength() const { 278 assert(hs == Constant); 279 return length + UsesDotPrefix; 280 } 281 282 ArgTypeResult getArgType(ASTContext &Ctx) const; 283 284 void toString(raw_ostream &os) const; 285 286 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 287 unsigned getPositionalArgIndex() const { 288 assert(hasDataArgument()); 289 return amt + 1; 290 } 291 292 bool usesDotPrefix() const { return UsesDotPrefix; } 293 void setUsesDotPrefix() { UsesDotPrefix = true; } 294 295private: 296 const char *start; 297 unsigned length; 298 HowSpecified hs; 299 unsigned amt; 300 bool UsesPositionalArg : 1; 301 bool UsesDotPrefix; 302}; 303 304 305class FormatSpecifier { 306protected: 307 LengthModifier LM; 308 OptionalAmount FieldWidth; 309 ConversionSpecifier CS; 310 /// Positional arguments, an IEEE extension: 311 /// IEEE Std 1003.1, 2004 Edition 312 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 313 bool UsesPositionalArg; 314 unsigned argIndex; 315public: 316 FormatSpecifier(bool isPrintf) 317 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 318 319 void setLengthModifier(LengthModifier lm) { 320 LM = lm; 321 } 322 323 void setUsesPositionalArg() { UsesPositionalArg = true; } 324 325 void setArgIndex(unsigned i) { 326 argIndex = i; 327 } 328 329 unsigned getArgIndex() const { 330 return argIndex; 331 } 332 333 unsigned getPositionalArgIndex() const { 334 return argIndex + 1; 335 } 336 337 const LengthModifier &getLengthModifier() const { 338 return LM; 339 } 340 341 const OptionalAmount &getFieldWidth() const { 342 return FieldWidth; 343 } 344 345 void setFieldWidth(const OptionalAmount &Amt) { 346 FieldWidth = Amt; 347 } 348 349 bool usesPositionalArg() const { return UsesPositionalArg; } 350 351 bool hasValidLengthModifier() const; 352 353 bool hasStandardLengthModifier() const; 354 355 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 356 357 bool hasStandardLengthConversionCombination() const; 358}; 359 360} // end analyze_format_string namespace 361 362//===----------------------------------------------------------------------===// 363/// Pieces specific to fprintf format strings. 364 365namespace analyze_printf { 366 367class PrintfConversionSpecifier : 368 public analyze_format_string::ConversionSpecifier { 369public: 370 PrintfConversionSpecifier() 371 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 372 373 PrintfConversionSpecifier(const char *pos, Kind k) 374 : ConversionSpecifier(true, pos, k) {} 375 376 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 377 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 378 bool isDoubleArg() const { return kind >= DoubleArgBeg && 379 kind <= DoubleArgEnd; } 380 unsigned getLength() const { 381 // Conversion specifiers currently only are represented by 382 // single characters, but we be flexible. 383 return 1; 384 } 385 386 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 387 return CS->isPrintfKind(); 388 } 389}; 390 391using analyze_format_string::ArgTypeResult; 392using analyze_format_string::LengthModifier; 393using analyze_format_string::OptionalAmount; 394using analyze_format_string::OptionalFlag; 395 396class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 397 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 398 OptionalFlag IsLeftJustified; // '-' 399 OptionalFlag HasPlusPrefix; // '+' 400 OptionalFlag HasSpacePrefix; // ' ' 401 OptionalFlag HasAlternativeForm; // '#' 402 OptionalFlag HasLeadingZeroes; // '0' 403 OptionalAmount Precision; 404public: 405 PrintfSpecifier() : 406 FormatSpecifier(/* isPrintf = */ true), 407 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 408 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 409 410 static PrintfSpecifier Parse(const char *beg, const char *end); 411 412 // Methods for incrementally constructing the PrintfSpecifier. 413 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 414 CS = cs; 415 } 416 void setHasThousandsGrouping(const char *position) { 417 HasThousandsGrouping = true; 418 HasThousandsGrouping.setPosition(position); 419 } 420 void setIsLeftJustified(const char *position) { 421 IsLeftJustified = true; 422 IsLeftJustified.setPosition(position); 423 } 424 void setHasPlusPrefix(const char *position) { 425 HasPlusPrefix = true; 426 HasPlusPrefix.setPosition(position); 427 } 428 void setHasSpacePrefix(const char *position) { 429 HasSpacePrefix = true; 430 HasSpacePrefix.setPosition(position); 431 } 432 void setHasAlternativeForm(const char *position) { 433 HasAlternativeForm = true; 434 HasAlternativeForm.setPosition(position); 435 } 436 void setHasLeadingZeros(const char *position) { 437 HasLeadingZeroes = true; 438 HasLeadingZeroes.setPosition(position); 439 } 440 void setUsesPositionalArg() { UsesPositionalArg = true; } 441 442 // Methods for querying the format specifier. 443 444 const PrintfConversionSpecifier &getConversionSpecifier() const { 445 return cast<PrintfConversionSpecifier>(CS); 446 } 447 448 void setPrecision(const OptionalAmount &Amt) { 449 Precision = Amt; 450 Precision.setUsesDotPrefix(); 451 } 452 453 const OptionalAmount &getPrecision() const { 454 return Precision; 455 } 456 457 bool consumesDataArgument() const { 458 return getConversionSpecifier().consumesDataArgument(); 459 } 460 461 /// \brief Returns the builtin type that a data argument 462 /// paired with this format specifier should have. This method 463 /// will return null if the format specifier does not have 464 /// a matching data argument or the matching argument matches 465 /// more than one type. 466 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 467 468 const OptionalFlag &hasThousandsGrouping() const { 469 return HasThousandsGrouping; 470 } 471 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 472 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 473 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 474 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 475 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 476 bool usesPositionalArg() const { return UsesPositionalArg; } 477 478 /// Changes the specifier and length according to a QualType, retaining any 479 /// flags or options. Returns true on success, or false when a conversion 480 /// was not successful. 481 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 482 bool IsObjCLiteral); 483 484 void toString(raw_ostream &os) const; 485 486 // Validation methods - to check if any element results in undefined behavior 487 bool hasValidPlusPrefix() const; 488 bool hasValidAlternativeForm() const; 489 bool hasValidLeadingZeros() const; 490 bool hasValidSpacePrefix() const; 491 bool hasValidLeftJustified() const; 492 bool hasValidThousandsGroupingPrefix() const; 493 494 bool hasValidPrecision() const; 495 bool hasValidFieldWidth() const; 496}; 497} // end analyze_printf namespace 498 499//===----------------------------------------------------------------------===// 500/// Pieces specific to fscanf format strings. 501 502namespace analyze_scanf { 503 504class ScanfConversionSpecifier : 505 public analyze_format_string::ConversionSpecifier { 506public: 507 ScanfConversionSpecifier() 508 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 509 510 ScanfConversionSpecifier(const char *pos, Kind k) 511 : ConversionSpecifier(false, pos, k) {} 512 513 void setEndScanList(const char *pos) { EndScanList = pos; } 514 515 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 516 return !CS->isPrintfKind(); 517 } 518}; 519 520using analyze_format_string::ArgTypeResult; 521using analyze_format_string::LengthModifier; 522using analyze_format_string::OptionalAmount; 523using analyze_format_string::OptionalFlag; 524 525class ScanfArgTypeResult : public ArgTypeResult { 526public: 527 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 528private: 529 Kind K; 530 ArgTypeResult A; 531 const char *Name; 532 QualType getRepresentativeType(ASTContext &C) const; 533public: 534 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 535 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 536 : K(PtrToArgTypeResultTy), A(a), Name(n) { 537 assert(A.isValid()); 538 } 539 540 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 541 542 bool isValid() const { return K != InvalidTy; } 543 544 bool matchesType(ASTContext& C, QualType argTy) const; 545 546 std::string getRepresentativeTypeName(ASTContext& C) const; 547}; 548 549class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 550 OptionalFlag SuppressAssignment; // '*' 551public: 552 ScanfSpecifier() : 553 FormatSpecifier(/* isPrintf = */ false), 554 SuppressAssignment("*") {} 555 556 void setSuppressAssignment(const char *position) { 557 SuppressAssignment = true; 558 SuppressAssignment.setPosition(position); 559 } 560 561 const OptionalFlag &getSuppressAssignment() const { 562 return SuppressAssignment; 563 } 564 565 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 566 CS = cs; 567 } 568 569 const ScanfConversionSpecifier &getConversionSpecifier() const { 570 return cast<ScanfConversionSpecifier>(CS); 571 } 572 573 bool consumesDataArgument() const { 574 return CS.consumesDataArgument() && !SuppressAssignment; 575 } 576 577 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 578 579 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 580 581 void toString(raw_ostream &os) const; 582 583 static ScanfSpecifier Parse(const char *beg, const char *end); 584}; 585 586} // end analyze_scanf namespace 587 588//===----------------------------------------------------------------------===// 589// Parsing and processing of format strings (both fprintf and fscanf). 590 591namespace analyze_format_string { 592 593enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 594 595class FormatStringHandler { 596public: 597 FormatStringHandler() {} 598 virtual ~FormatStringHandler(); 599 600 virtual void HandleNullChar(const char *nullCharacter) {} 601 602 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 603 604 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 605 PositionContext p) {} 606 607 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 608 609 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 610 unsigned specifierLen) {} 611 612 // Printf-specific handlers. 613 614 virtual bool HandleInvalidPrintfConversionSpecifier( 615 const analyze_printf::PrintfSpecifier &FS, 616 const char *startSpecifier, 617 unsigned specifierLen) { 618 return true; 619 } 620 621 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 622 const char *startSpecifier, 623 unsigned specifierLen) { 624 return true; 625 } 626 627 // Scanf-specific handlers. 628 629 virtual bool HandleInvalidScanfConversionSpecifier( 630 const analyze_scanf::ScanfSpecifier &FS, 631 const char *startSpecifier, 632 unsigned specifierLen) { 633 return true; 634 } 635 636 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 637 const char *startSpecifier, 638 unsigned specifierLen) { 639 return true; 640 } 641 642 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 643}; 644 645bool ParsePrintfString(FormatStringHandler &H, 646 const char *beg, const char *end, const LangOptions &LO); 647 648bool ParseScanfString(FormatStringHandler &H, 649 const char *beg, const char *end, const LangOptions &LO); 650 651} // end analyze_format_string namespace 652} // end clang namespace 653#endif 654