FormatString.h revision a76879eb4c75dbd9ec671558f0b8b79a28d4d747
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll' 70 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 71 AsIntMax, // 'j' 72 AsSizeT, // 'z' 73 AsPtrDiff, // 't' 74 AsLongDouble, // 'L' 75 AsAllocate, // for '%as', GNU extension to C90 scanf 76 AsMAllocate, // for '%ms', GNU extension to scanf 77 AsWideChar = AsLong // for '%ls', only makes sense for printf 78 }; 79 80 LengthModifier() 81 : Position(0), kind(None) {} 82 LengthModifier(const char *pos, Kind k) 83 : Position(pos), kind(k) {} 84 85 const char *getStart() const { 86 return Position; 87 } 88 89 unsigned getLength() const { 90 switch (kind) { 91 default: 92 return 1; 93 case AsLongLong: 94 case AsChar: 95 return 2; 96 case None: 97 return 0; 98 } 99 } 100 101 Kind getKind() const { return kind; } 102 void setKind(Kind k) { kind = k; } 103 104 const char *toString() const; 105 106private: 107 const char *Position; 108 Kind kind; 109}; 110 111class ConversionSpecifier { 112public: 113 enum Kind { 114 InvalidSpecifier = 0, 115 // C99 conversion specifiers. 116 cArg, 117 dArg, 118 iArg, 119 IntArgBeg = cArg, IntArgEnd = iArg, 120 121 oArg, 122 uArg, 123 xArg, 124 XArg, 125 UIntArgBeg = oArg, UIntArgEnd = XArg, 126 127 fArg, 128 FArg, 129 eArg, 130 EArg, 131 gArg, 132 GArg, 133 aArg, 134 AArg, 135 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 136 137 sArg, 138 pArg, 139 nArg, 140 PercentArg, 141 CArg, 142 SArg, 143 144 // ** Printf-specific ** 145 146 // Objective-C specific specifiers. 147 ObjCObjArg, // '@' 148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 149 150 // GlibC specific specifiers. 151 PrintErrno, // 'm' 152 153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 154 155 // ** Scanf-specific ** 156 ScanListArg, // '[' 157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 158 }; 159 160 ConversionSpecifier(bool isPrintf) 161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 162 163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 165 166 const char *getStart() const { 167 return Position; 168 } 169 170 StringRef getCharacters() const { 171 return StringRef(getStart(), getLength()); 172 } 173 174 bool consumesDataArgument() const { 175 switch (kind) { 176 case PrintErrno: 177 assert(IsPrintf); 178 case PercentArg: 179 return false; 180 default: 181 return true; 182 } 183 } 184 185 Kind getKind() const { return kind; } 186 void setKind(Kind k) { kind = k; } 187 unsigned getLength() const { 188 return EndScanList ? EndScanList - Position : 1; 189 } 190 191 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 192 const char *toString() const; 193 194 bool isPrintfKind() const { return IsPrintf; } 195 196protected: 197 bool IsPrintf; 198 const char *Position; 199 const char *EndScanList; 200 Kind kind; 201}; 202 203class ArgTypeResult { 204public: 205 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 206 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 207private: 208 const Kind K; 209 QualType T; 210 const char *Name; 211 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 212public: 213 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 214 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 215 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 216 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 217 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 218 219 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 220 221 bool isValid() const { return K != InvalidTy; } 222 223 const QualType *getSpecificType() const { 224 return K == SpecificTy ? &T : 0; 225 } 226 227 bool matchesType(ASTContext &C, QualType argTy) const; 228 229 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 230 231 QualType getRepresentativeType(ASTContext &C) const; 232 233 std::string getRepresentativeTypeName(ASTContext &C) const; 234}; 235 236class OptionalAmount { 237public: 238 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 239 240 OptionalAmount(HowSpecified howSpecified, 241 unsigned amount, 242 const char *amountStart, 243 unsigned amountLength, 244 bool usesPositionalArg) 245 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 246 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 247 248 OptionalAmount(bool valid = true) 249 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 250 UsesPositionalArg(0), UsesDotPrefix(0) {} 251 252 bool isInvalid() const { 253 return hs == Invalid; 254 } 255 256 HowSpecified getHowSpecified() const { return hs; } 257 void setHowSpecified(HowSpecified h) { hs = h; } 258 259 bool hasDataArgument() const { return hs == Arg; } 260 261 unsigned getArgIndex() const { 262 assert(hasDataArgument()); 263 return amt; 264 } 265 266 unsigned getConstantAmount() const { 267 assert(hs == Constant); 268 return amt; 269 } 270 271 const char *getStart() const { 272 // We include the . character if it is given. 273 return start - UsesDotPrefix; 274 } 275 276 unsigned getConstantLength() const { 277 assert(hs == Constant); 278 return length + UsesDotPrefix; 279 } 280 281 ArgTypeResult getArgType(ASTContext &Ctx) const; 282 283 void toString(raw_ostream &os) const; 284 285 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 286 unsigned getPositionalArgIndex() const { 287 assert(hasDataArgument()); 288 return amt + 1; 289 } 290 291 bool usesDotPrefix() const { return UsesDotPrefix; } 292 void setUsesDotPrefix() { UsesDotPrefix = true; } 293 294private: 295 const char *start; 296 unsigned length; 297 HowSpecified hs; 298 unsigned amt; 299 bool UsesPositionalArg : 1; 300 bool UsesDotPrefix; 301}; 302 303 304class FormatSpecifier { 305protected: 306 LengthModifier LM; 307 OptionalAmount FieldWidth; 308 ConversionSpecifier CS; 309 /// Positional arguments, an IEEE extension: 310 /// IEEE Std 1003.1, 2004 Edition 311 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 312 bool UsesPositionalArg; 313 unsigned argIndex; 314public: 315 FormatSpecifier(bool isPrintf) 316 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 317 318 void setLengthModifier(LengthModifier lm) { 319 LM = lm; 320 } 321 322 void setUsesPositionalArg() { UsesPositionalArg = true; } 323 324 void setArgIndex(unsigned i) { 325 argIndex = i; 326 } 327 328 unsigned getArgIndex() const { 329 return argIndex; 330 } 331 332 unsigned getPositionalArgIndex() const { 333 return argIndex + 1; 334 } 335 336 const LengthModifier &getLengthModifier() const { 337 return LM; 338 } 339 340 const OptionalAmount &getFieldWidth() const { 341 return FieldWidth; 342 } 343 344 void setFieldWidth(const OptionalAmount &Amt) { 345 FieldWidth = Amt; 346 } 347 348 bool usesPositionalArg() const { return UsesPositionalArg; } 349 350 bool hasValidLengthModifier() const; 351}; 352 353} // end analyze_format_string namespace 354 355//===----------------------------------------------------------------------===// 356/// Pieces specific to fprintf format strings. 357 358namespace analyze_printf { 359 360class PrintfConversionSpecifier : 361 public analyze_format_string::ConversionSpecifier { 362public: 363 PrintfConversionSpecifier() 364 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 365 366 PrintfConversionSpecifier(const char *pos, Kind k) 367 : ConversionSpecifier(true, pos, k) {} 368 369 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 370 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 371 bool isDoubleArg() const { return kind >= DoubleArgBeg && 372 kind <= DoubleArgEnd; } 373 unsigned getLength() const { 374 // Conversion specifiers currently only are represented by 375 // single characters, but we be flexible. 376 return 1; 377 } 378 379 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 380 return CS->isPrintfKind(); 381 } 382}; 383 384using analyze_format_string::ArgTypeResult; 385using analyze_format_string::LengthModifier; 386using analyze_format_string::OptionalAmount; 387using analyze_format_string::OptionalFlag; 388 389class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 390 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 391 OptionalFlag IsLeftJustified; // '-' 392 OptionalFlag HasPlusPrefix; // '+' 393 OptionalFlag HasSpacePrefix; // ' ' 394 OptionalFlag HasAlternativeForm; // '#' 395 OptionalFlag HasLeadingZeroes; // '0' 396 OptionalAmount Precision; 397public: 398 PrintfSpecifier() : 399 FormatSpecifier(/* isPrintf = */ true), 400 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 401 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 402 403 static PrintfSpecifier Parse(const char *beg, const char *end); 404 405 // Methods for incrementally constructing the PrintfSpecifier. 406 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 407 CS = cs; 408 } 409 void setHasThousandsGrouping(const char *position) { 410 HasThousandsGrouping = true; 411 HasThousandsGrouping.setPosition(position); 412 } 413 void setIsLeftJustified(const char *position) { 414 IsLeftJustified = true; 415 IsLeftJustified.setPosition(position); 416 } 417 void setHasPlusPrefix(const char *position) { 418 HasPlusPrefix = true; 419 HasPlusPrefix.setPosition(position); 420 } 421 void setHasSpacePrefix(const char *position) { 422 HasSpacePrefix = true; 423 HasSpacePrefix.setPosition(position); 424 } 425 void setHasAlternativeForm(const char *position) { 426 HasAlternativeForm = true; 427 HasAlternativeForm.setPosition(position); 428 } 429 void setHasLeadingZeros(const char *position) { 430 HasLeadingZeroes = true; 431 HasLeadingZeroes.setPosition(position); 432 } 433 void setUsesPositionalArg() { UsesPositionalArg = true; } 434 435 // Methods for querying the format specifier. 436 437 const PrintfConversionSpecifier &getConversionSpecifier() const { 438 return cast<PrintfConversionSpecifier>(CS); 439 } 440 441 void setPrecision(const OptionalAmount &Amt) { 442 Precision = Amt; 443 Precision.setUsesDotPrefix(); 444 } 445 446 const OptionalAmount &getPrecision() const { 447 return Precision; 448 } 449 450 bool consumesDataArgument() const { 451 return getConversionSpecifier().consumesDataArgument(); 452 } 453 454 /// \brief Returns the builtin type that a data argument 455 /// paired with this format specifier should have. This method 456 /// will return null if the format specifier does not have 457 /// a matching data argument or the matching argument matches 458 /// more than one type. 459 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 460 461 const OptionalFlag &hasThousandsGrouping() const { 462 return HasThousandsGrouping; 463 } 464 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 465 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 466 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 467 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 468 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 469 bool usesPositionalArg() const { return UsesPositionalArg; } 470 471 /// Changes the specifier and length according to a QualType, retaining any 472 /// flags or options. Returns true on success, or false when a conversion 473 /// was not successful. 474 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 475 bool IsObjCLiteral); 476 477 void toString(raw_ostream &os) const; 478 479 // Validation methods - to check if any element results in undefined behavior 480 bool hasValidPlusPrefix() const; 481 bool hasValidAlternativeForm() const; 482 bool hasValidLeadingZeros() const; 483 bool hasValidSpacePrefix() const; 484 bool hasValidLeftJustified() const; 485 bool hasValidThousandsGroupingPrefix() const; 486 487 bool hasValidPrecision() const; 488 bool hasValidFieldWidth() const; 489}; 490} // end analyze_printf namespace 491 492//===----------------------------------------------------------------------===// 493/// Pieces specific to fscanf format strings. 494 495namespace analyze_scanf { 496 497class ScanfConversionSpecifier : 498 public analyze_format_string::ConversionSpecifier { 499public: 500 ScanfConversionSpecifier() 501 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 502 503 ScanfConversionSpecifier(const char *pos, Kind k) 504 : ConversionSpecifier(false, pos, k) {} 505 506 void setEndScanList(const char *pos) { EndScanList = pos; } 507 508 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 509 return !CS->isPrintfKind(); 510 } 511}; 512 513using analyze_format_string::ArgTypeResult; 514using analyze_format_string::LengthModifier; 515using analyze_format_string::OptionalAmount; 516using analyze_format_string::OptionalFlag; 517 518class ScanfArgTypeResult : public ArgTypeResult { 519public: 520 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 521private: 522 Kind K; 523 ArgTypeResult A; 524 const char *Name; 525 QualType getRepresentativeType(ASTContext &C) const; 526public: 527 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 528 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 529 : K(PtrToArgTypeResultTy), A(a), Name(n) { 530 assert(A.isValid()); 531 } 532 533 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 534 535 bool isValid() const { return K != InvalidTy; } 536 537 bool matchesType(ASTContext& C, QualType argTy) const; 538 539 std::string getRepresentativeTypeName(ASTContext& C) const; 540}; 541 542class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 543 OptionalFlag SuppressAssignment; // '*' 544public: 545 ScanfSpecifier() : 546 FormatSpecifier(/* isPrintf = */ false), 547 SuppressAssignment("*") {} 548 549 void setSuppressAssignment(const char *position) { 550 SuppressAssignment = true; 551 SuppressAssignment.setPosition(position); 552 } 553 554 const OptionalFlag &getSuppressAssignment() const { 555 return SuppressAssignment; 556 } 557 558 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 559 CS = cs; 560 } 561 562 const ScanfConversionSpecifier &getConversionSpecifier() const { 563 return cast<ScanfConversionSpecifier>(CS); 564 } 565 566 bool consumesDataArgument() const { 567 return CS.consumesDataArgument() && !SuppressAssignment; 568 } 569 570 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 571 572 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 573 574 void toString(raw_ostream &os) const; 575 576 static ScanfSpecifier Parse(const char *beg, const char *end); 577}; 578 579} // end analyze_scanf namespace 580 581//===----------------------------------------------------------------------===// 582// Parsing and processing of format strings (both fprintf and fscanf). 583 584namespace analyze_format_string { 585 586enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 587 588class FormatStringHandler { 589public: 590 FormatStringHandler() {} 591 virtual ~FormatStringHandler(); 592 593 virtual void HandleNullChar(const char *nullCharacter) {} 594 595 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 596 PositionContext p) {} 597 598 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 599 600 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 601 unsigned specifierLen) {} 602 603 // Printf-specific handlers. 604 605 virtual bool HandleInvalidPrintfConversionSpecifier( 606 const analyze_printf::PrintfSpecifier &FS, 607 const char *startSpecifier, 608 unsigned specifierLen) { 609 return true; 610 } 611 612 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 613 const char *startSpecifier, 614 unsigned specifierLen) { 615 return true; 616 } 617 618 // Scanf-specific handlers. 619 620 virtual bool HandleInvalidScanfConversionSpecifier( 621 const analyze_scanf::ScanfSpecifier &FS, 622 const char *startSpecifier, 623 unsigned specifierLen) { 624 return true; 625 } 626 627 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 628 const char *startSpecifier, 629 unsigned specifierLen) { 630 return true; 631 } 632 633 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 634}; 635 636bool ParsePrintfString(FormatStringHandler &H, 637 const char *beg, const char *end, const LangOptions &LO); 638 639bool ParseScanfString(FormatStringHandler &H, 640 const char *beg, const char *end, const LangOptions &LO); 641 642} // end analyze_format_string namespace 643} // end clang namespace 644#endif 645