FormatString.h revision 6ecb950c65329f8d6ce9ad0514632df35a5ab61f
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsWideChar = AsLong // for '%ls', only makes sense for printf 75 }; 76 77 LengthModifier() 78 : Position(0), kind(None) {} 79 LengthModifier(const char *pos, Kind k) 80 : Position(pos), kind(k) {} 81 82 const char *getStart() const { 83 return Position; 84 } 85 86 unsigned getLength() const { 87 switch (kind) { 88 default: 89 return 1; 90 case AsLongLong: 91 case AsChar: 92 return 2; 93 case None: 94 return 0; 95 } 96 } 97 98 Kind getKind() const { return kind; } 99 void setKind(Kind k) { kind = k; } 100 101 const char *toString() const; 102 103private: 104 const char *Position; 105 Kind kind; 106}; 107 108class ConversionSpecifier { 109public: 110 enum Kind { 111 InvalidSpecifier = 0, 112 // C99 conversion specifiers. 113 cArg, 114 dArg, 115 iArg, 116 IntArgBeg = cArg, IntArgEnd = iArg, 117 118 oArg, 119 uArg, 120 xArg, 121 XArg, 122 UIntArgBeg = oArg, UIntArgEnd = XArg, 123 124 fArg, 125 FArg, 126 eArg, 127 EArg, 128 gArg, 129 GArg, 130 aArg, 131 AArg, 132 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 133 134 sArg, 135 pArg, 136 nArg, 137 PercentArg, 138 CArg, 139 SArg, 140 141 // ** Printf-specific ** 142 143 // Objective-C specific specifiers. 144 ObjCObjArg, // '@' 145 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 146 147 // GlibC specific specifiers. 148 PrintErrno, // 'm' 149 150 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno 151 }; 152 153 ConversionSpecifier(bool isPrintf) 154 : IsPrintf(isPrintf), Position(0), kind(InvalidSpecifier) {} 155 156 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 157 : IsPrintf(isPrintf), Position(pos), kind(k) {} 158 159 const char *getStart() const { 160 return Position; 161 } 162 163 llvm::StringRef getCharacters() const { 164 return llvm::StringRef(getStart(), getLength()); 165 } 166 167 Kind getKind() const { return kind; } 168 void setKind(Kind k) { kind = k; } 169 unsigned getLength() const { 170 // Conversion specifiers currently only are represented by 171 // single characters, but we be flexible. 172 return 1; 173 } 174 const char *toString() const; 175 176 bool isPrintfKind() const { return IsPrintf; } 177 178protected: 179 bool IsPrintf; 180 const char *Position; 181 Kind kind; 182}; 183 184class ArgTypeResult { 185public: 186 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 187 CStrTy, WCStrTy }; 188private: 189 const Kind K; 190 QualType T; 191 ArgTypeResult(bool) : K(InvalidTy) {} 192public: 193 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 194 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 195 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 196 197 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 198 199 bool isValid() const { return K != InvalidTy; } 200 201 const QualType *getSpecificType() const { 202 return K == SpecificTy ? &T : 0; 203 } 204 205 bool matchesType(ASTContext &C, QualType argTy) const; 206 207 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 208 209 QualType getRepresentativeType(ASTContext &C) const; 210}; 211 212class OptionalAmount { 213public: 214 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 215 216 OptionalAmount(HowSpecified howSpecified, 217 unsigned amount, 218 const char *amountStart, 219 unsigned amountLength, 220 bool usesPositionalArg) 221 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 222 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 223 224 OptionalAmount(bool valid = true) 225 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 226 UsesPositionalArg(0), UsesDotPrefix(0) {} 227 228 bool isInvalid() const { 229 return hs == Invalid; 230 } 231 232 HowSpecified getHowSpecified() const { return hs; } 233 void setHowSpecified(HowSpecified h) { hs = h; } 234 235 bool hasDataArgument() const { return hs == Arg; } 236 237 unsigned getArgIndex() const { 238 assert(hasDataArgument()); 239 return amt; 240 } 241 242 unsigned getConstantAmount() const { 243 assert(hs == Constant); 244 return amt; 245 } 246 247 const char *getStart() const { 248 // We include the . character if it is given. 249 return start - UsesDotPrefix; 250 } 251 252 unsigned getConstantLength() const { 253 assert(hs == Constant); 254 return length + UsesDotPrefix; 255 } 256 257 ArgTypeResult getArgType(ASTContext &Ctx) const; 258 259 void toString(llvm::raw_ostream &os) const; 260 261 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 262 unsigned getPositionalArgIndex() const { 263 assert(hasDataArgument()); 264 return amt + 1; 265 } 266 267 bool usesDotPrefix() const { return UsesDotPrefix; } 268 void setUsesDotPrefix() { UsesDotPrefix = true; } 269 270private: 271 const char *start; 272 unsigned length; 273 HowSpecified hs; 274 unsigned amt; 275 bool UsesPositionalArg : 1; 276 bool UsesDotPrefix; 277}; 278 279 280class FormatSpecifier { 281protected: 282 LengthModifier LM; 283 OptionalAmount FieldWidth; 284 /// Positional arguments, an IEEE extension: 285 /// IEEE Std 1003.1, 2004 Edition 286 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 287 bool UsesPositionalArg; 288 unsigned argIndex; 289public: 290 FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {} 291 292 void setLengthModifier(LengthModifier lm) { 293 LM = lm; 294 } 295 296 void setUsesPositionalArg() { UsesPositionalArg = true; } 297 298 void setArgIndex(unsigned i) { 299 argIndex = i; 300 } 301 302 unsigned getArgIndex() const { 303 return argIndex; 304 } 305 306 unsigned getPositionalArgIndex() const { 307 return argIndex + 1; 308 } 309 310 const LengthModifier &getLengthModifier() const { 311 return LM; 312 } 313 314 const OptionalAmount &getFieldWidth() const { 315 return FieldWidth; 316 } 317 318 void setFieldWidth(const OptionalAmount &Amt) { 319 FieldWidth = Amt; 320 } 321 322 bool usesPositionalArg() const { return UsesPositionalArg; } 323}; 324 325} // end analyze_format_string namespace 326 327//===----------------------------------------------------------------------===// 328/// Pieces specific to fprintf format strings. 329 330namespace analyze_printf { 331 332class PrintfConversionSpecifier : 333 public analyze_format_string::ConversionSpecifier { 334public: 335 PrintfConversionSpecifier() 336 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 337 338 PrintfConversionSpecifier(const char *pos, Kind k) 339 : ConversionSpecifier(true, pos, k) {} 340 341 bool consumesDataArgument() const { 342 switch (kind) { 343 case PercentArg: 344 case PrintErrno: 345 return false; 346 default: 347 return true; 348 } 349 } 350 351 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 352 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 353 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 354 bool isDoubleArg() const { return kind >= DoubleArgBeg && 355 kind <= DoubleArgBeg; } 356 unsigned getLength() const { 357 // Conversion specifiers currently only are represented by 358 // single characters, but we be flexible. 359 return 1; 360 } 361 362 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 363 return CS->isPrintfKind(); 364 } 365}; 366 367using analyze_format_string::ArgTypeResult; 368using analyze_format_string::LengthModifier; 369using analyze_format_string::OptionalAmount; 370using analyze_format_string::OptionalFlag; 371 372class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 373 OptionalFlag IsLeftJustified; // '-' 374 OptionalFlag HasPlusPrefix; // '+' 375 OptionalFlag HasSpacePrefix; // ' ' 376 OptionalFlag HasAlternativeForm; // '#' 377 OptionalFlag HasLeadingZeroes; // '0' 378 analyze_format_string::ConversionSpecifier CS; 379 OptionalAmount Precision; 380public: 381 PrintfSpecifier() : 382 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), 383 HasAlternativeForm("#"), HasLeadingZeroes("0"), CS(/* isPrintf = */ true) {} 384 385 static PrintfSpecifier Parse(const char *beg, const char *end); 386 387 // Methods for incrementally constructing the PrintfSpecifier. 388 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 389 CS = cs; 390 } 391 void setIsLeftJustified(const char *position) { 392 IsLeftJustified = true; 393 IsLeftJustified.setPosition(position); 394 } 395 void setHasPlusPrefix(const char *position) { 396 HasPlusPrefix = true; 397 HasPlusPrefix.setPosition(position); 398 } 399 void setHasSpacePrefix(const char *position) { 400 HasSpacePrefix = true; 401 HasSpacePrefix.setPosition(position); 402 } 403 void setHasAlternativeForm(const char *position) { 404 HasAlternativeForm = true; 405 HasAlternativeForm.setPosition(position); 406 } 407 void setHasLeadingZeros(const char *position) { 408 HasLeadingZeroes = true; 409 HasLeadingZeroes.setPosition(position); 410 } 411 void setUsesPositionalArg() { UsesPositionalArg = true; } 412 413 // Methods for querying the format specifier. 414 415 const PrintfConversionSpecifier &getConversionSpecifier() const { 416 return cast<PrintfConversionSpecifier>(CS); 417 } 418 419 void setPrecision(const OptionalAmount &Amt) { 420 Precision = Amt; 421 Precision.setUsesDotPrefix(); 422 } 423 424 const OptionalAmount &getPrecision() const { 425 return Precision; 426 } 427 428 bool consumesDataArgument() const { 429 return getConversionSpecifier().consumesDataArgument(); 430 } 431 432 /// \brief Returns the builtin type that a data argument 433 /// paired with this format specifier should have. This method 434 /// will return null if the format specifier does not have 435 /// a matching data argument or the matching argument matches 436 /// more than one type. 437 ArgTypeResult getArgType(ASTContext &Ctx) const; 438 439 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 440 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 441 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 442 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 443 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 444 bool usesPositionalArg() const { return UsesPositionalArg; } 445 446 /// Changes the specifier and length according to a QualType, retaining any 447 /// flags or options. Returns true on success, or false when a conversion 448 /// was not successful. 449 bool fixType(QualType QT); 450 451 void toString(llvm::raw_ostream &os) const; 452 453 // Validation methods - to check if any element results in undefined behavior 454 bool hasValidPlusPrefix() const; 455 bool hasValidAlternativeForm() const; 456 bool hasValidLeadingZeros() const; 457 bool hasValidSpacePrefix() const; 458 bool hasValidLeftJustified() const; 459 460 bool hasValidLengthModifier() const; 461 bool hasValidPrecision() const; 462 bool hasValidFieldWidth() const; 463}; 464} // end analyze_printf namespace 465 466//===----------------------------------------------------------------------===// 467/// Pieces specific to fscanf format strings. 468 469namespace analyze_scanf { 470 471class ScanfConversionSpecifier { 472public: 473 enum Kind { 474 InvalidSpecifier = 0, 475 // C99 conversion specifiers. 476 dArg, // 'd' 477 iArg, // 'i', 478 oArg, // 'o', 479 uArg, // 'u', 480 xArg, // 'x', 481 XArg, // 'X', 482 fArg, // 'f', 483 FArg, // 'F', 484 eArg, // 'e', 485 EArg, // 'E', 486 gArg, // 'g', 487 GArg, // 'G', 488 aArg, // 'a', 489 AArg, // 'A', 490 sArg, // 's', // match sequence of non-write-space characters 491 pArg, // 'p' 492 cArg, // 'c', differs from printf, writes array of characters 493 nArg, // 'n', differs from printf, writes back args consumed 494 PercentArg, // '%' 495 ScanListArg, // '[' followed by scan list 496 // IEEE Std 1003.1 extensions. 497 CArg, // 'C', same as writing 'lc' 498 SArg, // 'S', same as writing 'ls' 499 // Specifier ranges. 500 IntArgBeg = dArg, 501 IntArgEnd = iArg, 502 UIntArgBeg = oArg, 503 UIntArgEnd = XArg, 504 DoubleArgBeg = fArg, 505 DoubleArgEnd = AArg 506 }; 507 508 ScanfConversionSpecifier() 509 : Position(0), EndScanList(0), kind(InvalidSpecifier) {} 510 511 ScanfConversionSpecifier(const char *pos, Kind k) 512 : Position(pos), EndScanList(0), kind(k) {} 513 514 const char *getStart() const { 515 return Position; 516 } 517 518 void setEndScanList(const char *pos) { EndScanList = pos; } 519 520 llvm::StringRef getCharacters() const { 521 return llvm::StringRef(getStart(), getLength()); 522 } 523 524 bool consumesDataArgument() const { 525 return kind != PercentArg; 526 } 527 528 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 529 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 530 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 531 Kind getKind() const { return kind; } 532 void setKind(Kind k) { kind = k; } 533 534 unsigned getLength() const { 535 return EndScanList ? EndScanList - Position : 1; 536 } 537 538 const char *toString() const; 539 540private: 541 const char *Position; 542 const char *EndScanList; 543 Kind kind; 544}; 545 546using analyze_format_string::LengthModifier; 547using analyze_format_string::OptionalAmount; 548using analyze_format_string::OptionalFlag; 549 550class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 551 OptionalFlag SuppressAssignment; // '*' 552 ScanfConversionSpecifier CS; 553public: 554 ScanfSpecifier() : SuppressAssignment("*") {} 555 556 void setSuppressAssignment(const char *position) { 557 SuppressAssignment = true; 558 SuppressAssignment.setPosition(position); 559 } 560 561 const OptionalFlag &getSuppressAssignment() const { 562 return SuppressAssignment; 563 } 564 565 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 566 CS = cs; 567 } 568 569 const ScanfConversionSpecifier &getConversionSpecifier() const { 570 return CS; 571 } 572 573 bool consumesDataArgument() const { 574 return CS.consumesDataArgument() && !SuppressAssignment; 575 } 576 577 static ScanfSpecifier Parse(const char *beg, const char *end); 578 579}; 580 581} // end analyze_scanf namespace 582 583//===----------------------------------------------------------------------===// 584// Parsing and processing of format strings (both fprintf and fscanf). 585 586namespace analyze_format_string { 587 588enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 589 590class FormatStringHandler { 591public: 592 FormatStringHandler() {} 593 virtual ~FormatStringHandler(); 594 595 virtual void HandleNullChar(const char *nullCharacter) {} 596 597 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 598 PositionContext p) {} 599 600 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 601 602 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 603 unsigned specifierLen) {} 604 605 // Printf-specific handlers. 606 607 virtual bool HandleInvalidPrintfConversionSpecifier( 608 const analyze_printf::PrintfSpecifier &FS, 609 const char *startSpecifier, 610 unsigned specifierLen) { 611 return true; 612 } 613 614 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 615 const char *startSpecifier, 616 unsigned specifierLen) { 617 return true; 618 } 619 620 // Scanf-specific handlers. 621 622 virtual bool HandleInvalidScanfConversionSpecifier( 623 const analyze_scanf::ScanfSpecifier &FS, 624 const char *startSpecifier, 625 unsigned specifierLen) { 626 return true; 627 } 628 629 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 630 const char *startSpecifier, 631 unsigned specifierLen) { 632 return true; 633 } 634 635 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 636}; 637 638bool ParsePrintfString(FormatStringHandler &H, 639 const char *beg, const char *end); 640 641bool ParseScanfString(FormatStringHandler &H, 642 const char *beg, const char *end); 643 644} // end analyze_format_string namespace 645} // end clang namespace 646#endif 647