FormatString.h revision 37969b7e14d6a4dfd934ef6d3738cc90b832ec1d
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsAllocate, // for '%as', GNU extension to C90 scanf 75 AsMAllocate, // for '%ms', GNU extension to scanf 76 AsWideChar = AsLong // for '%ls', only makes sense for printf 77 }; 78 79 LengthModifier() 80 : Position(0), kind(None) {} 81 LengthModifier(const char *pos, Kind k) 82 : Position(pos), kind(k) {} 83 84 const char *getStart() const { 85 return Position; 86 } 87 88 unsigned getLength() const { 89 switch (kind) { 90 default: 91 return 1; 92 case AsLongLong: 93 case AsChar: 94 return 2; 95 case None: 96 return 0; 97 } 98 } 99 100 Kind getKind() const { return kind; } 101 void setKind(Kind k) { kind = k; } 102 103 const char *toString() const; 104 105private: 106 const char *Position; 107 Kind kind; 108}; 109 110class ConversionSpecifier { 111public: 112 enum Kind { 113 InvalidSpecifier = 0, 114 // C99 conversion specifiers. 115 cArg, 116 dArg, 117 iArg, 118 IntArgBeg = cArg, IntArgEnd = iArg, 119 120 oArg, 121 uArg, 122 xArg, 123 XArg, 124 UIntArgBeg = oArg, UIntArgEnd = XArg, 125 126 fArg, 127 FArg, 128 eArg, 129 EArg, 130 gArg, 131 GArg, 132 aArg, 133 AArg, 134 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 135 136 sArg, 137 pArg, 138 nArg, 139 PercentArg, 140 CArg, 141 SArg, 142 143 // ** Printf-specific ** 144 145 // Objective-C specific specifiers. 146 ObjCObjArg, // '@' 147 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 148 149 // GlibC specific specifiers. 150 PrintErrno, // 'm' 151 152 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 153 154 // ** Scanf-specific ** 155 ScanListArg, // '[' 156 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 157 }; 158 159 ConversionSpecifier(bool isPrintf) 160 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 161 162 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 163 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 164 165 const char *getStart() const { 166 return Position; 167 } 168 169 StringRef getCharacters() const { 170 return StringRef(getStart(), getLength()); 171 } 172 173 bool consumesDataArgument() const { 174 switch (kind) { 175 case PrintErrno: 176 assert(IsPrintf); 177 case PercentArg: 178 return false; 179 default: 180 return true; 181 } 182 } 183 184 Kind getKind() const { return kind; } 185 void setKind(Kind k) { kind = k; } 186 unsigned getLength() const { 187 return EndScanList ? EndScanList - Position : 1; 188 } 189 190 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 191 const char *toString() const; 192 193 bool isPrintfKind() const { return IsPrintf; } 194 195protected: 196 bool IsPrintf; 197 const char *Position; 198 const char *EndScanList; 199 Kind kind; 200}; 201 202class ArgTypeResult { 203public: 204 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 205 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 206private: 207 const Kind K; 208 QualType T; 209 const char *Name; 210 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 211public: 212 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 213 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 214 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 215 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 216 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 217 218 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 219 220 bool isValid() const { return K != InvalidTy; } 221 222 const QualType *getSpecificType() const { 223 return K == SpecificTy ? &T : 0; 224 } 225 226 bool matchesType(ASTContext &C, QualType argTy) const; 227 228 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 229 230 QualType getRepresentativeType(ASTContext &C) const; 231 232 std::string getRepresentativeTypeName(ASTContext &C) const; 233}; 234 235class OptionalAmount { 236public: 237 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 238 239 OptionalAmount(HowSpecified howSpecified, 240 unsigned amount, 241 const char *amountStart, 242 unsigned amountLength, 243 bool usesPositionalArg) 244 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 245 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 246 247 OptionalAmount(bool valid = true) 248 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 249 UsesPositionalArg(0), UsesDotPrefix(0) {} 250 251 bool isInvalid() const { 252 return hs == Invalid; 253 } 254 255 HowSpecified getHowSpecified() const { return hs; } 256 void setHowSpecified(HowSpecified h) { hs = h; } 257 258 bool hasDataArgument() const { return hs == Arg; } 259 260 unsigned getArgIndex() const { 261 assert(hasDataArgument()); 262 return amt; 263 } 264 265 unsigned getConstantAmount() const { 266 assert(hs == Constant); 267 return amt; 268 } 269 270 const char *getStart() const { 271 // We include the . character if it is given. 272 return start - UsesDotPrefix; 273 } 274 275 unsigned getConstantLength() const { 276 assert(hs == Constant); 277 return length + UsesDotPrefix; 278 } 279 280 ArgTypeResult getArgType(ASTContext &Ctx) const; 281 282 void toString(raw_ostream &os) const; 283 284 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 285 unsigned getPositionalArgIndex() const { 286 assert(hasDataArgument()); 287 return amt + 1; 288 } 289 290 bool usesDotPrefix() const { return UsesDotPrefix; } 291 void setUsesDotPrefix() { UsesDotPrefix = true; } 292 293private: 294 const char *start; 295 unsigned length; 296 HowSpecified hs; 297 unsigned amt; 298 bool UsesPositionalArg : 1; 299 bool UsesDotPrefix; 300}; 301 302 303class FormatSpecifier { 304protected: 305 LengthModifier LM; 306 OptionalAmount FieldWidth; 307 ConversionSpecifier CS; 308 /// Positional arguments, an IEEE extension: 309 /// IEEE Std 1003.1, 2004 Edition 310 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 311 bool UsesPositionalArg; 312 unsigned argIndex; 313public: 314 FormatSpecifier(bool isPrintf) 315 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 316 317 void setLengthModifier(LengthModifier lm) { 318 LM = lm; 319 } 320 321 void setUsesPositionalArg() { UsesPositionalArg = true; } 322 323 void setArgIndex(unsigned i) { 324 argIndex = i; 325 } 326 327 unsigned getArgIndex() const { 328 return argIndex; 329 } 330 331 unsigned getPositionalArgIndex() const { 332 return argIndex + 1; 333 } 334 335 const LengthModifier &getLengthModifier() const { 336 return LM; 337 } 338 339 const OptionalAmount &getFieldWidth() const { 340 return FieldWidth; 341 } 342 343 void setFieldWidth(const OptionalAmount &Amt) { 344 FieldWidth = Amt; 345 } 346 347 bool usesPositionalArg() const { return UsesPositionalArg; } 348 349 bool hasValidLengthModifier() const; 350}; 351 352} // end analyze_format_string namespace 353 354//===----------------------------------------------------------------------===// 355/// Pieces specific to fprintf format strings. 356 357namespace analyze_printf { 358 359class PrintfConversionSpecifier : 360 public analyze_format_string::ConversionSpecifier { 361public: 362 PrintfConversionSpecifier() 363 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 364 365 PrintfConversionSpecifier(const char *pos, Kind k) 366 : ConversionSpecifier(true, pos, k) {} 367 368 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 369 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 370 bool isDoubleArg() const { return kind >= DoubleArgBeg && 371 kind <= DoubleArgBeg; } 372 unsigned getLength() const { 373 // Conversion specifiers currently only are represented by 374 // single characters, but we be flexible. 375 return 1; 376 } 377 378 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 379 return CS->isPrintfKind(); 380 } 381}; 382 383using analyze_format_string::ArgTypeResult; 384using analyze_format_string::LengthModifier; 385using analyze_format_string::OptionalAmount; 386using analyze_format_string::OptionalFlag; 387 388class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 389 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 390 OptionalFlag IsLeftJustified; // '-' 391 OptionalFlag HasPlusPrefix; // '+' 392 OptionalFlag HasSpacePrefix; // ' ' 393 OptionalFlag HasAlternativeForm; // '#' 394 OptionalFlag HasLeadingZeroes; // '0' 395 OptionalAmount Precision; 396public: 397 PrintfSpecifier() : 398 FormatSpecifier(/* isPrintf = */ true), 399 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 400 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 401 402 static PrintfSpecifier Parse(const char *beg, const char *end); 403 404 // Methods for incrementally constructing the PrintfSpecifier. 405 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 406 CS = cs; 407 } 408 void setHasThousandsGrouping(const char *position) { 409 HasThousandsGrouping = true; 410 HasThousandsGrouping.setPosition(position); 411 } 412 void setIsLeftJustified(const char *position) { 413 IsLeftJustified = true; 414 IsLeftJustified.setPosition(position); 415 } 416 void setHasPlusPrefix(const char *position) { 417 HasPlusPrefix = true; 418 HasPlusPrefix.setPosition(position); 419 } 420 void setHasSpacePrefix(const char *position) { 421 HasSpacePrefix = true; 422 HasSpacePrefix.setPosition(position); 423 } 424 void setHasAlternativeForm(const char *position) { 425 HasAlternativeForm = true; 426 HasAlternativeForm.setPosition(position); 427 } 428 void setHasLeadingZeros(const char *position) { 429 HasLeadingZeroes = true; 430 HasLeadingZeroes.setPosition(position); 431 } 432 void setUsesPositionalArg() { UsesPositionalArg = true; } 433 434 // Methods for querying the format specifier. 435 436 const PrintfConversionSpecifier &getConversionSpecifier() const { 437 return cast<PrintfConversionSpecifier>(CS); 438 } 439 440 void setPrecision(const OptionalAmount &Amt) { 441 Precision = Amt; 442 Precision.setUsesDotPrefix(); 443 } 444 445 const OptionalAmount &getPrecision() const { 446 return Precision; 447 } 448 449 bool consumesDataArgument() const { 450 return getConversionSpecifier().consumesDataArgument(); 451 } 452 453 /// \brief Returns the builtin type that a data argument 454 /// paired with this format specifier should have. This method 455 /// will return null if the format specifier does not have 456 /// a matching data argument or the matching argument matches 457 /// more than one type. 458 ArgTypeResult getArgType(ASTContext &Ctx) const; 459 460 const OptionalFlag &hasThousandsGrouping() const { 461 return HasThousandsGrouping; 462 } 463 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 464 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 465 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 466 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 467 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 468 bool usesPositionalArg() const { return UsesPositionalArg; } 469 470 /// Changes the specifier and length according to a QualType, retaining any 471 /// flags or options. Returns true on success, or false when a conversion 472 /// was not successful. 473 bool fixType(QualType QT, const LangOptions &LangOpt); 474 475 void toString(raw_ostream &os) const; 476 477 // Validation methods - to check if any element results in undefined behavior 478 bool hasValidPlusPrefix() const; 479 bool hasValidAlternativeForm() const; 480 bool hasValidLeadingZeros() const; 481 bool hasValidSpacePrefix() const; 482 bool hasValidLeftJustified() const; 483 bool hasValidThousandsGroupingPrefix() const; 484 485 bool hasValidPrecision() const; 486 bool hasValidFieldWidth() const; 487}; 488} // end analyze_printf namespace 489 490//===----------------------------------------------------------------------===// 491/// Pieces specific to fscanf format strings. 492 493namespace analyze_scanf { 494 495class ScanfConversionSpecifier : 496 public analyze_format_string::ConversionSpecifier { 497public: 498 ScanfConversionSpecifier() 499 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 500 501 ScanfConversionSpecifier(const char *pos, Kind k) 502 : ConversionSpecifier(false, pos, k) {} 503 504 void setEndScanList(const char *pos) { EndScanList = pos; } 505 506 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 507 return !CS->isPrintfKind(); 508 } 509}; 510 511using analyze_format_string::ArgTypeResult; 512using analyze_format_string::LengthModifier; 513using analyze_format_string::OptionalAmount; 514using analyze_format_string::OptionalFlag; 515 516class ScanfArgTypeResult : public ArgTypeResult { 517public: 518 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 519private: 520 Kind K; 521 ArgTypeResult A; 522 const char *Name; 523 QualType getRepresentativeType(ASTContext &C) const; 524public: 525 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 526 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 527 : K(PtrToArgTypeResultTy), A(a), Name(n) { 528 assert(A.isValid()); 529 } 530 531 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 532 533 bool isValid() const { return K != InvalidTy; } 534 535 bool matchesType(ASTContext& C, QualType argTy) const; 536 537 std::string getRepresentativeTypeName(ASTContext& C) const; 538}; 539 540class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 541 OptionalFlag SuppressAssignment; // '*' 542public: 543 ScanfSpecifier() : 544 FormatSpecifier(/* isPrintf = */ false), 545 SuppressAssignment("*") {} 546 547 void setSuppressAssignment(const char *position) { 548 SuppressAssignment = true; 549 SuppressAssignment.setPosition(position); 550 } 551 552 const OptionalFlag &getSuppressAssignment() const { 553 return SuppressAssignment; 554 } 555 556 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 557 CS = cs; 558 } 559 560 const ScanfConversionSpecifier &getConversionSpecifier() const { 561 return cast<ScanfConversionSpecifier>(CS); 562 } 563 564 bool consumesDataArgument() const { 565 return CS.consumesDataArgument() && !SuppressAssignment; 566 } 567 568 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 569 570 bool fixType(QualType QT, const LangOptions &LangOpt); 571 572 void toString(raw_ostream &os) const; 573 574 static ScanfSpecifier Parse(const char *beg, const char *end); 575}; 576 577} // end analyze_scanf namespace 578 579//===----------------------------------------------------------------------===// 580// Parsing and processing of format strings (both fprintf and fscanf). 581 582namespace analyze_format_string { 583 584enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 585 586class FormatStringHandler { 587public: 588 FormatStringHandler() {} 589 virtual ~FormatStringHandler(); 590 591 virtual void HandleNullChar(const char *nullCharacter) {} 592 593 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 594 PositionContext p) {} 595 596 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 597 598 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 599 unsigned specifierLen) {} 600 601 // Printf-specific handlers. 602 603 virtual bool HandleInvalidPrintfConversionSpecifier( 604 const analyze_printf::PrintfSpecifier &FS, 605 const char *startSpecifier, 606 unsigned specifierLen) { 607 return true; 608 } 609 610 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 611 const char *startSpecifier, 612 unsigned specifierLen) { 613 return true; 614 } 615 616 // Scanf-specific handlers. 617 618 virtual bool HandleInvalidScanfConversionSpecifier( 619 const analyze_scanf::ScanfSpecifier &FS, 620 const char *startSpecifier, 621 unsigned specifierLen) { 622 return true; 623 } 624 625 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 626 const char *startSpecifier, 627 unsigned specifierLen) { 628 return true; 629 } 630 631 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 632}; 633 634bool ParsePrintfString(FormatStringHandler &H, 635 const char *beg, const char *end, const LangOptions &LO); 636 637bool ParseScanfString(FormatStringHandler &H, 638 const char *beg, const char *end, const LangOptions &LO); 639 640} // end analyze_format_string namespace 641} // end clang namespace 642#endif 643