FormatString.h revision be6126a2a784e1446460b8d15c2b26f880c871fc
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsAllocate, // for '%as', GNU extension to C90 scanf 75 AsMAllocate, // for '%ms', GNU extension to scanf 76 AsWideChar = AsLong // for '%ls', only makes sense for printf 77 }; 78 79 LengthModifier() 80 : Position(0), kind(None) {} 81 LengthModifier(const char *pos, Kind k) 82 : Position(pos), kind(k) {} 83 84 const char *getStart() const { 85 return Position; 86 } 87 88 unsigned getLength() const { 89 switch (kind) { 90 default: 91 return 1; 92 case AsLongLong: 93 case AsChar: 94 return 2; 95 case None: 96 return 0; 97 } 98 } 99 100 Kind getKind() const { return kind; } 101 void setKind(Kind k) { kind = k; } 102 103 const char *toString() const; 104 105private: 106 const char *Position; 107 Kind kind; 108}; 109 110class ConversionSpecifier { 111public: 112 enum Kind { 113 InvalidSpecifier = 0, 114 // C99 conversion specifiers. 115 cArg, 116 dArg, 117 iArg, 118 IntArgBeg = cArg, IntArgEnd = iArg, 119 120 oArg, 121 uArg, 122 xArg, 123 XArg, 124 UIntArgBeg = oArg, UIntArgEnd = XArg, 125 126 fArg, 127 FArg, 128 eArg, 129 EArg, 130 gArg, 131 GArg, 132 aArg, 133 AArg, 134 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 135 136 sArg, 137 pArg, 138 nArg, 139 PercentArg, 140 CArg, 141 SArg, 142 143 // ** Printf-specific ** 144 145 // Objective-C specific specifiers. 146 ObjCObjArg, // '@' 147 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 148 149 // GlibC specific specifiers. 150 PrintErrno, // 'm' 151 152 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 153 154 // ** Scanf-specific ** 155 ScanListArg, // '[' 156 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 157 }; 158 159 ConversionSpecifier(bool isPrintf) 160 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 161 162 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 163 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 164 165 const char *getStart() const { 166 return Position; 167 } 168 169 StringRef getCharacters() const { 170 return StringRef(getStart(), getLength()); 171 } 172 173 bool consumesDataArgument() const { 174 switch (kind) { 175 case PrintErrno: 176 assert(IsPrintf); 177 case PercentArg: 178 return false; 179 default: 180 return true; 181 } 182 } 183 184 Kind getKind() const { return kind; } 185 void setKind(Kind k) { kind = k; } 186 unsigned getLength() const { 187 return EndScanList ? EndScanList - Position : 1; 188 } 189 190 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 191 const char *toString() const; 192 193 bool isPrintfKind() const { return IsPrintf; } 194 195protected: 196 bool IsPrintf; 197 const char *Position; 198 const char *EndScanList; 199 Kind kind; 200}; 201 202class ArgTypeResult { 203public: 204 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 205 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 206private: 207 const Kind K; 208 QualType T; 209 const char *Name; 210 ArgTypeResult(bool) : K(InvalidTy), Name(0) {} 211public: 212 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {} 213 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {} 214 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {} 215 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {} 216 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {} 217 218 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 219 220 bool isValid() const { return K != InvalidTy; } 221 222 const QualType *getSpecificType() const { 223 return K == SpecificTy ? &T : 0; 224 } 225 226 bool matchesType(ASTContext &C, QualType argTy) const; 227 228 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 229 230 QualType getRepresentativeType(ASTContext &C) const; 231 232 std::string getRepresentativeTypeName(ASTContext &C) const; 233}; 234 235class OptionalAmount { 236public: 237 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 238 239 OptionalAmount(HowSpecified howSpecified, 240 unsigned amount, 241 const char *amountStart, 242 unsigned amountLength, 243 bool usesPositionalArg) 244 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 245 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 246 247 OptionalAmount(bool valid = true) 248 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 249 UsesPositionalArg(0), UsesDotPrefix(0) {} 250 251 bool isInvalid() const { 252 return hs == Invalid; 253 } 254 255 HowSpecified getHowSpecified() const { return hs; } 256 void setHowSpecified(HowSpecified h) { hs = h; } 257 258 bool hasDataArgument() const { return hs == Arg; } 259 260 unsigned getArgIndex() const { 261 assert(hasDataArgument()); 262 return amt; 263 } 264 265 unsigned getConstantAmount() const { 266 assert(hs == Constant); 267 return amt; 268 } 269 270 const char *getStart() const { 271 // We include the . character if it is given. 272 return start - UsesDotPrefix; 273 } 274 275 unsigned getConstantLength() const { 276 assert(hs == Constant); 277 return length + UsesDotPrefix; 278 } 279 280 ArgTypeResult getArgType(ASTContext &Ctx) const; 281 282 void toString(raw_ostream &os) const; 283 284 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 285 unsigned getPositionalArgIndex() const { 286 assert(hasDataArgument()); 287 return amt + 1; 288 } 289 290 bool usesDotPrefix() const { return UsesDotPrefix; } 291 void setUsesDotPrefix() { UsesDotPrefix = true; } 292 293private: 294 const char *start; 295 unsigned length; 296 HowSpecified hs; 297 unsigned amt; 298 bool UsesPositionalArg : 1; 299 bool UsesDotPrefix; 300}; 301 302 303class FormatSpecifier { 304protected: 305 LengthModifier LM; 306 OptionalAmount FieldWidth; 307 ConversionSpecifier CS; 308 /// Positional arguments, an IEEE extension: 309 /// IEEE Std 1003.1, 2004 Edition 310 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 311 bool UsesPositionalArg; 312 unsigned argIndex; 313public: 314 FormatSpecifier(bool isPrintf) 315 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 316 317 void setLengthModifier(LengthModifier lm) { 318 LM = lm; 319 } 320 321 void setUsesPositionalArg() { UsesPositionalArg = true; } 322 323 void setArgIndex(unsigned i) { 324 argIndex = i; 325 } 326 327 unsigned getArgIndex() const { 328 return argIndex; 329 } 330 331 unsigned getPositionalArgIndex() const { 332 return argIndex + 1; 333 } 334 335 const LengthModifier &getLengthModifier() const { 336 return LM; 337 } 338 339 const OptionalAmount &getFieldWidth() const { 340 return FieldWidth; 341 } 342 343 void setFieldWidth(const OptionalAmount &Amt) { 344 FieldWidth = Amt; 345 } 346 347 bool usesPositionalArg() const { return UsesPositionalArg; } 348 349 bool hasValidLengthModifier() const; 350}; 351 352} // end analyze_format_string namespace 353 354//===----------------------------------------------------------------------===// 355/// Pieces specific to fprintf format strings. 356 357namespace analyze_printf { 358 359class PrintfConversionSpecifier : 360 public analyze_format_string::ConversionSpecifier { 361public: 362 PrintfConversionSpecifier() 363 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 364 365 PrintfConversionSpecifier(const char *pos, Kind k) 366 : ConversionSpecifier(true, pos, k) {} 367 368 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 369 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 370 bool isDoubleArg() const { return kind >= DoubleArgBeg && 371 kind <= DoubleArgEnd; } 372 unsigned getLength() const { 373 // Conversion specifiers currently only are represented by 374 // single characters, but we be flexible. 375 return 1; 376 } 377 378 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 379 return CS->isPrintfKind(); 380 } 381}; 382 383using analyze_format_string::ArgTypeResult; 384using analyze_format_string::LengthModifier; 385using analyze_format_string::OptionalAmount; 386using analyze_format_string::OptionalFlag; 387 388class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 389 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 390 OptionalFlag IsLeftJustified; // '-' 391 OptionalFlag HasPlusPrefix; // '+' 392 OptionalFlag HasSpacePrefix; // ' ' 393 OptionalFlag HasAlternativeForm; // '#' 394 OptionalFlag HasLeadingZeroes; // '0' 395 OptionalAmount Precision; 396public: 397 PrintfSpecifier() : 398 FormatSpecifier(/* isPrintf = */ true), 399 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 400 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 401 402 static PrintfSpecifier Parse(const char *beg, const char *end); 403 404 // Methods for incrementally constructing the PrintfSpecifier. 405 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 406 CS = cs; 407 } 408 void setHasThousandsGrouping(const char *position) { 409 HasThousandsGrouping = true; 410 HasThousandsGrouping.setPosition(position); 411 } 412 void setIsLeftJustified(const char *position) { 413 IsLeftJustified = true; 414 IsLeftJustified.setPosition(position); 415 } 416 void setHasPlusPrefix(const char *position) { 417 HasPlusPrefix = true; 418 HasPlusPrefix.setPosition(position); 419 } 420 void setHasSpacePrefix(const char *position) { 421 HasSpacePrefix = true; 422 HasSpacePrefix.setPosition(position); 423 } 424 void setHasAlternativeForm(const char *position) { 425 HasAlternativeForm = true; 426 HasAlternativeForm.setPosition(position); 427 } 428 void setHasLeadingZeros(const char *position) { 429 HasLeadingZeroes = true; 430 HasLeadingZeroes.setPosition(position); 431 } 432 void setUsesPositionalArg() { UsesPositionalArg = true; } 433 434 // Methods for querying the format specifier. 435 436 const PrintfConversionSpecifier &getConversionSpecifier() const { 437 return cast<PrintfConversionSpecifier>(CS); 438 } 439 440 void setPrecision(const OptionalAmount &Amt) { 441 Precision = Amt; 442 Precision.setUsesDotPrefix(); 443 } 444 445 const OptionalAmount &getPrecision() const { 446 return Precision; 447 } 448 449 bool consumesDataArgument() const { 450 return getConversionSpecifier().consumesDataArgument(); 451 } 452 453 /// \brief Returns the builtin type that a data argument 454 /// paired with this format specifier should have. This method 455 /// will return null if the format specifier does not have 456 /// a matching data argument or the matching argument matches 457 /// more than one type. 458 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 459 460 const OptionalFlag &hasThousandsGrouping() const { 461 return HasThousandsGrouping; 462 } 463 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 464 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 465 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 466 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 467 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 468 bool usesPositionalArg() const { return UsesPositionalArg; } 469 470 /// Changes the specifier and length according to a QualType, retaining any 471 /// flags or options. Returns true on success, or false when a conversion 472 /// was not successful. 473 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 474 bool IsObjCLiteral); 475 476 void toString(raw_ostream &os) const; 477 478 // Validation methods - to check if any element results in undefined behavior 479 bool hasValidPlusPrefix() const; 480 bool hasValidAlternativeForm() const; 481 bool hasValidLeadingZeros() const; 482 bool hasValidSpacePrefix() const; 483 bool hasValidLeftJustified() const; 484 bool hasValidThousandsGroupingPrefix() const; 485 486 bool hasValidPrecision() const; 487 bool hasValidFieldWidth() const; 488}; 489} // end analyze_printf namespace 490 491//===----------------------------------------------------------------------===// 492/// Pieces specific to fscanf format strings. 493 494namespace analyze_scanf { 495 496class ScanfConversionSpecifier : 497 public analyze_format_string::ConversionSpecifier { 498public: 499 ScanfConversionSpecifier() 500 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 501 502 ScanfConversionSpecifier(const char *pos, Kind k) 503 : ConversionSpecifier(false, pos, k) {} 504 505 void setEndScanList(const char *pos) { EndScanList = pos; } 506 507 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 508 return !CS->isPrintfKind(); 509 } 510}; 511 512using analyze_format_string::ArgTypeResult; 513using analyze_format_string::LengthModifier; 514using analyze_format_string::OptionalAmount; 515using analyze_format_string::OptionalFlag; 516 517class ScanfArgTypeResult : public ArgTypeResult { 518public: 519 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy }; 520private: 521 Kind K; 522 ArgTypeResult A; 523 const char *Name; 524 QualType getRepresentativeType(ASTContext &C) const; 525public: 526 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {} 527 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0) 528 : K(PtrToArgTypeResultTy), A(a), Name(n) { 529 assert(A.isValid()); 530 } 531 532 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); } 533 534 bool isValid() const { return K != InvalidTy; } 535 536 bool matchesType(ASTContext& C, QualType argTy) const; 537 538 std::string getRepresentativeTypeName(ASTContext& C) const; 539}; 540 541class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 542 OptionalFlag SuppressAssignment; // '*' 543public: 544 ScanfSpecifier() : 545 FormatSpecifier(/* isPrintf = */ false), 546 SuppressAssignment("*") {} 547 548 void setSuppressAssignment(const char *position) { 549 SuppressAssignment = true; 550 SuppressAssignment.setPosition(position); 551 } 552 553 const OptionalFlag &getSuppressAssignment() const { 554 return SuppressAssignment; 555 } 556 557 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 558 CS = cs; 559 } 560 561 const ScanfConversionSpecifier &getConversionSpecifier() const { 562 return cast<ScanfConversionSpecifier>(CS); 563 } 564 565 bool consumesDataArgument() const { 566 return CS.consumesDataArgument() && !SuppressAssignment; 567 } 568 569 ScanfArgTypeResult getArgType(ASTContext &Ctx) const; 570 571 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 572 573 void toString(raw_ostream &os) const; 574 575 static ScanfSpecifier Parse(const char *beg, const char *end); 576}; 577 578} // end analyze_scanf namespace 579 580//===----------------------------------------------------------------------===// 581// Parsing and processing of format strings (both fprintf and fscanf). 582 583namespace analyze_format_string { 584 585enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 586 587class FormatStringHandler { 588public: 589 FormatStringHandler() {} 590 virtual ~FormatStringHandler(); 591 592 virtual void HandleNullChar(const char *nullCharacter) {} 593 594 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 595 PositionContext p) {} 596 597 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 598 599 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 600 unsigned specifierLen) {} 601 602 // Printf-specific handlers. 603 604 virtual bool HandleInvalidPrintfConversionSpecifier( 605 const analyze_printf::PrintfSpecifier &FS, 606 const char *startSpecifier, 607 unsigned specifierLen) { 608 return true; 609 } 610 611 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 612 const char *startSpecifier, 613 unsigned specifierLen) { 614 return true; 615 } 616 617 // Scanf-specific handlers. 618 619 virtual bool HandleInvalidScanfConversionSpecifier( 620 const analyze_scanf::ScanfSpecifier &FS, 621 const char *startSpecifier, 622 unsigned specifierLen) { 623 return true; 624 } 625 626 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 627 const char *startSpecifier, 628 unsigned specifierLen) { 629 return true; 630 } 631 632 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 633}; 634 635bool ParsePrintfString(FormatStringHandler &H, 636 const char *beg, const char *end, const LangOptions &LO); 637 638bool ParseScanfString(FormatStringHandler &H, 639 const char *beg, const char *end, const LangOptions &LO); 640 641} // end analyze_format_string namespace 642} // end clang namespace 643#endif 644