FormatString.h revision 47ad6ce1afad6b70927347dfa15e0f1dc76bf5bb
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26class TargetInfo; 27 28//===----------------------------------------------------------------------===// 29/// Common components of both fprintf and fscanf format strings. 30namespace analyze_format_string { 31 32/// Class representing optional flags with location and representation 33/// information. 34class OptionalFlag { 35public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 LLVM_EXPLICIT operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57private: 58 const char *representation; 59 const char *position; 60 bool flag; 61}; 62 63/// Represents the length modifier in a format string in scanf/printf. 64class LengthModifier { 65public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsInt32, // 'I32' (MSVCRT, like __int32) 77 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) 78 AsInt64, // 'I64' (MSVCRT, like __int64) 79 AsLongDouble, // 'L' 80 AsAllocate, // for '%as', GNU extension to C90 scanf 81 AsMAllocate, // for '%ms', GNU extension to scanf 82 AsWideChar = AsLong // for '%ls', only makes sense for printf 83 }; 84 85 LengthModifier() 86 : Position(0), kind(None) {} 87 LengthModifier(const char *pos, Kind k) 88 : Position(pos), kind(k) {} 89 90 const char *getStart() const { 91 return Position; 92 } 93 94 unsigned getLength() const { 95 switch (kind) { 96 default: 97 return 1; 98 case AsLongLong: 99 case AsChar: 100 return 2; 101 case AsInt32: 102 case AsInt64: 103 return 3; 104 case None: 105 return 0; 106 } 107 } 108 109 Kind getKind() const { return kind; } 110 void setKind(Kind k) { kind = k; } 111 112 const char *toString() const; 113 114private: 115 const char *Position; 116 Kind kind; 117}; 118 119class ConversionSpecifier { 120public: 121 enum Kind { 122 InvalidSpecifier = 0, 123 // C99 conversion specifiers. 124 cArg, 125 dArg, 126 DArg, // Apple extension 127 iArg, 128 IntArgBeg = dArg, IntArgEnd = iArg, 129 130 oArg, 131 OArg, // Apple extension 132 uArg, 133 UArg, // Apple extension 134 xArg, 135 XArg, 136 UIntArgBeg = oArg, UIntArgEnd = XArg, 137 138 fArg, 139 FArg, 140 eArg, 141 EArg, 142 gArg, 143 GArg, 144 aArg, 145 AArg, 146 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 147 148 sArg, 149 pArg, 150 nArg, 151 PercentArg, 152 CArg, 153 SArg, 154 155 // ** Printf-specific ** 156 157 // Objective-C specific specifiers. 158 ObjCObjArg, // '@' 159 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 160 161 // GlibC specific specifiers. 162 PrintErrno, // 'm' 163 164 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 165 166 // ** Scanf-specific ** 167 ScanListArg, // '[' 168 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 169 }; 170 171 ConversionSpecifier(bool isPrintf = true) 172 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 173 174 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 175 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 176 177 const char *getStart() const { 178 return Position; 179 } 180 181 StringRef getCharacters() const { 182 return StringRef(getStart(), getLength()); 183 } 184 185 bool consumesDataArgument() const { 186 switch (kind) { 187 case PrintErrno: 188 assert(IsPrintf); 189 return false; 190 case PercentArg: 191 return false; 192 default: 193 return true; 194 } 195 } 196 197 Kind getKind() const { return kind; } 198 void setKind(Kind k) { kind = k; } 199 unsigned getLength() const { 200 return EndScanList ? EndScanList - Position : 1; 201 } 202 203 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 204 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 205 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 206 const char *toString() const; 207 208 bool isPrintfKind() const { return IsPrintf; } 209 210 Optional<ConversionSpecifier> getStandardSpecifier() const; 211 212protected: 213 bool IsPrintf; 214 const char *Position; 215 const char *EndScanList; 216 Kind kind; 217}; 218 219class ArgType { 220public: 221 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 222 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 223private: 224 const Kind K; 225 QualType T; 226 const char *Name; 227 bool Ptr; 228public: 229 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {} 230 ArgType(QualType t, const char *n = 0) 231 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 232 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {} 233 234 static ArgType Invalid() { return ArgType(InvalidTy); } 235 bool isValid() const { return K != InvalidTy; } 236 237 /// Create an ArgType which corresponds to the type pointer to A. 238 static ArgType PtrTo(const ArgType& A) { 239 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 240 ArgType Res = A; 241 Res.Ptr = true; 242 return Res; 243 } 244 245 bool matchesType(ASTContext &C, QualType argTy) const; 246 247 QualType getRepresentativeType(ASTContext &C) const; 248 249 std::string getRepresentativeTypeName(ASTContext &C) const; 250}; 251 252class OptionalAmount { 253public: 254 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 255 256 OptionalAmount(HowSpecified howSpecified, 257 unsigned amount, 258 const char *amountStart, 259 unsigned amountLength, 260 bool usesPositionalArg) 261 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 262 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 263 264 OptionalAmount(bool valid = true) 265 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 266 UsesPositionalArg(0), UsesDotPrefix(0) {} 267 268 bool isInvalid() const { 269 return hs == Invalid; 270 } 271 272 HowSpecified getHowSpecified() const { return hs; } 273 void setHowSpecified(HowSpecified h) { hs = h; } 274 275 bool hasDataArgument() const { return hs == Arg; } 276 277 unsigned getArgIndex() const { 278 assert(hasDataArgument()); 279 return amt; 280 } 281 282 unsigned getConstantAmount() const { 283 assert(hs == Constant); 284 return amt; 285 } 286 287 const char *getStart() const { 288 // We include the . character if it is given. 289 return start - UsesDotPrefix; 290 } 291 292 unsigned getConstantLength() const { 293 assert(hs == Constant); 294 return length + UsesDotPrefix; 295 } 296 297 ArgType getArgType(ASTContext &Ctx) const; 298 299 void toString(raw_ostream &os) const; 300 301 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 302 unsigned getPositionalArgIndex() const { 303 assert(hasDataArgument()); 304 return amt + 1; 305 } 306 307 bool usesDotPrefix() const { return UsesDotPrefix; } 308 void setUsesDotPrefix() { UsesDotPrefix = true; } 309 310private: 311 const char *start; 312 unsigned length; 313 HowSpecified hs; 314 unsigned amt; 315 bool UsesPositionalArg : 1; 316 bool UsesDotPrefix; 317}; 318 319 320class FormatSpecifier { 321protected: 322 LengthModifier LM; 323 OptionalAmount FieldWidth; 324 ConversionSpecifier CS; 325 /// Positional arguments, an IEEE extension: 326 /// IEEE Std 1003.1, 2004 Edition 327 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 328 bool UsesPositionalArg; 329 unsigned argIndex; 330public: 331 FormatSpecifier(bool isPrintf) 332 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 333 334 void setLengthModifier(LengthModifier lm) { 335 LM = lm; 336 } 337 338 void setUsesPositionalArg() { UsesPositionalArg = true; } 339 340 void setArgIndex(unsigned i) { 341 argIndex = i; 342 } 343 344 unsigned getArgIndex() const { 345 return argIndex; 346 } 347 348 unsigned getPositionalArgIndex() const { 349 return argIndex + 1; 350 } 351 352 const LengthModifier &getLengthModifier() const { 353 return LM; 354 } 355 356 const OptionalAmount &getFieldWidth() const { 357 return FieldWidth; 358 } 359 360 void setFieldWidth(const OptionalAmount &Amt) { 361 FieldWidth = Amt; 362 } 363 364 bool usesPositionalArg() const { return UsesPositionalArg; } 365 366 bool hasValidLengthModifier(const TargetInfo &Target) const; 367 368 bool hasStandardLengthModifier() const; 369 370 Optional<LengthModifier> getCorrectedLengthModifier() const; 371 372 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 373 374 bool hasStandardLengthConversionCombination() const; 375 376 /// For a TypedefType QT, if it is a named integer type such as size_t, 377 /// assign the appropriate value to LM and return true. 378 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 379}; 380 381} // end analyze_format_string namespace 382 383//===----------------------------------------------------------------------===// 384/// Pieces specific to fprintf format strings. 385 386namespace analyze_printf { 387 388class PrintfConversionSpecifier : 389 public analyze_format_string::ConversionSpecifier { 390public: 391 PrintfConversionSpecifier() 392 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 393 394 PrintfConversionSpecifier(const char *pos, Kind k) 395 : ConversionSpecifier(true, pos, k) {} 396 397 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 398 bool isDoubleArg() const { return kind >= DoubleArgBeg && 399 kind <= DoubleArgEnd; } 400 unsigned getLength() const { 401 // Conversion specifiers currently only are represented by 402 // single characters, but we be flexible. 403 return 1; 404 } 405 406 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 407 return CS->isPrintfKind(); 408 } 409}; 410 411using analyze_format_string::ArgType; 412using analyze_format_string::LengthModifier; 413using analyze_format_string::OptionalAmount; 414using analyze_format_string::OptionalFlag; 415 416class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 417 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 418 OptionalFlag IsLeftJustified; // '-' 419 OptionalFlag HasPlusPrefix; // '+' 420 OptionalFlag HasSpacePrefix; // ' ' 421 OptionalFlag HasAlternativeForm; // '#' 422 OptionalFlag HasLeadingZeroes; // '0' 423 OptionalAmount Precision; 424public: 425 PrintfSpecifier() : 426 FormatSpecifier(/* isPrintf = */ true), 427 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 428 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 429 430 static PrintfSpecifier Parse(const char *beg, const char *end); 431 432 // Methods for incrementally constructing the PrintfSpecifier. 433 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 434 CS = cs; 435 } 436 void setHasThousandsGrouping(const char *position) { 437 HasThousandsGrouping = true; 438 HasThousandsGrouping.setPosition(position); 439 } 440 void setIsLeftJustified(const char *position) { 441 IsLeftJustified = true; 442 IsLeftJustified.setPosition(position); 443 } 444 void setHasPlusPrefix(const char *position) { 445 HasPlusPrefix = true; 446 HasPlusPrefix.setPosition(position); 447 } 448 void setHasSpacePrefix(const char *position) { 449 HasSpacePrefix = true; 450 HasSpacePrefix.setPosition(position); 451 } 452 void setHasAlternativeForm(const char *position) { 453 HasAlternativeForm = true; 454 HasAlternativeForm.setPosition(position); 455 } 456 void setHasLeadingZeros(const char *position) { 457 HasLeadingZeroes = true; 458 HasLeadingZeroes.setPosition(position); 459 } 460 void setUsesPositionalArg() { UsesPositionalArg = true; } 461 462 // Methods for querying the format specifier. 463 464 const PrintfConversionSpecifier &getConversionSpecifier() const { 465 return cast<PrintfConversionSpecifier>(CS); 466 } 467 468 void setPrecision(const OptionalAmount &Amt) { 469 Precision = Amt; 470 Precision.setUsesDotPrefix(); 471 } 472 473 const OptionalAmount &getPrecision() const { 474 return Precision; 475 } 476 477 bool consumesDataArgument() const { 478 return getConversionSpecifier().consumesDataArgument(); 479 } 480 481 /// \brief Returns the builtin type that a data argument 482 /// paired with this format specifier should have. This method 483 /// will return null if the format specifier does not have 484 /// a matching data argument or the matching argument matches 485 /// more than one type. 486 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 487 488 const OptionalFlag &hasThousandsGrouping() const { 489 return HasThousandsGrouping; 490 } 491 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 492 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 493 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 494 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 495 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 496 bool usesPositionalArg() const { return UsesPositionalArg; } 497 498 /// Changes the specifier and length according to a QualType, retaining any 499 /// flags or options. Returns true on success, or false when a conversion 500 /// was not successful. 501 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 502 bool IsObjCLiteral); 503 504 void toString(raw_ostream &os) const; 505 506 // Validation methods - to check if any element results in undefined behavior 507 bool hasValidPlusPrefix() const; 508 bool hasValidAlternativeForm() const; 509 bool hasValidLeadingZeros() const; 510 bool hasValidSpacePrefix() const; 511 bool hasValidLeftJustified() const; 512 bool hasValidThousandsGroupingPrefix() const; 513 514 bool hasValidPrecision() const; 515 bool hasValidFieldWidth() const; 516}; 517} // end analyze_printf namespace 518 519//===----------------------------------------------------------------------===// 520/// Pieces specific to fscanf format strings. 521 522namespace analyze_scanf { 523 524class ScanfConversionSpecifier : 525 public analyze_format_string::ConversionSpecifier { 526public: 527 ScanfConversionSpecifier() 528 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 529 530 ScanfConversionSpecifier(const char *pos, Kind k) 531 : ConversionSpecifier(false, pos, k) {} 532 533 void setEndScanList(const char *pos) { EndScanList = pos; } 534 535 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 536 return !CS->isPrintfKind(); 537 } 538}; 539 540using analyze_format_string::ArgType; 541using analyze_format_string::LengthModifier; 542using analyze_format_string::OptionalAmount; 543using analyze_format_string::OptionalFlag; 544 545class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 546 OptionalFlag SuppressAssignment; // '*' 547public: 548 ScanfSpecifier() : 549 FormatSpecifier(/* isPrintf = */ false), 550 SuppressAssignment("*") {} 551 552 void setSuppressAssignment(const char *position) { 553 SuppressAssignment = true; 554 SuppressAssignment.setPosition(position); 555 } 556 557 const OptionalFlag &getSuppressAssignment() const { 558 return SuppressAssignment; 559 } 560 561 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 562 CS = cs; 563 } 564 565 const ScanfConversionSpecifier &getConversionSpecifier() const { 566 return cast<ScanfConversionSpecifier>(CS); 567 } 568 569 bool consumesDataArgument() const { 570 return CS.consumesDataArgument() && !SuppressAssignment; 571 } 572 573 ArgType getArgType(ASTContext &Ctx) const; 574 575 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 576 577 void toString(raw_ostream &os) const; 578 579 static ScanfSpecifier Parse(const char *beg, const char *end); 580}; 581 582} // end analyze_scanf namespace 583 584//===----------------------------------------------------------------------===// 585// Parsing and processing of format strings (both fprintf and fscanf). 586 587namespace analyze_format_string { 588 589enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 590 591class FormatStringHandler { 592public: 593 FormatStringHandler() {} 594 virtual ~FormatStringHandler(); 595 596 virtual void HandleNullChar(const char *nullCharacter) {} 597 598 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 599 600 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 601 PositionContext p) {} 602 603 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 604 605 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 606 unsigned specifierLen) {} 607 608 // Printf-specific handlers. 609 610 virtual bool HandleInvalidPrintfConversionSpecifier( 611 const analyze_printf::PrintfSpecifier &FS, 612 const char *startSpecifier, 613 unsigned specifierLen) { 614 return true; 615 } 616 617 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 618 const char *startSpecifier, 619 unsigned specifierLen) { 620 return true; 621 } 622 623 // Scanf-specific handlers. 624 625 virtual bool HandleInvalidScanfConversionSpecifier( 626 const analyze_scanf::ScanfSpecifier &FS, 627 const char *startSpecifier, 628 unsigned specifierLen) { 629 return true; 630 } 631 632 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 633 const char *startSpecifier, 634 unsigned specifierLen) { 635 return true; 636 } 637 638 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 639}; 640 641bool ParsePrintfString(FormatStringHandler &H, 642 const char *beg, const char *end, const LangOptions &LO, 643 const TargetInfo &Target); 644 645bool ParseScanfString(FormatStringHandler &H, 646 const char *beg, const char *end, const LangOptions &LO, 647 const TargetInfo &Target); 648 649} // end analyze_format_string namespace 650} // end clang namespace 651#endif 652