FormatString.h revision bbb6bb4952b77e57b842b4d3096848123ae690e7
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26class TargetInfo; 27 28//===----------------------------------------------------------------------===// 29/// Common components of both fprintf and fscanf format strings. 30namespace analyze_format_string { 31 32/// Class representing optional flags with location and representation 33/// information. 34class OptionalFlag { 35public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57private: 58 const char *representation; 59 const char *position; 60 bool flag; 61}; 62 63/// Represents the length modifier in a format string in scanf/printf. 64class LengthModifier { 65public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsLongDouble, // 'L' 77 AsAllocate, // for '%as', GNU extension to C90 scanf 78 AsMAllocate, // for '%ms', GNU extension to scanf 79 AsWideChar = AsLong // for '%ls', only makes sense for printf 80 }; 81 82 LengthModifier() 83 : Position(0), kind(None) {} 84 LengthModifier(const char *pos, Kind k) 85 : Position(pos), kind(k) {} 86 87 const char *getStart() const { 88 return Position; 89 } 90 91 unsigned getLength() const { 92 switch (kind) { 93 default: 94 return 1; 95 case AsLongLong: 96 case AsChar: 97 return 2; 98 case None: 99 return 0; 100 } 101 } 102 103 Kind getKind() const { return kind; } 104 void setKind(Kind k) { kind = k; } 105 106 const char *toString() const; 107 108private: 109 const char *Position; 110 Kind kind; 111}; 112 113class ConversionSpecifier { 114public: 115 enum Kind { 116 InvalidSpecifier = 0, 117 // C99 conversion specifiers. 118 cArg, 119 dArg, 120 iArg, 121 IntArgBeg = cArg, IntArgEnd = iArg, 122 123 oArg, 124 uArg, 125 xArg, 126 XArg, 127 UIntArgBeg = oArg, UIntArgEnd = XArg, 128 129 fArg, 130 FArg, 131 eArg, 132 EArg, 133 gArg, 134 GArg, 135 aArg, 136 AArg, 137 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 138 139 sArg, 140 pArg, 141 nArg, 142 PercentArg, 143 CArg, 144 SArg, 145 146 // ** Printf-specific ** 147 148 // Objective-C specific specifiers. 149 ObjCObjArg, // '@' 150 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 151 152 // GlibC specific specifiers. 153 PrintErrno, // 'm' 154 155 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 156 157 // ** Scanf-specific ** 158 ScanListArg, // '[' 159 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 160 }; 161 162 ConversionSpecifier(bool isPrintf) 163 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 164 165 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 166 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 167 168 const char *getStart() const { 169 return Position; 170 } 171 172 StringRef getCharacters() const { 173 return StringRef(getStart(), getLength()); 174 } 175 176 bool consumesDataArgument() const { 177 switch (kind) { 178 case PrintErrno: 179 assert(IsPrintf); 180 return false; 181 case PercentArg: 182 return false; 183 default: 184 return true; 185 } 186 } 187 188 Kind getKind() const { return kind; } 189 void setKind(Kind k) { kind = k; } 190 unsigned getLength() const { 191 return EndScanList ? EndScanList - Position : 1; 192 } 193 194 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 195 const char *toString() const; 196 197 bool isPrintfKind() const { return IsPrintf; } 198 199protected: 200 bool IsPrintf; 201 const char *Position; 202 const char *EndScanList; 203 Kind kind; 204}; 205 206class ArgType { 207public: 208 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 209 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 210private: 211 const Kind K; 212 QualType T; 213 const char *Name; 214 bool Ptr; 215public: 216 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {} 217 ArgType(QualType t, const char *n = 0) 218 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 219 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {} 220 221 static ArgType Invalid() { return ArgType(InvalidTy); } 222 bool isValid() const { return K != InvalidTy; } 223 224 /// Create an ArgType which corresponds to the type pointer to A. 225 static ArgType PtrTo(const ArgType& A) { 226 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 227 ArgType Res = A; 228 Res.Ptr = true; 229 return Res; 230 } 231 232 bool matchesType(ASTContext &C, QualType argTy) const; 233 234 QualType getRepresentativeType(ASTContext &C) const; 235 236 std::string getRepresentativeTypeName(ASTContext &C) const; 237}; 238 239class OptionalAmount { 240public: 241 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 242 243 OptionalAmount(HowSpecified howSpecified, 244 unsigned amount, 245 const char *amountStart, 246 unsigned amountLength, 247 bool usesPositionalArg) 248 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 249 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 250 251 OptionalAmount(bool valid = true) 252 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 253 UsesPositionalArg(0), UsesDotPrefix(0) {} 254 255 bool isInvalid() const { 256 return hs == Invalid; 257 } 258 259 HowSpecified getHowSpecified() const { return hs; } 260 void setHowSpecified(HowSpecified h) { hs = h; } 261 262 bool hasDataArgument() const { return hs == Arg; } 263 264 unsigned getArgIndex() const { 265 assert(hasDataArgument()); 266 return amt; 267 } 268 269 unsigned getConstantAmount() const { 270 assert(hs == Constant); 271 return amt; 272 } 273 274 const char *getStart() const { 275 // We include the . character if it is given. 276 return start - UsesDotPrefix; 277 } 278 279 unsigned getConstantLength() const { 280 assert(hs == Constant); 281 return length + UsesDotPrefix; 282 } 283 284 ArgType getArgType(ASTContext &Ctx) const; 285 286 void toString(raw_ostream &os) const; 287 288 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 289 unsigned getPositionalArgIndex() const { 290 assert(hasDataArgument()); 291 return amt + 1; 292 } 293 294 bool usesDotPrefix() const { return UsesDotPrefix; } 295 void setUsesDotPrefix() { UsesDotPrefix = true; } 296 297private: 298 const char *start; 299 unsigned length; 300 HowSpecified hs; 301 unsigned amt; 302 bool UsesPositionalArg : 1; 303 bool UsesDotPrefix; 304}; 305 306 307class FormatSpecifier { 308protected: 309 LengthModifier LM; 310 OptionalAmount FieldWidth; 311 ConversionSpecifier CS; 312 /// Positional arguments, an IEEE extension: 313 /// IEEE Std 1003.1, 2004 Edition 314 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 315 bool UsesPositionalArg; 316 unsigned argIndex; 317public: 318 FormatSpecifier(bool isPrintf) 319 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 320 321 void setLengthModifier(LengthModifier lm) { 322 LM = lm; 323 } 324 325 void setUsesPositionalArg() { UsesPositionalArg = true; } 326 327 void setArgIndex(unsigned i) { 328 argIndex = i; 329 } 330 331 unsigned getArgIndex() const { 332 return argIndex; 333 } 334 335 unsigned getPositionalArgIndex() const { 336 return argIndex + 1; 337 } 338 339 const LengthModifier &getLengthModifier() const { 340 return LM; 341 } 342 343 const OptionalAmount &getFieldWidth() const { 344 return FieldWidth; 345 } 346 347 void setFieldWidth(const OptionalAmount &Amt) { 348 FieldWidth = Amt; 349 } 350 351 bool usesPositionalArg() const { return UsesPositionalArg; } 352 353 bool hasValidLengthModifier(const TargetInfo &Target) const; 354 355 bool hasStandardLengthModifier() const; 356 357 llvm::Optional<LengthModifier> getCorrectedLengthModifier() const; 358 359 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 360 361 bool hasStandardLengthConversionCombination() const; 362 363 /// For a TypedefType QT, if it is a named integer type such as size_t, 364 /// assign the appropriate value to LM and return true. 365 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 366}; 367 368} // end analyze_format_string namespace 369 370//===----------------------------------------------------------------------===// 371/// Pieces specific to fprintf format strings. 372 373namespace analyze_printf { 374 375class PrintfConversionSpecifier : 376 public analyze_format_string::ConversionSpecifier { 377public: 378 PrintfConversionSpecifier() 379 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 380 381 PrintfConversionSpecifier(const char *pos, Kind k) 382 : ConversionSpecifier(true, pos, k) {} 383 384 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 385 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 386 bool isDoubleArg() const { return kind >= DoubleArgBeg && 387 kind <= DoubleArgEnd; } 388 unsigned getLength() const { 389 // Conversion specifiers currently only are represented by 390 // single characters, but we be flexible. 391 return 1; 392 } 393 394 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 395 return CS->isPrintfKind(); 396 } 397}; 398 399using analyze_format_string::ArgType; 400using analyze_format_string::LengthModifier; 401using analyze_format_string::OptionalAmount; 402using analyze_format_string::OptionalFlag; 403 404class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 405 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 406 OptionalFlag IsLeftJustified; // '-' 407 OptionalFlag HasPlusPrefix; // '+' 408 OptionalFlag HasSpacePrefix; // ' ' 409 OptionalFlag HasAlternativeForm; // '#' 410 OptionalFlag HasLeadingZeroes; // '0' 411 OptionalAmount Precision; 412public: 413 PrintfSpecifier() : 414 FormatSpecifier(/* isPrintf = */ true), 415 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 416 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 417 418 static PrintfSpecifier Parse(const char *beg, const char *end); 419 420 // Methods for incrementally constructing the PrintfSpecifier. 421 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 422 CS = cs; 423 } 424 void setHasThousandsGrouping(const char *position) { 425 HasThousandsGrouping = true; 426 HasThousandsGrouping.setPosition(position); 427 } 428 void setIsLeftJustified(const char *position) { 429 IsLeftJustified = true; 430 IsLeftJustified.setPosition(position); 431 } 432 void setHasPlusPrefix(const char *position) { 433 HasPlusPrefix = true; 434 HasPlusPrefix.setPosition(position); 435 } 436 void setHasSpacePrefix(const char *position) { 437 HasSpacePrefix = true; 438 HasSpacePrefix.setPosition(position); 439 } 440 void setHasAlternativeForm(const char *position) { 441 HasAlternativeForm = true; 442 HasAlternativeForm.setPosition(position); 443 } 444 void setHasLeadingZeros(const char *position) { 445 HasLeadingZeroes = true; 446 HasLeadingZeroes.setPosition(position); 447 } 448 void setUsesPositionalArg() { UsesPositionalArg = true; } 449 450 // Methods for querying the format specifier. 451 452 const PrintfConversionSpecifier &getConversionSpecifier() const { 453 return cast<PrintfConversionSpecifier>(CS); 454 } 455 456 void setPrecision(const OptionalAmount &Amt) { 457 Precision = Amt; 458 Precision.setUsesDotPrefix(); 459 } 460 461 const OptionalAmount &getPrecision() const { 462 return Precision; 463 } 464 465 bool consumesDataArgument() const { 466 return getConversionSpecifier().consumesDataArgument(); 467 } 468 469 /// \brief Returns the builtin type that a data argument 470 /// paired with this format specifier should have. This method 471 /// will return null if the format specifier does not have 472 /// a matching data argument or the matching argument matches 473 /// more than one type. 474 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 475 476 const OptionalFlag &hasThousandsGrouping() const { 477 return HasThousandsGrouping; 478 } 479 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 480 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 481 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 482 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 483 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 484 bool usesPositionalArg() const { return UsesPositionalArg; } 485 486 /// Changes the specifier and length according to a QualType, retaining any 487 /// flags or options. Returns true on success, or false when a conversion 488 /// was not successful. 489 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 490 bool IsObjCLiteral); 491 492 void toString(raw_ostream &os) const; 493 494 // Validation methods - to check if any element results in undefined behavior 495 bool hasValidPlusPrefix() const; 496 bool hasValidAlternativeForm() const; 497 bool hasValidLeadingZeros() const; 498 bool hasValidSpacePrefix() const; 499 bool hasValidLeftJustified() const; 500 bool hasValidThousandsGroupingPrefix() const; 501 502 bool hasValidPrecision() const; 503 bool hasValidFieldWidth() const; 504}; 505} // end analyze_printf namespace 506 507//===----------------------------------------------------------------------===// 508/// Pieces specific to fscanf format strings. 509 510namespace analyze_scanf { 511 512class ScanfConversionSpecifier : 513 public analyze_format_string::ConversionSpecifier { 514public: 515 ScanfConversionSpecifier() 516 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 517 518 ScanfConversionSpecifier(const char *pos, Kind k) 519 : ConversionSpecifier(false, pos, k) {} 520 521 void setEndScanList(const char *pos) { EndScanList = pos; } 522 523 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 524 return !CS->isPrintfKind(); 525 } 526}; 527 528using analyze_format_string::ArgType; 529using analyze_format_string::LengthModifier; 530using analyze_format_string::OptionalAmount; 531using analyze_format_string::OptionalFlag; 532 533class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 534 OptionalFlag SuppressAssignment; // '*' 535public: 536 ScanfSpecifier() : 537 FormatSpecifier(/* isPrintf = */ false), 538 SuppressAssignment("*") {} 539 540 void setSuppressAssignment(const char *position) { 541 SuppressAssignment = true; 542 SuppressAssignment.setPosition(position); 543 } 544 545 const OptionalFlag &getSuppressAssignment() const { 546 return SuppressAssignment; 547 } 548 549 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 550 CS = cs; 551 } 552 553 const ScanfConversionSpecifier &getConversionSpecifier() const { 554 return cast<ScanfConversionSpecifier>(CS); 555 } 556 557 bool consumesDataArgument() const { 558 return CS.consumesDataArgument() && !SuppressAssignment; 559 } 560 561 ArgType getArgType(ASTContext &Ctx) const; 562 563 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 564 565 void toString(raw_ostream &os) const; 566 567 static ScanfSpecifier Parse(const char *beg, const char *end); 568}; 569 570} // end analyze_scanf namespace 571 572//===----------------------------------------------------------------------===// 573// Parsing and processing of format strings (both fprintf and fscanf). 574 575namespace analyze_format_string { 576 577enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 578 579class FormatStringHandler { 580public: 581 FormatStringHandler() {} 582 virtual ~FormatStringHandler(); 583 584 virtual void HandleNullChar(const char *nullCharacter) {} 585 586 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 587 588 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 589 PositionContext p) {} 590 591 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 592 593 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 594 unsigned specifierLen) {} 595 596 // Printf-specific handlers. 597 598 virtual bool HandleInvalidPrintfConversionSpecifier( 599 const analyze_printf::PrintfSpecifier &FS, 600 const char *startSpecifier, 601 unsigned specifierLen) { 602 return true; 603 } 604 605 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 606 const char *startSpecifier, 607 unsigned specifierLen) { 608 return true; 609 } 610 611 // Scanf-specific handlers. 612 613 virtual bool HandleInvalidScanfConversionSpecifier( 614 const analyze_scanf::ScanfSpecifier &FS, 615 const char *startSpecifier, 616 unsigned specifierLen) { 617 return true; 618 } 619 620 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 621 const char *startSpecifier, 622 unsigned specifierLen) { 623 return true; 624 } 625 626 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 627}; 628 629bool ParsePrintfString(FormatStringHandler &H, 630 const char *beg, const char *end, const LangOptions &LO); 631 632bool ParseScanfString(FormatStringHandler &H, 633 const char *beg, const char *end, const LangOptions &LO); 634 635} // end analyze_format_string namespace 636} // end clang namespace 637#endif 638