1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26class TargetInfo; 27 28//===----------------------------------------------------------------------===// 29/// Common components of both fprintf and fscanf format strings. 30namespace analyze_format_string { 31 32/// Class representing optional flags with location and representation 33/// information. 34class OptionalFlag { 35public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57private: 58 const char *representation; 59 const char *position; 60 bool flag; 61}; 62 63/// Represents the length modifier in a format string in scanf/printf. 64class LengthModifier { 65public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsLongDouble, // 'L' 77 AsAllocate, // for '%as', GNU extension to C90 scanf 78 AsMAllocate, // for '%ms', GNU extension to scanf 79 AsWideChar = AsLong // for '%ls', only makes sense for printf 80 }; 81 82 LengthModifier() 83 : Position(0), kind(None) {} 84 LengthModifier(const char *pos, Kind k) 85 : Position(pos), kind(k) {} 86 87 const char *getStart() const { 88 return Position; 89 } 90 91 unsigned getLength() const { 92 switch (kind) { 93 default: 94 return 1; 95 case AsLongLong: 96 case AsChar: 97 return 2; 98 case None: 99 return 0; 100 } 101 } 102 103 Kind getKind() const { return kind; } 104 void setKind(Kind k) { kind = k; } 105 106 const char *toString() const; 107 108private: 109 const char *Position; 110 Kind kind; 111}; 112 113class ConversionSpecifier { 114public: 115 enum Kind { 116 InvalidSpecifier = 0, 117 // C99 conversion specifiers. 118 cArg, 119 dArg, 120 iArg, 121 IntArgBeg = dArg, IntArgEnd = iArg, 122 123 oArg, 124 uArg, 125 xArg, 126 XArg, 127 UIntArgBeg = oArg, UIntArgEnd = XArg, 128 129 fArg, 130 FArg, 131 eArg, 132 EArg, 133 gArg, 134 GArg, 135 aArg, 136 AArg, 137 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 138 139 sArg, 140 pArg, 141 nArg, 142 PercentArg, 143 CArg, 144 SArg, 145 146 // ** Printf-specific ** 147 148 // Objective-C specific specifiers. 149 ObjCObjArg, // '@' 150 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 151 152 // GlibC specific specifiers. 153 PrintErrno, // 'm' 154 155 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 156 157 // ** Scanf-specific ** 158 ScanListArg, // '[' 159 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 160 }; 161 162 ConversionSpecifier(bool isPrintf) 163 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 164 165 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 166 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 167 168 const char *getStart() const { 169 return Position; 170 } 171 172 StringRef getCharacters() const { 173 return StringRef(getStart(), getLength()); 174 } 175 176 bool consumesDataArgument() const { 177 switch (kind) { 178 case PrintErrno: 179 assert(IsPrintf); 180 return false; 181 case PercentArg: 182 return false; 183 default: 184 return true; 185 } 186 } 187 188 Kind getKind() const { return kind; } 189 void setKind(Kind k) { kind = k; } 190 unsigned getLength() const { 191 return EndScanList ? EndScanList - Position : 1; 192 } 193 194 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 195 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 196 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 197 const char *toString() const; 198 199 bool isPrintfKind() const { return IsPrintf; } 200 201protected: 202 bool IsPrintf; 203 const char *Position; 204 const char *EndScanList; 205 Kind kind; 206}; 207 208class ArgType { 209public: 210 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 211 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 212private: 213 const Kind K; 214 QualType T; 215 const char *Name; 216 bool Ptr; 217public: 218 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {} 219 ArgType(QualType t, const char *n = 0) 220 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 221 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {} 222 223 static ArgType Invalid() { return ArgType(InvalidTy); } 224 bool isValid() const { return K != InvalidTy; } 225 226 /// Create an ArgType which corresponds to the type pointer to A. 227 static ArgType PtrTo(const ArgType& A) { 228 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 229 ArgType Res = A; 230 Res.Ptr = true; 231 return Res; 232 } 233 234 bool matchesType(ASTContext &C, QualType argTy) const; 235 236 QualType getRepresentativeType(ASTContext &C) const; 237 238 std::string getRepresentativeTypeName(ASTContext &C) const; 239}; 240 241class OptionalAmount { 242public: 243 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 244 245 OptionalAmount(HowSpecified howSpecified, 246 unsigned amount, 247 const char *amountStart, 248 unsigned amountLength, 249 bool usesPositionalArg) 250 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 251 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 252 253 OptionalAmount(bool valid = true) 254 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 255 UsesPositionalArg(0), UsesDotPrefix(0) {} 256 257 bool isInvalid() const { 258 return hs == Invalid; 259 } 260 261 HowSpecified getHowSpecified() const { return hs; } 262 void setHowSpecified(HowSpecified h) { hs = h; } 263 264 bool hasDataArgument() const { return hs == Arg; } 265 266 unsigned getArgIndex() const { 267 assert(hasDataArgument()); 268 return amt; 269 } 270 271 unsigned getConstantAmount() const { 272 assert(hs == Constant); 273 return amt; 274 } 275 276 const char *getStart() const { 277 // We include the . character if it is given. 278 return start - UsesDotPrefix; 279 } 280 281 unsigned getConstantLength() const { 282 assert(hs == Constant); 283 return length + UsesDotPrefix; 284 } 285 286 ArgType getArgType(ASTContext &Ctx) const; 287 288 void toString(raw_ostream &os) const; 289 290 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 291 unsigned getPositionalArgIndex() const { 292 assert(hasDataArgument()); 293 return amt + 1; 294 } 295 296 bool usesDotPrefix() const { return UsesDotPrefix; } 297 void setUsesDotPrefix() { UsesDotPrefix = true; } 298 299private: 300 const char *start; 301 unsigned length; 302 HowSpecified hs; 303 unsigned amt; 304 bool UsesPositionalArg : 1; 305 bool UsesDotPrefix; 306}; 307 308 309class FormatSpecifier { 310protected: 311 LengthModifier LM; 312 OptionalAmount FieldWidth; 313 ConversionSpecifier CS; 314 /// Positional arguments, an IEEE extension: 315 /// IEEE Std 1003.1, 2004 Edition 316 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 317 bool UsesPositionalArg; 318 unsigned argIndex; 319public: 320 FormatSpecifier(bool isPrintf) 321 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 322 323 void setLengthModifier(LengthModifier lm) { 324 LM = lm; 325 } 326 327 void setUsesPositionalArg() { UsesPositionalArg = true; } 328 329 void setArgIndex(unsigned i) { 330 argIndex = i; 331 } 332 333 unsigned getArgIndex() const { 334 return argIndex; 335 } 336 337 unsigned getPositionalArgIndex() const { 338 return argIndex + 1; 339 } 340 341 const LengthModifier &getLengthModifier() const { 342 return LM; 343 } 344 345 const OptionalAmount &getFieldWidth() const { 346 return FieldWidth; 347 } 348 349 void setFieldWidth(const OptionalAmount &Amt) { 350 FieldWidth = Amt; 351 } 352 353 bool usesPositionalArg() const { return UsesPositionalArg; } 354 355 bool hasValidLengthModifier(const TargetInfo &Target) const; 356 357 bool hasStandardLengthModifier() const; 358 359 llvm::Optional<LengthModifier> getCorrectedLengthModifier() const; 360 361 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 362 363 bool hasStandardLengthConversionCombination() const; 364 365 /// For a TypedefType QT, if it is a named integer type such as size_t, 366 /// assign the appropriate value to LM and return true. 367 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 368}; 369 370} // end analyze_format_string namespace 371 372//===----------------------------------------------------------------------===// 373/// Pieces specific to fprintf format strings. 374 375namespace analyze_printf { 376 377class PrintfConversionSpecifier : 378 public analyze_format_string::ConversionSpecifier { 379public: 380 PrintfConversionSpecifier() 381 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 382 383 PrintfConversionSpecifier(const char *pos, Kind k) 384 : ConversionSpecifier(true, pos, k) {} 385 386 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 387 bool isDoubleArg() const { return kind >= DoubleArgBeg && 388 kind <= DoubleArgEnd; } 389 unsigned getLength() const { 390 // Conversion specifiers currently only are represented by 391 // single characters, but we be flexible. 392 return 1; 393 } 394 395 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 396 return CS->isPrintfKind(); 397 } 398}; 399 400using analyze_format_string::ArgType; 401using analyze_format_string::LengthModifier; 402using analyze_format_string::OptionalAmount; 403using analyze_format_string::OptionalFlag; 404 405class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 406 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 407 OptionalFlag IsLeftJustified; // '-' 408 OptionalFlag HasPlusPrefix; // '+' 409 OptionalFlag HasSpacePrefix; // ' ' 410 OptionalFlag HasAlternativeForm; // '#' 411 OptionalFlag HasLeadingZeroes; // '0' 412 OptionalAmount Precision; 413public: 414 PrintfSpecifier() : 415 FormatSpecifier(/* isPrintf = */ true), 416 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 417 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 418 419 static PrintfSpecifier Parse(const char *beg, const char *end); 420 421 // Methods for incrementally constructing the PrintfSpecifier. 422 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 423 CS = cs; 424 } 425 void setHasThousandsGrouping(const char *position) { 426 HasThousandsGrouping = true; 427 HasThousandsGrouping.setPosition(position); 428 } 429 void setIsLeftJustified(const char *position) { 430 IsLeftJustified = true; 431 IsLeftJustified.setPosition(position); 432 } 433 void setHasPlusPrefix(const char *position) { 434 HasPlusPrefix = true; 435 HasPlusPrefix.setPosition(position); 436 } 437 void setHasSpacePrefix(const char *position) { 438 HasSpacePrefix = true; 439 HasSpacePrefix.setPosition(position); 440 } 441 void setHasAlternativeForm(const char *position) { 442 HasAlternativeForm = true; 443 HasAlternativeForm.setPosition(position); 444 } 445 void setHasLeadingZeros(const char *position) { 446 HasLeadingZeroes = true; 447 HasLeadingZeroes.setPosition(position); 448 } 449 void setUsesPositionalArg() { UsesPositionalArg = true; } 450 451 // Methods for querying the format specifier. 452 453 const PrintfConversionSpecifier &getConversionSpecifier() const { 454 return cast<PrintfConversionSpecifier>(CS); 455 } 456 457 void setPrecision(const OptionalAmount &Amt) { 458 Precision = Amt; 459 Precision.setUsesDotPrefix(); 460 } 461 462 const OptionalAmount &getPrecision() const { 463 return Precision; 464 } 465 466 bool consumesDataArgument() const { 467 return getConversionSpecifier().consumesDataArgument(); 468 } 469 470 /// \brief Returns the builtin type that a data argument 471 /// paired with this format specifier should have. This method 472 /// will return null if the format specifier does not have 473 /// a matching data argument or the matching argument matches 474 /// more than one type. 475 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 476 477 const OptionalFlag &hasThousandsGrouping() const { 478 return HasThousandsGrouping; 479 } 480 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 481 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 482 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 483 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 484 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 485 bool usesPositionalArg() const { return UsesPositionalArg; } 486 487 /// Changes the specifier and length according to a QualType, retaining any 488 /// flags or options. Returns true on success, or false when a conversion 489 /// was not successful. 490 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 491 bool IsObjCLiteral); 492 493 void toString(raw_ostream &os) const; 494 495 // Validation methods - to check if any element results in undefined behavior 496 bool hasValidPlusPrefix() const; 497 bool hasValidAlternativeForm() const; 498 bool hasValidLeadingZeros() const; 499 bool hasValidSpacePrefix() const; 500 bool hasValidLeftJustified() const; 501 bool hasValidThousandsGroupingPrefix() const; 502 503 bool hasValidPrecision() const; 504 bool hasValidFieldWidth() const; 505}; 506} // end analyze_printf namespace 507 508//===----------------------------------------------------------------------===// 509/// Pieces specific to fscanf format strings. 510 511namespace analyze_scanf { 512 513class ScanfConversionSpecifier : 514 public analyze_format_string::ConversionSpecifier { 515public: 516 ScanfConversionSpecifier() 517 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 518 519 ScanfConversionSpecifier(const char *pos, Kind k) 520 : ConversionSpecifier(false, pos, k) {} 521 522 void setEndScanList(const char *pos) { EndScanList = pos; } 523 524 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 525 return !CS->isPrintfKind(); 526 } 527}; 528 529using analyze_format_string::ArgType; 530using analyze_format_string::LengthModifier; 531using analyze_format_string::OptionalAmount; 532using analyze_format_string::OptionalFlag; 533 534class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 535 OptionalFlag SuppressAssignment; // '*' 536public: 537 ScanfSpecifier() : 538 FormatSpecifier(/* isPrintf = */ false), 539 SuppressAssignment("*") {} 540 541 void setSuppressAssignment(const char *position) { 542 SuppressAssignment = true; 543 SuppressAssignment.setPosition(position); 544 } 545 546 const OptionalFlag &getSuppressAssignment() const { 547 return SuppressAssignment; 548 } 549 550 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 551 CS = cs; 552 } 553 554 const ScanfConversionSpecifier &getConversionSpecifier() const { 555 return cast<ScanfConversionSpecifier>(CS); 556 } 557 558 bool consumesDataArgument() const { 559 return CS.consumesDataArgument() && !SuppressAssignment; 560 } 561 562 ArgType getArgType(ASTContext &Ctx) const; 563 564 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 565 566 void toString(raw_ostream &os) const; 567 568 static ScanfSpecifier Parse(const char *beg, const char *end); 569}; 570 571} // end analyze_scanf namespace 572 573//===----------------------------------------------------------------------===// 574// Parsing and processing of format strings (both fprintf and fscanf). 575 576namespace analyze_format_string { 577 578enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 579 580class FormatStringHandler { 581public: 582 FormatStringHandler() {} 583 virtual ~FormatStringHandler(); 584 585 virtual void HandleNullChar(const char *nullCharacter) {} 586 587 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 588 589 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 590 PositionContext p) {} 591 592 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 593 594 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 595 unsigned specifierLen) {} 596 597 // Printf-specific handlers. 598 599 virtual bool HandleInvalidPrintfConversionSpecifier( 600 const analyze_printf::PrintfSpecifier &FS, 601 const char *startSpecifier, 602 unsigned specifierLen) { 603 return true; 604 } 605 606 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 607 const char *startSpecifier, 608 unsigned specifierLen) { 609 return true; 610 } 611 612 // Scanf-specific handlers. 613 614 virtual bool HandleInvalidScanfConversionSpecifier( 615 const analyze_scanf::ScanfSpecifier &FS, 616 const char *startSpecifier, 617 unsigned specifierLen) { 618 return true; 619 } 620 621 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 622 const char *startSpecifier, 623 unsigned specifierLen) { 624 return true; 625 } 626 627 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 628}; 629 630bool ParsePrintfString(FormatStringHandler &H, 631 const char *beg, const char *end, const LangOptions &LO); 632 633bool ParseScanfString(FormatStringHandler &H, 634 const char *beg, const char *end, const LangOptions &LO); 635 636} // end analyze_format_string namespace 637} // end clang namespace 638#endif 639