FormatString.h revision 35d353b47bce29200b910371dd9b8ba7f3058ab8
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsWideChar = AsLong // for '%ls', only makes sense for printf 75 }; 76 77 LengthModifier() 78 : Position(0), kind(None) {} 79 LengthModifier(const char *pos, Kind k) 80 : Position(pos), kind(k) {} 81 82 const char *getStart() const { 83 return Position; 84 } 85 86 unsigned getLength() const { 87 switch (kind) { 88 default: 89 return 1; 90 case AsLongLong: 91 case AsChar: 92 return 2; 93 case None: 94 return 0; 95 } 96 } 97 98 Kind getKind() const { return kind; } 99 void setKind(Kind k) { kind = k; } 100 101 const char *toString() const; 102 103private: 104 const char *Position; 105 Kind kind; 106}; 107 108class ArgTypeResult { 109public: 110 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 111 CStrTy, WCStrTy }; 112private: 113 const Kind K; 114 QualType T; 115 ArgTypeResult(bool) : K(InvalidTy) {} 116public: 117 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 118 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 119 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 120 121 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 122 123 bool isValid() const { return K != InvalidTy; } 124 125 const QualType *getSpecificType() const { 126 return K == SpecificTy ? &T : 0; 127 } 128 129 bool matchesType(ASTContext &C, QualType argTy) const; 130 131 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 132 133 QualType getRepresentativeType(ASTContext &C) const; 134}; 135 136class OptionalAmount { 137public: 138 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 139 140 OptionalAmount(HowSpecified howSpecified, 141 unsigned amount, 142 const char *amountStart, 143 unsigned amountLength, 144 bool usesPositionalArg) 145 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 146 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 147 148 OptionalAmount(bool valid = true) 149 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 150 UsesPositionalArg(0), UsesDotPrefix(0) {} 151 152 bool isInvalid() const { 153 return hs == Invalid; 154 } 155 156 HowSpecified getHowSpecified() const { return hs; } 157 void setHowSpecified(HowSpecified h) { hs = h; } 158 159 bool hasDataArgument() const { return hs == Arg; } 160 161 unsigned getArgIndex() const { 162 assert(hasDataArgument()); 163 return amt; 164 } 165 166 unsigned getConstantAmount() const { 167 assert(hs == Constant); 168 return amt; 169 } 170 171 const char *getStart() const { 172 // We include the . character if it is given. 173 return start - UsesDotPrefix; 174 } 175 176 unsigned getConstantLength() const { 177 assert(hs == Constant); 178 return length + UsesDotPrefix; 179 } 180 181 ArgTypeResult getArgType(ASTContext &Ctx) const; 182 183 void toString(llvm::raw_ostream &os) const; 184 185 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 186 unsigned getPositionalArgIndex() const { 187 assert(hasDataArgument()); 188 return amt + 1; 189 } 190 191 bool usesDotPrefix() const { return UsesDotPrefix; } 192 void setUsesDotPrefix() { UsesDotPrefix = true; } 193 194private: 195 const char *start; 196 unsigned length; 197 HowSpecified hs; 198 unsigned amt; 199 bool UsesPositionalArg : 1; 200 bool UsesDotPrefix; 201}; 202 203 204class FormatSpecifier { 205protected: 206 LengthModifier LM; 207 OptionalAmount FieldWidth; 208 /// Positional arguments, an IEEE extension: 209 /// IEEE Std 1003.1, 2004 Edition 210 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 211 bool UsesPositionalArg; 212 unsigned argIndex; 213public: 214 FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {} 215 216 void setLengthModifier(LengthModifier lm) { 217 LM = lm; 218 } 219 220 void setUsesPositionalArg() { UsesPositionalArg = true; } 221 222 void setArgIndex(unsigned i) { 223 argIndex = i; 224 } 225 226 unsigned getArgIndex() const { 227 return argIndex; 228 } 229 230 unsigned getPositionalArgIndex() const { 231 return argIndex + 1; 232 } 233 234 const LengthModifier &getLengthModifier() const { 235 return LM; 236 } 237 238 const OptionalAmount &getFieldWidth() const { 239 return FieldWidth; 240 } 241 242 void setFieldWidth(const OptionalAmount &Amt) { 243 FieldWidth = Amt; 244 } 245 246 bool usesPositionalArg() const { return UsesPositionalArg; } 247}; 248 249} // end analyze_format_string namespace 250 251//===----------------------------------------------------------------------===// 252/// Pieces specific to fprintf format strings. 253 254namespace analyze_printf { 255 256class ConversionSpecifier { 257public: 258 enum Kind { 259 InvalidSpecifier = 0, 260 // C99 conversion specifiers. 261 dArg, // 'd' 262 cArg, // 'c' 263 iArg, // 'i', 264 oArg, // 'o', 265 uArg, // 'u', 266 xArg, // 'x', 267 XArg, // 'X', 268 fArg, // 'f', 269 FArg, // 'F', 270 eArg, // 'e', 271 EArg, // 'E', 272 gArg, // 'g', 273 GArg, // 'G', 274 aArg, // 'a', 275 AArg, // 'A', 276 sArg, // 's' 277 pArg, // 'p' 278 nArg, // 'n' 279 PercentArg, // '%' 280 // MacOS X unicode extensions. 281 CArg, // 'C' 282 UnicodeStrArg, // 'S' 283 // Objective-C specific specifiers. 284 ObjCObjArg, // '@' 285 // GlibC specific specifiers. 286 PrintErrno, // 'm' 287 // Specifier ranges. 288 IntArgBeg = dArg, 289 IntArgEnd = iArg, 290 UIntArgBeg = oArg, 291 UIntArgEnd = XArg, 292 DoubleArgBeg = fArg, 293 DoubleArgEnd = AArg, 294 C99Beg = IntArgBeg, 295 C99End = DoubleArgEnd, 296 ObjCBeg = ObjCObjArg, 297 ObjCEnd = ObjCObjArg 298 }; 299 300 ConversionSpecifier() 301 : Position(0), kind(InvalidSpecifier) {} 302 303 ConversionSpecifier(const char *pos, Kind k) 304 : Position(pos), kind(k) {} 305 306 const char *getStart() const { 307 return Position; 308 } 309 310 llvm::StringRef getCharacters() const { 311 return llvm::StringRef(getStart(), getLength()); 312 } 313 314 bool consumesDataArgument() const { 315 switch (kind) { 316 case PercentArg: 317 case PrintErrno: 318 return false; 319 default: 320 return true; 321 } 322 } 323 324 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 325 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 326 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 327 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 328 Kind getKind() const { return kind; } 329 void setKind(Kind k) { kind = k; } 330 unsigned getLength() const { 331 // Conversion specifiers currently only are represented by 332 // single characters, but we be flexible. 333 return 1; 334 } 335 const char *toString() const; 336 337private: 338 const char *Position; 339 Kind kind; 340}; 341 342using analyze_format_string::ArgTypeResult; 343using analyze_format_string::LengthModifier; 344using analyze_format_string::OptionalAmount; 345using analyze_format_string::OptionalFlag; 346 347class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 348 OptionalFlag IsLeftJustified; // '-' 349 OptionalFlag HasPlusPrefix; // '+' 350 OptionalFlag HasSpacePrefix; // ' ' 351 OptionalFlag HasAlternativeForm; // '#' 352 OptionalFlag HasLeadingZeroes; // '0' 353 ConversionSpecifier CS; 354 OptionalAmount Precision; 355public: 356 PrintfSpecifier() : 357 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), 358 HasAlternativeForm("#"), HasLeadingZeroes("0") {} 359 360 static PrintfSpecifier Parse(const char *beg, const char *end); 361 362 // Methods for incrementally constructing the PrintfSpecifier. 363 void setConversionSpecifier(const ConversionSpecifier &cs) { 364 CS = cs; 365 } 366 void setIsLeftJustified(const char *position) { 367 IsLeftJustified = true; 368 IsLeftJustified.setPosition(position); 369 } 370 void setHasPlusPrefix(const char *position) { 371 HasPlusPrefix = true; 372 HasPlusPrefix.setPosition(position); 373 } 374 void setHasSpacePrefix(const char *position) { 375 HasSpacePrefix = true; 376 HasSpacePrefix.setPosition(position); 377 } 378 void setHasAlternativeForm(const char *position) { 379 HasAlternativeForm = true; 380 HasAlternativeForm.setPosition(position); 381 } 382 void setHasLeadingZeros(const char *position) { 383 HasLeadingZeroes = true; 384 HasLeadingZeroes.setPosition(position); 385 } 386 void setUsesPositionalArg() { UsesPositionalArg = true; } 387 388 // Methods for querying the format specifier. 389 390 const ConversionSpecifier &getConversionSpecifier() const { 391 return CS; 392 } 393 394 void setPrecision(const OptionalAmount &Amt) { 395 Precision = Amt; 396 Precision.setUsesDotPrefix(); 397 } 398 399 const OptionalAmount &getPrecision() const { 400 return Precision; 401 } 402 403 bool consumesDataArgument() const { 404 return CS.consumesDataArgument(); 405 } 406 407 /// \brief Returns the builtin type that a data argument 408 /// paired with this format specifier should have. This method 409 /// will return null if the format specifier does not have 410 /// a matching data argument or the matching argument matches 411 /// more than one type. 412 ArgTypeResult getArgType(ASTContext &Ctx) const; 413 414 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 415 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 416 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 417 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 418 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 419 bool usesPositionalArg() const { return UsesPositionalArg; } 420 421 /// Changes the specifier and length according to a QualType, retaining any 422 /// flags or options. Returns true on success, or false when a conversion 423 /// was not successful. 424 bool fixType(QualType QT); 425 426 void toString(llvm::raw_ostream &os) const; 427 428 // Validation methods - to check if any element results in undefined behavior 429 bool hasValidPlusPrefix() const; 430 bool hasValidAlternativeForm() const; 431 bool hasValidLeadingZeros() const; 432 bool hasValidSpacePrefix() const; 433 bool hasValidLeftJustified() const; 434 435 bool hasValidLengthModifier() const; 436 bool hasValidPrecision() const; 437 bool hasValidFieldWidth() const; 438}; 439} // end analyze_printf namespace 440 441//===----------------------------------------------------------------------===// 442/// Pieces specific to fscanf format strings. 443 444namespace analyze_scanf { 445 446class ConversionSpecifier { 447public: 448 enum Kind { 449 InvalidSpecifier = 0, 450 // C99 conversion specifiers. 451 dArg, // 'd' 452 iArg, // 'i', 453 oArg, // 'o', 454 uArg, // 'u', 455 xArg, // 'x', 456 XArg, // 'X', 457 fArg, // 'f', 458 FArg, // 'F', 459 eArg, // 'e', 460 EArg, // 'E', 461 gArg, // 'g', 462 GArg, // 'G', 463 aArg, // 'a', 464 AArg, // 'A', 465 sArg, // 's', // match sequence of non-write-space characters 466 pArg, // 'p' 467 cArg, // 'c', differs from printf, writes array of characters 468 nArg, // 'n', differs from printf, writes back args consumed 469 PercentArg, // '%' 470 ScanListArg, // '[' followed by scan list 471 // IEEE Std 1003.1 extensions. 472 CArg, // 'C', same as writing 'lc' 473 SArg, // 'S', same as writing 'ls' 474 // Specifier ranges. 475 IntArgBeg = dArg, 476 IntArgEnd = iArg, 477 UIntArgBeg = oArg, 478 UIntArgEnd = XArg, 479 DoubleArgBeg = fArg, 480 DoubleArgEnd = AArg 481 }; 482 483 ConversionSpecifier() 484 : Position(0), EndScanList(0), kind(InvalidSpecifier) {} 485 486 ConversionSpecifier(const char *pos, Kind k) 487 : Position(pos), EndScanList(0), kind(k) {} 488 489 const char *getStart() const { 490 return Position; 491 } 492 493 void setEndScanList(const char *pos) { EndScanList = pos; } 494 495 llvm::StringRef getCharacters() const { 496 return llvm::StringRef(getStart(), getLength()); 497 } 498 499 bool consumesDataArgument() const { 500 return kind != PercentArg; 501 } 502 503 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 504 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 505 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 506 Kind getKind() const { return kind; } 507 void setKind(Kind k) { kind = k; } 508 509 unsigned getLength() const { 510 return EndScanList ? EndScanList - Position : 1; 511 } 512 513 const char *toString() const; 514 515private: 516 const char *Position; 517 const char *EndScanList; 518 Kind kind; 519}; 520 521using analyze_format_string::LengthModifier; 522using analyze_format_string::OptionalAmount; 523using analyze_format_string::OptionalFlag; 524 525class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 526 OptionalFlag SuppressAssignment; // '*' 527 ConversionSpecifier CS; 528public: 529 ScanfSpecifier() : SuppressAssignment("*") {} 530 531 void setSuppressAssignment(const char *position) { 532 SuppressAssignment = true; 533 SuppressAssignment.setPosition(position); 534 } 535 536 const OptionalFlag &getSuppressAssignment() const { 537 return SuppressAssignment; 538 } 539 540 void setConversionSpecifier(const ConversionSpecifier &cs) { 541 CS = cs; 542 } 543 544 const ConversionSpecifier &getConversionSpecifier() const { 545 return CS; 546 } 547 548 bool consumesDataArgument() const { 549 return CS.consumesDataArgument() && !SuppressAssignment; 550 } 551 552 static ScanfSpecifier Parse(const char *beg, const char *end); 553 554}; 555 556} // end analyze_scanf namespace 557 558//===----------------------------------------------------------------------===// 559// Parsing and processing of format strings (both fprintf and fscanf). 560 561namespace analyze_format_string { 562 563enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 564 565class FormatStringHandler { 566public: 567 FormatStringHandler() {} 568 virtual ~FormatStringHandler(); 569 570 virtual void HandleNullChar(const char *nullCharacter) {} 571 572 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 573 PositionContext p) {} 574 575 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 576 577 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 578 unsigned specifierLen) {} 579 580 // Printf-specific handlers. 581 582 virtual bool HandleInvalidPrintfConversionSpecifier( 583 const analyze_printf::PrintfSpecifier &FS, 584 const char *startSpecifier, 585 unsigned specifierLen) { 586 return true; 587 } 588 589 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 590 const char *startSpecifier, 591 unsigned specifierLen) { 592 return true; 593 } 594 595 // Scanf-specific handlers. 596 597 virtual bool HandleInvalidScanfConversionSpecifier( 598 const analyze_scanf::ScanfSpecifier &FS, 599 const char *startSpecifier, 600 unsigned specifierLen) { 601 return true; 602 } 603 604 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 605 const char *startSpecifier, 606 unsigned specifierLen) { 607 return true; 608 } 609 610 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 611}; 612 613bool ParsePrintfString(FormatStringHandler &H, 614 const char *beg, const char *end); 615 616bool ParseScanfString(FormatStringHandler &H, 617 const char *beg, const char *end); 618 619} // end analyze_format_string namespace 620} // end clang namespace 621#endif 622