FormatString.h revision 826a3457f737f1fc45a22954fd1bfde38160c165
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsWideChar = AsLong // for '%ls', only makes sense for printf 75 }; 76 77 LengthModifier() 78 : Position(0), kind(None) {} 79 LengthModifier(const char *pos, Kind k) 80 : Position(pos), kind(k) {} 81 82 const char *getStart() const { 83 return Position; 84 } 85 86 unsigned getLength() const { 87 switch (kind) { 88 default: 89 return 1; 90 case AsLongLong: 91 case AsChar: 92 return 2; 93 case None: 94 return 0; 95 } 96 } 97 98 Kind getKind() const { return kind; } 99 void setKind(Kind k) { kind = k; } 100 101 const char *toString() const; 102 103private: 104 const char *Position; 105 Kind kind; 106}; 107 108class ArgTypeResult { 109public: 110 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 111 CStrTy, WCStrTy }; 112private: 113 const Kind K; 114 QualType T; 115 ArgTypeResult(bool) : K(InvalidTy) {} 116public: 117 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 118 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 119 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 120 121 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 122 123 bool isValid() const { return K != InvalidTy; } 124 125 const QualType *getSpecificType() const { 126 return K == SpecificTy ? &T : 0; 127 } 128 129 bool matchesType(ASTContext &C, QualType argTy) const; 130 131 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 132 133 QualType getRepresentativeType(ASTContext &C) const; 134}; 135 136class OptionalAmount { 137public: 138 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 139 140 OptionalAmount(HowSpecified howSpecified, 141 unsigned amount, 142 const char *amountStart, 143 unsigned amountLength, 144 bool usesPositionalArg) 145 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 146 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 147 148 OptionalAmount(bool valid = true) 149 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 150 UsesPositionalArg(0), UsesDotPrefix(0) {} 151 152 bool isInvalid() const { 153 return hs == Invalid; 154 } 155 156 HowSpecified getHowSpecified() const { return hs; } 157 void setHowSpecified(HowSpecified h) { hs = h; } 158 159 bool hasDataArgument() const { return hs == Arg; } 160 161 unsigned getArgIndex() const { 162 assert(hasDataArgument()); 163 return amt; 164 } 165 166 unsigned getConstantAmount() const { 167 assert(hs == Constant); 168 return amt; 169 } 170 171 const char *getStart() const { 172 // We include the . character if it is given. 173 return start - UsesDotPrefix; 174 } 175 176 unsigned getConstantLength() const { 177 assert(hs == Constant); 178 return length + UsesDotPrefix; 179 } 180 181 ArgTypeResult getArgType(ASTContext &Ctx) const; 182 183 void toString(llvm::raw_ostream &os) const; 184 185 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 186 unsigned getPositionalArgIndex() const { 187 assert(hasDataArgument()); 188 return amt + 1; 189 } 190 191 bool usesDotPrefix() const { return UsesDotPrefix; } 192 void setUsesDotPrefix() { UsesDotPrefix = true; } 193 194private: 195 const char *start; 196 unsigned length; 197 HowSpecified hs; 198 unsigned amt; 199 bool UsesPositionalArg : 1; 200 bool UsesDotPrefix; 201}; 202 203 204class FormatSpecifier { 205protected: 206 LengthModifier LM; 207 OptionalAmount FieldWidth; 208 /// Positional arguments, an IEEE extension: 209 /// IEEE Std 1003.1, 2004 Edition 210 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 211 bool UsesPositionalArg; 212 unsigned argIndex; 213public: 214 FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {} 215 216 void setLengthModifier(LengthModifier lm) { 217 LM = lm; 218 } 219 220 void setUsesPositionalArg() { UsesPositionalArg = true; } 221 222 void setArgIndex(unsigned i) { 223 // assert(CS.consumesDataArgument()); 224 argIndex = i; 225 } 226 227 unsigned getArgIndex() const { 228 //assert(CS.consumesDataArgument()); 229 return argIndex; 230 } 231 232 unsigned getPositionalArgIndex() const { 233 //assert(CS.consumesDataArgument()); 234 return argIndex + 1; 235 } 236 237 const LengthModifier &getLengthModifier() const { 238 return LM; 239 } 240 241 const OptionalAmount &getFieldWidth() const { 242 return FieldWidth; 243 } 244 245 void setFieldWidth(const OptionalAmount &Amt) { 246 FieldWidth = Amt; 247 } 248 249 bool usesPositionalArg() const { return UsesPositionalArg; } 250}; 251 252} // end analyze_format_string namespace 253 254//===----------------------------------------------------------------------===// 255/// Pieces specific to fprintf format strings. 256 257namespace analyze_printf { 258 259class ConversionSpecifier { 260public: 261 enum Kind { 262 InvalidSpecifier = 0, 263 // C99 conversion specifiers. 264 dArg, // 'd' 265 IntAsCharArg, // 'c' 266 iArg, // 'i', 267 oArg, // 'o', 268 uArg, // 'u', 269 xArg, // 'x', 270 XArg, // 'X', 271 fArg, // 'f', 272 FArg, // 'F', 273 eArg, // 'e', 274 EArg, // 'E', 275 gArg, // 'g', 276 GArg, // 'G', 277 aArg, // 'a', 278 AArg, // 'A', 279 CStrArg, // 's' 280 VoidPtrArg, // 'p' 281 OutIntPtrArg, // 'n' 282 PercentArg, // '%' 283 // MacOS X unicode extensions. 284 CArg, // 'C' 285 UnicodeStrArg, // 'S' 286 // Objective-C specific specifiers. 287 ObjCObjArg, // '@' 288 // GlibC specific specifiers. 289 PrintErrno, // 'm' 290 // Specifier ranges. 291 IntArgBeg = dArg, 292 IntArgEnd = iArg, 293 UIntArgBeg = oArg, 294 UIntArgEnd = XArg, 295 DoubleArgBeg = fArg, 296 DoubleArgEnd = AArg, 297 C99Beg = IntArgBeg, 298 C99End = DoubleArgEnd, 299 ObjCBeg = ObjCObjArg, 300 ObjCEnd = ObjCObjArg 301 }; 302 303 ConversionSpecifier() 304 : Position(0), kind(InvalidSpecifier) {} 305 306 ConversionSpecifier(const char *pos, Kind k) 307 : Position(pos), kind(k) {} 308 309 const char *getStart() const { 310 return Position; 311 } 312 313 llvm::StringRef getCharacters() const { 314 return llvm::StringRef(getStart(), getLength()); 315 } 316 317 bool consumesDataArgument() const { 318 switch (kind) { 319 case PercentArg: 320 case PrintErrno: 321 return false; 322 default: 323 return true; 324 } 325 } 326 327 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 328 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 329 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 330 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 331 Kind getKind() const { return kind; } 332 void setKind(Kind k) { kind = k; } 333 unsigned getLength() const { 334 // Conversion specifiers currently only are represented by 335 // single characters, but we be flexible. 336 return 1; 337 } 338 const char *toString() const; 339 340private: 341 const char *Position; 342 Kind kind; 343}; 344 345using analyze_format_string::ArgTypeResult; 346using analyze_format_string::LengthModifier; 347using analyze_format_string::OptionalAmount; 348using analyze_format_string::OptionalFlag; 349 350class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 351 OptionalFlag IsLeftJustified; // '-' 352 OptionalFlag HasPlusPrefix; // '+' 353 OptionalFlag HasSpacePrefix; // ' ' 354 OptionalFlag HasAlternativeForm; // '#' 355 OptionalFlag HasLeadingZeroes; // '0' 356 ConversionSpecifier CS; 357 OptionalAmount Precision; 358public: 359 PrintfSpecifier() : 360 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), 361 HasAlternativeForm("#"), HasLeadingZeroes("0") {} 362 363 static PrintfSpecifier Parse(const char *beg, const char *end); 364 365 // Methods for incrementally constructing the PrintfSpecifier. 366 void setConversionSpecifier(const ConversionSpecifier &cs) { 367 CS = cs; 368 } 369 void setIsLeftJustified(const char *position) { 370 IsLeftJustified = true; 371 IsLeftJustified.setPosition(position); 372 } 373 void setHasPlusPrefix(const char *position) { 374 HasPlusPrefix = true; 375 HasPlusPrefix.setPosition(position); 376 } 377 void setHasSpacePrefix(const char *position) { 378 HasSpacePrefix = true; 379 HasSpacePrefix.setPosition(position); 380 } 381 void setHasAlternativeForm(const char *position) { 382 HasAlternativeForm = true; 383 HasAlternativeForm.setPosition(position); 384 } 385 void setHasLeadingZeros(const char *position) { 386 HasLeadingZeroes = true; 387 HasLeadingZeroes.setPosition(position); 388 } 389 void setUsesPositionalArg() { UsesPositionalArg = true; } 390 391 // Methods for querying the format specifier. 392 393 const ConversionSpecifier &getConversionSpecifier() const { 394 return CS; 395 } 396 397 void setPrecision(const OptionalAmount &Amt) { 398 Precision = Amt; 399 Precision.setUsesDotPrefix(); 400 } 401 402 const OptionalAmount &getPrecision() const { 403 return Precision; 404 } 405 406 /// \brief Returns the builtin type that a data argument 407 /// paired with this format specifier should have. This method 408 /// will return null if the format specifier does not have 409 /// a matching data argument or the matching argument matches 410 /// more than one type. 411 ArgTypeResult getArgType(ASTContext &Ctx) const; 412 413 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 414 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 415 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 416 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 417 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 418 bool usesPositionalArg() const { return UsesPositionalArg; } 419 420 /// Changes the specifier and length according to a QualType, retaining any 421 /// flags or options. Returns true on success, or false when a conversion 422 /// was not successful. 423 bool fixType(QualType QT); 424 425 void toString(llvm::raw_ostream &os) const; 426 427 // Validation methods - to check if any element results in undefined behavior 428 bool hasValidPlusPrefix() const; 429 bool hasValidAlternativeForm() const; 430 bool hasValidLeadingZeros() const; 431 bool hasValidSpacePrefix() const; 432 bool hasValidLeftJustified() const; 433 434 bool hasValidLengthModifier() const; 435 bool hasValidPrecision() const; 436 bool hasValidFieldWidth() const; 437}; 438} // end analyze_printf namespace 439 440//===----------------------------------------------------------------------===// 441/// Pieces specific to fscanf format strings. 442 443namespace analyze_scanf { 444 445class ConversionSpecifier { 446public: 447 enum Kind { 448 InvalidSpecifier = 0, 449 // C99 conversion specifiers. 450 dArg, // 'd' 451 iArg, // 'i', 452 oArg, // 'o', 453 uArg, // 'u', 454 xArg, // 'x', 455 XArg, // 'X', 456 fArg, // 'f', 457 FArg, // 'F', 458 eArg, // 'e', 459 EArg, // 'E', 460 gArg, // 'g', 461 GArg, // 'G', 462 aArg, // 'a', 463 AArg, // 'A', 464 sArg, // 's', // match sequence of non-write-space characters 465 VoidPtrArg, // 'p' 466 cArg, // 'c', differs from printf, writes array of characters 467 ConsumedSoFarArg, // 'n', differs from printf, writes back args consumed 468 PercentArg, // '%' 469 ScanListArg, // '[' followed by scan list 470 // IEEE Std 1003.1 extensions. 471 CArg, // 'C', same as writing 'lc' 472 SArg, // 'S', same as writing 'ls' 473 // Specifier ranges. 474 IntArgBeg = dArg, 475 IntArgEnd = iArg, 476 UIntArgBeg = oArg, 477 UIntArgEnd = XArg, 478 DoubleArgBeg = fArg, 479 DoubleArgEnd = AArg 480 }; 481 482 ConversionSpecifier() 483 : Position(0), EndScanList(0), kind(InvalidSpecifier) {} 484 485 ConversionSpecifier(const char *pos, Kind k) 486 : Position(pos), EndScanList(0), kind(k) {} 487 488 const char *getStart() const { 489 return Position; 490 } 491 492 void setEndScanList(const char *pos) { EndScanList = pos; } 493 494 llvm::StringRef getCharacters() const { 495 return llvm::StringRef(getStart(), getLength()); 496 } 497 498 bool consumesDataArgument() const { 499 return kind != PercentArg; 500 } 501 502 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 503 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 504 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 505 Kind getKind() const { return kind; } 506 void setKind(Kind k) { kind = k; } 507 508 unsigned getLength() const { 509 return EndScanList ? EndScanList - Position : 1; 510 } 511 512 const char *toString() const; 513 514private: 515 const char *Position; 516 const char *EndScanList; 517 Kind kind; 518}; 519 520using analyze_format_string::LengthModifier; 521using analyze_format_string::OptionalAmount; 522using analyze_format_string::OptionalFlag; 523 524class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 525 OptionalFlag SuppressAssignment; // '*' 526 ConversionSpecifier CS; 527public: 528 ScanfSpecifier() : SuppressAssignment("*") {} 529 530 void setSuppressAssignment(const char *position) { 531 SuppressAssignment = true; 532 SuppressAssignment.setPosition(position); 533 } 534 535 const OptionalFlag &getSuppressAssignment() const { 536 return SuppressAssignment; 537 } 538 539 void setConversionSpecifier(const ConversionSpecifier &cs) { 540 CS = cs; 541 } 542 543 const ConversionSpecifier &getConversionSpecifier() const { 544 return CS; 545 } 546 547 bool consumesDataArgument() const { 548 return CS.consumesDataArgument() && !SuppressAssignment; 549 } 550 551 static ScanfSpecifier Parse(const char *beg, const char *end); 552 553}; 554 555} // end analyze_scanf namespace 556 557//===----------------------------------------------------------------------===// 558// Parsing and processing of format strings (both fprintf and fscanf). 559 560namespace analyze_format_string { 561 562enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 563 564class FormatStringHandler { 565public: 566 FormatStringHandler() {} 567 virtual ~FormatStringHandler(); 568 569 virtual void HandleNullChar(const char *nullCharacter) {} 570 571 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 572 PositionContext p) {} 573 574 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 575 576 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 577 unsigned specifierLen) {} 578 579 // Printf-specific handlers. 580 581 virtual bool HandleInvalidPrintfConversionSpecifier( 582 const analyze_printf::PrintfSpecifier &FS, 583 const char *startSpecifier, 584 unsigned specifierLen) { 585 return true; 586 } 587 588 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 589 const char *startSpecifier, 590 unsigned specifierLen) { 591 return true; 592 } 593 594 // Scanf-specific handlers. 595 596 virtual bool HandleInvalidScanfConversionSpecifier( 597 const analyze_scanf::ScanfSpecifier &FS, 598 const char *startSpecifier, 599 unsigned specifierLen) { 600 return true; 601 } 602 603 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 604 const char *startSpecifier, 605 unsigned specifierLen) { 606 return true; 607 } 608 609 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 610}; 611 612bool ParsePrintfString(FormatStringHandler &H, 613 const char *beg, const char *end); 614 615bool ParseScanfString(FormatStringHandler &H, 616 const char *beg, const char *end); 617 618} // end analyze_format_string namespace 619} // end clang namespace 620#endif 621