FormatString.h revision 2da5036adaef57395270ef2dd82358fc513d8616
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsWideChar = AsLong // for '%ls', only makes sense for printf 75 }; 76 77 LengthModifier() 78 : Position(0), kind(None) {} 79 LengthModifier(const char *pos, Kind k) 80 : Position(pos), kind(k) {} 81 82 const char *getStart() const { 83 return Position; 84 } 85 86 unsigned getLength() const { 87 switch (kind) { 88 default: 89 return 1; 90 case AsLongLong: 91 case AsChar: 92 return 2; 93 case None: 94 return 0; 95 } 96 } 97 98 Kind getKind() const { return kind; } 99 void setKind(Kind k) { kind = k; } 100 101 const char *toString() const; 102 103private: 104 const char *Position; 105 Kind kind; 106}; 107 108class ArgTypeResult { 109public: 110 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 111 CStrTy, WCStrTy }; 112private: 113 const Kind K; 114 QualType T; 115 ArgTypeResult(bool) : K(InvalidTy) {} 116public: 117 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 118 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 119 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 120 121 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 122 123 bool isValid() const { return K != InvalidTy; } 124 125 const QualType *getSpecificType() const { 126 return K == SpecificTy ? &T : 0; 127 } 128 129 bool matchesType(ASTContext &C, QualType argTy) const; 130 131 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 132 133 QualType getRepresentativeType(ASTContext &C) const; 134}; 135 136class OptionalAmount { 137public: 138 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 139 140 OptionalAmount(HowSpecified howSpecified, 141 unsigned amount, 142 const char *amountStart, 143 unsigned amountLength, 144 bool usesPositionalArg) 145 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 146 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 147 148 OptionalAmount(bool valid = true) 149 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 150 UsesPositionalArg(0), UsesDotPrefix(0) {} 151 152 bool isInvalid() const { 153 return hs == Invalid; 154 } 155 156 HowSpecified getHowSpecified() const { return hs; } 157 void setHowSpecified(HowSpecified h) { hs = h; } 158 159 bool hasDataArgument() const { return hs == Arg; } 160 161 unsigned getArgIndex() const { 162 assert(hasDataArgument()); 163 return amt; 164 } 165 166 unsigned getConstantAmount() const { 167 assert(hs == Constant); 168 return amt; 169 } 170 171 const char *getStart() const { 172 // We include the . character if it is given. 173 return start - UsesDotPrefix; 174 } 175 176 unsigned getConstantLength() const { 177 assert(hs == Constant); 178 return length + UsesDotPrefix; 179 } 180 181 ArgTypeResult getArgType(ASTContext &Ctx) const; 182 183 void toString(llvm::raw_ostream &os) const; 184 185 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 186 unsigned getPositionalArgIndex() const { 187 assert(hasDataArgument()); 188 return amt + 1; 189 } 190 191 bool usesDotPrefix() const { return UsesDotPrefix; } 192 void setUsesDotPrefix() { UsesDotPrefix = true; } 193 194private: 195 const char *start; 196 unsigned length; 197 HowSpecified hs; 198 unsigned amt; 199 bool UsesPositionalArg : 1; 200 bool UsesDotPrefix; 201}; 202 203 204class FormatSpecifier { 205protected: 206 LengthModifier LM; 207 OptionalAmount FieldWidth; 208 /// Positional arguments, an IEEE extension: 209 /// IEEE Std 1003.1, 2004 Edition 210 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 211 bool UsesPositionalArg; 212 unsigned argIndex; 213public: 214 FormatSpecifier() : UsesPositionalArg(false), argIndex(0) {} 215 216 void setLengthModifier(LengthModifier lm) { 217 LM = lm; 218 } 219 220 void setUsesPositionalArg() { UsesPositionalArg = true; } 221 222 void setArgIndex(unsigned i) { 223 argIndex = i; 224 } 225 226 unsigned getArgIndex() const { 227 return argIndex; 228 } 229 230 unsigned getPositionalArgIndex() const { 231 return argIndex + 1; 232 } 233 234 const LengthModifier &getLengthModifier() const { 235 return LM; 236 } 237 238 const OptionalAmount &getFieldWidth() const { 239 return FieldWidth; 240 } 241 242 void setFieldWidth(const OptionalAmount &Amt) { 243 FieldWidth = Amt; 244 } 245 246 bool usesPositionalArg() const { return UsesPositionalArg; } 247}; 248 249} // end analyze_format_string namespace 250 251//===----------------------------------------------------------------------===// 252/// Pieces specific to fprintf format strings. 253 254namespace analyze_printf { 255 256class ConversionSpecifier { 257public: 258 enum Kind { 259 InvalidSpecifier = 0, 260 // C99 conversion specifiers. 261 cArg, 262 dArg, 263 iArg, 264 IntArgBeg = cArg, IntArgEnd = iArg, 265 266 oArg, 267 uArg, 268 xArg, 269 XArg, 270 UIntArgBeg = oArg, UIntArgEnd = XArg, 271 272 fArg, 273 FArg, 274 eArg, 275 EArg, 276 gArg, 277 GArg, 278 aArg, 279 AArg, 280 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 281 282 sArg, 283 pArg, 284 nArg, 285 PercentArg, 286 CArg, 287 SArg, 288 289 // ** Printf-specific ** 290 291 // Objective-C specific specifiers. 292 ObjCObjArg, // '@' 293 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 294 295 // GlibC specific specifiers. 296 PrintErrno, // 'm' 297 298 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno 299 }; 300 301 ConversionSpecifier() 302 : Position(0), kind(InvalidSpecifier) {} 303 304 ConversionSpecifier(const char *pos, Kind k) 305 : Position(pos), kind(k) {} 306 307 const char *getStart() const { 308 return Position; 309 } 310 311 llvm::StringRef getCharacters() const { 312 return llvm::StringRef(getStart(), getLength()); 313 } 314 315 bool consumesDataArgument() const { 316 switch (kind) { 317 case PercentArg: 318 case PrintErrno: 319 return false; 320 default: 321 return true; 322 } 323 } 324 325 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 326 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 327 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 328 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 329 Kind getKind() const { return kind; } 330 void setKind(Kind k) { kind = k; } 331 unsigned getLength() const { 332 // Conversion specifiers currently only are represented by 333 // single characters, but we be flexible. 334 return 1; 335 } 336 const char *toString() const; 337 338private: 339 const char *Position; 340 Kind kind; 341}; 342 343using analyze_format_string::ArgTypeResult; 344using analyze_format_string::LengthModifier; 345using analyze_format_string::OptionalAmount; 346using analyze_format_string::OptionalFlag; 347 348class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 349 OptionalFlag IsLeftJustified; // '-' 350 OptionalFlag HasPlusPrefix; // '+' 351 OptionalFlag HasSpacePrefix; // ' ' 352 OptionalFlag HasAlternativeForm; // '#' 353 OptionalFlag HasLeadingZeroes; // '0' 354 ConversionSpecifier CS; 355 OptionalAmount Precision; 356public: 357 PrintfSpecifier() : 358 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), 359 HasAlternativeForm("#"), HasLeadingZeroes("0") {} 360 361 static PrintfSpecifier Parse(const char *beg, const char *end); 362 363 // Methods for incrementally constructing the PrintfSpecifier. 364 void setConversionSpecifier(const ConversionSpecifier &cs) { 365 CS = cs; 366 } 367 void setIsLeftJustified(const char *position) { 368 IsLeftJustified = true; 369 IsLeftJustified.setPosition(position); 370 } 371 void setHasPlusPrefix(const char *position) { 372 HasPlusPrefix = true; 373 HasPlusPrefix.setPosition(position); 374 } 375 void setHasSpacePrefix(const char *position) { 376 HasSpacePrefix = true; 377 HasSpacePrefix.setPosition(position); 378 } 379 void setHasAlternativeForm(const char *position) { 380 HasAlternativeForm = true; 381 HasAlternativeForm.setPosition(position); 382 } 383 void setHasLeadingZeros(const char *position) { 384 HasLeadingZeroes = true; 385 HasLeadingZeroes.setPosition(position); 386 } 387 void setUsesPositionalArg() { UsesPositionalArg = true; } 388 389 // Methods for querying the format specifier. 390 391 const ConversionSpecifier &getConversionSpecifier() const { 392 return CS; 393 } 394 395 void setPrecision(const OptionalAmount &Amt) { 396 Precision = Amt; 397 Precision.setUsesDotPrefix(); 398 } 399 400 const OptionalAmount &getPrecision() const { 401 return Precision; 402 } 403 404 bool consumesDataArgument() const { 405 return CS.consumesDataArgument(); 406 } 407 408 /// \brief Returns the builtin type that a data argument 409 /// paired with this format specifier should have. This method 410 /// will return null if the format specifier does not have 411 /// a matching data argument or the matching argument matches 412 /// more than one type. 413 ArgTypeResult getArgType(ASTContext &Ctx) const; 414 415 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 416 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 417 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 418 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 419 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 420 bool usesPositionalArg() const { return UsesPositionalArg; } 421 422 /// Changes the specifier and length according to a QualType, retaining any 423 /// flags or options. Returns true on success, or false when a conversion 424 /// was not successful. 425 bool fixType(QualType QT); 426 427 void toString(llvm::raw_ostream &os) const; 428 429 // Validation methods - to check if any element results in undefined behavior 430 bool hasValidPlusPrefix() const; 431 bool hasValidAlternativeForm() const; 432 bool hasValidLeadingZeros() const; 433 bool hasValidSpacePrefix() const; 434 bool hasValidLeftJustified() const; 435 436 bool hasValidLengthModifier() const; 437 bool hasValidPrecision() const; 438 bool hasValidFieldWidth() const; 439}; 440} // end analyze_printf namespace 441 442//===----------------------------------------------------------------------===// 443/// Pieces specific to fscanf format strings. 444 445namespace analyze_scanf { 446 447class ConversionSpecifier { 448public: 449 enum Kind { 450 InvalidSpecifier = 0, 451 // C99 conversion specifiers. 452 dArg, // 'd' 453 iArg, // 'i', 454 oArg, // 'o', 455 uArg, // 'u', 456 xArg, // 'x', 457 XArg, // 'X', 458 fArg, // 'f', 459 FArg, // 'F', 460 eArg, // 'e', 461 EArg, // 'E', 462 gArg, // 'g', 463 GArg, // 'G', 464 aArg, // 'a', 465 AArg, // 'A', 466 sArg, // 's', // match sequence of non-write-space characters 467 pArg, // 'p' 468 cArg, // 'c', differs from printf, writes array of characters 469 nArg, // 'n', differs from printf, writes back args consumed 470 PercentArg, // '%' 471 ScanListArg, // '[' followed by scan list 472 // IEEE Std 1003.1 extensions. 473 CArg, // 'C', same as writing 'lc' 474 SArg, // 'S', same as writing 'ls' 475 // Specifier ranges. 476 IntArgBeg = dArg, 477 IntArgEnd = iArg, 478 UIntArgBeg = oArg, 479 UIntArgEnd = XArg, 480 DoubleArgBeg = fArg, 481 DoubleArgEnd = AArg 482 }; 483 484 ConversionSpecifier() 485 : Position(0), EndScanList(0), kind(InvalidSpecifier) {} 486 487 ConversionSpecifier(const char *pos, Kind k) 488 : Position(pos), EndScanList(0), kind(k) {} 489 490 const char *getStart() const { 491 return Position; 492 } 493 494 void setEndScanList(const char *pos) { EndScanList = pos; } 495 496 llvm::StringRef getCharacters() const { 497 return llvm::StringRef(getStart(), getLength()); 498 } 499 500 bool consumesDataArgument() const { 501 return kind != PercentArg; 502 } 503 504 bool isIntArg() const { return kind >= dArg && kind <= iArg; } 505 bool isUIntArg() const { return kind >= oArg && kind <= XArg; } 506 bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } 507 Kind getKind() const { return kind; } 508 void setKind(Kind k) { kind = k; } 509 510 unsigned getLength() const { 511 return EndScanList ? EndScanList - Position : 1; 512 } 513 514 const char *toString() const; 515 516private: 517 const char *Position; 518 const char *EndScanList; 519 Kind kind; 520}; 521 522using analyze_format_string::LengthModifier; 523using analyze_format_string::OptionalAmount; 524using analyze_format_string::OptionalFlag; 525 526class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 527 OptionalFlag SuppressAssignment; // '*' 528 ConversionSpecifier CS; 529public: 530 ScanfSpecifier() : SuppressAssignment("*") {} 531 532 void setSuppressAssignment(const char *position) { 533 SuppressAssignment = true; 534 SuppressAssignment.setPosition(position); 535 } 536 537 const OptionalFlag &getSuppressAssignment() const { 538 return SuppressAssignment; 539 } 540 541 void setConversionSpecifier(const ConversionSpecifier &cs) { 542 CS = cs; 543 } 544 545 const ConversionSpecifier &getConversionSpecifier() const { 546 return CS; 547 } 548 549 bool consumesDataArgument() const { 550 return CS.consumesDataArgument() && !SuppressAssignment; 551 } 552 553 static ScanfSpecifier Parse(const char *beg, const char *end); 554 555}; 556 557} // end analyze_scanf namespace 558 559//===----------------------------------------------------------------------===// 560// Parsing and processing of format strings (both fprintf and fscanf). 561 562namespace analyze_format_string { 563 564enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 565 566class FormatStringHandler { 567public: 568 FormatStringHandler() {} 569 virtual ~FormatStringHandler(); 570 571 virtual void HandleNullChar(const char *nullCharacter) {} 572 573 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 574 PositionContext p) {} 575 576 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 577 578 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 579 unsigned specifierLen) {} 580 581 // Printf-specific handlers. 582 583 virtual bool HandleInvalidPrintfConversionSpecifier( 584 const analyze_printf::PrintfSpecifier &FS, 585 const char *startSpecifier, 586 unsigned specifierLen) { 587 return true; 588 } 589 590 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 591 const char *startSpecifier, 592 unsigned specifierLen) { 593 return true; 594 } 595 596 // Scanf-specific handlers. 597 598 virtual bool HandleInvalidScanfConversionSpecifier( 599 const analyze_scanf::ScanfSpecifier &FS, 600 const char *startSpecifier, 601 unsigned specifierLen) { 602 return true; 603 } 604 605 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 606 const char *startSpecifier, 607 unsigned specifierLen) { 608 return true; 609 } 610 611 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 612}; 613 614bool ParsePrintfString(FormatStringHandler &H, 615 const char *beg, const char *end); 616 617bool ParseScanfString(FormatStringHandler &H, 618 const char *beg, const char *end); 619 620} // end analyze_format_string namespace 621} // end clang namespace 622#endif 623