FormatString.h revision 96827eb52405a71c65c200949f3e644368e86454
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26//===----------------------------------------------------------------------===// 27/// Common components of both fprintf and fscanf format strings. 28namespace analyze_format_string { 29 30/// Class representing optional flags with location and representation 31/// information. 32class OptionalFlag { 33public: 34 OptionalFlag(const char *Representation) 35 : representation(Representation), flag(false) {} 36 bool isSet() { return flag; } 37 void set() { flag = true; } 38 void clear() { flag = false; } 39 void setPosition(const char *position) { 40 assert(position); 41 this->position = position; 42 } 43 const char *getPosition() const { 44 assert(position); 45 return position; 46 } 47 const char *toString() const { return representation; } 48 49 // Overloaded operators for bool like qualities 50 operator bool() const { return flag; } 51 OptionalFlag& operator=(const bool &rhs) { 52 flag = rhs; 53 return *this; // Return a reference to myself. 54 } 55private: 56 const char *representation; 57 const char *position; 58 bool flag; 59}; 60 61/// Represents the length modifier in a format string in scanf/printf. 62class LengthModifier { 63public: 64 enum Kind { 65 None, 66 AsChar, // 'hh' 67 AsShort, // 'h' 68 AsLong, // 'l' 69 AsLongLong, // 'll', 'q' (BSD, deprecated) 70 AsIntMax, // 'j' 71 AsSizeT, // 'z' 72 AsPtrDiff, // 't' 73 AsLongDouble, // 'L' 74 AsWideChar = AsLong // for '%ls', only makes sense for printf 75 }; 76 77 LengthModifier() 78 : Position(0), kind(None) {} 79 LengthModifier(const char *pos, Kind k) 80 : Position(pos), kind(k) {} 81 82 const char *getStart() const { 83 return Position; 84 } 85 86 unsigned getLength() const { 87 switch (kind) { 88 default: 89 return 1; 90 case AsLongLong: 91 case AsChar: 92 return 2; 93 case None: 94 return 0; 95 } 96 } 97 98 Kind getKind() const { return kind; } 99 void setKind(Kind k) { kind = k; } 100 101 const char *toString() const; 102 103private: 104 const char *Position; 105 Kind kind; 106}; 107 108class ConversionSpecifier { 109public: 110 enum Kind { 111 InvalidSpecifier = 0, 112 // C99 conversion specifiers. 113 cArg, 114 dArg, 115 iArg, 116 IntArgBeg = cArg, IntArgEnd = iArg, 117 118 oArg, 119 uArg, 120 xArg, 121 XArg, 122 UIntArgBeg = oArg, UIntArgEnd = XArg, 123 124 fArg, 125 FArg, 126 eArg, 127 EArg, 128 gArg, 129 GArg, 130 aArg, 131 AArg, 132 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 133 134 sArg, 135 pArg, 136 nArg, 137 PercentArg, 138 CArg, 139 SArg, 140 141 // ** Printf-specific ** 142 143 // Objective-C specific specifiers. 144 ObjCObjArg, // '@' 145 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 146 147 // GlibC specific specifiers. 148 PrintErrno, // 'm' 149 150 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 151 152 // ** Scanf-specific ** 153 ScanListArg, // '[' 154 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 155 }; 156 157 ConversionSpecifier(bool isPrintf) 158 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 159 160 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 161 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 162 163 const char *getStart() const { 164 return Position; 165 } 166 167 llvm::StringRef getCharacters() const { 168 return llvm::StringRef(getStart(), getLength()); 169 } 170 171 bool consumesDataArgument() const { 172 switch (kind) { 173 case PrintErrno: 174 assert(IsPrintf); 175 case PercentArg: 176 return false; 177 default: 178 return true; 179 } 180 } 181 182 Kind getKind() const { return kind; } 183 void setKind(Kind k) { kind = k; } 184 unsigned getLength() const { 185 return EndScanList ? EndScanList - Position : 1; 186 } 187 188 const char *toString() const; 189 190 bool isPrintfKind() const { return IsPrintf; } 191 192protected: 193 bool IsPrintf; 194 const char *Position; 195 const char *EndScanList; 196 Kind kind; 197}; 198 199class ArgTypeResult { 200public: 201 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 202 CStrTy, WCStrTy }; 203private: 204 const Kind K; 205 QualType T; 206 ArgTypeResult(bool) : K(InvalidTy) {} 207public: 208 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 209 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 210 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 211 212 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 213 214 bool isValid() const { return K != InvalidTy; } 215 216 const QualType *getSpecificType() const { 217 return K == SpecificTy ? &T : 0; 218 } 219 220 bool matchesType(ASTContext &C, QualType argTy) const; 221 222 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 223 224 QualType getRepresentativeType(ASTContext &C) const; 225}; 226 227class OptionalAmount { 228public: 229 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 230 231 OptionalAmount(HowSpecified howSpecified, 232 unsigned amount, 233 const char *amountStart, 234 unsigned amountLength, 235 bool usesPositionalArg) 236 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 237 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 238 239 OptionalAmount(bool valid = true) 240 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 241 UsesPositionalArg(0), UsesDotPrefix(0) {} 242 243 bool isInvalid() const { 244 return hs == Invalid; 245 } 246 247 HowSpecified getHowSpecified() const { return hs; } 248 void setHowSpecified(HowSpecified h) { hs = h; } 249 250 bool hasDataArgument() const { return hs == Arg; } 251 252 unsigned getArgIndex() const { 253 assert(hasDataArgument()); 254 return amt; 255 } 256 257 unsigned getConstantAmount() const { 258 assert(hs == Constant); 259 return amt; 260 } 261 262 const char *getStart() const { 263 // We include the . character if it is given. 264 return start - UsesDotPrefix; 265 } 266 267 unsigned getConstantLength() const { 268 assert(hs == Constant); 269 return length + UsesDotPrefix; 270 } 271 272 ArgTypeResult getArgType(ASTContext &Ctx) const; 273 274 void toString(llvm::raw_ostream &os) const; 275 276 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 277 unsigned getPositionalArgIndex() const { 278 assert(hasDataArgument()); 279 return amt + 1; 280 } 281 282 bool usesDotPrefix() const { return UsesDotPrefix; } 283 void setUsesDotPrefix() { UsesDotPrefix = true; } 284 285private: 286 const char *start; 287 unsigned length; 288 HowSpecified hs; 289 unsigned amt; 290 bool UsesPositionalArg : 1; 291 bool UsesDotPrefix; 292}; 293 294 295class FormatSpecifier { 296protected: 297 LengthModifier LM; 298 OptionalAmount FieldWidth; 299 ConversionSpecifier CS; 300 /// Positional arguments, an IEEE extension: 301 /// IEEE Std 1003.1, 2004 Edition 302 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 303 bool UsesPositionalArg; 304 unsigned argIndex; 305public: 306 FormatSpecifier(bool isPrintf) 307 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 308 309 void setLengthModifier(LengthModifier lm) { 310 LM = lm; 311 } 312 313 void setUsesPositionalArg() { UsesPositionalArg = true; } 314 315 void setArgIndex(unsigned i) { 316 argIndex = i; 317 } 318 319 unsigned getArgIndex() const { 320 return argIndex; 321 } 322 323 unsigned getPositionalArgIndex() const { 324 return argIndex + 1; 325 } 326 327 const LengthModifier &getLengthModifier() const { 328 return LM; 329 } 330 331 const OptionalAmount &getFieldWidth() const { 332 return FieldWidth; 333 } 334 335 void setFieldWidth(const OptionalAmount &Amt) { 336 FieldWidth = Amt; 337 } 338 339 bool usesPositionalArg() const { return UsesPositionalArg; } 340 341 bool hasValidLengthModifier() const; 342}; 343 344} // end analyze_format_string namespace 345 346//===----------------------------------------------------------------------===// 347/// Pieces specific to fprintf format strings. 348 349namespace analyze_printf { 350 351class PrintfConversionSpecifier : 352 public analyze_format_string::ConversionSpecifier { 353public: 354 PrintfConversionSpecifier() 355 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 356 357 PrintfConversionSpecifier(const char *pos, Kind k) 358 : ConversionSpecifier(true, pos, k) {} 359 360 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 361 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 362 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 363 bool isDoubleArg() const { return kind >= DoubleArgBeg && 364 kind <= DoubleArgBeg; } 365 unsigned getLength() const { 366 // Conversion specifiers currently only are represented by 367 // single characters, but we be flexible. 368 return 1; 369 } 370 371 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 372 return CS->isPrintfKind(); 373 } 374}; 375 376using analyze_format_string::ArgTypeResult; 377using analyze_format_string::LengthModifier; 378using analyze_format_string::OptionalAmount; 379using analyze_format_string::OptionalFlag; 380 381class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 382 OptionalFlag IsLeftJustified; // '-' 383 OptionalFlag HasPlusPrefix; // '+' 384 OptionalFlag HasSpacePrefix; // ' ' 385 OptionalFlag HasAlternativeForm; // '#' 386 OptionalFlag HasLeadingZeroes; // '0' 387 OptionalAmount Precision; 388public: 389 PrintfSpecifier() : 390 FormatSpecifier(/* isPrintf = */ true), 391 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "), 392 HasAlternativeForm("#"), HasLeadingZeroes("0") {} 393 394 static PrintfSpecifier Parse(const char *beg, const char *end); 395 396 // Methods for incrementally constructing the PrintfSpecifier. 397 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 398 CS = cs; 399 } 400 void setIsLeftJustified(const char *position) { 401 IsLeftJustified = true; 402 IsLeftJustified.setPosition(position); 403 } 404 void setHasPlusPrefix(const char *position) { 405 HasPlusPrefix = true; 406 HasPlusPrefix.setPosition(position); 407 } 408 void setHasSpacePrefix(const char *position) { 409 HasSpacePrefix = true; 410 HasSpacePrefix.setPosition(position); 411 } 412 void setHasAlternativeForm(const char *position) { 413 HasAlternativeForm = true; 414 HasAlternativeForm.setPosition(position); 415 } 416 void setHasLeadingZeros(const char *position) { 417 HasLeadingZeroes = true; 418 HasLeadingZeroes.setPosition(position); 419 } 420 void setUsesPositionalArg() { UsesPositionalArg = true; } 421 422 // Methods for querying the format specifier. 423 424 const PrintfConversionSpecifier &getConversionSpecifier() const { 425 return cast<PrintfConversionSpecifier>(CS); 426 } 427 428 void setPrecision(const OptionalAmount &Amt) { 429 Precision = Amt; 430 Precision.setUsesDotPrefix(); 431 } 432 433 const OptionalAmount &getPrecision() const { 434 return Precision; 435 } 436 437 bool consumesDataArgument() const { 438 return getConversionSpecifier().consumesDataArgument(); 439 } 440 441 /// \brief Returns the builtin type that a data argument 442 /// paired with this format specifier should have. This method 443 /// will return null if the format specifier does not have 444 /// a matching data argument or the matching argument matches 445 /// more than one type. 446 ArgTypeResult getArgType(ASTContext &Ctx) const; 447 448 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 449 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 450 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 451 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 452 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 453 bool usesPositionalArg() const { return UsesPositionalArg; } 454 455 /// Changes the specifier and length according to a QualType, retaining any 456 /// flags or options. Returns true on success, or false when a conversion 457 /// was not successful. 458 bool fixType(QualType QT); 459 460 void toString(llvm::raw_ostream &os) const; 461 462 // Validation methods - to check if any element results in undefined behavior 463 bool hasValidPlusPrefix() const; 464 bool hasValidAlternativeForm() const; 465 bool hasValidLeadingZeros() const; 466 bool hasValidSpacePrefix() const; 467 bool hasValidLeftJustified() const; 468 469 bool hasValidPrecision() const; 470 bool hasValidFieldWidth() const; 471}; 472} // end analyze_printf namespace 473 474//===----------------------------------------------------------------------===// 475/// Pieces specific to fscanf format strings. 476 477namespace analyze_scanf { 478 479class ScanfConversionSpecifier : 480 public analyze_format_string::ConversionSpecifier { 481public: 482 ScanfConversionSpecifier() 483 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 484 485 ScanfConversionSpecifier(const char *pos, Kind k) 486 : ConversionSpecifier(false, pos, k) {} 487 488 void setEndScanList(const char *pos) { EndScanList = pos; } 489 490 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 491 return !CS->isPrintfKind(); 492 } 493}; 494 495using analyze_format_string::LengthModifier; 496using analyze_format_string::OptionalAmount; 497using analyze_format_string::OptionalFlag; 498 499class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 500 OptionalFlag SuppressAssignment; // '*' 501public: 502 ScanfSpecifier() : 503 FormatSpecifier(/* isPrintf = */ false), 504 SuppressAssignment("*") {} 505 506 void setSuppressAssignment(const char *position) { 507 SuppressAssignment = true; 508 SuppressAssignment.setPosition(position); 509 } 510 511 const OptionalFlag &getSuppressAssignment() const { 512 return SuppressAssignment; 513 } 514 515 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 516 CS = cs; 517 } 518 519 const ScanfConversionSpecifier &getConversionSpecifier() const { 520 return cast<ScanfConversionSpecifier>(CS); 521 } 522 523 bool consumesDataArgument() const { 524 return CS.consumesDataArgument() && !SuppressAssignment; 525 } 526 527 static ScanfSpecifier Parse(const char *beg, const char *end); 528}; 529 530} // end analyze_scanf namespace 531 532//===----------------------------------------------------------------------===// 533// Parsing and processing of format strings (both fprintf and fscanf). 534 535namespace analyze_format_string { 536 537enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 538 539class FormatStringHandler { 540public: 541 FormatStringHandler() {} 542 virtual ~FormatStringHandler(); 543 544 virtual void HandleNullChar(const char *nullCharacter) {} 545 546 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 547 PositionContext p) {} 548 549 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 550 551 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 552 unsigned specifierLen) {} 553 554 // Printf-specific handlers. 555 556 virtual bool HandleInvalidPrintfConversionSpecifier( 557 const analyze_printf::PrintfSpecifier &FS, 558 const char *startSpecifier, 559 unsigned specifierLen) { 560 return true; 561 } 562 563 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 564 const char *startSpecifier, 565 unsigned specifierLen) { 566 return true; 567 } 568 569 // Scanf-specific handlers. 570 571 virtual bool HandleInvalidScanfConversionSpecifier( 572 const analyze_scanf::ScanfSpecifier &FS, 573 const char *startSpecifier, 574 unsigned specifierLen) { 575 return true; 576 } 577 578 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 579 const char *startSpecifier, 580 unsigned specifierLen) { 581 return true; 582 } 583 584 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 585}; 586 587bool ParsePrintfString(FormatStringHandler &H, 588 const char *beg, const char *end); 589 590bool ParseScanfString(FormatStringHandler &H, 591 const char *beg, const char *end); 592 593} // end analyze_format_string namespace 594} // end clang namespace 595#endif 596