FormatString.h revision 5fdc1b993dcb01e8a994fdacfc4eb089832c82e3
1//= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines APIs for analyzing the format strings of printf, fscanf, 11// and friends. 12// 13// The structure of format strings for fprintf are described in C99 7.19.6.1. 14// 15// The structure of format strings for fscanf are described in C99 7.19.6.2. 16// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_H 20#define LLVM_CLANG_FORMAT_H 21 22#include "clang/AST/CanonicalType.h" 23 24namespace clang { 25 26class Sema; 27 28//===----------------------------------------------------------------------===// 29/// Common components of both fprintf and fscanf format strings. 30namespace analyze_format_string { 31 32/// Class representing optional flags with location and representation 33/// information. 34class OptionalFlag { 35public: 36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} 38 bool isSet() { return flag; } 39 void set() { flag = true; } 40 void clear() { flag = false; } 41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } 45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } 49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57private: 58 const char *representation; 59 const char *position; 60 bool flag; 61}; 62 63/// Represents the length modifier in a format string in scanf/printf. 64class LengthModifier { 65public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll', 'q' (BSD, deprecated) 72 AsIntMax, // 'j' 73 AsSizeT, // 'z' 74 AsPtrDiff, // 't' 75 AsLongDouble, // 'L' 76 AsWideChar = AsLong // for '%ls', only makes sense for printf 77 }; 78 79 LengthModifier() 80 : Position(0), kind(None) {} 81 LengthModifier(const char *pos, Kind k) 82 : Position(pos), kind(k) {} 83 84 const char *getStart() const { 85 return Position; 86 } 87 88 unsigned getLength() const { 89 switch (kind) { 90 default: 91 return 1; 92 case AsLongLong: 93 case AsChar: 94 return 2; 95 case None: 96 return 0; 97 } 98 } 99 100 Kind getKind() const { return kind; } 101 void setKind(Kind k) { kind = k; } 102 103 const char *toString() const; 104 105private: 106 const char *Position; 107 Kind kind; 108}; 109 110class ConversionSpecifier { 111public: 112 enum Kind { 113 InvalidSpecifier = 0, 114 // C99 conversion specifiers. 115 cArg, 116 dArg, 117 iArg, 118 IntArgBeg = cArg, IntArgEnd = iArg, 119 120 oArg, 121 uArg, 122 xArg, 123 XArg, 124 UIntArgBeg = oArg, UIntArgEnd = XArg, 125 126 fArg, 127 FArg, 128 eArg, 129 EArg, 130 gArg, 131 GArg, 132 aArg, 133 AArg, 134 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 135 136 sArg, 137 pArg, 138 nArg, 139 PercentArg, 140 CArg, 141 SArg, 142 143 // ** Printf-specific ** 144 145 // Objective-C specific specifiers. 146 ObjCObjArg, // '@' 147 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 148 149 // GlibC specific specifiers. 150 PrintErrno, // 'm' 151 152 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 153 154 // ** Scanf-specific ** 155 ScanListArg, // '[' 156 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 157 }; 158 159 ConversionSpecifier(bool isPrintf) 160 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 161 162 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 163 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 164 165 const char *getStart() const { 166 return Position; 167 } 168 169 StringRef getCharacters() const { 170 return StringRef(getStart(), getLength()); 171 } 172 173 bool consumesDataArgument() const { 174 switch (kind) { 175 case PrintErrno: 176 assert(IsPrintf); 177 case PercentArg: 178 return false; 179 default: 180 return true; 181 } 182 } 183 184 Kind getKind() const { return kind; } 185 void setKind(Kind k) { kind = k; } 186 unsigned getLength() const { 187 return EndScanList ? EndScanList - Position : 1; 188 } 189 190 const char *toString() const; 191 192 bool isPrintfKind() const { return IsPrintf; } 193 194protected: 195 bool IsPrintf; 196 const char *Position; 197 const char *EndScanList; 198 Kind kind; 199}; 200 201class ArgTypeResult { 202public: 203 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 204 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 205private: 206 const Kind K; 207 QualType T; 208 ArgTypeResult(bool) : K(InvalidTy) {} 209public: 210 ArgTypeResult(Kind k = UnknownTy) : K(k) {} 211 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {} 212 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {} 213 214 static ArgTypeResult Invalid() { return ArgTypeResult(true); } 215 216 bool isValid() const { return K != InvalidTy; } 217 218 const QualType *getSpecificType() const { 219 return K == SpecificTy ? &T : 0; 220 } 221 222 bool matchesType(ASTContext &C, QualType argTy) const; 223 224 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; } 225 226 QualType getRepresentativeType(ASTContext &C) const; 227}; 228 229class OptionalAmount { 230public: 231 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 232 233 OptionalAmount(HowSpecified howSpecified, 234 unsigned amount, 235 const char *amountStart, 236 unsigned amountLength, 237 bool usesPositionalArg) 238 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 239 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 240 241 OptionalAmount(bool valid = true) 242 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 243 UsesPositionalArg(0), UsesDotPrefix(0) {} 244 245 bool isInvalid() const { 246 return hs == Invalid; 247 } 248 249 HowSpecified getHowSpecified() const { return hs; } 250 void setHowSpecified(HowSpecified h) { hs = h; } 251 252 bool hasDataArgument() const { return hs == Arg; } 253 254 unsigned getArgIndex() const { 255 assert(hasDataArgument()); 256 return amt; 257 } 258 259 unsigned getConstantAmount() const { 260 assert(hs == Constant); 261 return amt; 262 } 263 264 const char *getStart() const { 265 // We include the . character if it is given. 266 return start - UsesDotPrefix; 267 } 268 269 unsigned getConstantLength() const { 270 assert(hs == Constant); 271 return length + UsesDotPrefix; 272 } 273 274 ArgTypeResult getArgType(ASTContext &Ctx) const; 275 276 void toString(raw_ostream &os) const; 277 278 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 279 unsigned getPositionalArgIndex() const { 280 assert(hasDataArgument()); 281 return amt + 1; 282 } 283 284 bool usesDotPrefix() const { return UsesDotPrefix; } 285 void setUsesDotPrefix() { UsesDotPrefix = true; } 286 287private: 288 const char *start; 289 unsigned length; 290 HowSpecified hs; 291 unsigned amt; 292 bool UsesPositionalArg : 1; 293 bool UsesDotPrefix; 294}; 295 296 297class FormatSpecifier { 298protected: 299 LengthModifier LM; 300 OptionalAmount FieldWidth; 301 ConversionSpecifier CS; 302 /// Positional arguments, an IEEE extension: 303 /// IEEE Std 1003.1, 2004 Edition 304 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 305 bool UsesPositionalArg; 306 unsigned argIndex; 307public: 308 FormatSpecifier(bool isPrintf) 309 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 310 311 void setLengthModifier(LengthModifier lm) { 312 LM = lm; 313 } 314 315 void setUsesPositionalArg() { UsesPositionalArg = true; } 316 317 void setArgIndex(unsigned i) { 318 argIndex = i; 319 } 320 321 unsigned getArgIndex() const { 322 return argIndex; 323 } 324 325 unsigned getPositionalArgIndex() const { 326 return argIndex + 1; 327 } 328 329 const LengthModifier &getLengthModifier() const { 330 return LM; 331 } 332 333 const OptionalAmount &getFieldWidth() const { 334 return FieldWidth; 335 } 336 337 void setFieldWidth(const OptionalAmount &Amt) { 338 FieldWidth = Amt; 339 } 340 341 bool usesPositionalArg() const { return UsesPositionalArg; } 342 343 bool hasValidLengthModifier() const; 344}; 345 346} // end analyze_format_string namespace 347 348//===----------------------------------------------------------------------===// 349/// Pieces specific to fprintf format strings. 350 351namespace analyze_printf { 352 353class PrintfConversionSpecifier : 354 public analyze_format_string::ConversionSpecifier { 355public: 356 PrintfConversionSpecifier() 357 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 358 359 PrintfConversionSpecifier(const char *pos, Kind k) 360 : ConversionSpecifier(true, pos, k) {} 361 362 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 363 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 364 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 365 bool isDoubleArg() const { return kind >= DoubleArgBeg && 366 kind <= DoubleArgBeg; } 367 unsigned getLength() const { 368 // Conversion specifiers currently only are represented by 369 // single characters, but we be flexible. 370 return 1; 371 } 372 373 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 374 return CS->isPrintfKind(); 375 } 376}; 377 378using analyze_format_string::ArgTypeResult; 379using analyze_format_string::LengthModifier; 380using analyze_format_string::OptionalAmount; 381using analyze_format_string::OptionalFlag; 382 383class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 384 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 385 OptionalFlag IsLeftJustified; // '-' 386 OptionalFlag HasPlusPrefix; // '+' 387 OptionalFlag HasSpacePrefix; // ' ' 388 OptionalFlag HasAlternativeForm; // '#' 389 OptionalFlag HasLeadingZeroes; // '0' 390 OptionalAmount Precision; 391public: 392 PrintfSpecifier() : 393 FormatSpecifier(/* isPrintf = */ true), 394 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 395 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 396 397 static PrintfSpecifier Parse(const char *beg, const char *end); 398 399 // Methods for incrementally constructing the PrintfSpecifier. 400 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 401 CS = cs; 402 } 403 void setHasThousandsGrouping(const char *position) { 404 HasThousandsGrouping = true; 405 HasThousandsGrouping.setPosition(position); 406 } 407 void setIsLeftJustified(const char *position) { 408 IsLeftJustified = true; 409 IsLeftJustified.setPosition(position); 410 } 411 void setHasPlusPrefix(const char *position) { 412 HasPlusPrefix = true; 413 HasPlusPrefix.setPosition(position); 414 } 415 void setHasSpacePrefix(const char *position) { 416 HasSpacePrefix = true; 417 HasSpacePrefix.setPosition(position); 418 } 419 void setHasAlternativeForm(const char *position) { 420 HasAlternativeForm = true; 421 HasAlternativeForm.setPosition(position); 422 } 423 void setHasLeadingZeros(const char *position) { 424 HasLeadingZeroes = true; 425 HasLeadingZeroes.setPosition(position); 426 } 427 void setUsesPositionalArg() { UsesPositionalArg = true; } 428 429 // Methods for querying the format specifier. 430 431 const PrintfConversionSpecifier &getConversionSpecifier() const { 432 return cast<PrintfConversionSpecifier>(CS); 433 } 434 435 void setPrecision(const OptionalAmount &Amt) { 436 Precision = Amt; 437 Precision.setUsesDotPrefix(); 438 } 439 440 const OptionalAmount &getPrecision() const { 441 return Precision; 442 } 443 444 bool consumesDataArgument() const { 445 return getConversionSpecifier().consumesDataArgument(); 446 } 447 448 /// \brief Returns the builtin type that a data argument 449 /// paired with this format specifier should have. This method 450 /// will return null if the format specifier does not have 451 /// a matching data argument or the matching argument matches 452 /// more than one type. 453 ArgTypeResult getArgType(Sema &S) const; 454 455 const OptionalFlag &hasThousandsGrouping() const { 456 return HasThousandsGrouping; 457 } 458 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 459 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 460 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 461 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 462 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 463 bool usesPositionalArg() const { return UsesPositionalArg; } 464 465 /// Changes the specifier and length according to a QualType, retaining any 466 /// flags or options. Returns true on success, or false when a conversion 467 /// was not successful. 468 bool fixType(QualType QT, const LangOptions &LangOpt); 469 470 void toString(raw_ostream &os) const; 471 472 // Validation methods - to check if any element results in undefined behavior 473 bool hasValidPlusPrefix() const; 474 bool hasValidAlternativeForm() const; 475 bool hasValidLeadingZeros() const; 476 bool hasValidSpacePrefix() const; 477 bool hasValidLeftJustified() const; 478 bool hasValidThousandsGroupingPrefix() const; 479 480 bool hasValidPrecision() const; 481 bool hasValidFieldWidth() const; 482}; 483} // end analyze_printf namespace 484 485//===----------------------------------------------------------------------===// 486/// Pieces specific to fscanf format strings. 487 488namespace analyze_scanf { 489 490class ScanfConversionSpecifier : 491 public analyze_format_string::ConversionSpecifier { 492public: 493 ScanfConversionSpecifier() 494 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 495 496 ScanfConversionSpecifier(const char *pos, Kind k) 497 : ConversionSpecifier(false, pos, k) {} 498 499 void setEndScanList(const char *pos) { EndScanList = pos; } 500 501 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 502 return !CS->isPrintfKind(); 503 } 504}; 505 506using analyze_format_string::LengthModifier; 507using analyze_format_string::OptionalAmount; 508using analyze_format_string::OptionalFlag; 509 510class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 511 OptionalFlag SuppressAssignment; // '*' 512public: 513 ScanfSpecifier() : 514 FormatSpecifier(/* isPrintf = */ false), 515 SuppressAssignment("*") {} 516 517 void setSuppressAssignment(const char *position) { 518 SuppressAssignment = true; 519 SuppressAssignment.setPosition(position); 520 } 521 522 const OptionalFlag &getSuppressAssignment() const { 523 return SuppressAssignment; 524 } 525 526 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 527 CS = cs; 528 } 529 530 const ScanfConversionSpecifier &getConversionSpecifier() const { 531 return cast<ScanfConversionSpecifier>(CS); 532 } 533 534 bool consumesDataArgument() const { 535 return CS.consumesDataArgument() && !SuppressAssignment; 536 } 537 538 static ScanfSpecifier Parse(const char *beg, const char *end); 539}; 540 541} // end analyze_scanf namespace 542 543//===----------------------------------------------------------------------===// 544// Parsing and processing of format strings (both fprintf and fscanf). 545 546namespace analyze_format_string { 547 548enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 549 550class FormatStringHandler { 551public: 552 FormatStringHandler() {} 553 virtual ~FormatStringHandler(); 554 555 virtual void HandleNullChar(const char *nullCharacter) {} 556 557 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 558 PositionContext p) {} 559 560 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 561 562 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 563 unsigned specifierLen) {} 564 565 // Printf-specific handlers. 566 567 virtual bool HandleInvalidPrintfConversionSpecifier( 568 const analyze_printf::PrintfSpecifier &FS, 569 const char *startSpecifier, 570 unsigned specifierLen) { 571 return true; 572 } 573 574 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 575 const char *startSpecifier, 576 unsigned specifierLen) { 577 return true; 578 } 579 580 // Scanf-specific handlers. 581 582 virtual bool HandleInvalidScanfConversionSpecifier( 583 const analyze_scanf::ScanfSpecifier &FS, 584 const char *startSpecifier, 585 unsigned specifierLen) { 586 return true; 587 } 588 589 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 590 const char *startSpecifier, 591 unsigned specifierLen) { 592 return true; 593 } 594 595 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 596}; 597 598bool ParsePrintfString(FormatStringHandler &H, 599 const char *beg, const char *end); 600 601bool ParseScanfString(FormatStringHandler &H, 602 const char *beg, const char *end); 603 604} // end analyze_format_string namespace 605} // end clang namespace 606#endif 607