PrintfFormatString.cpp revision 58e1e54476d610d6c33ef483f216ed8a1282d35c
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgType; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } else { 42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 43 analyze_format_string::PrecisionPos); 44 if (Amt.isInvalid()) 45 return true; 46 FS.setPrecision(Amt); 47 } 48 return false; 49} 50 51static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 52 const char *&Beg, 53 const char *E, 54 unsigned &argIndex, 55 const LangOptions &LO) { 56 57 using namespace clang::analyze_format_string; 58 using namespace clang::analyze_printf; 59 60 const char *I = Beg; 61 const char *Start = 0; 62 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 63 64 // Look for a '%' character that indicates the start of a format specifier. 65 for ( ; I != E ; ++I) { 66 char c = *I; 67 if (c == '\0') { 68 // Detect spurious null characters, which are likely errors. 69 H.HandleNullChar(I); 70 return true; 71 } 72 if (c == '%') { 73 Start = I++; // Record the start of the format specifier. 74 break; 75 } 76 } 77 78 // No format specifier found? 79 if (!Start) 80 return false; 81 82 if (I == E) { 83 // No more characters left? 84 H.HandleIncompleteSpecifier(Start, E - Start); 85 return true; 86 } 87 88 PrintfSpecifier FS; 89 if (ParseArgPosition(H, FS, Start, I, E)) 90 return true; 91 92 if (I == E) { 93 // No more characters left? 94 H.HandleIncompleteSpecifier(Start, E - Start); 95 return true; 96 } 97 98 // Look for flags (if any). 99 bool hasMore = true; 100 for ( ; I != E; ++I) { 101 switch (*I) { 102 default: hasMore = false; break; 103 case '\'': 104 // FIXME: POSIX specific. Always accept? 105 FS.setHasThousandsGrouping(I); 106 break; 107 case '-': FS.setIsLeftJustified(I); break; 108 case '+': FS.setHasPlusPrefix(I); break; 109 case ' ': FS.setHasSpacePrefix(I); break; 110 case '#': FS.setHasAlternativeForm(I); break; 111 case '0': FS.setHasLeadingZeros(I); break; 112 } 113 if (!hasMore) 114 break; 115 } 116 117 if (I == E) { 118 // No more characters left? 119 H.HandleIncompleteSpecifier(Start, E - Start); 120 return true; 121 } 122 123 // Look for the field width (if any). 124 if (ParseFieldWidth(H, FS, Start, I, E, 125 FS.usesPositionalArg() ? 0 : &argIndex)) 126 return true; 127 128 if (I == E) { 129 // No more characters left? 130 H.HandleIncompleteSpecifier(Start, E - Start); 131 return true; 132 } 133 134 // Look for the precision (if any). 135 if (*I == '.') { 136 ++I; 137 if (I == E) { 138 H.HandleIncompleteSpecifier(Start, E - Start); 139 return true; 140 } 141 142 if (ParsePrecision(H, FS, Start, I, E, 143 FS.usesPositionalArg() ? 0 : &argIndex)) 144 return true; 145 146 if (I == E) { 147 // No more characters left? 148 H.HandleIncompleteSpecifier(Start, E - Start); 149 return true; 150 } 151 } 152 153 // Look for the length modifier. 154 if (ParseLengthModifier(FS, I, E, LO) && I == E) { 155 // No more characters left? 156 H.HandleIncompleteSpecifier(Start, E - Start); 157 return true; 158 } 159 160 if (*I == '\0') { 161 // Detect spurious null characters, which are likely errors. 162 H.HandleNullChar(I); 163 return true; 164 } 165 166 // Finally, look for the conversion specifier. 167 const char *conversionPosition = I++; 168 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 169 switch (*conversionPosition) { 170 default: 171 break; 172 // C99: 7.19.6.1 (section 8). 173 case '%': k = ConversionSpecifier::PercentArg; break; 174 case 'A': k = ConversionSpecifier::AArg; break; 175 case 'E': k = ConversionSpecifier::EArg; break; 176 case 'F': k = ConversionSpecifier::FArg; break; 177 case 'G': k = ConversionSpecifier::GArg; break; 178 case 'X': k = ConversionSpecifier::XArg; break; 179 case 'a': k = ConversionSpecifier::aArg; break; 180 case 'c': k = ConversionSpecifier::cArg; break; 181 case 'd': k = ConversionSpecifier::dArg; break; 182 case 'e': k = ConversionSpecifier::eArg; break; 183 case 'f': k = ConversionSpecifier::fArg; break; 184 case 'g': k = ConversionSpecifier::gArg; break; 185 case 'i': k = ConversionSpecifier::iArg; break; 186 case 'n': k = ConversionSpecifier::nArg; break; 187 case 'o': k = ConversionSpecifier::oArg; break; 188 case 'p': k = ConversionSpecifier::pArg; break; 189 case 's': k = ConversionSpecifier::sArg; break; 190 case 'u': k = ConversionSpecifier::uArg; break; 191 case 'x': k = ConversionSpecifier::xArg; break; 192 // POSIX specific. 193 case 'C': k = ConversionSpecifier::CArg; break; 194 case 'S': k = ConversionSpecifier::SArg; break; 195 // Objective-C. 196 case '@': k = ConversionSpecifier::ObjCObjArg; break; 197 // Glibc specific. 198 case 'm': k = ConversionSpecifier::PrintErrno; break; 199 } 200 PrintfConversionSpecifier CS(conversionPosition, k); 201 FS.setConversionSpecifier(CS); 202 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 203 FS.setArgIndex(argIndex++); 204 205 if (k == ConversionSpecifier::InvalidSpecifier) { 206 // Assume the conversion takes one argument. 207 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); 208 } 209 return PrintfSpecifierResult(Start, FS); 210} 211 212bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 213 const char *I, 214 const char *E, 215 const LangOptions &LO) { 216 217 unsigned argIndex = 0; 218 219 // Keep looking for a format specifier until we have exhausted the string. 220 while (I != E) { 221 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, 222 LO); 223 // Did a fail-stop error of any kind occur when parsing the specifier? 224 // If so, don't do any more processing. 225 if (FSR.shouldStop()) 226 return true;; 227 // Did we exhaust the string or encounter an error that 228 // we can recover from? 229 if (!FSR.hasValue()) 230 continue; 231 // We have a format specifier. Pass it to the callback. 232 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 233 I - FSR.getStart())) 234 return true; 235 } 236 assert(I == E && "Format string not exhausted"); 237 return false; 238} 239 240//===----------------------------------------------------------------------===// 241// Methods on PrintfSpecifier. 242//===----------------------------------------------------------------------===// 243 244ArgType PrintfSpecifier::getArgType(ASTContext &Ctx, 245 bool IsObjCLiteral) const { 246 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 247 248 if (!CS.consumesDataArgument()) 249 return ArgType::Invalid(); 250 251 if (CS.getKind() == ConversionSpecifier::cArg) 252 switch (LM.getKind()) { 253 case LengthModifier::None: return Ctx.IntTy; 254 case LengthModifier::AsLong: 255 return ArgType(ArgType::WIntTy, "wint_t"); 256 default: 257 return ArgType::Invalid(); 258 } 259 260 if (CS.isIntArg()) 261 switch (LM.getKind()) { 262 case LengthModifier::AsLongDouble: 263 // GNU extension. 264 return Ctx.LongLongTy; 265 case LengthModifier::None: return Ctx.IntTy; 266 case LengthModifier::AsChar: return ArgType::AnyCharTy; 267 case LengthModifier::AsShort: return Ctx.ShortTy; 268 case LengthModifier::AsLong: return Ctx.LongTy; 269 case LengthModifier::AsLongLong: 270 case LengthModifier::AsQuad: 271 return Ctx.LongLongTy; 272 case LengthModifier::AsIntMax: 273 return ArgType(Ctx.getIntMaxType(), "intmax_t"); 274 case LengthModifier::AsSizeT: 275 // FIXME: How to get the corresponding signed version of size_t? 276 return ArgType(); 277 case LengthModifier::AsPtrDiff: 278 return ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"); 279 case LengthModifier::AsAllocate: 280 case LengthModifier::AsMAllocate: 281 return ArgType::Invalid(); 282 } 283 284 if (CS.isUIntArg()) 285 switch (LM.getKind()) { 286 case LengthModifier::AsLongDouble: 287 // GNU extension. 288 return Ctx.UnsignedLongLongTy; 289 case LengthModifier::None: return Ctx.UnsignedIntTy; 290 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 291 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 292 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 293 case LengthModifier::AsLongLong: 294 case LengthModifier::AsQuad: 295 return Ctx.UnsignedLongLongTy; 296 case LengthModifier::AsIntMax: 297 return ArgType(Ctx.getUIntMaxType(), "uintmax_t"); 298 case LengthModifier::AsSizeT: 299 return ArgType(Ctx.getSizeType(), "size_t"); 300 case LengthModifier::AsPtrDiff: 301 // FIXME: How to get the corresponding unsigned 302 // version of ptrdiff_t? 303 return ArgType(); 304 case LengthModifier::AsAllocate: 305 case LengthModifier::AsMAllocate: 306 return ArgType::Invalid(); 307 } 308 309 if (CS.isDoubleArg()) { 310 if (LM.getKind() == LengthModifier::AsLongDouble) 311 return Ctx.LongDoubleTy; 312 return Ctx.DoubleTy; 313 } 314 315 switch (CS.getKind()) { 316 case ConversionSpecifier::sArg: 317 if (LM.getKind() == LengthModifier::AsWideChar) { 318 if (IsObjCLiteral) 319 return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()); 320 return ArgType(ArgType::WCStrTy, "wchar_t *"); 321 } 322 return ArgType::CStrTy; 323 case ConversionSpecifier::SArg: 324 if (IsObjCLiteral) 325 return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()); 326 return ArgType(ArgType::WCStrTy, "wchar_t *"); 327 case ConversionSpecifier::CArg: 328 if (IsObjCLiteral) 329 return Ctx.UnsignedShortTy; 330 return ArgType(Ctx.WCharTy, "wchar_t"); 331 case ConversionSpecifier::pArg: 332 return ArgType::CPointerTy; 333 case ConversionSpecifier::nArg: 334 return ArgType::PtrTo(Ctx.IntTy); 335 case ConversionSpecifier::ObjCObjArg: 336 return ArgType::ObjCPointerTy; 337 default: 338 break; 339 } 340 341 // FIXME: Handle other cases. 342 return ArgType(); 343} 344 345bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, 346 ASTContext &Ctx, bool IsObjCLiteral) { 347 // %n is different from other conversion specifiers; don't try to fix it. 348 if (CS.getKind() == ConversionSpecifier::nArg) 349 return false; 350 351 // Handle Objective-C objects first. Note that while the '%@' specifier will 352 // not warn for structure pointer or void pointer arguments (because that's 353 // how CoreFoundation objects are implemented), we only show a fixit for '%@' 354 // if we know it's an object (block, id, class, or __attribute__((NSObject))). 355 if (QT->isObjCRetainableType()) { 356 if (!IsObjCLiteral) 357 return false; 358 359 CS.setKind(ConversionSpecifier::ObjCObjArg); 360 361 // Disable irrelevant flags 362 HasThousandsGrouping = false; 363 HasPlusPrefix = false; 364 HasSpacePrefix = false; 365 HasAlternativeForm = false; 366 HasLeadingZeroes = false; 367 Precision.setHowSpecified(OptionalAmount::NotSpecified); 368 LM.setKind(LengthModifier::None); 369 370 return true; 371 } 372 373 // Handle strings next (char *, wchar_t *) 374 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 375 CS.setKind(ConversionSpecifier::sArg); 376 377 // Disable irrelevant flags 378 HasAlternativeForm = 0; 379 HasLeadingZeroes = 0; 380 381 // Set the long length modifier for wide characters 382 if (QT->getPointeeType()->isWideCharType()) 383 LM.setKind(LengthModifier::AsWideChar); 384 else 385 LM.setKind(LengthModifier::None); 386 387 return true; 388 } 389 390 // If it's an enum, get its underlying type. 391 if (const EnumType *ETy = QT->getAs<EnumType>()) 392 QT = ETy->getDecl()->getIntegerType(); 393 394 // We can only work with builtin types. 395 const BuiltinType *BT = QT->getAs<BuiltinType>(); 396 if (!BT) 397 return false; 398 399 // Set length modifier 400 switch (BT->getKind()) { 401 case BuiltinType::Bool: 402 case BuiltinType::WChar_U: 403 case BuiltinType::WChar_S: 404 case BuiltinType::Char16: 405 case BuiltinType::Char32: 406 case BuiltinType::UInt128: 407 case BuiltinType::Int128: 408 case BuiltinType::Half: 409 // Various types which are non-trivial to correct. 410 return false; 411 412#define SIGNED_TYPE(Id, SingletonId) 413#define UNSIGNED_TYPE(Id, SingletonId) 414#define FLOATING_TYPE(Id, SingletonId) 415#define BUILTIN_TYPE(Id, SingletonId) \ 416 case BuiltinType::Id: 417#include "clang/AST/BuiltinTypes.def" 418 // Misc other stuff which doesn't make sense here. 419 return false; 420 421 case BuiltinType::UInt: 422 case BuiltinType::Int: 423 case BuiltinType::Float: 424 case BuiltinType::Double: 425 LM.setKind(LengthModifier::None); 426 break; 427 428 case BuiltinType::Char_U: 429 case BuiltinType::UChar: 430 case BuiltinType::Char_S: 431 case BuiltinType::SChar: 432 LM.setKind(LengthModifier::AsChar); 433 break; 434 435 case BuiltinType::Short: 436 case BuiltinType::UShort: 437 LM.setKind(LengthModifier::AsShort); 438 break; 439 440 case BuiltinType::Long: 441 case BuiltinType::ULong: 442 LM.setKind(LengthModifier::AsLong); 443 break; 444 445 case BuiltinType::LongLong: 446 case BuiltinType::ULongLong: 447 LM.setKind(LengthModifier::AsLongLong); 448 break; 449 450 case BuiltinType::LongDouble: 451 LM.setKind(LengthModifier::AsLongDouble); 452 break; 453 } 454 455 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 456 if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) 457 namedTypeToLengthModifier(QT, LM); 458 459 // If fixing the length modifier was enough, we are done. 460 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral); 461 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT)) 462 return true; 463 464 // Set conversion specifier and disable any flags which do not apply to it. 465 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 466 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) { 467 CS.setKind(ConversionSpecifier::cArg); 468 LM.setKind(LengthModifier::None); 469 Precision.setHowSpecified(OptionalAmount::NotSpecified); 470 HasAlternativeForm = 0; 471 HasLeadingZeroes = 0; 472 HasPlusPrefix = 0; 473 } 474 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 475 else if (QT->isRealFloatingType()) { 476 CS.setKind(ConversionSpecifier::fArg); 477 } 478 else if (QT->isSignedIntegerType()) { 479 CS.setKind(ConversionSpecifier::dArg); 480 HasAlternativeForm = 0; 481 } 482 else if (QT->isUnsignedIntegerType()) { 483 CS.setKind(ConversionSpecifier::uArg); 484 HasAlternativeForm = 0; 485 HasPlusPrefix = 0; 486 } else { 487 llvm_unreachable("Unexpected type"); 488 } 489 490 return true; 491} 492 493void PrintfSpecifier::toString(raw_ostream &os) const { 494 // Whilst some features have no defined order, we are using the order 495 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 496 os << "%"; 497 498 // Positional args 499 if (usesPositionalArg()) { 500 os << getPositionalArgIndex() << "$"; 501 } 502 503 // Conversion flags 504 if (IsLeftJustified) os << "-"; 505 if (HasPlusPrefix) os << "+"; 506 if (HasSpacePrefix) os << " "; 507 if (HasAlternativeForm) os << "#"; 508 if (HasLeadingZeroes) os << "0"; 509 510 // Minimum field width 511 FieldWidth.toString(os); 512 // Precision 513 Precision.toString(os); 514 // Length modifier 515 os << LM.toString(); 516 // Conversion specifier 517 os << CS.toString(); 518} 519 520bool PrintfSpecifier::hasValidPlusPrefix() const { 521 if (!HasPlusPrefix) 522 return true; 523 524 // The plus prefix only makes sense for signed conversions 525 switch (CS.getKind()) { 526 case ConversionSpecifier::dArg: 527 case ConversionSpecifier::iArg: 528 case ConversionSpecifier::fArg: 529 case ConversionSpecifier::FArg: 530 case ConversionSpecifier::eArg: 531 case ConversionSpecifier::EArg: 532 case ConversionSpecifier::gArg: 533 case ConversionSpecifier::GArg: 534 case ConversionSpecifier::aArg: 535 case ConversionSpecifier::AArg: 536 return true; 537 538 default: 539 return false; 540 } 541} 542 543bool PrintfSpecifier::hasValidAlternativeForm() const { 544 if (!HasAlternativeForm) 545 return true; 546 547 // Alternate form flag only valid with the oxXaAeEfFgG conversions 548 switch (CS.getKind()) { 549 case ConversionSpecifier::oArg: 550 case ConversionSpecifier::xArg: 551 case ConversionSpecifier::XArg: 552 case ConversionSpecifier::aArg: 553 case ConversionSpecifier::AArg: 554 case ConversionSpecifier::eArg: 555 case ConversionSpecifier::EArg: 556 case ConversionSpecifier::fArg: 557 case ConversionSpecifier::FArg: 558 case ConversionSpecifier::gArg: 559 case ConversionSpecifier::GArg: 560 return true; 561 562 default: 563 return false; 564 } 565} 566 567bool PrintfSpecifier::hasValidLeadingZeros() const { 568 if (!HasLeadingZeroes) 569 return true; 570 571 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 572 switch (CS.getKind()) { 573 case ConversionSpecifier::dArg: 574 case ConversionSpecifier::iArg: 575 case ConversionSpecifier::oArg: 576 case ConversionSpecifier::uArg: 577 case ConversionSpecifier::xArg: 578 case ConversionSpecifier::XArg: 579 case ConversionSpecifier::aArg: 580 case ConversionSpecifier::AArg: 581 case ConversionSpecifier::eArg: 582 case ConversionSpecifier::EArg: 583 case ConversionSpecifier::fArg: 584 case ConversionSpecifier::FArg: 585 case ConversionSpecifier::gArg: 586 case ConversionSpecifier::GArg: 587 return true; 588 589 default: 590 return false; 591 } 592} 593 594bool PrintfSpecifier::hasValidSpacePrefix() const { 595 if (!HasSpacePrefix) 596 return true; 597 598 // The space prefix only makes sense for signed conversions 599 switch (CS.getKind()) { 600 case ConversionSpecifier::dArg: 601 case ConversionSpecifier::iArg: 602 case ConversionSpecifier::fArg: 603 case ConversionSpecifier::FArg: 604 case ConversionSpecifier::eArg: 605 case ConversionSpecifier::EArg: 606 case ConversionSpecifier::gArg: 607 case ConversionSpecifier::GArg: 608 case ConversionSpecifier::aArg: 609 case ConversionSpecifier::AArg: 610 return true; 611 612 default: 613 return false; 614 } 615} 616 617bool PrintfSpecifier::hasValidLeftJustified() const { 618 if (!IsLeftJustified) 619 return true; 620 621 // The left justified flag is valid for all conversions except n 622 switch (CS.getKind()) { 623 case ConversionSpecifier::nArg: 624 return false; 625 626 default: 627 return true; 628 } 629} 630 631bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 632 if (!HasThousandsGrouping) 633 return true; 634 635 switch (CS.getKind()) { 636 case ConversionSpecifier::dArg: 637 case ConversionSpecifier::iArg: 638 case ConversionSpecifier::uArg: 639 case ConversionSpecifier::fArg: 640 case ConversionSpecifier::FArg: 641 case ConversionSpecifier::gArg: 642 case ConversionSpecifier::GArg: 643 return true; 644 default: 645 return false; 646 } 647} 648 649bool PrintfSpecifier::hasValidPrecision() const { 650 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 651 return true; 652 653 // Precision is only valid with the diouxXaAeEfFgGs conversions 654 switch (CS.getKind()) { 655 case ConversionSpecifier::dArg: 656 case ConversionSpecifier::iArg: 657 case ConversionSpecifier::oArg: 658 case ConversionSpecifier::uArg: 659 case ConversionSpecifier::xArg: 660 case ConversionSpecifier::XArg: 661 case ConversionSpecifier::aArg: 662 case ConversionSpecifier::AArg: 663 case ConversionSpecifier::eArg: 664 case ConversionSpecifier::EArg: 665 case ConversionSpecifier::fArg: 666 case ConversionSpecifier::FArg: 667 case ConversionSpecifier::gArg: 668 case ConversionSpecifier::GArg: 669 case ConversionSpecifier::sArg: 670 return true; 671 672 default: 673 return false; 674 } 675} 676bool PrintfSpecifier::hasValidFieldWidth() const { 677 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 678 return true; 679 680 // The field width is valid for all conversions except n 681 switch (CS.getKind()) { 682 case ConversionSpecifier::nArg: 683 return false; 684 685 default: 686 return true; 687 } 688} 689