PrintfFormatString.cpp revision 630821869c4ec4604ab479d66e5ff81147a858e1
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } else { 42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 43 analyze_format_string::PrecisionPos); 44 if (Amt.isInvalid()) 45 return true; 46 FS.setPrecision(Amt); 47 } 48 return false; 49} 50 51static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 52 const char *&Beg, 53 const char *E, 54 unsigned &argIndex) { 55 56 using namespace clang::analyze_format_string; 57 using namespace clang::analyze_printf; 58 59 const char *I = Beg; 60 const char *Start = 0; 61 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 62 63 // Look for a '%' character that indicates the start of a format specifier. 64 for ( ; I != E ; ++I) { 65 char c = *I; 66 if (c == '\0') { 67 // Detect spurious null characters, which are likely errors. 68 H.HandleNullChar(I); 69 return true; 70 } 71 if (c == '%') { 72 Start = I++; // Record the start of the format specifier. 73 break; 74 } 75 } 76 77 // No format specifier found? 78 if (!Start) 79 return false; 80 81 if (I == E) { 82 // No more characters left? 83 H.HandleIncompleteSpecifier(Start, E - Start); 84 return true; 85 } 86 87 PrintfSpecifier FS; 88 if (ParseArgPosition(H, FS, Start, I, E)) 89 return true; 90 91 if (I == E) { 92 // No more characters left? 93 H.HandleIncompleteSpecifier(Start, E - Start); 94 return true; 95 } 96 97 // Look for flags (if any). 98 bool hasMore = true; 99 for ( ; I != E; ++I) { 100 switch (*I) { 101 default: hasMore = false; break; 102 case '\'': 103 // FIXME: POSIX specific. Always accept? 104 FS.setHasThousandsGrouping(I); 105 break; 106 case '-': FS.setIsLeftJustified(I); break; 107 case '+': FS.setHasPlusPrefix(I); break; 108 case ' ': FS.setHasSpacePrefix(I); break; 109 case '#': FS.setHasAlternativeForm(I); break; 110 case '0': FS.setHasLeadingZeros(I); break; 111 } 112 if (!hasMore) 113 break; 114 } 115 116 if (I == E) { 117 // No more characters left? 118 H.HandleIncompleteSpecifier(Start, E - Start); 119 return true; 120 } 121 122 // Look for the field width (if any). 123 if (ParseFieldWidth(H, FS, Start, I, E, 124 FS.usesPositionalArg() ? 0 : &argIndex)) 125 return true; 126 127 if (I == E) { 128 // No more characters left? 129 H.HandleIncompleteSpecifier(Start, E - Start); 130 return true; 131 } 132 133 // Look for the precision (if any). 134 if (*I == '.') { 135 ++I; 136 if (I == E) { 137 H.HandleIncompleteSpecifier(Start, E - Start); 138 return true; 139 } 140 141 if (ParsePrecision(H, FS, Start, I, E, 142 FS.usesPositionalArg() ? 0 : &argIndex)) 143 return true; 144 145 if (I == E) { 146 // No more characters left? 147 H.HandleIncompleteSpecifier(Start, E - Start); 148 return true; 149 } 150 } 151 152 // Look for the length modifier. 153 if (ParseLengthModifier(FS, I, E) && I == E) { 154 // No more characters left? 155 H.HandleIncompleteSpecifier(Start, E - Start); 156 return true; 157 } 158 159 if (*I == '\0') { 160 // Detect spurious null characters, which are likely errors. 161 H.HandleNullChar(I); 162 return true; 163 } 164 165 // Finally, look for the conversion specifier. 166 const char *conversionPosition = I++; 167 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 168 switch (*conversionPosition) { 169 default: 170 break; 171 // C99: 7.19.6.1 (section 8). 172 case '%': k = ConversionSpecifier::PercentArg; break; 173 case 'A': k = ConversionSpecifier::AArg; break; 174 case 'E': k = ConversionSpecifier::EArg; break; 175 case 'F': k = ConversionSpecifier::FArg; break; 176 case 'G': k = ConversionSpecifier::GArg; break; 177 case 'X': k = ConversionSpecifier::XArg; break; 178 case 'a': k = ConversionSpecifier::aArg; break; 179 case 'c': k = ConversionSpecifier::cArg; break; 180 case 'd': k = ConversionSpecifier::dArg; break; 181 case 'e': k = ConversionSpecifier::eArg; break; 182 case 'f': k = ConversionSpecifier::fArg; break; 183 case 'g': k = ConversionSpecifier::gArg; break; 184 case 'i': k = ConversionSpecifier::iArg; break; 185 case 'n': k = ConversionSpecifier::nArg; break; 186 case 'o': k = ConversionSpecifier::oArg; break; 187 case 'p': k = ConversionSpecifier::pArg; break; 188 case 's': k = ConversionSpecifier::sArg; break; 189 case 'u': k = ConversionSpecifier::uArg; break; 190 case 'x': k = ConversionSpecifier::xArg; break; 191 // POSIX specific. 192 case 'C': k = ConversionSpecifier::CArg; break; 193 case 'S': k = ConversionSpecifier::SArg; break; 194 // Objective-C. 195 case '@': k = ConversionSpecifier::ObjCObjArg; break; 196 // Glibc specific. 197 case 'm': k = ConversionSpecifier::PrintErrno; break; 198 } 199 PrintfConversionSpecifier CS(conversionPosition, k); 200 FS.setConversionSpecifier(CS); 201 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 202 FS.setArgIndex(argIndex++); 203 204 if (k == ConversionSpecifier::InvalidSpecifier) { 205 // Assume the conversion takes one argument. 206 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start); 207 } 208 return PrintfSpecifierResult(Start, FS); 209} 210 211bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 212 const char *I, 213 const char *E) { 214 215 unsigned argIndex = 0; 216 217 // Keep looking for a format specifier until we have exhausted the string. 218 while (I != E) { 219 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex); 220 // Did a fail-stop error of any kind occur when parsing the specifier? 221 // If so, don't do any more processing. 222 if (FSR.shouldStop()) 223 return true;; 224 // Did we exhaust the string or encounter an error that 225 // we can recover from? 226 if (!FSR.hasValue()) 227 continue; 228 // We have a format specifier. Pass it to the callback. 229 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 230 I - FSR.getStart())) 231 return true; 232 } 233 assert(I == E && "Format string not exhausted"); 234 return false; 235} 236 237//===----------------------------------------------------------------------===// 238// Methods on PrintfSpecifier. 239//===----------------------------------------------------------------------===// 240 241ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 242 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 243 244 if (!CS.consumesDataArgument()) 245 return ArgTypeResult::Invalid(); 246 247 if (CS.getKind() == ConversionSpecifier::cArg) 248 switch (LM.getKind()) { 249 case LengthModifier::None: return Ctx.IntTy; 250 case LengthModifier::AsLong: 251 return ArgTypeResult(ArgTypeResult::WIntTy, "wint_t"); 252 default: 253 return ArgTypeResult::Invalid(); 254 } 255 256 if (CS.isIntArg()) 257 switch (LM.getKind()) { 258 case LengthModifier::AsLongDouble: 259 return ArgTypeResult::Invalid(); 260 case LengthModifier::None: return Ctx.IntTy; 261 case LengthModifier::AsChar: return ArgTypeResult::AnyCharTy; 262 case LengthModifier::AsShort: return Ctx.ShortTy; 263 case LengthModifier::AsLong: return Ctx.LongTy; 264 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 265 case LengthModifier::AsIntMax: 266 return ArgTypeResult(Ctx.getIntMaxType(), "intmax_t"); 267 case LengthModifier::AsSizeT: 268 // FIXME: How to get the corresponding signed version of size_t? 269 return ArgTypeResult(); 270 case LengthModifier::AsPtrDiff: 271 return ArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t"); 272 } 273 274 if (CS.isUIntArg()) 275 switch (LM.getKind()) { 276 case LengthModifier::AsLongDouble: 277 return ArgTypeResult::Invalid(); 278 case LengthModifier::None: return Ctx.UnsignedIntTy; 279 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 280 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 281 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 282 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 283 case LengthModifier::AsIntMax: 284 return ArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t"); 285 case LengthModifier::AsSizeT: 286 return ArgTypeResult(Ctx.getSizeType(), "size_t"); 287 case LengthModifier::AsPtrDiff: 288 // FIXME: How to get the corresponding unsigned 289 // version of ptrdiff_t? 290 return ArgTypeResult(); 291 } 292 293 if (CS.isDoubleArg()) { 294 if (LM.getKind() == LengthModifier::AsLongDouble) 295 return Ctx.LongDoubleTy; 296 return Ctx.DoubleTy; 297 } 298 299 switch (CS.getKind()) { 300 case ConversionSpecifier::sArg: 301 if (LM.getKind() == LengthModifier::AsWideChar) 302 return ArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t *"); 303 return ArgTypeResult::CStrTy; 304 case ConversionSpecifier::SArg: 305 // FIXME: This appears to be Mac OS X specific. 306 return ArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t *"); 307 case ConversionSpecifier::CArg: 308 return ArgTypeResult(Ctx.WCharTy, "wchar_t"); 309 case ConversionSpecifier::pArg: 310 return ArgTypeResult::CPointerTy; 311 default: 312 break; 313 } 314 315 // FIXME: Handle other cases. 316 return ArgTypeResult(); 317} 318 319bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt) { 320 // Handle strings first (char *, wchar_t *) 321 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 322 CS.setKind(ConversionSpecifier::sArg); 323 324 // Disable irrelevant flags 325 HasAlternativeForm = 0; 326 HasLeadingZeroes = 0; 327 328 // Set the long length modifier for wide characters 329 if (QT->getPointeeType()->isWideCharType()) 330 LM.setKind(LengthModifier::AsWideChar); 331 else 332 LM.setKind(LengthModifier::None); 333 334 return true; 335 } 336 337 // We can only work with builtin types. 338 const BuiltinType *BT = QT->getAs<BuiltinType>(); 339 if (!BT) 340 return false; 341 342 // Set length modifier 343 switch (BT->getKind()) { 344 case BuiltinType::Bool: 345 case BuiltinType::WChar_U: 346 case BuiltinType::WChar_S: 347 case BuiltinType::Char16: 348 case BuiltinType::Char32: 349 case BuiltinType::UInt128: 350 case BuiltinType::Int128: 351 case BuiltinType::Half: 352 // Various types which are non-trivial to correct. 353 return false; 354 355#define SIGNED_TYPE(Id, SingletonId) 356#define UNSIGNED_TYPE(Id, SingletonId) 357#define FLOATING_TYPE(Id, SingletonId) 358#define BUILTIN_TYPE(Id, SingletonId) \ 359 case BuiltinType::Id: 360#include "clang/AST/BuiltinTypes.def" 361 // Misc other stuff which doesn't make sense here. 362 return false; 363 364 case BuiltinType::UInt: 365 case BuiltinType::Int: 366 case BuiltinType::Float: 367 case BuiltinType::Double: 368 LM.setKind(LengthModifier::None); 369 break; 370 371 case BuiltinType::Char_U: 372 case BuiltinType::UChar: 373 case BuiltinType::Char_S: 374 case BuiltinType::SChar: 375 LM.setKind(LengthModifier::AsChar); 376 break; 377 378 case BuiltinType::Short: 379 case BuiltinType::UShort: 380 LM.setKind(LengthModifier::AsShort); 381 break; 382 383 case BuiltinType::Long: 384 case BuiltinType::ULong: 385 LM.setKind(LengthModifier::AsLong); 386 break; 387 388 case BuiltinType::LongLong: 389 case BuiltinType::ULongLong: 390 LM.setKind(LengthModifier::AsLongLong); 391 break; 392 393 case BuiltinType::LongDouble: 394 LM.setKind(LengthModifier::AsLongDouble); 395 break; 396 } 397 398 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 399 if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 400 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 401 if (Identifier->getName() == "size_t") { 402 LM.setKind(LengthModifier::AsSizeT); 403 } else if (Identifier->getName() == "ssize_t") { 404 // Not C99, but common in Unix. 405 LM.setKind(LengthModifier::AsSizeT); 406 } else if (Identifier->getName() == "intmax_t") { 407 LM.setKind(LengthModifier::AsIntMax); 408 } else if (Identifier->getName() == "uintmax_t") { 409 LM.setKind(LengthModifier::AsIntMax); 410 } else if (Identifier->getName() == "ptrdiff_t") { 411 LM.setKind(LengthModifier::AsPtrDiff); 412 } 413 } 414 415 // Set conversion specifier and disable any flags which do not apply to it. 416 // Let typedefs to char fall through to int, as %c is silly for uint8_t. 417 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) { 418 CS.setKind(ConversionSpecifier::cArg); 419 LM.setKind(LengthModifier::None); 420 Precision.setHowSpecified(OptionalAmount::NotSpecified); 421 HasAlternativeForm = 0; 422 HasLeadingZeroes = 0; 423 HasPlusPrefix = 0; 424 } 425 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 426 else if (QT->isRealFloatingType()) { 427 CS.setKind(ConversionSpecifier::fArg); 428 } 429 else if (QT->isSignedIntegerType()) { 430 CS.setKind(ConversionSpecifier::dArg); 431 HasAlternativeForm = 0; 432 } 433 else if (QT->isUnsignedIntegerType()) { 434 // Preserve the original formatting, e.g. 'X', 'o'. 435 if (!cast<PrintfConversionSpecifier>(CS).isUIntArg()) 436 CS.setKind(ConversionSpecifier::uArg); 437 HasAlternativeForm = 0; 438 HasPlusPrefix = 0; 439 } else { 440 llvm_unreachable("Unexpected type"); 441 } 442 443 return true; 444} 445 446void PrintfSpecifier::toString(raw_ostream &os) const { 447 // Whilst some features have no defined order, we are using the order 448 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1) 449 os << "%"; 450 451 // Positional args 452 if (usesPositionalArg()) { 453 os << getPositionalArgIndex() << "$"; 454 } 455 456 // Conversion flags 457 if (IsLeftJustified) os << "-"; 458 if (HasPlusPrefix) os << "+"; 459 if (HasSpacePrefix) os << " "; 460 if (HasAlternativeForm) os << "#"; 461 if (HasLeadingZeroes) os << "0"; 462 463 // Minimum field width 464 FieldWidth.toString(os); 465 // Precision 466 Precision.toString(os); 467 // Length modifier 468 os << LM.toString(); 469 // Conversion specifier 470 os << CS.toString(); 471} 472 473bool PrintfSpecifier::hasValidPlusPrefix() const { 474 if (!HasPlusPrefix) 475 return true; 476 477 // The plus prefix only makes sense for signed conversions 478 switch (CS.getKind()) { 479 case ConversionSpecifier::dArg: 480 case ConversionSpecifier::iArg: 481 case ConversionSpecifier::fArg: 482 case ConversionSpecifier::FArg: 483 case ConversionSpecifier::eArg: 484 case ConversionSpecifier::EArg: 485 case ConversionSpecifier::gArg: 486 case ConversionSpecifier::GArg: 487 case ConversionSpecifier::aArg: 488 case ConversionSpecifier::AArg: 489 return true; 490 491 default: 492 return false; 493 } 494} 495 496bool PrintfSpecifier::hasValidAlternativeForm() const { 497 if (!HasAlternativeForm) 498 return true; 499 500 // Alternate form flag only valid with the oxXaAeEfFgG conversions 501 switch (CS.getKind()) { 502 case ConversionSpecifier::oArg: 503 case ConversionSpecifier::xArg: 504 case ConversionSpecifier::XArg: 505 case ConversionSpecifier::aArg: 506 case ConversionSpecifier::AArg: 507 case ConversionSpecifier::eArg: 508 case ConversionSpecifier::EArg: 509 case ConversionSpecifier::fArg: 510 case ConversionSpecifier::FArg: 511 case ConversionSpecifier::gArg: 512 case ConversionSpecifier::GArg: 513 return true; 514 515 default: 516 return false; 517 } 518} 519 520bool PrintfSpecifier::hasValidLeadingZeros() const { 521 if (!HasLeadingZeroes) 522 return true; 523 524 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 525 switch (CS.getKind()) { 526 case ConversionSpecifier::dArg: 527 case ConversionSpecifier::iArg: 528 case ConversionSpecifier::oArg: 529 case ConversionSpecifier::uArg: 530 case ConversionSpecifier::xArg: 531 case ConversionSpecifier::XArg: 532 case ConversionSpecifier::aArg: 533 case ConversionSpecifier::AArg: 534 case ConversionSpecifier::eArg: 535 case ConversionSpecifier::EArg: 536 case ConversionSpecifier::fArg: 537 case ConversionSpecifier::FArg: 538 case ConversionSpecifier::gArg: 539 case ConversionSpecifier::GArg: 540 return true; 541 542 default: 543 return false; 544 } 545} 546 547bool PrintfSpecifier::hasValidSpacePrefix() const { 548 if (!HasSpacePrefix) 549 return true; 550 551 // The space prefix only makes sense for signed conversions 552 switch (CS.getKind()) { 553 case ConversionSpecifier::dArg: 554 case ConversionSpecifier::iArg: 555 case ConversionSpecifier::fArg: 556 case ConversionSpecifier::FArg: 557 case ConversionSpecifier::eArg: 558 case ConversionSpecifier::EArg: 559 case ConversionSpecifier::gArg: 560 case ConversionSpecifier::GArg: 561 case ConversionSpecifier::aArg: 562 case ConversionSpecifier::AArg: 563 return true; 564 565 default: 566 return false; 567 } 568} 569 570bool PrintfSpecifier::hasValidLeftJustified() const { 571 if (!IsLeftJustified) 572 return true; 573 574 // The left justified flag is valid for all conversions except n 575 switch (CS.getKind()) { 576 case ConversionSpecifier::nArg: 577 return false; 578 579 default: 580 return true; 581 } 582} 583 584bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const { 585 if (!HasThousandsGrouping) 586 return true; 587 588 switch (CS.getKind()) { 589 case ConversionSpecifier::dArg: 590 case ConversionSpecifier::iArg: 591 case ConversionSpecifier::uArg: 592 case ConversionSpecifier::fArg: 593 case ConversionSpecifier::FArg: 594 case ConversionSpecifier::gArg: 595 case ConversionSpecifier::GArg: 596 return true; 597 default: 598 return false; 599 } 600} 601 602bool PrintfSpecifier::hasValidPrecision() const { 603 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 604 return true; 605 606 // Precision is only valid with the diouxXaAeEfFgGs conversions 607 switch (CS.getKind()) { 608 case ConversionSpecifier::dArg: 609 case ConversionSpecifier::iArg: 610 case ConversionSpecifier::oArg: 611 case ConversionSpecifier::uArg: 612 case ConversionSpecifier::xArg: 613 case ConversionSpecifier::XArg: 614 case ConversionSpecifier::aArg: 615 case ConversionSpecifier::AArg: 616 case ConversionSpecifier::eArg: 617 case ConversionSpecifier::EArg: 618 case ConversionSpecifier::fArg: 619 case ConversionSpecifier::FArg: 620 case ConversionSpecifier::gArg: 621 case ConversionSpecifier::GArg: 622 case ConversionSpecifier::sArg: 623 return true; 624 625 default: 626 return false; 627 } 628} 629bool PrintfSpecifier::hasValidFieldWidth() const { 630 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 631 return true; 632 633 // The field width is valid for all conversions except n 634 switch (CS.getKind()) { 635 case ConversionSpecifier::nArg: 636 return false; 637 638 default: 639 return true; 640 } 641} 642