PrintfFormatString.cpp revision 7fdba13847f01cfcdc7c9261b13e99e3cbecb5fc
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } 42 else { 43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 44 analyze_format_string::PrecisionPos); 45 if (Amt.isInvalid()) 46 return true; 47 FS.setPrecision(Amt); 48 } 49 return false; 50} 51 52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 53 const char *&Beg, 54 const char *E, 55 unsigned &argIndex) { 56 57 using namespace clang::analyze_format_string; 58 using namespace clang::analyze_printf; 59 60 const char *I = Beg; 61 const char *Start = 0; 62 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 63 64 // Look for a '%' character that indicates the start of a format specifier. 65 for ( ; I != E ; ++I) { 66 char c = *I; 67 if (c == '\0') { 68 // Detect spurious null characters, which are likely errors. 69 H.HandleNullChar(I); 70 return true; 71 } 72 if (c == '%') { 73 Start = I++; // Record the start of the format specifier. 74 break; 75 } 76 } 77 78 // No format specifier found? 79 if (!Start) 80 return false; 81 82 if (I == E) { 83 // No more characters left? 84 H.HandleIncompleteSpecifier(Start, E - Start); 85 return true; 86 } 87 88 PrintfSpecifier FS; 89 if (ParseArgPosition(H, FS, Start, I, E)) 90 return true; 91 92 if (I == E) { 93 // No more characters left? 94 H.HandleIncompleteSpecifier(Start, E - Start); 95 return true; 96 } 97 98 // Look for flags (if any). 99 bool hasMore = true; 100 for ( ; I != E; ++I) { 101 switch (*I) { 102 default: hasMore = false; break; 103 case '-': FS.setIsLeftJustified(I); break; 104 case '+': FS.setHasPlusPrefix(I); break; 105 case ' ': FS.setHasSpacePrefix(I); break; 106 case '#': FS.setHasAlternativeForm(I); break; 107 case '0': FS.setHasLeadingZeros(I); break; 108 } 109 if (!hasMore) 110 break; 111 } 112 113 if (I == E) { 114 // No more characters left? 115 H.HandleIncompleteSpecifier(Start, E - Start); 116 return true; 117 } 118 119 // Look for the field width (if any). 120 if (ParseFieldWidth(H, FS, Start, I, E, 121 FS.usesPositionalArg() ? 0 : &argIndex)) 122 return true; 123 124 if (I == E) { 125 // No more characters left? 126 H.HandleIncompleteSpecifier(Start, E - Start); 127 return true; 128 } 129 130 // Look for the precision (if any). 131 if (*I == '.') { 132 ++I; 133 if (I == E) { 134 H.HandleIncompleteSpecifier(Start, E - Start); 135 return true; 136 } 137 138 if (ParsePrecision(H, FS, Start, I, E, 139 FS.usesPositionalArg() ? 0 : &argIndex)) 140 return true; 141 142 if (I == E) { 143 // No more characters left? 144 H.HandleIncompleteSpecifier(Start, E - Start); 145 return true; 146 } 147 } 148 149 // Look for the length modifier. 150 if (ParseLengthModifier(FS, I, E) && I == E) { 151 // No more characters left? 152 H.HandleIncompleteSpecifier(Start, E - Start); 153 return true; 154 } 155 156 if (*I == '\0') { 157 // Detect spurious null characters, which are likely errors. 158 H.HandleNullChar(I); 159 return true; 160 } 161 162 // Finally, look for the conversion specifier. 163 const char *conversionPosition = I++; 164 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 165 switch (*conversionPosition) { 166 default: 167 break; 168 // C99: 7.19.6.1 (section 8). 169 case '%': k = ConversionSpecifier::PercentArg; break; 170 case 'A': k = ConversionSpecifier::AArg; break; 171 case 'E': k = ConversionSpecifier::EArg; break; 172 case 'F': k = ConversionSpecifier::FArg; break; 173 case 'G': k = ConversionSpecifier::GArg; break; 174 case 'X': k = ConversionSpecifier::XArg; break; 175 case 'a': k = ConversionSpecifier::aArg; break; 176 case 'c': k = ConversionSpecifier::cArg; break; 177 case 'd': k = ConversionSpecifier::dArg; break; 178 case 'e': k = ConversionSpecifier::eArg; break; 179 case 'f': k = ConversionSpecifier::fArg; break; 180 case 'g': k = ConversionSpecifier::gArg; break; 181 case 'i': k = ConversionSpecifier::iArg; break; 182 case 'n': k = ConversionSpecifier::nArg; break; 183 case 'o': k = ConversionSpecifier::oArg; break; 184 case 'p': k = ConversionSpecifier::pArg; break; 185 case 's': k = ConversionSpecifier::sArg; break; 186 case 'u': k = ConversionSpecifier::uArg; break; 187 case 'x': k = ConversionSpecifier::xArg; break; 188 // Mac OS X (unicode) specific 189 case 'C': k = ConversionSpecifier::CArg; break; 190 case 'S': k = ConversionSpecifier::SArg; break; 191 // Objective-C. 192 case '@': k = ConversionSpecifier::ObjCObjArg; break; 193 // Glibc specific. 194 case 'm': k = ConversionSpecifier::PrintErrno; break; 195 } 196 PrintfConversionSpecifier CS(conversionPosition, k); 197 FS.setConversionSpecifier(CS); 198 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 199 FS.setArgIndex(argIndex++); 200 201 if (k == ConversionSpecifier::InvalidSpecifier) { 202 // Assume the conversion takes one argument. 203 return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg); 204 } 205 return PrintfSpecifierResult(Start, FS); 206} 207 208bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 209 const char *I, 210 const char *E) { 211 212 unsigned argIndex = 0; 213 214 // Keep looking for a format specifier until we have exhausted the string. 215 while (I != E) { 216 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex); 217 // Did a fail-stop error of any kind occur when parsing the specifier? 218 // If so, don't do any more processing. 219 if (FSR.shouldStop()) 220 return true;; 221 // Did we exhaust the string or encounter an error that 222 // we can recover from? 223 if (!FSR.hasValue()) 224 continue; 225 // We have a format specifier. Pass it to the callback. 226 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 227 I - FSR.getStart())) 228 return true; 229 } 230 assert(I == E && "Format string not exhausted"); 231 return false; 232} 233 234//===----------------------------------------------------------------------===// 235// Methods on ConversionSpecifier. 236//===----------------------------------------------------------------------===// 237const char *ConversionSpecifier::toString() const { 238 switch (kind) { 239 case dArg: return "d"; 240 case iArg: return "i"; 241 case oArg: return "o"; 242 case uArg: return "u"; 243 case xArg: return "x"; 244 case XArg: return "X"; 245 case fArg: return "f"; 246 case FArg: return "F"; 247 case eArg: return "e"; 248 case EArg: return "E"; 249 case gArg: return "g"; 250 case GArg: return "G"; 251 case aArg: return "a"; 252 case AArg: return "A"; 253 case cArg: return "c"; 254 case sArg: return "s"; 255 case pArg: return "p"; 256 case nArg: return "n"; 257 case PercentArg: return "%"; 258 case ScanListArg: return "["; 259 case InvalidSpecifier: return NULL; 260 261 // MacOS X unicode extensions. 262 case CArg: return "C"; 263 case SArg: return "S"; 264 265 // Objective-C specific specifiers. 266 case ObjCObjArg: return "@"; 267 268 // GlibC specific specifiers. 269 case PrintErrno: return "m"; 270 } 271 return NULL; 272} 273 274//===----------------------------------------------------------------------===// 275// Methods on PrintfSpecifier. 276//===----------------------------------------------------------------------===// 277 278ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 279 const PrintfConversionSpecifier &CS = getConversionSpecifier(); 280 281 if (!CS.consumesDataArgument()) 282 return ArgTypeResult::Invalid(); 283 284 if (CS.isIntArg()) 285 switch (LM.getKind()) { 286 case LengthModifier::AsLongDouble: 287 return ArgTypeResult::Invalid(); 288 case LengthModifier::None: return Ctx.IntTy; 289 case LengthModifier::AsChar: return Ctx.SignedCharTy; 290 case LengthModifier::AsShort: return Ctx.ShortTy; 291 case LengthModifier::AsLong: return Ctx.LongTy; 292 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 293 case LengthModifier::AsIntMax: 294 // FIXME: Return unknown for now. 295 return ArgTypeResult(); 296 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 297 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 298 } 299 300 if (CS.isUIntArg()) 301 switch (LM.getKind()) { 302 case LengthModifier::AsLongDouble: 303 return ArgTypeResult::Invalid(); 304 case LengthModifier::None: return Ctx.UnsignedIntTy; 305 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 306 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 307 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 308 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 309 case LengthModifier::AsIntMax: 310 // FIXME: Return unknown for now. 311 return ArgTypeResult(); 312 case LengthModifier::AsSizeT: 313 // FIXME: How to get the corresponding unsigned 314 // version of size_t? 315 return ArgTypeResult(); 316 case LengthModifier::AsPtrDiff: 317 // FIXME: How to get the corresponding unsigned 318 // version of ptrdiff_t? 319 return ArgTypeResult(); 320 } 321 322 if (CS.isDoubleArg()) { 323 if (LM.getKind() == LengthModifier::AsLongDouble) 324 return Ctx.LongDoubleTy; 325 return Ctx.DoubleTy; 326 } 327 328 switch (CS.getKind()) { 329 case ConversionSpecifier::sArg: 330 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 331 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 332 case ConversionSpecifier::SArg: 333 // FIXME: This appears to be Mac OS X specific. 334 return ArgTypeResult::WCStrTy; 335 case ConversionSpecifier::CArg: 336 return Ctx.WCharTy; 337 case ConversionSpecifier::pArg: 338 return ArgTypeResult::CPointerTy; 339 default: 340 break; 341 } 342 343 // FIXME: Handle other cases. 344 return ArgTypeResult(); 345} 346 347bool PrintfSpecifier::fixType(QualType QT) { 348 // Handle strings first (char *, wchar_t *) 349 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 350 CS.setKind(ConversionSpecifier::sArg); 351 352 // Disable irrelevant flags 353 HasAlternativeForm = 0; 354 HasLeadingZeroes = 0; 355 356 // Set the long length modifier for wide characters 357 if (QT->getPointeeType()->isWideCharType()) 358 LM.setKind(LengthModifier::AsWideChar); 359 360 return true; 361 } 362 363 // We can only work with builtin types. 364 if (!QT->isBuiltinType()) 365 return false; 366 367 // Everything else should be a base type 368 const BuiltinType *BT = QT->getAs<BuiltinType>(); 369 370 // Set length modifier 371 switch (BT->getKind()) { 372 default: 373 // The rest of the conversions are either optional or for non-builtin types 374 LM.setKind(LengthModifier::None); 375 break; 376 377 case BuiltinType::WChar: 378 case BuiltinType::Long: 379 case BuiltinType::ULong: 380 LM.setKind(LengthModifier::AsLong); 381 break; 382 383 case BuiltinType::LongLong: 384 case BuiltinType::ULongLong: 385 LM.setKind(LengthModifier::AsLongLong); 386 break; 387 388 case BuiltinType::LongDouble: 389 LM.setKind(LengthModifier::AsLongDouble); 390 break; 391 } 392 393 // Set conversion specifier and disable any flags which do not apply to it. 394 if (QT->isAnyCharacterType()) { 395 CS.setKind(ConversionSpecifier::cArg); 396 Precision.setHowSpecified(OptionalAmount::NotSpecified); 397 HasAlternativeForm = 0; 398 HasLeadingZeroes = 0; 399 HasPlusPrefix = 0; 400 } 401 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 402 else if (QT->isRealFloatingType()) { 403 CS.setKind(ConversionSpecifier::fArg); 404 } 405 else if (QT->isPointerType()) { 406 CS.setKind(ConversionSpecifier::pArg); 407 Precision.setHowSpecified(OptionalAmount::NotSpecified); 408 HasAlternativeForm = 0; 409 HasLeadingZeroes = 0; 410 HasPlusPrefix = 0; 411 } 412 else if (QT->isSignedIntegerType()) { 413 CS.setKind(ConversionSpecifier::dArg); 414 HasAlternativeForm = 0; 415 } 416 else if (QT->isUnsignedIntegerType()) { 417 CS.setKind(ConversionSpecifier::uArg); 418 HasAlternativeForm = 0; 419 HasPlusPrefix = 0; 420 } 421 else { 422 return false; 423 } 424 425 return true; 426} 427 428void PrintfSpecifier::toString(llvm::raw_ostream &os) const { 429 // Whilst some features have no defined order, we are using the order 430 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1) 431 os << "%"; 432 433 // Positional args 434 if (usesPositionalArg()) { 435 os << getPositionalArgIndex() << "$"; 436 } 437 438 // Conversion flags 439 if (IsLeftJustified) os << "-"; 440 if (HasPlusPrefix) os << "+"; 441 if (HasSpacePrefix) os << " "; 442 if (HasAlternativeForm) os << "#"; 443 if (HasLeadingZeroes) os << "0"; 444 445 // Minimum field width 446 FieldWidth.toString(os); 447 // Precision 448 Precision.toString(os); 449 // Length modifier 450 os << LM.toString(); 451 // Conversion specifier 452 os << CS.toString(); 453} 454 455bool PrintfSpecifier::hasValidPlusPrefix() const { 456 if (!HasPlusPrefix) 457 return true; 458 459 // The plus prefix only makes sense for signed conversions 460 switch (CS.getKind()) { 461 case ConversionSpecifier::dArg: 462 case ConversionSpecifier::iArg: 463 case ConversionSpecifier::fArg: 464 case ConversionSpecifier::FArg: 465 case ConversionSpecifier::eArg: 466 case ConversionSpecifier::EArg: 467 case ConversionSpecifier::gArg: 468 case ConversionSpecifier::GArg: 469 case ConversionSpecifier::aArg: 470 case ConversionSpecifier::AArg: 471 return true; 472 473 default: 474 return false; 475 } 476} 477 478bool PrintfSpecifier::hasValidAlternativeForm() const { 479 if (!HasAlternativeForm) 480 return true; 481 482 // Alternate form flag only valid with the oxaAeEfFgG conversions 483 switch (CS.getKind()) { 484 case ConversionSpecifier::oArg: 485 case ConversionSpecifier::xArg: 486 case ConversionSpecifier::aArg: 487 case ConversionSpecifier::AArg: 488 case ConversionSpecifier::eArg: 489 case ConversionSpecifier::EArg: 490 case ConversionSpecifier::fArg: 491 case ConversionSpecifier::FArg: 492 case ConversionSpecifier::gArg: 493 case ConversionSpecifier::GArg: 494 return true; 495 496 default: 497 return false; 498 } 499} 500 501bool PrintfSpecifier::hasValidLeadingZeros() const { 502 if (!HasLeadingZeroes) 503 return true; 504 505 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 506 switch (CS.getKind()) { 507 case ConversionSpecifier::dArg: 508 case ConversionSpecifier::iArg: 509 case ConversionSpecifier::oArg: 510 case ConversionSpecifier::uArg: 511 case ConversionSpecifier::xArg: 512 case ConversionSpecifier::XArg: 513 case ConversionSpecifier::aArg: 514 case ConversionSpecifier::AArg: 515 case ConversionSpecifier::eArg: 516 case ConversionSpecifier::EArg: 517 case ConversionSpecifier::fArg: 518 case ConversionSpecifier::FArg: 519 case ConversionSpecifier::gArg: 520 case ConversionSpecifier::GArg: 521 return true; 522 523 default: 524 return false; 525 } 526} 527 528bool PrintfSpecifier::hasValidSpacePrefix() const { 529 if (!HasSpacePrefix) 530 return true; 531 532 // The space prefix only makes sense for signed conversions 533 switch (CS.getKind()) { 534 case ConversionSpecifier::dArg: 535 case ConversionSpecifier::iArg: 536 case ConversionSpecifier::fArg: 537 case ConversionSpecifier::FArg: 538 case ConversionSpecifier::eArg: 539 case ConversionSpecifier::EArg: 540 case ConversionSpecifier::gArg: 541 case ConversionSpecifier::GArg: 542 case ConversionSpecifier::aArg: 543 case ConversionSpecifier::AArg: 544 return true; 545 546 default: 547 return false; 548 } 549} 550 551bool PrintfSpecifier::hasValidLeftJustified() const { 552 if (!IsLeftJustified) 553 return true; 554 555 // The left justified flag is valid for all conversions except n 556 switch (CS.getKind()) { 557 case ConversionSpecifier::nArg: 558 return false; 559 560 default: 561 return true; 562 } 563} 564 565bool PrintfSpecifier::hasValidLengthModifier() const { 566 switch (LM.getKind()) { 567 case LengthModifier::None: 568 return true; 569 570 // Handle most integer flags 571 case LengthModifier::AsChar: 572 case LengthModifier::AsShort: 573 case LengthModifier::AsLongLong: 574 case LengthModifier::AsIntMax: 575 case LengthModifier::AsSizeT: 576 case LengthModifier::AsPtrDiff: 577 switch (CS.getKind()) { 578 case ConversionSpecifier::dArg: 579 case ConversionSpecifier::iArg: 580 case ConversionSpecifier::oArg: 581 case ConversionSpecifier::uArg: 582 case ConversionSpecifier::xArg: 583 case ConversionSpecifier::XArg: 584 case ConversionSpecifier::nArg: 585 return true; 586 default: 587 return false; 588 } 589 590 // Handle 'l' flag 591 case LengthModifier::AsLong: 592 switch (CS.getKind()) { 593 case ConversionSpecifier::dArg: 594 case ConversionSpecifier::iArg: 595 case ConversionSpecifier::oArg: 596 case ConversionSpecifier::uArg: 597 case ConversionSpecifier::xArg: 598 case ConversionSpecifier::XArg: 599 case ConversionSpecifier::aArg: 600 case ConversionSpecifier::AArg: 601 case ConversionSpecifier::fArg: 602 case ConversionSpecifier::FArg: 603 case ConversionSpecifier::eArg: 604 case ConversionSpecifier::EArg: 605 case ConversionSpecifier::gArg: 606 case ConversionSpecifier::GArg: 607 case ConversionSpecifier::nArg: 608 case ConversionSpecifier::cArg: 609 case ConversionSpecifier::sArg: 610 return true; 611 default: 612 return false; 613 } 614 615 case LengthModifier::AsLongDouble: 616 switch (CS.getKind()) { 617 case ConversionSpecifier::aArg: 618 case ConversionSpecifier::AArg: 619 case ConversionSpecifier::fArg: 620 case ConversionSpecifier::FArg: 621 case ConversionSpecifier::eArg: 622 case ConversionSpecifier::EArg: 623 case ConversionSpecifier::gArg: 624 case ConversionSpecifier::GArg: 625 return true; 626 default: 627 return false; 628 } 629 } 630 return false; 631} 632 633bool PrintfSpecifier::hasValidPrecision() const { 634 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 635 return true; 636 637 // Precision is only valid with the diouxXaAeEfFgGs conversions 638 switch (CS.getKind()) { 639 case ConversionSpecifier::dArg: 640 case ConversionSpecifier::iArg: 641 case ConversionSpecifier::oArg: 642 case ConversionSpecifier::uArg: 643 case ConversionSpecifier::xArg: 644 case ConversionSpecifier::XArg: 645 case ConversionSpecifier::aArg: 646 case ConversionSpecifier::AArg: 647 case ConversionSpecifier::eArg: 648 case ConversionSpecifier::EArg: 649 case ConversionSpecifier::fArg: 650 case ConversionSpecifier::FArg: 651 case ConversionSpecifier::gArg: 652 case ConversionSpecifier::GArg: 653 case ConversionSpecifier::sArg: 654 return true; 655 656 default: 657 return false; 658 } 659} 660bool PrintfSpecifier::hasValidFieldWidth() const { 661 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 662 return true; 663 664 // The field width is valid for all conversions except n 665 switch (CS.getKind()) { 666 case ConversionSpecifier::nArg: 667 return false; 668 669 default: 670 return true; 671 } 672} 673