PrintfFormatString.cpp revision 99196b1031d37d37f395a3291ccdd12a3fc01242
1//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_printf::ConversionSpecifier; 23using clang::analyze_printf::PrintfSpecifier; 24 25using namespace clang; 26 27typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier> 28 PrintfSpecifierResult; 29 30//===----------------------------------------------------------------------===// 31// Methods for parsing format strings. 32//===----------------------------------------------------------------------===// 33 34using analyze_format_string::ParseNonPositionAmount; 35 36static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS, 37 const char *Start, const char *&Beg, const char *E, 38 unsigned *argIndex) { 39 if (argIndex) { 40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 41 } 42 else { 43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 44 analyze_format_string::PrecisionPos); 45 if (Amt.isInvalid()) 46 return true; 47 FS.setPrecision(Amt); 48 } 49 return false; 50} 51 52static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H, 53 const char *&Beg, 54 const char *E, 55 unsigned &argIndex) { 56 57 using namespace clang::analyze_printf; 58 59 const char *I = Beg; 60 const char *Start = 0; 61 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 62 63 // Look for a '%' character that indicates the start of a format specifier. 64 for ( ; I != E ; ++I) { 65 char c = *I; 66 if (c == '\0') { 67 // Detect spurious null characters, which are likely errors. 68 H.HandleNullChar(I); 69 return true; 70 } 71 if (c == '%') { 72 Start = I++; // Record the start of the format specifier. 73 break; 74 } 75 } 76 77 // No format specifier found? 78 if (!Start) 79 return false; 80 81 if (I == E) { 82 // No more characters left? 83 H.HandleIncompleteSpecifier(Start, E - Start); 84 return true; 85 } 86 87 PrintfSpecifier FS; 88 if (ParseArgPosition(H, FS, Start, I, E)) 89 return true; 90 91 if (I == E) { 92 // No more characters left? 93 H.HandleIncompleteSpecifier(Start, E - Start); 94 return true; 95 } 96 97 // Look for flags (if any). 98 bool hasMore = true; 99 for ( ; I != E; ++I) { 100 switch (*I) { 101 default: hasMore = false; break; 102 case '-': FS.setIsLeftJustified(I); break; 103 case '+': FS.setHasPlusPrefix(I); break; 104 case ' ': FS.setHasSpacePrefix(I); break; 105 case '#': FS.setHasAlternativeForm(I); break; 106 case '0': FS.setHasLeadingZeros(I); break; 107 } 108 if (!hasMore) 109 break; 110 } 111 112 if (I == E) { 113 // No more characters left? 114 H.HandleIncompleteSpecifier(Start, E - Start); 115 return true; 116 } 117 118 // Look for the field width (if any). 119 if (ParseFieldWidth(H, FS, Start, I, E, 120 FS.usesPositionalArg() ? 0 : &argIndex)) 121 return true; 122 123 if (I == E) { 124 // No more characters left? 125 H.HandleIncompleteSpecifier(Start, E - Start); 126 return true; 127 } 128 129 // Look for the precision (if any). 130 if (*I == '.') { 131 ++I; 132 if (I == E) { 133 H.HandleIncompleteSpecifier(Start, E - Start); 134 return true; 135 } 136 137 if (ParsePrecision(H, FS, Start, I, E, 138 FS.usesPositionalArg() ? 0 : &argIndex)) 139 return true; 140 141 if (I == E) { 142 // No more characters left? 143 H.HandleIncompleteSpecifier(Start, E - Start); 144 return true; 145 } 146 } 147 148 // Look for the length modifier. 149 if (ParseLengthModifier(FS, I, E) && I == E) { 150 // No more characters left? 151 H.HandleIncompleteSpecifier(Start, E - Start); 152 return true; 153 } 154 155 if (*I == '\0') { 156 // Detect spurious null characters, which are likely errors. 157 H.HandleNullChar(I); 158 return true; 159 } 160 161 // Finally, look for the conversion specifier. 162 const char *conversionPosition = I++; 163 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 164 switch (*conversionPosition) { 165 default: 166 break; 167 // C99: 7.19.6.1 (section 8). 168 case '%': k = ConversionSpecifier::PercentArg; break; 169 case 'A': k = ConversionSpecifier::AArg; break; 170 case 'E': k = ConversionSpecifier::EArg; break; 171 case 'F': k = ConversionSpecifier::FArg; break; 172 case 'G': k = ConversionSpecifier::GArg; break; 173 case 'X': k = ConversionSpecifier::XArg; break; 174 case 'a': k = ConversionSpecifier::aArg; break; 175 case 'c': k = ConversionSpecifier::cArg; break; 176 case 'd': k = ConversionSpecifier::dArg; break; 177 case 'e': k = ConversionSpecifier::eArg; break; 178 case 'f': k = ConversionSpecifier::fArg; break; 179 case 'g': k = ConversionSpecifier::gArg; break; 180 case 'i': k = ConversionSpecifier::iArg; break; 181 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 182 case 'o': k = ConversionSpecifier::oArg; break; 183 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 184 case 's': k = ConversionSpecifier::sArg; break; 185 case 'u': k = ConversionSpecifier::uArg; break; 186 case 'x': k = ConversionSpecifier::xArg; break; 187 // Mac OS X (unicode) specific 188 case 'C': k = ConversionSpecifier::CArg; break; 189 case 'S': k = ConversionSpecifier::UnicodeStrArg; break; 190 // Objective-C. 191 case '@': k = ConversionSpecifier::ObjCObjArg; break; 192 // Glibc specific. 193 case 'm': k = ConversionSpecifier::PrintErrno; break; 194 } 195 ConversionSpecifier CS(conversionPosition, k); 196 FS.setConversionSpecifier(CS); 197 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 198 FS.setArgIndex(argIndex++); 199 200 if (k == ConversionSpecifier::InvalidSpecifier) { 201 // Assume the conversion takes one argument. 202 return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg); 203 } 204 return PrintfSpecifierResult(Start, FS); 205} 206 207bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, 208 const char *I, 209 const char *E) { 210 211 unsigned argIndex = 0; 212 213 // Keep looking for a format specifier until we have exhausted the string. 214 while (I != E) { 215 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex); 216 // Did a fail-stop error of any kind occur when parsing the specifier? 217 // If so, don't do any more processing. 218 if (FSR.shouldStop()) 219 return true;; 220 // Did we exhaust the string or encounter an error that 221 // we can recover from? 222 if (!FSR.hasValue()) 223 continue; 224 // We have a format specifier. Pass it to the callback. 225 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(), 226 I - FSR.getStart())) 227 return true; 228 } 229 assert(I == E && "Format string not exhausted"); 230 return false; 231} 232 233//===----------------------------------------------------------------------===// 234// Methods on ConversionSpecifier. 235//===----------------------------------------------------------------------===// 236const char *ConversionSpecifier::toString() const { 237 switch (kind) { 238 case dArg: return "d"; 239 case iArg: return "i"; 240 case oArg: return "o"; 241 case uArg: return "u"; 242 case xArg: return "x"; 243 case XArg: return "X"; 244 case fArg: return "f"; 245 case FArg: return "F"; 246 case eArg: return "e"; 247 case EArg: return "E"; 248 case gArg: return "g"; 249 case GArg: return "G"; 250 case aArg: return "a"; 251 case AArg: return "A"; 252 case cArg: return "c"; 253 case sArg: return "s"; 254 case VoidPtrArg: return "p"; 255 case OutIntPtrArg: return "n"; 256 case PercentArg: return "%"; 257 case InvalidSpecifier: return NULL; 258 259 // MacOS X unicode extensions. 260 case CArg: return "C"; 261 case UnicodeStrArg: return "S"; 262 263 // Objective-C specific specifiers. 264 case ObjCObjArg: return "@"; 265 266 // GlibC specific specifiers. 267 case PrintErrno: return "m"; 268 } 269 return NULL; 270} 271 272//===----------------------------------------------------------------------===// 273// Methods on PrintfSpecifier. 274//===----------------------------------------------------------------------===// 275 276ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const { 277 if (!CS.consumesDataArgument()) 278 return ArgTypeResult::Invalid(); 279 280 if (CS.isIntArg()) 281 switch (LM.getKind()) { 282 case LengthModifier::AsLongDouble: 283 return ArgTypeResult::Invalid(); 284 case LengthModifier::None: return Ctx.IntTy; 285 case LengthModifier::AsChar: return Ctx.SignedCharTy; 286 case LengthModifier::AsShort: return Ctx.ShortTy; 287 case LengthModifier::AsLong: return Ctx.LongTy; 288 case LengthModifier::AsLongLong: return Ctx.LongLongTy; 289 case LengthModifier::AsIntMax: 290 // FIXME: Return unknown for now. 291 return ArgTypeResult(); 292 case LengthModifier::AsSizeT: return Ctx.getSizeType(); 293 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType(); 294 } 295 296 if (CS.isUIntArg()) 297 switch (LM.getKind()) { 298 case LengthModifier::AsLongDouble: 299 return ArgTypeResult::Invalid(); 300 case LengthModifier::None: return Ctx.UnsignedIntTy; 301 case LengthModifier::AsChar: return Ctx.UnsignedCharTy; 302 case LengthModifier::AsShort: return Ctx.UnsignedShortTy; 303 case LengthModifier::AsLong: return Ctx.UnsignedLongTy; 304 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy; 305 case LengthModifier::AsIntMax: 306 // FIXME: Return unknown for now. 307 return ArgTypeResult(); 308 case LengthModifier::AsSizeT: 309 // FIXME: How to get the corresponding unsigned 310 // version of size_t? 311 return ArgTypeResult(); 312 case LengthModifier::AsPtrDiff: 313 // FIXME: How to get the corresponding unsigned 314 // version of ptrdiff_t? 315 return ArgTypeResult(); 316 } 317 318 if (CS.isDoubleArg()) { 319 if (LM.getKind() == LengthModifier::AsLongDouble) 320 return Ctx.LongDoubleTy; 321 return Ctx.DoubleTy; 322 } 323 324 switch (CS.getKind()) { 325 case ConversionSpecifier::sArg: 326 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ? 327 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 328 case ConversionSpecifier::UnicodeStrArg: 329 // FIXME: This appears to be Mac OS X specific. 330 return ArgTypeResult::WCStrTy; 331 case ConversionSpecifier::CArg: 332 return Ctx.WCharTy; 333 case ConversionSpecifier::VoidPtrArg: 334 return ArgTypeResult::CPointerTy; 335 default: 336 break; 337 } 338 339 // FIXME: Handle other cases. 340 return ArgTypeResult(); 341} 342 343bool PrintfSpecifier::fixType(QualType QT) { 344 // Handle strings first (char *, wchar_t *) 345 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { 346 CS.setKind(ConversionSpecifier::sArg); 347 348 // Disable irrelevant flags 349 HasAlternativeForm = 0; 350 HasLeadingZeroes = 0; 351 352 // Set the long length modifier for wide characters 353 if (QT->getPointeeType()->isWideCharType()) 354 LM.setKind(LengthModifier::AsWideChar); 355 356 return true; 357 } 358 359 // We can only work with builtin types. 360 if (!QT->isBuiltinType()) 361 return false; 362 363 // Everything else should be a base type 364 const BuiltinType *BT = QT->getAs<BuiltinType>(); 365 366 // Set length modifier 367 switch (BT->getKind()) { 368 default: 369 // The rest of the conversions are either optional or for non-builtin types 370 LM.setKind(LengthModifier::None); 371 break; 372 373 case BuiltinType::WChar: 374 case BuiltinType::Long: 375 case BuiltinType::ULong: 376 LM.setKind(LengthModifier::AsLong); 377 break; 378 379 case BuiltinType::LongLong: 380 case BuiltinType::ULongLong: 381 LM.setKind(LengthModifier::AsLongLong); 382 break; 383 384 case BuiltinType::LongDouble: 385 LM.setKind(LengthModifier::AsLongDouble); 386 break; 387 } 388 389 // Set conversion specifier and disable any flags which do not apply to it. 390 if (QT->isAnyCharacterType()) { 391 CS.setKind(ConversionSpecifier::cArg); 392 Precision.setHowSpecified(OptionalAmount::NotSpecified); 393 HasAlternativeForm = 0; 394 HasLeadingZeroes = 0; 395 HasPlusPrefix = 0; 396 } 397 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType 398 else if (QT->isRealFloatingType()) { 399 CS.setKind(ConversionSpecifier::fArg); 400 } 401 else if (QT->isPointerType()) { 402 CS.setKind(ConversionSpecifier::VoidPtrArg); 403 Precision.setHowSpecified(OptionalAmount::NotSpecified); 404 HasAlternativeForm = 0; 405 HasLeadingZeroes = 0; 406 HasPlusPrefix = 0; 407 } 408 else if (QT->isSignedIntegerType()) { 409 CS.setKind(ConversionSpecifier::dArg); 410 HasAlternativeForm = 0; 411 } 412 else if (QT->isUnsignedIntegerType()) { 413 CS.setKind(ConversionSpecifier::uArg); 414 HasAlternativeForm = 0; 415 HasPlusPrefix = 0; 416 } 417 else { 418 return false; 419 } 420 421 return true; 422} 423 424void PrintfSpecifier::toString(llvm::raw_ostream &os) const { 425 // Whilst some features have no defined order, we are using the order 426 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1) 427 os << "%"; 428 429 // Positional args 430 if (usesPositionalArg()) { 431 os << getPositionalArgIndex() << "$"; 432 } 433 434 // Conversion flags 435 if (IsLeftJustified) os << "-"; 436 if (HasPlusPrefix) os << "+"; 437 if (HasSpacePrefix) os << " "; 438 if (HasAlternativeForm) os << "#"; 439 if (HasLeadingZeroes) os << "0"; 440 441 // Minimum field width 442 FieldWidth.toString(os); 443 // Precision 444 Precision.toString(os); 445 // Length modifier 446 os << LM.toString(); 447 // Conversion specifier 448 os << CS.toString(); 449} 450 451bool PrintfSpecifier::hasValidPlusPrefix() const { 452 if (!HasPlusPrefix) 453 return true; 454 455 // The plus prefix only makes sense for signed conversions 456 switch (CS.getKind()) { 457 case ConversionSpecifier::dArg: 458 case ConversionSpecifier::iArg: 459 case ConversionSpecifier::fArg: 460 case ConversionSpecifier::FArg: 461 case ConversionSpecifier::eArg: 462 case ConversionSpecifier::EArg: 463 case ConversionSpecifier::gArg: 464 case ConversionSpecifier::GArg: 465 case ConversionSpecifier::aArg: 466 case ConversionSpecifier::AArg: 467 return true; 468 469 default: 470 return false; 471 } 472} 473 474bool PrintfSpecifier::hasValidAlternativeForm() const { 475 if (!HasAlternativeForm) 476 return true; 477 478 // Alternate form flag only valid with the oxaAeEfFgG conversions 479 switch (CS.getKind()) { 480 case ConversionSpecifier::oArg: 481 case ConversionSpecifier::xArg: 482 case ConversionSpecifier::aArg: 483 case ConversionSpecifier::AArg: 484 case ConversionSpecifier::eArg: 485 case ConversionSpecifier::EArg: 486 case ConversionSpecifier::fArg: 487 case ConversionSpecifier::FArg: 488 case ConversionSpecifier::gArg: 489 case ConversionSpecifier::GArg: 490 return true; 491 492 default: 493 return false; 494 } 495} 496 497bool PrintfSpecifier::hasValidLeadingZeros() const { 498 if (!HasLeadingZeroes) 499 return true; 500 501 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions 502 switch (CS.getKind()) { 503 case ConversionSpecifier::dArg: 504 case ConversionSpecifier::iArg: 505 case ConversionSpecifier::oArg: 506 case ConversionSpecifier::uArg: 507 case ConversionSpecifier::xArg: 508 case ConversionSpecifier::XArg: 509 case ConversionSpecifier::aArg: 510 case ConversionSpecifier::AArg: 511 case ConversionSpecifier::eArg: 512 case ConversionSpecifier::EArg: 513 case ConversionSpecifier::fArg: 514 case ConversionSpecifier::FArg: 515 case ConversionSpecifier::gArg: 516 case ConversionSpecifier::GArg: 517 return true; 518 519 default: 520 return false; 521 } 522} 523 524bool PrintfSpecifier::hasValidSpacePrefix() const { 525 if (!HasSpacePrefix) 526 return true; 527 528 // The space prefix only makes sense for signed conversions 529 switch (CS.getKind()) { 530 case ConversionSpecifier::dArg: 531 case ConversionSpecifier::iArg: 532 case ConversionSpecifier::fArg: 533 case ConversionSpecifier::FArg: 534 case ConversionSpecifier::eArg: 535 case ConversionSpecifier::EArg: 536 case ConversionSpecifier::gArg: 537 case ConversionSpecifier::GArg: 538 case ConversionSpecifier::aArg: 539 case ConversionSpecifier::AArg: 540 return true; 541 542 default: 543 return false; 544 } 545} 546 547bool PrintfSpecifier::hasValidLeftJustified() const { 548 if (!IsLeftJustified) 549 return true; 550 551 // The left justified flag is valid for all conversions except n 552 switch (CS.getKind()) { 553 case ConversionSpecifier::OutIntPtrArg: 554 return false; 555 556 default: 557 return true; 558 } 559} 560 561bool PrintfSpecifier::hasValidLengthModifier() const { 562 switch (LM.getKind()) { 563 case LengthModifier::None: 564 return true; 565 566 // Handle most integer flags 567 case LengthModifier::AsChar: 568 case LengthModifier::AsShort: 569 case LengthModifier::AsLongLong: 570 case LengthModifier::AsIntMax: 571 case LengthModifier::AsSizeT: 572 case LengthModifier::AsPtrDiff: 573 switch (CS.getKind()) { 574 case ConversionSpecifier::dArg: 575 case ConversionSpecifier::iArg: 576 case ConversionSpecifier::oArg: 577 case ConversionSpecifier::uArg: 578 case ConversionSpecifier::xArg: 579 case ConversionSpecifier::XArg: 580 case ConversionSpecifier::OutIntPtrArg: 581 return true; 582 default: 583 return false; 584 } 585 586 // Handle 'l' flag 587 case LengthModifier::AsLong: 588 switch (CS.getKind()) { 589 case ConversionSpecifier::dArg: 590 case ConversionSpecifier::iArg: 591 case ConversionSpecifier::oArg: 592 case ConversionSpecifier::uArg: 593 case ConversionSpecifier::xArg: 594 case ConversionSpecifier::XArg: 595 case ConversionSpecifier::aArg: 596 case ConversionSpecifier::AArg: 597 case ConversionSpecifier::fArg: 598 case ConversionSpecifier::FArg: 599 case ConversionSpecifier::eArg: 600 case ConversionSpecifier::EArg: 601 case ConversionSpecifier::gArg: 602 case ConversionSpecifier::GArg: 603 case ConversionSpecifier::OutIntPtrArg: 604 case ConversionSpecifier::cArg: 605 case ConversionSpecifier::sArg: 606 return true; 607 default: 608 return false; 609 } 610 611 case LengthModifier::AsLongDouble: 612 switch (CS.getKind()) { 613 case ConversionSpecifier::aArg: 614 case ConversionSpecifier::AArg: 615 case ConversionSpecifier::fArg: 616 case ConversionSpecifier::FArg: 617 case ConversionSpecifier::eArg: 618 case ConversionSpecifier::EArg: 619 case ConversionSpecifier::gArg: 620 case ConversionSpecifier::GArg: 621 return true; 622 default: 623 return false; 624 } 625 } 626 return false; 627} 628 629bool PrintfSpecifier::hasValidPrecision() const { 630 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified) 631 return true; 632 633 // Precision is only valid with the diouxXaAeEfFgGs conversions 634 switch (CS.getKind()) { 635 case ConversionSpecifier::dArg: 636 case ConversionSpecifier::iArg: 637 case ConversionSpecifier::oArg: 638 case ConversionSpecifier::uArg: 639 case ConversionSpecifier::xArg: 640 case ConversionSpecifier::XArg: 641 case ConversionSpecifier::aArg: 642 case ConversionSpecifier::AArg: 643 case ConversionSpecifier::eArg: 644 case ConversionSpecifier::EArg: 645 case ConversionSpecifier::fArg: 646 case ConversionSpecifier::FArg: 647 case ConversionSpecifier::gArg: 648 case ConversionSpecifier::GArg: 649 case ConversionSpecifier::sArg: 650 return true; 651 652 default: 653 return false; 654 } 655} 656bool PrintfSpecifier::hasValidFieldWidth() const { 657 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified) 658 return true; 659 660 // The field width is valid for all conversions except n 661 switch (CS.getKind()) { 662 case ConversionSpecifier::OutIntPtrArg: 663 return false; 664 665 default: 666 return true; 667 } 668} 669