FormatString.cpp revision 58e1e54476d610d6c33ef483f216ed8a1282d35c
1// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Shared details for processing format strings of printf and scanf 11// (and friends). 12// 13//===----------------------------------------------------------------------===// 14 15#include "FormatStringParsing.h" 16#include "clang/Basic/LangOptions.h" 17 18using clang::analyze_format_string::ArgType; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::FormatSpecifier; 21using clang::analyze_format_string::LengthModifier; 22using clang::analyze_format_string::OptionalAmount; 23using clang::analyze_format_string::PositionContext; 24using clang::analyze_format_string::ConversionSpecifier; 25using namespace clang; 26 27// Key function to FormatStringHandler. 28FormatStringHandler::~FormatStringHandler() {} 29 30//===----------------------------------------------------------------------===// 31// Functions for parsing format strings components in both printf and 32// scanf format strings. 33//===----------------------------------------------------------------------===// 34 35OptionalAmount 36clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 37 const char *I = Beg; 38 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 39 40 unsigned accumulator = 0; 41 bool hasDigits = false; 42 43 for ( ; I != E; ++I) { 44 char c = *I; 45 if (c >= '0' && c <= '9') { 46 hasDigits = true; 47 accumulator = (accumulator * 10) + (c - '0'); 48 continue; 49 } 50 51 if (hasDigits) 52 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 53 false); 54 55 break; 56 } 57 58 return OptionalAmount(); 59} 60 61OptionalAmount 62clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 63 const char *E, 64 unsigned &argIndex) { 65 if (*Beg == '*') { 66 ++Beg; 67 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 68 } 69 70 return ParseAmount(Beg, E); 71} 72 73OptionalAmount 74clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 75 const char *Start, 76 const char *&Beg, 77 const char *E, 78 PositionContext p) { 79 if (*Beg == '*') { 80 const char *I = Beg + 1; 81 const OptionalAmount &Amt = ParseAmount(I, E); 82 83 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 84 H.HandleInvalidPosition(Beg, I - Beg, p); 85 return OptionalAmount(false); 86 } 87 88 if (I == E) { 89 // No more characters left? 90 H.HandleIncompleteSpecifier(Start, E - Start); 91 return OptionalAmount(false); 92 } 93 94 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 95 96 if (*I == '$') { 97 // Handle positional arguments 98 99 // Special case: '*0$', since this is an easy mistake. 100 if (Amt.getConstantAmount() == 0) { 101 H.HandleZeroPosition(Beg, I - Beg + 1); 102 return OptionalAmount(false); 103 } 104 105 const char *Tmp = Beg; 106 Beg = ++I; 107 108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 109 Tmp, 0, true); 110 } 111 112 H.HandleInvalidPosition(Beg, I - Beg, p); 113 return OptionalAmount(false); 114 } 115 116 return ParseAmount(Beg, E); 117} 118 119 120bool 121clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 122 FormatSpecifier &CS, 123 const char *Start, 124 const char *&Beg, const char *E, 125 unsigned *argIndex) { 126 // FIXME: Support negative field widths. 127 if (argIndex) { 128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 129 } 130 else { 131 const OptionalAmount Amt = 132 ParsePositionAmount(H, Start, Beg, E, 133 analyze_format_string::FieldWidthPos); 134 135 if (Amt.isInvalid()) 136 return true; 137 CS.setFieldWidth(Amt); 138 } 139 return false; 140} 141 142bool 143clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 144 FormatSpecifier &FS, 145 const char *Start, 146 const char *&Beg, 147 const char *E) { 148 const char *I = Beg; 149 150 const OptionalAmount &Amt = ParseAmount(I, E); 151 152 if (I == E) { 153 // No more characters left? 154 H.HandleIncompleteSpecifier(Start, E - Start); 155 return true; 156 } 157 158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 159 // Warn that positional arguments are non-standard. 160 H.HandlePosition(Start, I - Start); 161 162 // Special case: '%0$', since this is an easy mistake. 163 if (Amt.getConstantAmount() == 0) { 164 H.HandleZeroPosition(Start, I - Start); 165 return true; 166 } 167 168 FS.setArgIndex(Amt.getConstantAmount() - 1); 169 FS.setUsesPositionalArg(); 170 // Update the caller's pointer if we decided to consume 171 // these characters. 172 Beg = I; 173 return false; 174 } 175 176 return false; 177} 178 179bool 180clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 181 const char *&I, 182 const char *E, 183 const LangOptions &LO, 184 bool IsScanf) { 185 LengthModifier::Kind lmKind = LengthModifier::None; 186 const char *lmPosition = I; 187 switch (*I) { 188 default: 189 return false; 190 case 'h': 191 ++I; 192 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) 193 : LengthModifier::AsShort; 194 break; 195 case 'l': 196 ++I; 197 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) 198 : LengthModifier::AsLong; 199 break; 200 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 201 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 202 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 203 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 204 case 'q': lmKind = LengthModifier::AsQuad; ++I; break; 205 case 'a': 206 if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { 207 // For scanf in C90, look at the next character to see if this should 208 // be parsed as the GNU extension 'a' length modifier. If not, this 209 // will be parsed as a conversion specifier. 210 ++I; 211 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { 212 lmKind = LengthModifier::AsAllocate; 213 break; 214 } 215 --I; 216 } 217 return false; 218 case 'm': 219 if (IsScanf) { 220 lmKind = LengthModifier::AsMAllocate; 221 ++I; 222 break; 223 } 224 return false; 225 } 226 LengthModifier lm(lmPosition, lmKind); 227 FS.setLengthModifier(lm); 228 return true; 229} 230 231//===----------------------------------------------------------------------===// 232// Methods on ArgType. 233//===----------------------------------------------------------------------===// 234 235bool ArgType::matchesType(ASTContext &C, QualType argTy) const { 236 if (Ptr) { 237 // It has to be a pointer. 238 const PointerType *PT = argTy->getAs<PointerType>(); 239 if (!PT) 240 return false; 241 242 // We cannot write through a const qualified pointer. 243 if (PT->getPointeeType().isConstQualified()) 244 return false; 245 246 argTy = PT->getPointeeType(); 247 } 248 249 switch (K) { 250 case InvalidTy: 251 llvm_unreachable("ArgType must be valid"); 252 253 case UnknownTy: 254 return true; 255 256 case AnyCharTy: { 257 if (const EnumType *ETy = argTy->getAs<EnumType>()) 258 argTy = ETy->getDecl()->getIntegerType(); 259 260 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 261 switch (BT->getKind()) { 262 default: 263 break; 264 case BuiltinType::Char_S: 265 case BuiltinType::SChar: 266 case BuiltinType::UChar: 267 case BuiltinType::Char_U: 268 return true; 269 } 270 return false; 271 } 272 273 case SpecificTy: { 274 if (const EnumType *ETy = argTy->getAs<EnumType>()) 275 argTy = ETy->getDecl()->getIntegerType(); 276 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 277 278 if (T == argTy) 279 return true; 280 // Check for "compatible types". 281 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 282 switch (BT->getKind()) { 283 default: 284 break; 285 case BuiltinType::Char_S: 286 case BuiltinType::SChar: 287 case BuiltinType::Char_U: 288 case BuiltinType::UChar: 289 return T == C.UnsignedCharTy || T == C.SignedCharTy; 290 case BuiltinType::Short: 291 return T == C.UnsignedShortTy; 292 case BuiltinType::UShort: 293 return T == C.ShortTy; 294 case BuiltinType::Int: 295 return T == C.UnsignedIntTy; 296 case BuiltinType::UInt: 297 return T == C.IntTy; 298 case BuiltinType::Long: 299 return T == C.UnsignedLongTy; 300 case BuiltinType::ULong: 301 return T == C.LongTy; 302 case BuiltinType::LongLong: 303 return T == C.UnsignedLongLongTy; 304 case BuiltinType::ULongLong: 305 return T == C.LongLongTy; 306 } 307 return false; 308 } 309 310 case CStrTy: { 311 const PointerType *PT = argTy->getAs<PointerType>(); 312 if (!PT) 313 return false; 314 QualType pointeeTy = PT->getPointeeType(); 315 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 316 switch (BT->getKind()) { 317 case BuiltinType::Void: 318 case BuiltinType::Char_U: 319 case BuiltinType::UChar: 320 case BuiltinType::Char_S: 321 case BuiltinType::SChar: 322 return true; 323 default: 324 break; 325 } 326 327 return false; 328 } 329 330 case WCStrTy: { 331 const PointerType *PT = argTy->getAs<PointerType>(); 332 if (!PT) 333 return false; 334 QualType pointeeTy = 335 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 336 return pointeeTy == C.getWCharType(); 337 } 338 339 case WIntTy: { 340 341 QualType PromoArg = 342 argTy->isPromotableIntegerType() 343 ? C.getPromotedIntegerType(argTy) : argTy; 344 345 QualType WInt = C.getCanonicalType(C.getWIntType()).getUnqualifiedType(); 346 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 347 348 // If the promoted argument is the corresponding signed type of the 349 // wint_t type, then it should match. 350 if (PromoArg->hasSignedIntegerRepresentation() && 351 C.getCorrespondingUnsignedType(PromoArg) == WInt) 352 return true; 353 354 return WInt == PromoArg; 355 } 356 357 case CPointerTy: 358 return argTy->isPointerType() || argTy->isObjCObjectPointerType() || 359 argTy->isBlockPointerType() || argTy->isNullPtrType(); 360 361 case ObjCPointerTy: { 362 if (argTy->getAs<ObjCObjectPointerType>() || 363 argTy->getAs<BlockPointerType>()) 364 return true; 365 366 // Handle implicit toll-free bridging. 367 if (const PointerType *PT = argTy->getAs<PointerType>()) { 368 // Things such as CFTypeRef are really just opaque pointers 369 // to C structs representing CF types that can often be bridged 370 // to Objective-C objects. Since the compiler doesn't know which 371 // structs can be toll-free bridged, we just accept them all. 372 QualType pointee = PT->getPointeeType(); 373 if (pointee->getAsStructureType() || pointee->isVoidType()) 374 return true; 375 } 376 return false; 377 } 378 } 379 380 llvm_unreachable("Invalid ArgType Kind!"); 381} 382 383QualType ArgType::getRepresentativeType(ASTContext &C) const { 384 QualType Res; 385 switch (K) { 386 case InvalidTy: 387 llvm_unreachable("No representative type for Invalid ArgType"); 388 case UnknownTy: 389 llvm_unreachable("No representative type for Unknown ArgType"); 390 case AnyCharTy: 391 Res = C.CharTy; 392 break; 393 case SpecificTy: 394 Res = T; 395 break; 396 case CStrTy: 397 Res = C.getPointerType(C.CharTy); 398 break; 399 case WCStrTy: 400 Res = C.getPointerType(C.getWCharType()); 401 break; 402 case ObjCPointerTy: 403 Res = C.ObjCBuiltinIdTy; 404 break; 405 case CPointerTy: 406 Res = C.VoidPtrTy; 407 break; 408 case WIntTy: { 409 Res = C.getWIntType(); 410 break; 411 } 412 } 413 414 if (Ptr) 415 Res = C.getPointerType(Res); 416 return Res; 417} 418 419std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { 420 std::string S = getRepresentativeType(C).getAsString(); 421 422 std::string Alias; 423 if (Name) { 424 // Use a specific name for this type, e.g. "size_t". 425 Alias = Name; 426 if (Ptr) { 427 // If ArgType is actually a pointer to T, append an asterisk. 428 Alias += (Alias[Alias.size()-1] == '*') ? "*" : " *"; 429 } 430 // If Alias is the same as the underlying type, e.g. wchar_t, then drop it. 431 if (S == Alias) 432 Alias.clear(); 433 } 434 435 if (!Alias.empty()) 436 return std::string("'") + Alias + "' (aka '" + S + "')"; 437 return std::string("'") + S + "'"; 438} 439 440 441//===----------------------------------------------------------------------===// 442// Methods on OptionalAmount. 443//===----------------------------------------------------------------------===// 444 445ArgType 446analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 447 return Ctx.IntTy; 448} 449 450//===----------------------------------------------------------------------===// 451// Methods on LengthModifier. 452//===----------------------------------------------------------------------===// 453 454const char * 455analyze_format_string::LengthModifier::toString() const { 456 switch (kind) { 457 case AsChar: 458 return "hh"; 459 case AsShort: 460 return "h"; 461 case AsLong: // or AsWideChar 462 return "l"; 463 case AsLongLong: 464 return "ll"; 465 case AsQuad: 466 return "q"; 467 case AsIntMax: 468 return "j"; 469 case AsSizeT: 470 return "z"; 471 case AsPtrDiff: 472 return "t"; 473 case AsLongDouble: 474 return "L"; 475 case AsAllocate: 476 return "a"; 477 case AsMAllocate: 478 return "m"; 479 case None: 480 return ""; 481 } 482 return NULL; 483} 484 485//===----------------------------------------------------------------------===// 486// Methods on ConversionSpecifier. 487//===----------------------------------------------------------------------===// 488 489const char *ConversionSpecifier::toString() const { 490 switch (kind) { 491 case dArg: return "d"; 492 case iArg: return "i"; 493 case oArg: return "o"; 494 case uArg: return "u"; 495 case xArg: return "x"; 496 case XArg: return "X"; 497 case fArg: return "f"; 498 case FArg: return "F"; 499 case eArg: return "e"; 500 case EArg: return "E"; 501 case gArg: return "g"; 502 case GArg: return "G"; 503 case aArg: return "a"; 504 case AArg: return "A"; 505 case cArg: return "c"; 506 case sArg: return "s"; 507 case pArg: return "p"; 508 case nArg: return "n"; 509 case PercentArg: return "%"; 510 case ScanListArg: return "["; 511 case InvalidSpecifier: return NULL; 512 513 // MacOS X unicode extensions. 514 case CArg: return "C"; 515 case SArg: return "S"; 516 517 // Objective-C specific specifiers. 518 case ObjCObjArg: return "@"; 519 520 // GlibC specific specifiers. 521 case PrintErrno: return "m"; 522 } 523 return NULL; 524} 525 526//===----------------------------------------------------------------------===// 527// Methods on OptionalAmount. 528//===----------------------------------------------------------------------===// 529 530void OptionalAmount::toString(raw_ostream &os) const { 531 switch (hs) { 532 case Invalid: 533 case NotSpecified: 534 return; 535 case Arg: 536 if (UsesDotPrefix) 537 os << "."; 538 if (usesPositionalArg()) 539 os << "*" << getPositionalArgIndex() << "$"; 540 else 541 os << "*"; 542 break; 543 case Constant: 544 if (UsesDotPrefix) 545 os << "."; 546 os << amt; 547 break; 548 } 549} 550 551bool FormatSpecifier::hasValidLengthModifier() const { 552 switch (LM.getKind()) { 553 case LengthModifier::None: 554 return true; 555 556 // Handle most integer flags 557 case LengthModifier::AsChar: 558 case LengthModifier::AsShort: 559 case LengthModifier::AsLongLong: 560 case LengthModifier::AsQuad: 561 case LengthModifier::AsIntMax: 562 case LengthModifier::AsSizeT: 563 case LengthModifier::AsPtrDiff: 564 switch (CS.getKind()) { 565 case ConversionSpecifier::dArg: 566 case ConversionSpecifier::iArg: 567 case ConversionSpecifier::oArg: 568 case ConversionSpecifier::uArg: 569 case ConversionSpecifier::xArg: 570 case ConversionSpecifier::XArg: 571 case ConversionSpecifier::nArg: 572 return true; 573 default: 574 return false; 575 } 576 577 // Handle 'l' flag 578 case LengthModifier::AsLong: 579 switch (CS.getKind()) { 580 case ConversionSpecifier::dArg: 581 case ConversionSpecifier::iArg: 582 case ConversionSpecifier::oArg: 583 case ConversionSpecifier::uArg: 584 case ConversionSpecifier::xArg: 585 case ConversionSpecifier::XArg: 586 case ConversionSpecifier::aArg: 587 case ConversionSpecifier::AArg: 588 case ConversionSpecifier::fArg: 589 case ConversionSpecifier::FArg: 590 case ConversionSpecifier::eArg: 591 case ConversionSpecifier::EArg: 592 case ConversionSpecifier::gArg: 593 case ConversionSpecifier::GArg: 594 case ConversionSpecifier::nArg: 595 case ConversionSpecifier::cArg: 596 case ConversionSpecifier::sArg: 597 case ConversionSpecifier::ScanListArg: 598 return true; 599 default: 600 return false; 601 } 602 603 case LengthModifier::AsLongDouble: 604 switch (CS.getKind()) { 605 case ConversionSpecifier::aArg: 606 case ConversionSpecifier::AArg: 607 case ConversionSpecifier::fArg: 608 case ConversionSpecifier::FArg: 609 case ConversionSpecifier::eArg: 610 case ConversionSpecifier::EArg: 611 case ConversionSpecifier::gArg: 612 case ConversionSpecifier::GArg: 613 return true; 614 // GNU extension. 615 case ConversionSpecifier::dArg: 616 case ConversionSpecifier::iArg: 617 case ConversionSpecifier::oArg: 618 case ConversionSpecifier::uArg: 619 case ConversionSpecifier::xArg: 620 case ConversionSpecifier::XArg: 621 return true; 622 default: 623 return false; 624 } 625 626 case LengthModifier::AsAllocate: 627 switch (CS.getKind()) { 628 case ConversionSpecifier::sArg: 629 case ConversionSpecifier::SArg: 630 case ConversionSpecifier::ScanListArg: 631 return true; 632 default: 633 return false; 634 } 635 636 case LengthModifier::AsMAllocate: 637 switch (CS.getKind()) { 638 case ConversionSpecifier::cArg: 639 case ConversionSpecifier::CArg: 640 case ConversionSpecifier::sArg: 641 case ConversionSpecifier::SArg: 642 case ConversionSpecifier::ScanListArg: 643 return true; 644 default: 645 return false; 646 } 647 } 648 llvm_unreachable("Invalid LengthModifier Kind!"); 649} 650 651bool FormatSpecifier::hasStandardLengthModifier() const { 652 switch (LM.getKind()) { 653 case LengthModifier::None: 654 case LengthModifier::AsChar: 655 case LengthModifier::AsShort: 656 case LengthModifier::AsLong: 657 case LengthModifier::AsLongLong: 658 case LengthModifier::AsIntMax: 659 case LengthModifier::AsSizeT: 660 case LengthModifier::AsPtrDiff: 661 case LengthModifier::AsLongDouble: 662 return true; 663 case LengthModifier::AsAllocate: 664 case LengthModifier::AsMAllocate: 665 case LengthModifier::AsQuad: 666 return false; 667 } 668 llvm_unreachable("Invalid LengthModifier Kind!"); 669} 670 671bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const { 672 switch (CS.getKind()) { 673 case ConversionSpecifier::cArg: 674 case ConversionSpecifier::dArg: 675 case ConversionSpecifier::iArg: 676 case ConversionSpecifier::oArg: 677 case ConversionSpecifier::uArg: 678 case ConversionSpecifier::xArg: 679 case ConversionSpecifier::XArg: 680 case ConversionSpecifier::fArg: 681 case ConversionSpecifier::FArg: 682 case ConversionSpecifier::eArg: 683 case ConversionSpecifier::EArg: 684 case ConversionSpecifier::gArg: 685 case ConversionSpecifier::GArg: 686 case ConversionSpecifier::aArg: 687 case ConversionSpecifier::AArg: 688 case ConversionSpecifier::sArg: 689 case ConversionSpecifier::pArg: 690 case ConversionSpecifier::nArg: 691 case ConversionSpecifier::ObjCObjArg: 692 case ConversionSpecifier::ScanListArg: 693 case ConversionSpecifier::PercentArg: 694 return true; 695 case ConversionSpecifier::CArg: 696 case ConversionSpecifier::SArg: 697 return LangOpt.ObjC1 || LangOpt.ObjC2; 698 case ConversionSpecifier::InvalidSpecifier: 699 case ConversionSpecifier::PrintErrno: 700 return false; 701 } 702 llvm_unreachable("Invalid ConversionSpecifier Kind!"); 703} 704 705bool FormatSpecifier::hasStandardLengthConversionCombination() const { 706 if (LM.getKind() == LengthModifier::AsLongDouble) { 707 switch(CS.getKind()) { 708 case ConversionSpecifier::dArg: 709 case ConversionSpecifier::iArg: 710 case ConversionSpecifier::oArg: 711 case ConversionSpecifier::uArg: 712 case ConversionSpecifier::xArg: 713 case ConversionSpecifier::XArg: 714 return false; 715 default: 716 return true; 717 } 718 } 719 return true; 720} 721 722bool FormatSpecifier::namedTypeToLengthModifier(QualType QT, 723 LengthModifier &LM) { 724 assert(isa<TypedefType>(QT) && "Expected a TypedefType"); 725 const TypedefNameDecl *Typedef = cast<TypedefType>(QT)->getDecl(); 726 727 for (;;) { 728 const IdentifierInfo *Identifier = Typedef->getIdentifier(); 729 if (Identifier->getName() == "size_t") { 730 LM.setKind(LengthModifier::AsSizeT); 731 return true; 732 } else if (Identifier->getName() == "ssize_t") { 733 // Not C99, but common in Unix. 734 LM.setKind(LengthModifier::AsSizeT); 735 return true; 736 } else if (Identifier->getName() == "intmax_t") { 737 LM.setKind(LengthModifier::AsIntMax); 738 return true; 739 } else if (Identifier->getName() == "uintmax_t") { 740 LM.setKind(LengthModifier::AsIntMax); 741 return true; 742 } else if (Identifier->getName() == "ptrdiff_t") { 743 LM.setKind(LengthModifier::AsPtrDiff); 744 return true; 745 } 746 747 QualType T = Typedef->getUnderlyingType(); 748 if (!isa<TypedefType>(T)) 749 break; 750 751 Typedef = cast<TypedefType>(T)->getDecl(); 752 } 753 return false; 754} 755