FormatString.cpp revision 32addd519c6699000ff79c387a1c87f0ab7c3698
1// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Shared details for processing format strings of printf and scanf 11// (and friends). 12// 13//===----------------------------------------------------------------------===// 14 15#include "FormatStringParsing.h" 16#include "clang/Basic/LangOptions.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::FormatSpecifier; 21using clang::analyze_format_string::LengthModifier; 22using clang::analyze_format_string::OptionalAmount; 23using clang::analyze_format_string::PositionContext; 24using clang::analyze_format_string::ConversionSpecifier; 25using namespace clang; 26 27// Key function to FormatStringHandler. 28FormatStringHandler::~FormatStringHandler() {} 29 30//===----------------------------------------------------------------------===// 31// Functions for parsing format strings components in both printf and 32// scanf format strings. 33//===----------------------------------------------------------------------===// 34 35OptionalAmount 36clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 37 const char *I = Beg; 38 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 39 40 unsigned accumulator = 0; 41 bool hasDigits = false; 42 43 for ( ; I != E; ++I) { 44 char c = *I; 45 if (c >= '0' && c <= '9') { 46 hasDigits = true; 47 accumulator = (accumulator * 10) + (c - '0'); 48 continue; 49 } 50 51 if (hasDigits) 52 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 53 false); 54 55 break; 56 } 57 58 return OptionalAmount(); 59} 60 61OptionalAmount 62clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 63 const char *E, 64 unsigned &argIndex) { 65 if (*Beg == '*') { 66 ++Beg; 67 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 68 } 69 70 return ParseAmount(Beg, E); 71} 72 73OptionalAmount 74clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 75 const char *Start, 76 const char *&Beg, 77 const char *E, 78 PositionContext p) { 79 if (*Beg == '*') { 80 const char *I = Beg + 1; 81 const OptionalAmount &Amt = ParseAmount(I, E); 82 83 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 84 H.HandleInvalidPosition(Beg, I - Beg, p); 85 return OptionalAmount(false); 86 } 87 88 if (I == E) { 89 // No more characters left? 90 H.HandleIncompleteSpecifier(Start, E - Start); 91 return OptionalAmount(false); 92 } 93 94 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 95 96 if (*I == '$') { 97 // Handle positional arguments 98 99 // Special case: '*0$', since this is an easy mistake. 100 if (Amt.getConstantAmount() == 0) { 101 H.HandleZeroPosition(Beg, I - Beg + 1); 102 return OptionalAmount(false); 103 } 104 105 const char *Tmp = Beg; 106 Beg = ++I; 107 108 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 109 Tmp, 0, true); 110 } 111 112 H.HandleInvalidPosition(Beg, I - Beg, p); 113 return OptionalAmount(false); 114 } 115 116 return ParseAmount(Beg, E); 117} 118 119 120bool 121clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 122 FormatSpecifier &CS, 123 const char *Start, 124 const char *&Beg, const char *E, 125 unsigned *argIndex) { 126 // FIXME: Support negative field widths. 127 if (argIndex) { 128 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 129 } 130 else { 131 const OptionalAmount Amt = 132 ParsePositionAmount(H, Start, Beg, E, 133 analyze_format_string::FieldWidthPos); 134 135 if (Amt.isInvalid()) 136 return true; 137 CS.setFieldWidth(Amt); 138 } 139 return false; 140} 141 142bool 143clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 144 FormatSpecifier &FS, 145 const char *Start, 146 const char *&Beg, 147 const char *E) { 148 const char *I = Beg; 149 150 const OptionalAmount &Amt = ParseAmount(I, E); 151 152 if (I == E) { 153 // No more characters left? 154 H.HandleIncompleteSpecifier(Start, E - Start); 155 return true; 156 } 157 158 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 159 // Special case: '%0$', since this is an easy mistake. 160 if (Amt.getConstantAmount() == 0) { 161 H.HandleZeroPosition(Start, I - Start); 162 return true; 163 } 164 165 FS.setArgIndex(Amt.getConstantAmount() - 1); 166 FS.setUsesPositionalArg(); 167 // Update the caller's pointer if we decided to consume 168 // these characters. 169 Beg = I; 170 return false; 171 } 172 173 return false; 174} 175 176bool 177clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 178 const char *&I, 179 const char *E, 180 const LangOptions &LO, 181 bool IsScanf) { 182 LengthModifier::Kind lmKind = LengthModifier::None; 183 const char *lmPosition = I; 184 switch (*I) { 185 default: 186 return false; 187 case 'h': 188 ++I; 189 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) 190 : LengthModifier::AsShort; 191 break; 192 case 'l': 193 ++I; 194 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) 195 : LengthModifier::AsLong; 196 break; 197 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 198 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 199 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 200 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 201 case 'q': lmKind = LengthModifier::AsQuad; ++I; break; 202 case 'a': 203 if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { 204 // For scanf in C90, look at the next character to see if this should 205 // be parsed as the GNU extension 'a' length modifier. If not, this 206 // will be parsed as a conversion specifier. 207 ++I; 208 if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { 209 lmKind = LengthModifier::AsAllocate; 210 break; 211 } 212 --I; 213 } 214 return false; 215 case 'm': 216 if (IsScanf) { 217 lmKind = LengthModifier::AsMAllocate; 218 ++I; 219 break; 220 } 221 return false; 222 } 223 LengthModifier lm(lmPosition, lmKind); 224 FS.setLengthModifier(lm); 225 return true; 226} 227 228//===----------------------------------------------------------------------===// 229// Methods on ArgTypeResult. 230//===----------------------------------------------------------------------===// 231 232bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 233 switch (K) { 234 case InvalidTy: 235 llvm_unreachable("ArgTypeResult must be valid"); 236 237 case UnknownTy: 238 return true; 239 240 case AnyCharTy: { 241 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 242 switch (BT->getKind()) { 243 default: 244 break; 245 case BuiltinType::Char_S: 246 case BuiltinType::SChar: 247 case BuiltinType::UChar: 248 case BuiltinType::Char_U: 249 return true; 250 } 251 return false; 252 } 253 254 case SpecificTy: { 255 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 256 if (T == argTy) 257 return true; 258 // Check for "compatible types". 259 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 260 switch (BT->getKind()) { 261 default: 262 break; 263 case BuiltinType::Char_S: 264 case BuiltinType::SChar: 265 return T == C.UnsignedCharTy; 266 case BuiltinType::Char_U: 267 case BuiltinType::UChar: 268 return T == C.SignedCharTy; 269 case BuiltinType::Short: 270 return T == C.UnsignedShortTy; 271 case BuiltinType::UShort: 272 return T == C.ShortTy; 273 case BuiltinType::Int: 274 return T == C.UnsignedIntTy; 275 case BuiltinType::UInt: 276 return T == C.IntTy; 277 case BuiltinType::Long: 278 return T == C.UnsignedLongTy; 279 case BuiltinType::ULong: 280 return T == C.LongTy; 281 case BuiltinType::LongLong: 282 return T == C.UnsignedLongLongTy; 283 case BuiltinType::ULongLong: 284 return T == C.LongLongTy; 285 } 286 return false; 287 } 288 289 case CStrTy: { 290 const PointerType *PT = argTy->getAs<PointerType>(); 291 if (!PT) 292 return false; 293 QualType pointeeTy = PT->getPointeeType(); 294 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 295 switch (BT->getKind()) { 296 case BuiltinType::Void: 297 case BuiltinType::Char_U: 298 case BuiltinType::UChar: 299 case BuiltinType::Char_S: 300 case BuiltinType::SChar: 301 return true; 302 default: 303 break; 304 } 305 306 return false; 307 } 308 309 case WCStrTy: { 310 const PointerType *PT = argTy->getAs<PointerType>(); 311 if (!PT) 312 return false; 313 QualType pointeeTy = 314 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 315 return pointeeTy == C.getWCharType(); 316 } 317 318 case WIntTy: { 319 // Instead of doing a lookup for the definition of 'wint_t' (which 320 // is defined by the system headers) instead see if wchar_t and 321 // the argument type promote to the same type. 322 QualType PromoWChar = 323 C.getWCharType()->isPromotableIntegerType() 324 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); 325 QualType PromoArg = 326 argTy->isPromotableIntegerType() 327 ? C.getPromotedIntegerType(argTy) : argTy; 328 329 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); 330 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 331 332 return PromoWChar == PromoArg; 333 } 334 335 case CPointerTy: 336 return argTy->isPointerType() || argTy->isObjCObjectPointerType() || 337 argTy->isNullPtrType(); 338 339 case ObjCPointerTy: { 340 if (argTy->getAs<ObjCObjectPointerType>() || 341 argTy->getAs<BlockPointerType>()) 342 return true; 343 344 // Handle implicit toll-free bridging. 345 if (const PointerType *PT = argTy->getAs<PointerType>()) { 346 // Things such as CFTypeRef are really just opaque pointers 347 // to C structs representing CF types that can often be bridged 348 // to Objective-C objects. Since the compiler doesn't know which 349 // structs can be toll-free bridged, we just accept them all. 350 QualType pointee = PT->getPointeeType(); 351 if (pointee->getAsStructureType() || pointee->isVoidType()) 352 return true; 353 } 354 return false; 355 } 356 } 357 358 llvm_unreachable("Invalid ArgTypeResult Kind!"); 359} 360 361QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 362 switch (K) { 363 case InvalidTy: 364 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 365 case UnknownTy: 366 return QualType(); 367 case AnyCharTy: 368 return C.CharTy; 369 case SpecificTy: 370 return T; 371 case CStrTy: 372 return C.getPointerType(C.CharTy); 373 case WCStrTy: 374 return C.getPointerType(C.getWCharType()); 375 case ObjCPointerTy: 376 return C.ObjCBuiltinIdTy; 377 case CPointerTy: 378 return C.VoidPtrTy; 379 case WIntTy: { 380 QualType WC = C.getWCharType(); 381 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; 382 } 383 } 384 385 llvm_unreachable("Invalid ArgTypeResult Kind!"); 386} 387 388std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { 389 std::string S = getRepresentativeType(C).getAsString(); 390 if (Name && S != Name) 391 return std::string("'") + Name + "' (aka '" + S + "')"; 392 return std::string("'") + S + "'"; 393} 394 395 396//===----------------------------------------------------------------------===// 397// Methods on OptionalAmount. 398//===----------------------------------------------------------------------===// 399 400ArgTypeResult 401analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 402 return Ctx.IntTy; 403} 404 405//===----------------------------------------------------------------------===// 406// Methods on LengthModifier. 407//===----------------------------------------------------------------------===// 408 409const char * 410analyze_format_string::LengthModifier::toString() const { 411 switch (kind) { 412 case AsChar: 413 return "hh"; 414 case AsShort: 415 return "h"; 416 case AsLong: // or AsWideChar 417 return "l"; 418 case AsLongLong: 419 return "ll"; 420 case AsQuad: 421 return "q"; 422 case AsIntMax: 423 return "j"; 424 case AsSizeT: 425 return "z"; 426 case AsPtrDiff: 427 return "t"; 428 case AsLongDouble: 429 return "L"; 430 case AsAllocate: 431 return "a"; 432 case AsMAllocate: 433 return "m"; 434 case None: 435 return ""; 436 } 437 return NULL; 438} 439 440//===----------------------------------------------------------------------===// 441// Methods on ConversionSpecifier. 442//===----------------------------------------------------------------------===// 443 444const char *ConversionSpecifier::toString() const { 445 switch (kind) { 446 case dArg: return "d"; 447 case iArg: return "i"; 448 case oArg: return "o"; 449 case uArg: return "u"; 450 case xArg: return "x"; 451 case XArg: return "X"; 452 case fArg: return "f"; 453 case FArg: return "F"; 454 case eArg: return "e"; 455 case EArg: return "E"; 456 case gArg: return "g"; 457 case GArg: return "G"; 458 case aArg: return "a"; 459 case AArg: return "A"; 460 case cArg: return "c"; 461 case sArg: return "s"; 462 case pArg: return "p"; 463 case nArg: return "n"; 464 case PercentArg: return "%"; 465 case ScanListArg: return "["; 466 case InvalidSpecifier: return NULL; 467 468 // MacOS X unicode extensions. 469 case CArg: return "C"; 470 case SArg: return "S"; 471 472 // Objective-C specific specifiers. 473 case ObjCObjArg: return "@"; 474 475 // GlibC specific specifiers. 476 case PrintErrno: return "m"; 477 } 478 return NULL; 479} 480 481//===----------------------------------------------------------------------===// 482// Methods on OptionalAmount. 483//===----------------------------------------------------------------------===// 484 485void OptionalAmount::toString(raw_ostream &os) const { 486 switch (hs) { 487 case Invalid: 488 case NotSpecified: 489 return; 490 case Arg: 491 if (UsesDotPrefix) 492 os << "."; 493 if (usesPositionalArg()) 494 os << "*" << getPositionalArgIndex() << "$"; 495 else 496 os << "*"; 497 break; 498 case Constant: 499 if (UsesDotPrefix) 500 os << "."; 501 os << amt; 502 break; 503 } 504} 505 506bool FormatSpecifier::hasValidLengthModifier() const { 507 switch (LM.getKind()) { 508 case LengthModifier::None: 509 return true; 510 511 // Handle most integer flags 512 case LengthModifier::AsChar: 513 case LengthModifier::AsShort: 514 case LengthModifier::AsLongLong: 515 case LengthModifier::AsQuad: 516 case LengthModifier::AsIntMax: 517 case LengthModifier::AsSizeT: 518 case LengthModifier::AsPtrDiff: 519 switch (CS.getKind()) { 520 case ConversionSpecifier::dArg: 521 case ConversionSpecifier::iArg: 522 case ConversionSpecifier::oArg: 523 case ConversionSpecifier::uArg: 524 case ConversionSpecifier::xArg: 525 case ConversionSpecifier::XArg: 526 case ConversionSpecifier::nArg: 527 return true; 528 default: 529 return false; 530 } 531 532 // Handle 'l' flag 533 case LengthModifier::AsLong: 534 switch (CS.getKind()) { 535 case ConversionSpecifier::dArg: 536 case ConversionSpecifier::iArg: 537 case ConversionSpecifier::oArg: 538 case ConversionSpecifier::uArg: 539 case ConversionSpecifier::xArg: 540 case ConversionSpecifier::XArg: 541 case ConversionSpecifier::aArg: 542 case ConversionSpecifier::AArg: 543 case ConversionSpecifier::fArg: 544 case ConversionSpecifier::FArg: 545 case ConversionSpecifier::eArg: 546 case ConversionSpecifier::EArg: 547 case ConversionSpecifier::gArg: 548 case ConversionSpecifier::GArg: 549 case ConversionSpecifier::nArg: 550 case ConversionSpecifier::cArg: 551 case ConversionSpecifier::sArg: 552 case ConversionSpecifier::ScanListArg: 553 return true; 554 default: 555 return false; 556 } 557 558 case LengthModifier::AsLongDouble: 559 switch (CS.getKind()) { 560 case ConversionSpecifier::aArg: 561 case ConversionSpecifier::AArg: 562 case ConversionSpecifier::fArg: 563 case ConversionSpecifier::FArg: 564 case ConversionSpecifier::eArg: 565 case ConversionSpecifier::EArg: 566 case ConversionSpecifier::gArg: 567 case ConversionSpecifier::GArg: 568 return true; 569 // GNU extension. 570 case ConversionSpecifier::dArg: 571 case ConversionSpecifier::iArg: 572 case ConversionSpecifier::oArg: 573 case ConversionSpecifier::uArg: 574 case ConversionSpecifier::xArg: 575 case ConversionSpecifier::XArg: 576 return true; 577 default: 578 return false; 579 } 580 581 case LengthModifier::AsAllocate: 582 switch (CS.getKind()) { 583 case ConversionSpecifier::sArg: 584 case ConversionSpecifier::SArg: 585 case ConversionSpecifier::ScanListArg: 586 return true; 587 default: 588 return false; 589 } 590 591 case LengthModifier::AsMAllocate: 592 switch (CS.getKind()) { 593 case ConversionSpecifier::cArg: 594 case ConversionSpecifier::CArg: 595 case ConversionSpecifier::sArg: 596 case ConversionSpecifier::SArg: 597 case ConversionSpecifier::ScanListArg: 598 return true; 599 default: 600 return false; 601 } 602 } 603 llvm_unreachable("Invalid LengthModifier Kind!"); 604} 605