PrintfFormatString.cpp revision d49d87719b8e272134e76601e3efc0197785aa8a
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::ArgTypeResult; 19using clang::analyze_printf::FormatSpecifier; 20using clang::analyze_printf::FormatStringHandler; 21using clang::analyze_printf::OptionalAmount; 22using clang::analyze_printf::PositionContext; 23 24using namespace clang; 25 26namespace { 27class FormatSpecifierResult { 28 FormatSpecifier FS; 29 const char *Start; 30 bool Stop; 31public: 32 FormatSpecifierResult(bool stop = false) 33 : Start(0), Stop(stop) {} 34 FormatSpecifierResult(const char *start, 35 const FormatSpecifier &fs) 36 : FS(fs), Start(start), Stop(false) {} 37 38 39 const char *getStart() const { return Start; } 40 bool shouldStop() const { return Stop; } 41 bool hasValue() const { return Start != 0; } 42 const FormatSpecifier &getValue() const { 43 assert(hasValue()); 44 return FS; 45 } 46 const FormatSpecifier &getValue() { return FS; } 47}; 48} // end anonymous namespace 49 50template <typename T> 51class UpdateOnReturn { 52 T &ValueToUpdate; 53 const T &ValueToCopy; 54public: 55 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 56 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 57 58 ~UpdateOnReturn() { 59 ValueToUpdate = ValueToCopy; 60 } 61}; 62 63//===----------------------------------------------------------------------===// 64// Methods for parsing format strings. 65//===----------------------------------------------------------------------===// 66 67static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 68 const char *I = Beg; 69 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 70 71 unsigned accumulator = 0; 72 bool hasDigits = false; 73 74 for ( ; I != E; ++I) { 75 char c = *I; 76 if (c >= '0' && c <= '9') { 77 hasDigits = true; 78 accumulator += (accumulator * 10) + (c - '0'); 79 continue; 80 } 81 82 if (hasDigits) 83 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); 84 85 break; 86 } 87 88 return OptionalAmount(); 89} 90 91static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, 92 unsigned &argIndex) { 93 if (*Beg == '*') { 94 ++Beg; 95 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); 96 } 97 98 return ParseAmount(Beg, E); 99} 100 101static OptionalAmount ParsePositionAmount(FormatStringHandler &H, 102 const char *Start, 103 const char *&Beg, const char *E, 104 PositionContext p) { 105 if (*Beg == '*') { 106 const char *I = Beg + 1; 107 const OptionalAmount &Amt = ParseAmount(I, E); 108 109 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 110 H.HandleInvalidPosition(Beg, I - Beg, p); 111 return OptionalAmount(false); 112 } 113 114 if (I== E) { 115 // No more characters left? 116 H.HandleIncompleteFormatSpecifier(Start, E - Start); 117 return OptionalAmount(false); 118 } 119 120 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 121 122 if (*I == '$') { 123 // Special case: '*0$', since this is an easy mistake. 124 if (Amt.getConstantAmount() == 0) { 125 H.HandleZeroPosition(Beg, I - Beg + 1); 126 return OptionalAmount(false); 127 } 128 129 const char *Tmp = Beg; 130 Beg = ++I; 131 132 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 133 Tmp); 134 } 135 136 H.HandleInvalidPosition(Beg, I - Beg, p); 137 return OptionalAmount(false); 138 } 139 140 return ParseAmount(Beg, E); 141} 142 143static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, 144 const char *Start, const char *&Beg, const char *E, 145 unsigned *argIndex) { 146 if (argIndex) { 147 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 148 } 149 else { 150 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 151 analyze_printf::PrecisionPos); 152 if (Amt.isInvalid()) 153 return true; 154 FS.setPrecision(Amt); 155 } 156 return false; 157} 158 159static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS, 160 const char *Start, const char *&Beg, const char *E, 161 unsigned *argIndex) { 162 // FIXME: Support negative field widths. 163 if (argIndex) { 164 FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 165 } 166 else { 167 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 168 analyze_printf::FieldWidthPos); 169 if (Amt.isInvalid()) 170 return true; 171 FS.setFieldWidth(Amt); 172 } 173 return false; 174} 175 176 177static bool ParseArgPosition(FormatStringHandler &H, 178 FormatSpecifier &FS, const char *Start, 179 const char *&Beg, const char *E) { 180 181 using namespace clang::analyze_printf; 182 const char *I = Beg; 183 184 const OptionalAmount &Amt = ParseAmount(I, E); 185 186 if (I == E) { 187 // No more characters left? 188 H.HandleIncompleteFormatSpecifier(Start, E - Start); 189 return true; 190 } 191 192 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 193 // Special case: '%0$', since this is an easy mistake. 194 if (Amt.getConstantAmount() == 0) { 195 H.HandleZeroPosition(Start, I - Start); 196 return true; 197 } 198 199 FS.setArgIndex(Amt.getConstantAmount() - 1); 200 FS.setUsesPositionalArg(); 201 // Update the caller's pointer if we decided to consume 202 // these characters. 203 Beg = I; 204 return false; 205 } 206 207 return false; 208} 209 210static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 211 const char *&Beg, 212 const char *E, 213 unsigned &argIndex) { 214 215 using namespace clang::analyze_printf; 216 217 const char *I = Beg; 218 const char *Start = 0; 219 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 220 221 // Look for a '%' character that indicates the start of a format specifier. 222 for ( ; I != E ; ++I) { 223 char c = *I; 224 if (c == '\0') { 225 // Detect spurious null characters, which are likely errors. 226 H.HandleNullChar(I); 227 return true; 228 } 229 if (c == '%') { 230 Start = I++; // Record the start of the format specifier. 231 break; 232 } 233 } 234 235 // No format specifier found? 236 if (!Start) 237 return false; 238 239 if (I == E) { 240 // No more characters left? 241 H.HandleIncompleteFormatSpecifier(Start, E - Start); 242 return true; 243 } 244 245 FormatSpecifier FS; 246 if (ParseArgPosition(H, FS, Start, I, E)) 247 return true; 248 249 if (I == E) { 250 // No more characters left? 251 H.HandleIncompleteFormatSpecifier(Start, E - Start); 252 return true; 253 } 254 255 // Look for flags (if any). 256 bool hasMore = true; 257 for ( ; I != E; ++I) { 258 switch (*I) { 259 default: hasMore = false; break; 260 case '-': FS.setIsLeftJustified(); break; 261 case '+': FS.setHasPlusPrefix(); break; 262 case ' ': FS.setHasSpacePrefix(); break; 263 case '#': FS.setHasAlternativeForm(); break; 264 case '0': FS.setHasLeadingZeros(); break; 265 } 266 if (!hasMore) 267 break; 268 } 269 270 if (I == E) { 271 // No more characters left? 272 H.HandleIncompleteFormatSpecifier(Start, E - Start); 273 return true; 274 } 275 276 // Look for the field width (if any). 277 if (ParseFieldWidth(H, FS, Start, I, E, 278 FS.usesPositionalArg() ? 0 : &argIndex)) 279 return true; 280 281 if (I == E) { 282 // No more characters left? 283 H.HandleIncompleteFormatSpecifier(Start, E - Start); 284 return true; 285 } 286 287 // Look for the precision (if any). 288 if (*I == '.') { 289 ++I; 290 if (I == E) { 291 H.HandleIncompleteFormatSpecifier(Start, E - Start); 292 return true; 293 } 294 295 if (ParsePrecision(H, FS, Start, I, E, 296 FS.usesPositionalArg() ? 0 : &argIndex)) 297 return true; 298 299 if (I == E) { 300 // No more characters left? 301 H.HandleIncompleteFormatSpecifier(Start, E - Start); 302 return true; 303 } 304 } 305 306 // Look for the length modifier. 307 LengthModifier lm = None; 308 switch (*I) { 309 default: 310 break; 311 case 'h': 312 ++I; 313 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 314 break; 315 case 'l': 316 ++I; 317 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 318 break; 319 case 'j': lm = AsIntMax; ++I; break; 320 case 'z': lm = AsSizeT; ++I; break; 321 case 't': lm = AsPtrDiff; ++I; break; 322 case 'L': lm = AsLongDouble; ++I; break; 323 case 'q': lm = AsLongLong; ++I; break; 324 } 325 FS.setLengthModifier(lm); 326 327 if (I == E) { 328 // No more characters left? 329 H.HandleIncompleteFormatSpecifier(Start, E - Start); 330 return true; 331 } 332 333 if (*I == '\0') { 334 // Detect spurious null characters, which are likely errors. 335 H.HandleNullChar(I); 336 return true; 337 } 338 339 // Finally, look for the conversion specifier. 340 const char *conversionPosition = I++; 341 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 342 switch (*conversionPosition) { 343 default: 344 break; 345 // C99: 7.19.6.1 (section 8). 346 case '%': k = ConversionSpecifier::PercentArg; break; 347 case 'A': k = ConversionSpecifier::AArg; break; 348 case 'E': k = ConversionSpecifier::EArg; break; 349 case 'F': k = ConversionSpecifier::FArg; break; 350 case 'G': k = ConversionSpecifier::GArg; break; 351 case 'X': k = ConversionSpecifier::XArg; break; 352 case 'a': k = ConversionSpecifier::aArg; break; 353 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 354 case 'd': k = ConversionSpecifier::dArg; break; 355 case 'e': k = ConversionSpecifier::eArg; break; 356 case 'f': k = ConversionSpecifier::fArg; break; 357 case 'g': k = ConversionSpecifier::gArg; break; 358 case 'i': k = ConversionSpecifier::iArg; break; 359 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 360 case 'o': k = ConversionSpecifier::oArg; break; 361 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 362 case 's': k = ConversionSpecifier::CStrArg; break; 363 case 'u': k = ConversionSpecifier::uArg; break; 364 case 'x': k = ConversionSpecifier::xArg; break; 365 // Mac OS X (unicode) specific 366 case 'C': k = ConversionSpecifier::CArg; break; 367 case 'S': k = ConversionSpecifier::UnicodeStrArg; break; 368 // Objective-C. 369 case '@': k = ConversionSpecifier::ObjCObjArg; break; 370 // Glibc specific. 371 case 'm': k = ConversionSpecifier::PrintErrno; break; 372 } 373 ConversionSpecifier CS(conversionPosition, k); 374 FS.setConversionSpecifier(CS); 375 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 376 FS.setArgIndex(argIndex++); 377 378 if (k == ConversionSpecifier::InvalidSpecifier) { 379 // Assume the conversion takes one argument. 380 return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 381 } 382 return FormatSpecifierResult(Start, FS); 383} 384 385bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, 386 const char *I, const char *E) { 387 388 unsigned argIndex = 0; 389 390 // Keep looking for a format specifier until we have exhausted the string. 391 while (I != E) { 392 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); 393 // Did a fail-stop error of any kind occur when parsing the specifier? 394 // If so, don't do any more processing. 395 if (FSR.shouldStop()) 396 return true;; 397 // Did we exhaust the string or encounter an error that 398 // we can recover from? 399 if (!FSR.hasValue()) 400 continue; 401 // We have a format specifier. Pass it to the callback. 402 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 403 I - FSR.getStart())) 404 return true; 405 } 406 assert(I == E && "Format string not exhausted"); 407 return false; 408} 409 410FormatStringHandler::~FormatStringHandler() {} 411 412//===----------------------------------------------------------------------===// 413// Methods on ArgTypeResult. 414//===----------------------------------------------------------------------===// 415 416bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 417 assert(isValid()); 418 419 if (K == UnknownTy) 420 return true; 421 422 if (K == SpecificTy) { 423 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 424 425 if (T == argTy) 426 return true; 427 428 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 429 switch (BT->getKind()) { 430 default: 431 break; 432 case BuiltinType::Char_S: 433 case BuiltinType::SChar: 434 return T == C.UnsignedCharTy; 435 case BuiltinType::Char_U: 436 case BuiltinType::UChar: 437 return T == C.SignedCharTy; 438 case BuiltinType::Short: 439 return T == C.UnsignedShortTy; 440 case BuiltinType::UShort: 441 return T == C.ShortTy; 442 case BuiltinType::Int: 443 return T == C.UnsignedIntTy; 444 case BuiltinType::UInt: 445 return T == C.IntTy; 446 case BuiltinType::Long: 447 return T == C.UnsignedLongTy; 448 case BuiltinType::ULong: 449 return T == C.LongTy; 450 case BuiltinType::LongLong: 451 return T == C.UnsignedLongLongTy; 452 case BuiltinType::ULongLong: 453 return T == C.LongLongTy; 454 } 455 456 return false; 457 } 458 459 if (K == CStrTy) { 460 const PointerType *PT = argTy->getAs<PointerType>(); 461 if (!PT) 462 return false; 463 464 QualType pointeeTy = PT->getPointeeType(); 465 466 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 467 switch (BT->getKind()) { 468 case BuiltinType::Void: 469 case BuiltinType::Char_U: 470 case BuiltinType::UChar: 471 case BuiltinType::Char_S: 472 case BuiltinType::SChar: 473 return true; 474 default: 475 break; 476 } 477 478 return false; 479 } 480 481 if (K == WCStrTy) { 482 const PointerType *PT = argTy->getAs<PointerType>(); 483 if (!PT) 484 return false; 485 486 QualType pointeeTy = 487 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 488 489 return pointeeTy == C.getWCharType(); 490 } 491 492 return false; 493} 494 495QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 496 assert(isValid()); 497 if (K == SpecificTy) 498 return T; 499 if (K == CStrTy) 500 return C.getPointerType(C.CharTy); 501 if (K == WCStrTy) 502 return C.getPointerType(C.getWCharType()); 503 if (K == ObjCPointerTy) 504 return C.ObjCBuiltinIdTy; 505 506 return QualType(); 507} 508 509//===----------------------------------------------------------------------===// 510// Methods on OptionalAmount. 511//===----------------------------------------------------------------------===// 512 513ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { 514 return Ctx.IntTy; 515} 516 517//===----------------------------------------------------------------------===// 518// Methods on FormatSpecifier. 519//===----------------------------------------------------------------------===// 520 521ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 522 if (!CS.consumesDataArgument()) 523 return ArgTypeResult::Invalid(); 524 525 if (CS.isIntArg()) 526 switch (LM) { 527 case AsLongDouble: 528 return ArgTypeResult::Invalid(); 529 case None: return Ctx.IntTy; 530 case AsChar: return Ctx.SignedCharTy; 531 case AsShort: return Ctx.ShortTy; 532 case AsLong: return Ctx.LongTy; 533 case AsLongLong: return Ctx.LongLongTy; 534 case AsIntMax: 535 // FIXME: Return unknown for now. 536 return ArgTypeResult(); 537 case AsSizeT: return Ctx.getSizeType(); 538 case AsPtrDiff: return Ctx.getPointerDiffType(); 539 } 540 541 if (CS.isUIntArg()) 542 switch (LM) { 543 case AsLongDouble: 544 return ArgTypeResult::Invalid(); 545 case None: return Ctx.UnsignedIntTy; 546 case AsChar: return Ctx.UnsignedCharTy; 547 case AsShort: return Ctx.UnsignedShortTy; 548 case AsLong: return Ctx.UnsignedLongTy; 549 case AsLongLong: return Ctx.UnsignedLongLongTy; 550 case AsIntMax: 551 // FIXME: Return unknown for now. 552 return ArgTypeResult(); 553 case AsSizeT: 554 // FIXME: How to get the corresponding unsigned 555 // version of size_t? 556 return ArgTypeResult(); 557 case AsPtrDiff: 558 // FIXME: How to get the corresponding unsigned 559 // version of ptrdiff_t? 560 return ArgTypeResult(); 561 } 562 563 if (CS.isDoubleArg()) { 564 if (LM == AsLongDouble) 565 return Ctx.LongDoubleTy; 566 return Ctx.DoubleTy; 567 } 568 569 switch (CS.getKind()) { 570 case ConversionSpecifier::CStrArg: 571 return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 572 case ConversionSpecifier::UnicodeStrArg: 573 // FIXME: This appears to be Mac OS X specific. 574 return ArgTypeResult::WCStrTy; 575 case ConversionSpecifier::CArg: 576 return Ctx.WCharTy; 577 default: 578 break; 579 } 580 581 // FIXME: Handle other cases. 582 return ArgTypeResult(); 583} 584 585