PrintfFormatString.cpp revision efaff195ba1fa55b6fe0b0b2435b81451387d241
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::ArgTypeResult; 19using clang::analyze_printf::FormatSpecifier; 20using clang::analyze_printf::FormatStringHandler; 21using clang::analyze_printf::OptionalAmount; 22using clang::analyze_printf::PositionContext; 23 24using namespace clang; 25 26namespace { 27class FormatSpecifierResult { 28 FormatSpecifier FS; 29 const char *Start; 30 bool Stop; 31public: 32 FormatSpecifierResult(bool stop = false) 33 : Start(0), Stop(stop) {} 34 FormatSpecifierResult(const char *start, 35 const FormatSpecifier &fs) 36 : FS(fs), Start(start), Stop(false) {} 37 38 39 const char *getStart() const { return Start; } 40 bool shouldStop() const { return Stop; } 41 bool hasValue() const { return Start != 0; } 42 const FormatSpecifier &getValue() const { 43 assert(hasValue()); 44 return FS; 45 } 46 const FormatSpecifier &getValue() { return FS; } 47}; 48} // end anonymous namespace 49 50template <typename T> 51class UpdateOnReturn { 52 T &ValueToUpdate; 53 const T &ValueToCopy; 54public: 55 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 56 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 57 58 ~UpdateOnReturn() { 59 ValueToUpdate = ValueToCopy; 60 } 61}; 62 63//===----------------------------------------------------------------------===// 64// Methods for parsing format strings. 65//===----------------------------------------------------------------------===// 66 67static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 68 const char *I = Beg; 69 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 70 71 unsigned accumulator = 0; 72 73 for ( ; I != E; ++I) { 74 char c = *I; 75 if (c >= '0' && c <= '9') { 76 // Ignore '0' on the first character. 77 if (c == '0' && I == Beg) 78 break; 79 accumulator += (accumulator * 10) + (c - '0'); 80 continue; 81 } 82 83 if (accumulator) 84 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); 85 86 break; 87 } 88 89 return OptionalAmount(); 90} 91 92static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E, 93 unsigned &argIndex) { 94 if (*Beg == '*') { 95 ++Beg; 96 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); 97 } 98 99 return ParseAmount(Beg, E); 100} 101 102static OptionalAmount ParsePositionAmount(FormatStringHandler &H, 103 const char *Start, 104 const char *&Beg, const char *E, 105 PositionContext p) { 106 if (*Beg == '*') { 107 const char *I = Beg + 1; 108 const OptionalAmount &Amt = ParseAmount(I, E); 109 110 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 111 H.HandleInvalidPosition(Beg, I - Beg, p); 112 return OptionalAmount(false); 113 } 114 115 if (I== E) { 116 // No more characters left? 117 H.HandleIncompleteFormatSpecifier(Start, E - Start); 118 return OptionalAmount(false); 119 } 120 121 if (*I == '$') { 122 const char *Tmp = Beg; 123 Beg = ++I; 124 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 125 Tmp); 126 } 127 128 H.HandleInvalidPosition(Beg, I - Beg, p); 129 return OptionalAmount(false); 130 } 131 132 return ParseAmount(Beg, E); 133} 134 135static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS, 136 const char *Start, const char *&Beg, const char *E, 137 unsigned *argIndex) { 138 if (argIndex) { 139 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex)); 140 } 141 else { 142 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 143 analyze_printf::PrecisionPos); 144 if (Amt.isInvalid()) 145 return true; 146 FS.setPrecision(Amt); 147 } 148 return false; 149} 150 151static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS, 152 const char *Start, const char *&Beg, const char *E, 153 unsigned *argIndex) { 154 // FIXME: Support negative field widths. 155 if (argIndex) { 156 FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 157 } 158 else { 159 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, 160 analyze_printf::FieldWidthPos); 161 if (Amt.isInvalid()) 162 return true; 163 FS.setFieldWidth(Amt); 164 } 165 return false; 166} 167 168 169static bool ParseArgPosition(FormatStringHandler &H, 170 FormatSpecifier &FS, const char *Start, 171 const char *&Beg, const char *E) { 172 173 using namespace clang::analyze_printf; 174 const char *I = Beg; 175 176 const OptionalAmount &Amt = ParseAmount(I, E); 177 178 if (I == E) { 179 // No more characters left? 180 H.HandleIncompleteFormatSpecifier(Start, E - Start); 181 return true; 182 } 183 184 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 185 FS.setArgIndex(Amt.getConstantAmount() - 1); 186 FS.setUsesPositionalArg(); 187 // Update the caller's pointer if we decided to consume 188 // these characters. 189 Beg = I; 190 return false; 191 } 192 193 // Special case: '%0$', since this is an easy mistake. 194 if (*I == '0' && (I+1) != E && *(I+1) == '$') { 195 H.HandleZeroPosition(Start, I - Start + 2); 196 return true; 197 } 198 199 return false; 200} 201 202static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 203 const char *&Beg, 204 const char *E, 205 unsigned &argIndex) { 206 207 using namespace clang::analyze_printf; 208 209 const char *I = Beg; 210 const char *Start = 0; 211 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 212 213 // Look for a '%' character that indicates the start of a format specifier. 214 for ( ; I != E ; ++I) { 215 char c = *I; 216 if (c == '\0') { 217 // Detect spurious null characters, which are likely errors. 218 H.HandleNullChar(I); 219 return true; 220 } 221 if (c == '%') { 222 Start = I++; // Record the start of the format specifier. 223 break; 224 } 225 } 226 227 // No format specifier found? 228 if (!Start) 229 return false; 230 231 if (I == E) { 232 // No more characters left? 233 H.HandleIncompleteFormatSpecifier(Start, E - Start); 234 return true; 235 } 236 237 FormatSpecifier FS; 238 if (ParseArgPosition(H, FS, Start, I, E)) 239 return true; 240 241 if (I == E) { 242 // No more characters left? 243 H.HandleIncompleteFormatSpecifier(Start, E - Start); 244 return true; 245 } 246 247 // Look for flags (if any). 248 bool hasMore = true; 249 for ( ; I != E; ++I) { 250 switch (*I) { 251 default: hasMore = false; break; 252 case '-': FS.setIsLeftJustified(); break; 253 case '+': FS.setHasPlusPrefix(); break; 254 case ' ': FS.setHasSpacePrefix(); break; 255 case '#': FS.setHasAlternativeForm(); break; 256 case '0': FS.setHasLeadingZeros(); break; 257 } 258 if (!hasMore) 259 break; 260 } 261 262 if (I == E) { 263 // No more characters left? 264 H.HandleIncompleteFormatSpecifier(Start, E - Start); 265 return true; 266 } 267 268 // Look for the field width (if any). 269 if (ParseFieldWidth(H, FS, Start, I, E, 270 FS.usesPositionalArg() ? 0 : &argIndex)) 271 return true; 272 273 if (I == E) { 274 // No more characters left? 275 H.HandleIncompleteFormatSpecifier(Start, E - Start); 276 return true; 277 } 278 279 // Look for the precision (if any). 280 if (*I == '.') { 281 ++I; 282 if (I == E) { 283 H.HandleIncompleteFormatSpecifier(Start, E - Start); 284 return true; 285 } 286 287 if (ParsePrecision(H, FS, Start, I, E, 288 FS.usesPositionalArg() ? 0 : &argIndex)) 289 return true; 290 291 if (I == E) { 292 // No more characters left? 293 H.HandleIncompleteFormatSpecifier(Start, E - Start); 294 return true; 295 } 296 } 297 298 // Look for the length modifier. 299 LengthModifier lm = None; 300 switch (*I) { 301 default: 302 break; 303 case 'h': 304 ++I; 305 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 306 break; 307 case 'l': 308 ++I; 309 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 310 break; 311 case 'j': lm = AsIntMax; ++I; break; 312 case 'z': lm = AsSizeT; ++I; break; 313 case 't': lm = AsPtrDiff; ++I; break; 314 case 'L': lm = AsLongDouble; ++I; break; 315 case 'q': lm = AsLongLong; ++I; break; 316 } 317 FS.setLengthModifier(lm); 318 319 if (I == E) { 320 // No more characters left? 321 H.HandleIncompleteFormatSpecifier(Start, E - Start); 322 return true; 323 } 324 325 if (*I == '\0') { 326 // Detect spurious null characters, which are likely errors. 327 H.HandleNullChar(I); 328 return true; 329 } 330 331 // Finally, look for the conversion specifier. 332 const char *conversionPosition = I++; 333 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 334 switch (*conversionPosition) { 335 default: 336 break; 337 // C99: 7.19.6.1 (section 8). 338 case '%': k = ConversionSpecifier::PercentArg; break; 339 case 'A': k = ConversionSpecifier::AArg; break; 340 case 'E': k = ConversionSpecifier::EArg; break; 341 case 'F': k = ConversionSpecifier::FArg; break; 342 case 'G': k = ConversionSpecifier::GArg; break; 343 case 'X': k = ConversionSpecifier::XArg; break; 344 case 'a': k = ConversionSpecifier::aArg; break; 345 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 346 case 'd': k = ConversionSpecifier::dArg; break; 347 case 'e': k = ConversionSpecifier::eArg; break; 348 case 'f': k = ConversionSpecifier::fArg; break; 349 case 'g': k = ConversionSpecifier::gArg; break; 350 case 'i': k = ConversionSpecifier::iArg; break; 351 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 352 case 'o': k = ConversionSpecifier::oArg; break; 353 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 354 case 's': k = ConversionSpecifier::CStrArg; break; 355 case 'u': k = ConversionSpecifier::uArg; break; 356 case 'x': k = ConversionSpecifier::xArg; break; 357 // Mac OS X (unicode) specific 358 case 'C': k = ConversionSpecifier::CArg; break; 359 case 'S': k = ConversionSpecifier::UnicodeStrArg; break; 360 // Objective-C. 361 case '@': k = ConversionSpecifier::ObjCObjArg; break; 362 // Glibc specific. 363 case 'm': k = ConversionSpecifier::PrintErrno; break; 364 } 365 ConversionSpecifier CS(conversionPosition, k); 366 FS.setConversionSpecifier(CS); 367 if (CS.consumesDataArgument() && !FS.usesPositionalArg()) 368 FS.setArgIndex(argIndex++); 369 370 if (k == ConversionSpecifier::InvalidSpecifier) { 371 // Assume the conversion takes one argument. 372 return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 373 } 374 return FormatSpecifierResult(Start, FS); 375} 376 377bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, 378 const char *I, const char *E) { 379 380 unsigned argIndex = 0; 381 382 // Keep looking for a format specifier until we have exhausted the string. 383 while (I != E) { 384 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); 385 // Did a fail-stop error of any kind occur when parsing the specifier? 386 // If so, don't do any more processing. 387 if (FSR.shouldStop()) 388 return true;; 389 // Did we exhaust the string or encounter an error that 390 // we can recover from? 391 if (!FSR.hasValue()) 392 continue; 393 // We have a format specifier. Pass it to the callback. 394 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 395 I - FSR.getStart())) 396 return true; 397 } 398 assert(I == E && "Format string not exhausted"); 399 return false; 400} 401 402FormatStringHandler::~FormatStringHandler() {} 403 404//===----------------------------------------------------------------------===// 405// Methods on ArgTypeResult. 406//===----------------------------------------------------------------------===// 407 408bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 409 assert(isValid()); 410 411 if (K == UnknownTy) 412 return true; 413 414 if (K == SpecificTy) { 415 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 416 417 if (T == argTy) 418 return true; 419 420 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 421 switch (BT->getKind()) { 422 default: 423 break; 424 case BuiltinType::Char_S: 425 case BuiltinType::SChar: 426 return T == C.UnsignedCharTy; 427 case BuiltinType::Char_U: 428 case BuiltinType::UChar: 429 return T == C.SignedCharTy; 430 case BuiltinType::Short: 431 return T == C.UnsignedShortTy; 432 case BuiltinType::UShort: 433 return T == C.ShortTy; 434 case BuiltinType::Int: 435 return T == C.UnsignedIntTy; 436 case BuiltinType::UInt: 437 return T == C.IntTy; 438 case BuiltinType::Long: 439 return T == C.UnsignedLongTy; 440 case BuiltinType::ULong: 441 return T == C.LongTy; 442 case BuiltinType::LongLong: 443 return T == C.UnsignedLongLongTy; 444 case BuiltinType::ULongLong: 445 return T == C.LongLongTy; 446 } 447 448 return false; 449 } 450 451 if (K == CStrTy) { 452 const PointerType *PT = argTy->getAs<PointerType>(); 453 if (!PT) 454 return false; 455 456 QualType pointeeTy = PT->getPointeeType(); 457 458 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 459 switch (BT->getKind()) { 460 case BuiltinType::Void: 461 case BuiltinType::Char_U: 462 case BuiltinType::UChar: 463 case BuiltinType::Char_S: 464 case BuiltinType::SChar: 465 return true; 466 default: 467 break; 468 } 469 470 return false; 471 } 472 473 if (K == WCStrTy) { 474 const PointerType *PT = argTy->getAs<PointerType>(); 475 if (!PT) 476 return false; 477 478 QualType pointeeTy = 479 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 480 481 return pointeeTy == C.getWCharType(); 482 } 483 484 return false; 485} 486 487QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 488 assert(isValid()); 489 if (K == SpecificTy) 490 return T; 491 if (K == CStrTy) 492 return C.getPointerType(C.CharTy); 493 if (K == WCStrTy) 494 return C.getPointerType(C.getWCharType()); 495 if (K == ObjCPointerTy) 496 return C.ObjCBuiltinIdTy; 497 498 return QualType(); 499} 500 501//===----------------------------------------------------------------------===// 502// Methods on OptionalAmount. 503//===----------------------------------------------------------------------===// 504 505ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { 506 return Ctx.IntTy; 507} 508 509//===----------------------------------------------------------------------===// 510// Methods on FormatSpecifier. 511//===----------------------------------------------------------------------===// 512 513ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 514 if (!CS.consumesDataArgument()) 515 return ArgTypeResult::Invalid(); 516 517 if (CS.isIntArg()) 518 switch (LM) { 519 case AsLongDouble: 520 return ArgTypeResult::Invalid(); 521 case None: return Ctx.IntTy; 522 case AsChar: return Ctx.SignedCharTy; 523 case AsShort: return Ctx.ShortTy; 524 case AsLong: return Ctx.LongTy; 525 case AsLongLong: return Ctx.LongLongTy; 526 case AsIntMax: 527 // FIXME: Return unknown for now. 528 return ArgTypeResult(); 529 case AsSizeT: return Ctx.getSizeType(); 530 case AsPtrDiff: return Ctx.getPointerDiffType(); 531 } 532 533 if (CS.isUIntArg()) 534 switch (LM) { 535 case AsLongDouble: 536 return ArgTypeResult::Invalid(); 537 case None: return Ctx.UnsignedIntTy; 538 case AsChar: return Ctx.UnsignedCharTy; 539 case AsShort: return Ctx.UnsignedShortTy; 540 case AsLong: return Ctx.UnsignedLongTy; 541 case AsLongLong: return Ctx.UnsignedLongLongTy; 542 case AsIntMax: 543 // FIXME: Return unknown for now. 544 return ArgTypeResult(); 545 case AsSizeT: 546 // FIXME: How to get the corresponding unsigned 547 // version of size_t? 548 return ArgTypeResult(); 549 case AsPtrDiff: 550 // FIXME: How to get the corresponding unsigned 551 // version of ptrdiff_t? 552 return ArgTypeResult(); 553 } 554 555 if (CS.isDoubleArg()) { 556 if (LM == AsLongDouble) 557 return Ctx.LongDoubleTy; 558 return Ctx.DoubleTy; 559 } 560 561 switch (CS.getKind()) { 562 case ConversionSpecifier::CStrArg: 563 return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 564 case ConversionSpecifier::UnicodeStrArg: 565 // FIXME: This appears to be Mac OS X specific. 566 return ArgTypeResult::WCStrTy; 567 case ConversionSpecifier::CArg: 568 return Ctx.WCharTy; 569 default: 570 break; 571 } 572 573 // FIXME: Handle other cases. 574 return ArgTypeResult(); 575} 576 577