ScanfFormatString.cpp revision 9d24c2cbd9cf1b7c165ccb13221f2efb2f4b49b0
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_scanf::ScanfArgTypeResult; 24using clang::analyze_scanf::ScanfConversionSpecifier; 25using clang::analyze_scanf::ScanfSpecifier; 26using clang::UpdateOnReturn; 27using namespace clang; 28 29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63} 64 65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66// We can possibly refactor. 67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex, 71 const LangOptions &LO) { 72 73 using namespace clang::analyze_scanf; 74 const char *I = Beg; 75 const char *Start = 0; 76 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 77 78 // Look for a '%' character that indicates the start of a format specifier. 79 for ( ; I != E ; ++I) { 80 char c = *I; 81 if (c == '\0') { 82 // Detect spurious null characters, which are likely errors. 83 H.HandleNullChar(I); 84 return true; 85 } 86 if (c == '%') { 87 Start = I++; // Record the start of the format specifier. 88 break; 89 } 90 } 91 92 // No format specifier found? 93 if (!Start) 94 return false; 95 96 if (I == E) { 97 // No more characters left? 98 H.HandleIncompleteSpecifier(Start, E - Start); 99 return true; 100 } 101 102 ScanfSpecifier FS; 103 if (ParseArgPosition(H, FS, Start, I, E)) 104 return true; 105 106 if (I == E) { 107 // No more characters left? 108 H.HandleIncompleteSpecifier(Start, E - Start); 109 return true; 110 } 111 112 // Look for '*' flag if it is present. 113 if (*I == '*') { 114 FS.setSuppressAssignment(I); 115 if (++I == E) { 116 H.HandleIncompleteSpecifier(Start, E - Start); 117 return true; 118 } 119 } 120 121 // Look for the field width (if any). Unlike printf, this is either 122 // a fixed integer or isn't present. 123 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 124 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 125 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 126 FS.setFieldWidth(Amt); 127 128 if (I == E) { 129 // No more characters left? 130 H.HandleIncompleteSpecifier(Start, E - Start); 131 return true; 132 } 133 } 134 135 // Look for the length modifier. 136 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 137 // No more characters left? 138 H.HandleIncompleteSpecifier(Start, E - Start); 139 return true; 140 } 141 142 // Detect spurious null characters, which are likely errors. 143 if (*I == '\0') { 144 H.HandleNullChar(I); 145 return true; 146 } 147 148 // Finally, look for the conversion specifier. 149 const char *conversionPosition = I++; 150 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 151 switch (*conversionPosition) { 152 default: 153 break; 154 case '%': k = ConversionSpecifier::PercentArg; break; 155 case 'A': k = ConversionSpecifier::AArg; break; 156 case 'E': k = ConversionSpecifier::EArg; break; 157 case 'F': k = ConversionSpecifier::FArg; break; 158 case 'G': k = ConversionSpecifier::GArg; break; 159 case 'X': k = ConversionSpecifier::XArg; break; 160 case 'a': k = ConversionSpecifier::aArg; break; 161 case 'd': k = ConversionSpecifier::dArg; break; 162 case 'e': k = ConversionSpecifier::eArg; break; 163 case 'f': k = ConversionSpecifier::fArg; break; 164 case 'g': k = ConversionSpecifier::gArg; break; 165 case 'i': k = ConversionSpecifier::iArg; break; 166 case 'n': k = ConversionSpecifier::nArg; break; 167 case 'c': k = ConversionSpecifier::cArg; break; 168 case 'C': k = ConversionSpecifier::CArg; break; 169 case 'S': k = ConversionSpecifier::SArg; break; 170 case '[': k = ConversionSpecifier::ScanListArg; break; 171 case 'u': k = ConversionSpecifier::uArg; break; 172 case 'x': k = ConversionSpecifier::xArg; break; 173 case 'o': k = ConversionSpecifier::oArg; break; 174 case 's': k = ConversionSpecifier::sArg; break; 175 case 'p': k = ConversionSpecifier::pArg; break; 176 } 177 ScanfConversionSpecifier CS(conversionPosition, k); 178 if (k == ScanfConversionSpecifier::ScanListArg) { 179 if (ParseScanList(H, CS, I, E)) 180 return true; 181 } 182 FS.setConversionSpecifier(CS); 183 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 184 && !FS.usesPositionalArg()) 185 FS.setArgIndex(argIndex++); 186 187 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 188 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 189 190 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 191 // Assume the conversion takes one argument. 192 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 193 } 194 return ScanfSpecifierResult(Start, FS); 195} 196 197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { 198 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 199 200 if (!CS.consumesDataArgument()) 201 return ScanfArgTypeResult::Invalid(); 202 203 switch(CS.getKind()) { 204 // Signed int. 205 case ConversionSpecifier::dArg: 206 case ConversionSpecifier::iArg: 207 switch (LM.getKind()) { 208 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); 209 case LengthModifier::AsChar: 210 return ArgTypeResult(ArgTypeResult::AnyCharTy); 211 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); 212 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); 213 case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy); 214 case LengthModifier::AsIntMax: 215 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); 216 case LengthModifier::AsSizeT: 217 // FIXME: ssize_t. 218 return ScanfArgTypeResult(); 219 case LengthModifier::AsPtrDiff: 220 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); 221 case LengthModifier::AsLongDouble: 222 // GNU extension. 223 return ArgTypeResult(Ctx.LongLongTy); 224 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 225 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); 226 } 227 228 // Unsigned int. 229 case ConversionSpecifier::oArg: 230 case ConversionSpecifier::uArg: 231 case ConversionSpecifier::xArg: 232 case ConversionSpecifier::XArg: 233 switch (LM.getKind()) { 234 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); 235 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); 236 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); 237 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); 238 case LengthModifier::AsLongLong: 239 return ArgTypeResult(Ctx.UnsignedLongLongTy); 240 case LengthModifier::AsIntMax: 241 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); 242 case LengthModifier::AsSizeT: 243 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); 244 case LengthModifier::AsPtrDiff: 245 // FIXME: Unsigned version of ptrdiff_t? 246 return ScanfArgTypeResult(); 247 case LengthModifier::AsLongDouble: 248 // GNU extension. 249 return ArgTypeResult(Ctx.UnsignedLongLongTy); 250 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 251 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); 252 } 253 254 // Float. 255 case ConversionSpecifier::aArg: 256 case ConversionSpecifier::AArg: 257 case ConversionSpecifier::eArg: 258 case ConversionSpecifier::EArg: 259 case ConversionSpecifier::fArg: 260 case ConversionSpecifier::FArg: 261 case ConversionSpecifier::gArg: 262 case ConversionSpecifier::GArg: 263 switch (LM.getKind()) { 264 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); 265 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); 266 case LengthModifier::AsLongDouble: 267 return ArgTypeResult(Ctx.LongDoubleTy); 268 default: 269 return ScanfArgTypeResult::Invalid(); 270 } 271 272 // Char, string and scanlist. 273 case ConversionSpecifier::cArg: 274 case ConversionSpecifier::sArg: 275 case ConversionSpecifier::ScanListArg: 276 switch (LM.getKind()) { 277 case LengthModifier::None: return ScanfArgTypeResult::CStrTy; 278 case LengthModifier::AsLong: 279 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 280 case LengthModifier::AsAllocate: 281 case LengthModifier::AsMAllocate: 282 return ScanfArgTypeResult(ArgTypeResult::CStrTy); 283 default: 284 return ScanfArgTypeResult::Invalid(); 285 } 286 case ConversionSpecifier::CArg: 287 case ConversionSpecifier::SArg: 288 // FIXME: Mac OS X specific? 289 switch (LM.getKind()) { 290 case LengthModifier::None: 291 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 292 case LengthModifier::AsAllocate: 293 case LengthModifier::AsMAllocate: 294 return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **"); 295 default: 296 return ScanfArgTypeResult::Invalid(); 297 } 298 299 // Pointer. 300 case ConversionSpecifier::pArg: 301 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); 302 303 default: 304 break; 305 } 306 307 return ScanfArgTypeResult(); 308} 309 310bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt) 311{ 312 if (!QT->isPointerType()) 313 return false; 314 315 QualType PT = QT->getPointeeType(); 316 const BuiltinType *BT = PT->getAs<BuiltinType>(); 317 if (!BT) 318 return false; 319 320 // Pointer to a character. 321 if (PT->isAnyCharacterType()) { 322 CS.setKind(ConversionSpecifier::sArg); 323 if (PT->isWideCharType()) 324 LM.setKind(LengthModifier::AsWideChar); 325 else 326 LM.setKind(LengthModifier::None); 327 return true; 328 } 329 330 // Figure out the length modifier. 331 switch (BT->getKind()) { 332 // no modifier 333 case BuiltinType::UInt: 334 case BuiltinType::Int: 335 case BuiltinType::Float: 336 LM.setKind(LengthModifier::None); 337 break; 338 339 // hh 340 case BuiltinType::Char_U: 341 case BuiltinType::UChar: 342 case BuiltinType::Char_S: 343 case BuiltinType::SChar: 344 LM.setKind(LengthModifier::AsChar); 345 break; 346 347 // h 348 case BuiltinType::Short: 349 case BuiltinType::UShort: 350 LM.setKind(LengthModifier::AsShort); 351 break; 352 353 // l 354 case BuiltinType::Long: 355 case BuiltinType::ULong: 356 case BuiltinType::Double: 357 LM.setKind(LengthModifier::AsLong); 358 break; 359 360 // ll 361 case BuiltinType::LongLong: 362 case BuiltinType::ULongLong: 363 LM.setKind(LengthModifier::AsLongLong); 364 break; 365 366 // L 367 case BuiltinType::LongDouble: 368 LM.setKind(LengthModifier::AsLongDouble); 369 break; 370 371 // Don't know. 372 default: 373 return false; 374 } 375 376 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 377 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 378 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 379 if (Identifier->getName() == "size_t") { 380 LM.setKind(LengthModifier::AsSizeT); 381 } else if (Identifier->getName() == "ssize_t") { 382 // Not C99, but common in Unix. 383 LM.setKind(LengthModifier::AsSizeT); 384 } else if (Identifier->getName() == "intmax_t") { 385 LM.setKind(LengthModifier::AsIntMax); 386 } else if (Identifier->getName() == "uintmax_t") { 387 LM.setKind(LengthModifier::AsIntMax); 388 } else if (Identifier->getName() == "ptrdiff_t") { 389 LM.setKind(LengthModifier::AsPtrDiff); 390 } 391 } 392 393 // Figure out the conversion specifier. 394 if (PT->isRealFloatingType()) 395 CS.setKind(ConversionSpecifier::fArg); 396 else if (PT->isSignedIntegerType()) 397 CS.setKind(ConversionSpecifier::dArg); 398 else if (PT->isUnsignedIntegerType()) { 399 // Preserve the original formatting, e.g. 'X', 'o'. 400 if (!CS.isUIntArg()) { 401 CS.setKind(ConversionSpecifier::uArg); 402 } 403 } else 404 llvm_unreachable("Unexpected type"); 405 406 return true; 407} 408 409void ScanfSpecifier::toString(raw_ostream &os) const { 410 os << "%"; 411 412 if (usesPositionalArg()) 413 os << getPositionalArgIndex() << "$"; 414 if (SuppressAssignment) 415 os << "*"; 416 417 FieldWidth.toString(os); 418 os << LM.toString(); 419 os << CS.toString(); 420} 421 422bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 423 const char *I, 424 const char *E, 425 const LangOptions &LO) { 426 427 unsigned argIndex = 0; 428 429 // Keep looking for a format specifier until we have exhausted the string. 430 while (I != E) { 431 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 432 LO); 433 // Did a fail-stop error of any kind occur when parsing the specifier? 434 // If so, don't do any more processing. 435 if (FSR.shouldStop()) 436 return true;; 437 // Did we exhaust the string or encounter an error that 438 // we can recover from? 439 if (!FSR.hasValue()) 440 continue; 441 // We have a format specifier. Pass it to the callback. 442 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 443 I - FSR.getStart())) { 444 return true; 445 } 446 } 447 assert(I == E && "Format string not exhausted"); 448 return false; 449} 450 451bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { 452 switch (K) { 453 case InvalidTy: 454 llvm_unreachable("ArgTypeResult must be valid"); 455 case UnknownTy: 456 return true; 457 case CStrTy: 458 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); 459 case WCStrTy: 460 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); 461 case PtrToArgTypeResultTy: { 462 const PointerType *PT = argTy->getAs<PointerType>(); 463 if (!PT) 464 return false; 465 return A.matchesType(C, PT->getPointeeType()); 466 } 467 } 468 469 llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); 470} 471 472QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { 473 switch (K) { 474 case InvalidTy: 475 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 476 case UnknownTy: 477 return QualType(); 478 case CStrTy: 479 return C.getPointerType(C.CharTy); 480 case WCStrTy: 481 return C.getPointerType(C.getWCharType()); 482 case PtrToArgTypeResultTy: 483 return C.getPointerType(A.getRepresentativeType(C)); 484 } 485 486 llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); 487} 488 489std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { 490 std::string S = getRepresentativeType(C).getAsString(); 491 if (!Name) 492 return std::string("'") + S + "'"; 493 return std::string("'") + Name + "' (aka '" + S + "')"; 494} 495