ScanfFormatString.cpp revision d02deebce5f1b283101e035a7f5d5bab0d2068ec
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_scanf::ScanfArgTypeResult; 24using clang::analyze_scanf::ScanfConversionSpecifier; 25using clang::analyze_scanf::ScanfSpecifier; 26using clang::UpdateOnReturn; 27using namespace clang; 28 29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63} 64 65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66// We can possibly refactor. 67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex, 71 const LangOptions &LO) { 72 73 using namespace clang::analyze_scanf; 74 const char *I = Beg; 75 const char *Start = 0; 76 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 77 78 // Look for a '%' character that indicates the start of a format specifier. 79 for ( ; I != E ; ++I) { 80 char c = *I; 81 if (c == '\0') { 82 // Detect spurious null characters, which are likely errors. 83 H.HandleNullChar(I); 84 return true; 85 } 86 if (c == '%') { 87 Start = I++; // Record the start of the format specifier. 88 break; 89 } 90 } 91 92 // No format specifier found? 93 if (!Start) 94 return false; 95 96 if (I == E) { 97 // No more characters left? 98 H.HandleIncompleteSpecifier(Start, E - Start); 99 return true; 100 } 101 102 ScanfSpecifier FS; 103 if (ParseArgPosition(H, FS, Start, I, E)) 104 return true; 105 106 if (I == E) { 107 // No more characters left? 108 H.HandleIncompleteSpecifier(Start, E - Start); 109 return true; 110 } 111 112 // Look for '*' flag if it is present. 113 if (*I == '*') { 114 FS.setSuppressAssignment(I); 115 if (++I == E) { 116 H.HandleIncompleteSpecifier(Start, E - Start); 117 return true; 118 } 119 } 120 121 // Look for the field width (if any). Unlike printf, this is either 122 // a fixed integer or isn't present. 123 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 124 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 125 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 126 FS.setFieldWidth(Amt); 127 128 if (I == E) { 129 // No more characters left? 130 H.HandleIncompleteSpecifier(Start, E - Start); 131 return true; 132 } 133 } 134 135 // Look for the length modifier. 136 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 137 // No more characters left? 138 H.HandleIncompleteSpecifier(Start, E - Start); 139 return true; 140 } 141 142 // Detect spurious null characters, which are likely errors. 143 if (*I == '\0') { 144 H.HandleNullChar(I); 145 return true; 146 } 147 148 // Finally, look for the conversion specifier. 149 const char *conversionPosition = I++; 150 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 151 switch (*conversionPosition) { 152 default: 153 break; 154 case '%': k = ConversionSpecifier::PercentArg; break; 155 case 'A': k = ConversionSpecifier::AArg; break; 156 case 'E': k = ConversionSpecifier::EArg; break; 157 case 'F': k = ConversionSpecifier::FArg; break; 158 case 'G': k = ConversionSpecifier::GArg; break; 159 case 'X': k = ConversionSpecifier::XArg; break; 160 case 'a': k = ConversionSpecifier::aArg; break; 161 case 'd': k = ConversionSpecifier::dArg; break; 162 case 'e': k = ConversionSpecifier::eArg; break; 163 case 'f': k = ConversionSpecifier::fArg; break; 164 case 'g': k = ConversionSpecifier::gArg; break; 165 case 'i': k = ConversionSpecifier::iArg; break; 166 case 'n': k = ConversionSpecifier::nArg; break; 167 case 'c': k = ConversionSpecifier::cArg; break; 168 case 'C': k = ConversionSpecifier::CArg; break; 169 case 'S': k = ConversionSpecifier::SArg; break; 170 case '[': k = ConversionSpecifier::ScanListArg; break; 171 case 'u': k = ConversionSpecifier::uArg; break; 172 case 'x': k = ConversionSpecifier::xArg; break; 173 case 'o': k = ConversionSpecifier::oArg; break; 174 case 's': k = ConversionSpecifier::sArg; break; 175 case 'p': k = ConversionSpecifier::pArg; break; 176 } 177 ScanfConversionSpecifier CS(conversionPosition, k); 178 if (k == ScanfConversionSpecifier::ScanListArg) { 179 if (!ParseScanList(H, CS, I, E)) 180 return true; 181 } 182 FS.setConversionSpecifier(CS); 183 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 184 && !FS.usesPositionalArg()) 185 FS.setArgIndex(argIndex++); 186 187 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 188 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 189 190 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 191 // Assume the conversion takes one argument. 192 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 193 } 194 return ScanfSpecifierResult(Start, FS); 195} 196 197ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { 198 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 199 200 if (!CS.consumesDataArgument()) 201 return ScanfArgTypeResult::Invalid(); 202 203 switch(CS.getKind()) { 204 // Signed int. 205 case ConversionSpecifier::dArg: 206 case ConversionSpecifier::iArg: 207 switch (LM.getKind()) { 208 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); 209 case LengthModifier::AsChar: 210 return ArgTypeResult(ArgTypeResult::AnyCharTy); 211 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); 212 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); 213 case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy); 214 case LengthModifier::AsIntMax: 215 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); 216 case LengthModifier::AsSizeT: 217 // FIXME: ssize_t. 218 return ScanfArgTypeResult(); 219 case LengthModifier::AsPtrDiff: 220 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); 221 case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid(); 222 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 223 } 224 225 // Unsigned int. 226 case ConversionSpecifier::oArg: 227 case ConversionSpecifier::uArg: 228 case ConversionSpecifier::xArg: 229 case ConversionSpecifier::XArg: 230 switch (LM.getKind()) { 231 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); 232 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); 233 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); 234 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); 235 case LengthModifier::AsLongLong: 236 return ArgTypeResult(Ctx.UnsignedLongLongTy); 237 case LengthModifier::AsIntMax: 238 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); 239 case LengthModifier::AsSizeT: 240 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); 241 case LengthModifier::AsPtrDiff: 242 // FIXME: Unsigned version of ptrdiff_t? 243 return ScanfArgTypeResult(); 244 case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid(); 245 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); 246 } 247 248 // Float. 249 case ConversionSpecifier::aArg: 250 case ConversionSpecifier::AArg: 251 case ConversionSpecifier::eArg: 252 case ConversionSpecifier::EArg: 253 case ConversionSpecifier::fArg: 254 case ConversionSpecifier::FArg: 255 case ConversionSpecifier::gArg: 256 case ConversionSpecifier::GArg: 257 switch (LM.getKind()) { 258 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); 259 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); 260 case LengthModifier::AsLongDouble: 261 return ArgTypeResult(Ctx.LongDoubleTy); 262 default: 263 return ScanfArgTypeResult::Invalid(); 264 } 265 266 // Char, string and scanlist. 267 case ConversionSpecifier::cArg: 268 case ConversionSpecifier::sArg: 269 case ConversionSpecifier::ScanListArg: 270 switch (LM.getKind()) { 271 case LengthModifier::None: return ScanfArgTypeResult::CStrTy; 272 case LengthModifier::AsLong: 273 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 274 default: 275 return ScanfArgTypeResult::Invalid(); 276 } 277 case ConversionSpecifier::CArg: 278 case ConversionSpecifier::SArg: 279 // FIXME: Mac OS X specific? 280 if (LM.getKind() == LengthModifier::None) 281 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 282 return ScanfArgTypeResult::Invalid(); 283 284 // Pointer. 285 case ConversionSpecifier::pArg: 286 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); 287 288 default: 289 break; 290 } 291 292 return ScanfArgTypeResult(); 293} 294 295bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt) 296{ 297 if (!QT->isPointerType()) 298 return false; 299 300 QualType PT = QT->getPointeeType(); 301 const BuiltinType *BT = PT->getAs<BuiltinType>(); 302 if (!BT) 303 return false; 304 305 // Pointer to a character. 306 if (PT->isAnyCharacterType()) { 307 CS.setKind(ConversionSpecifier::sArg); 308 if (PT->isWideCharType()) 309 LM.setKind(LengthModifier::AsWideChar); 310 else 311 LM.setKind(LengthModifier::None); 312 return true; 313 } 314 315 // Figure out the length modifier. 316 switch (BT->getKind()) { 317 // no modifier 318 case BuiltinType::UInt: 319 case BuiltinType::Int: 320 case BuiltinType::Float: 321 LM.setKind(LengthModifier::None); 322 break; 323 324 // hh 325 case BuiltinType::Char_U: 326 case BuiltinType::UChar: 327 case BuiltinType::Char_S: 328 case BuiltinType::SChar: 329 LM.setKind(LengthModifier::AsChar); 330 break; 331 332 // h 333 case BuiltinType::Short: 334 case BuiltinType::UShort: 335 LM.setKind(LengthModifier::AsShort); 336 break; 337 338 // l 339 case BuiltinType::Long: 340 case BuiltinType::ULong: 341 case BuiltinType::Double: 342 LM.setKind(LengthModifier::AsLong); 343 break; 344 345 // ll 346 case BuiltinType::LongLong: 347 case BuiltinType::ULongLong: 348 LM.setKind(LengthModifier::AsLongLong); 349 break; 350 351 // L 352 case BuiltinType::LongDouble: 353 LM.setKind(LengthModifier::AsLongDouble); 354 break; 355 356 // Don't know. 357 default: 358 return false; 359 } 360 361 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 362 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 363 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 364 if (Identifier->getName() == "size_t") { 365 LM.setKind(LengthModifier::AsSizeT); 366 } else if (Identifier->getName() == "ssize_t") { 367 // Not C99, but common in Unix. 368 LM.setKind(LengthModifier::AsSizeT); 369 } else if (Identifier->getName() == "intmax_t") { 370 LM.setKind(LengthModifier::AsIntMax); 371 } else if (Identifier->getName() == "uintmax_t") { 372 LM.setKind(LengthModifier::AsIntMax); 373 } else if (Identifier->getName() == "ptrdiff_t") { 374 LM.setKind(LengthModifier::AsPtrDiff); 375 } 376 } 377 378 // Figure out the conversion specifier. 379 if (PT->isRealFloatingType()) 380 CS.setKind(ConversionSpecifier::fArg); 381 else if (PT->isSignedIntegerType()) 382 CS.setKind(ConversionSpecifier::dArg); 383 else if (PT->isUnsignedIntegerType()) { 384 // Preserve the original formatting, e.g. 'X', 'o'. 385 if (!CS.isUIntArg()) { 386 CS.setKind(ConversionSpecifier::uArg); 387 } 388 } else 389 llvm_unreachable("Unexpected type"); 390 391 return true; 392} 393 394void ScanfSpecifier::toString(raw_ostream &os) const { 395 os << "%"; 396 397 if (usesPositionalArg()) 398 os << getPositionalArgIndex() << "$"; 399 if (SuppressAssignment) 400 os << "*"; 401 402 FieldWidth.toString(os); 403 os << LM.toString(); 404 os << CS.toString(); 405} 406 407bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 408 const char *I, 409 const char *E, 410 const LangOptions &LO) { 411 412 unsigned argIndex = 0; 413 414 // Keep looking for a format specifier until we have exhausted the string. 415 while (I != E) { 416 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 417 LO); 418 // Did a fail-stop error of any kind occur when parsing the specifier? 419 // If so, don't do any more processing. 420 if (FSR.shouldStop()) 421 return true;; 422 // Did we exhaust the string or encounter an error that 423 // we can recover from? 424 if (!FSR.hasValue()) 425 continue; 426 // We have a format specifier. Pass it to the callback. 427 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 428 I - FSR.getStart())) { 429 return true; 430 } 431 } 432 assert(I == E && "Format string not exhausted"); 433 return false; 434} 435 436bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { 437 switch (K) { 438 case InvalidTy: 439 llvm_unreachable("ArgTypeResult must be valid"); 440 case UnknownTy: 441 return true; 442 case CStrTy: 443 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); 444 case WCStrTy: 445 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); 446 case PtrToArgTypeResultTy: { 447 const PointerType *PT = argTy->getAs<PointerType>(); 448 if (!PT) 449 return false; 450 return A.matchesType(C, PT->getPointeeType()); 451 } 452 } 453 454 return false; // Unreachable, but we still get a warning. 455} 456 457QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { 458 switch (K) { 459 case InvalidTy: 460 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 461 case UnknownTy: 462 return QualType(); 463 case CStrTy: 464 return C.getPointerType(C.CharTy); 465 case WCStrTy: 466 return C.getPointerType(C.getWCharType()); 467 case PtrToArgTypeResultTy: 468 return C.getPointerType(A.getRepresentativeType(C)); 469 } 470 471 return QualType(); // Not reachable. 472} 473 474std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { 475 std::string S = getRepresentativeType(C).getAsString(); 476 if (!Name) 477 return std::string("'") + S + "'"; 478 return std::string("'") + Name + "' (aka '" + S + "')"; 479} 480