ScanfFormatString.cpp revision 6fcd932dfd6835f70cc00d6f7c6789793f6d7b66
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::ConversionSpecifier; 23using clang::analyze_scanf::ScanfArgTypeResult; 24using clang::analyze_scanf::ScanfConversionSpecifier; 25using clang::analyze_scanf::ScanfSpecifier; 26using clang::UpdateOnReturn; 27using namespace clang; 28 29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63} 64 65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66// We can possibly refactor. 67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex) { 71 72 using namespace clang::analyze_scanf; 73 const char *I = Beg; 74 const char *Start = 0; 75 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 76 77 // Look for a '%' character that indicates the start of a format specifier. 78 for ( ; I != E ; ++I) { 79 char c = *I; 80 if (c == '\0') { 81 // Detect spurious null characters, which are likely errors. 82 H.HandleNullChar(I); 83 return true; 84 } 85 if (c == '%') { 86 Start = I++; // Record the start of the format specifier. 87 break; 88 } 89 } 90 91 // No format specifier found? 92 if (!Start) 93 return false; 94 95 if (I == E) { 96 // No more characters left? 97 H.HandleIncompleteSpecifier(Start, E - Start); 98 return true; 99 } 100 101 ScanfSpecifier FS; 102 if (ParseArgPosition(H, FS, Start, I, E)) 103 return true; 104 105 if (I == E) { 106 // No more characters left? 107 H.HandleIncompleteSpecifier(Start, E - Start); 108 return true; 109 } 110 111 // Look for '*' flag if it is present. 112 if (*I == '*') { 113 FS.setSuppressAssignment(I); 114 if (++I == E) { 115 H.HandleIncompleteSpecifier(Start, E - Start); 116 return true; 117 } 118 } 119 120 // Look for the field width (if any). Unlike printf, this is either 121 // a fixed integer or isn't present. 122 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 123 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 124 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 125 FS.setFieldWidth(Amt); 126 127 if (I == E) { 128 // No more characters left? 129 H.HandleIncompleteSpecifier(Start, E - Start); 130 return true; 131 } 132 } 133 134 // Look for the length modifier. 135 if (ParseLengthModifier(FS, I, E) && I == E) { 136 // No more characters left? 137 H.HandleIncompleteSpecifier(Start, E - Start); 138 return true; 139 } 140 141 // Detect spurious null characters, which are likely errors. 142 if (*I == '\0') { 143 H.HandleNullChar(I); 144 return true; 145 } 146 147 // Finally, look for the conversion specifier. 148 const char *conversionPosition = I++; 149 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 150 switch (*conversionPosition) { 151 default: 152 break; 153 case '%': k = ConversionSpecifier::PercentArg; break; 154 case 'A': k = ConversionSpecifier::AArg; break; 155 case 'E': k = ConversionSpecifier::EArg; break; 156 case 'F': k = ConversionSpecifier::FArg; break; 157 case 'G': k = ConversionSpecifier::GArg; break; 158 case 'X': k = ConversionSpecifier::XArg; break; 159 case 'a': k = ConversionSpecifier::aArg; break; 160 case 'd': k = ConversionSpecifier::dArg; break; 161 case 'e': k = ConversionSpecifier::eArg; break; 162 case 'f': k = ConversionSpecifier::fArg; break; 163 case 'g': k = ConversionSpecifier::gArg; break; 164 case 'i': k = ConversionSpecifier::iArg; break; 165 case 'n': k = ConversionSpecifier::nArg; break; 166 case 'c': k = ConversionSpecifier::cArg; break; 167 case 'C': k = ConversionSpecifier::CArg; break; 168 case 'S': k = ConversionSpecifier::SArg; break; 169 case '[': k = ConversionSpecifier::ScanListArg; break; 170 case 'u': k = ConversionSpecifier::uArg; break; 171 case 'x': k = ConversionSpecifier::xArg; break; 172 case 'o': k = ConversionSpecifier::oArg; break; 173 case 's': k = ConversionSpecifier::sArg; break; 174 case 'p': k = ConversionSpecifier::pArg; break; 175 } 176 ScanfConversionSpecifier CS(conversionPosition, k); 177 if (k == ScanfConversionSpecifier::ScanListArg) { 178 if (!ParseScanList(H, CS, I, E)) 179 return true; 180 } 181 FS.setConversionSpecifier(CS); 182 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 183 && !FS.usesPositionalArg()) 184 FS.setArgIndex(argIndex++); 185 186 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 187 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 188 189 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 190 // Assume the conversion takes one argument. 191 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 192 } 193 return ScanfSpecifierResult(Start, FS); 194} 195 196ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { 197 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 198 199 if (!CS.consumesDataArgument()) 200 return ScanfArgTypeResult::Invalid(); 201 202 switch(CS.getKind()) { 203 // Signed int. 204 case ConversionSpecifier::dArg: 205 case ConversionSpecifier::iArg: 206 switch (LM.getKind()) { 207 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); 208 case LengthModifier::AsChar: 209 return ArgTypeResult(ArgTypeResult::AnyCharTy); 210 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); 211 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); 212 case LengthModifier::AsLongLong: return ArgTypeResult(Ctx.LongLongTy); 213 case LengthModifier::AsIntMax: 214 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); 215 case LengthModifier::AsSizeT: 216 // FIXME: ssize_t. 217 return ScanfArgTypeResult(); 218 case LengthModifier::AsPtrDiff: 219 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); 220 case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid(); 221 } 222 223 // Unsigned int. 224 case ConversionSpecifier::oArg: 225 case ConversionSpecifier::uArg: 226 case ConversionSpecifier::xArg: 227 case ConversionSpecifier::XArg: 228 switch (LM.getKind()) { 229 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); 230 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); 231 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); 232 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); 233 case LengthModifier::AsLongLong: 234 return ArgTypeResult(Ctx.UnsignedLongLongTy); 235 case LengthModifier::AsIntMax: 236 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); 237 case LengthModifier::AsSizeT: 238 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); 239 case LengthModifier::AsPtrDiff: 240 // FIXME: Unsigned version of ptrdiff_t? 241 return ScanfArgTypeResult(); 242 case LengthModifier::AsLongDouble: return ScanfArgTypeResult::Invalid(); 243 } 244 245 // Float. 246 case ConversionSpecifier::aArg: 247 case ConversionSpecifier::AArg: 248 case ConversionSpecifier::eArg: 249 case ConversionSpecifier::EArg: 250 case ConversionSpecifier::fArg: 251 case ConversionSpecifier::FArg: 252 case ConversionSpecifier::gArg: 253 case ConversionSpecifier::GArg: 254 switch (LM.getKind()) { 255 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); 256 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); 257 case LengthModifier::AsLongDouble: 258 return ArgTypeResult(Ctx.LongDoubleTy); 259 default: 260 return ScanfArgTypeResult::Invalid(); 261 } 262 263 // Char, string and scanlist. 264 case ConversionSpecifier::cArg: 265 case ConversionSpecifier::sArg: 266 case ConversionSpecifier::ScanListArg: 267 switch (LM.getKind()) { 268 case LengthModifier::None: return ScanfArgTypeResult::CStrTy; 269 case LengthModifier::AsLong: 270 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 271 default: 272 return ScanfArgTypeResult::Invalid(); 273 } 274 case ConversionSpecifier::CArg: 275 case ConversionSpecifier::SArg: 276 // FIXME: Mac OS X specific? 277 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); 278 279 // Pointer. 280 case ConversionSpecifier::pArg: 281 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); 282 283 default: 284 break; 285 } 286 287 return ScanfArgTypeResult(); 288} 289 290bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt) 291{ 292 if (!QT->isPointerType()) 293 return false; 294 295 QualType PT = QT->getPointeeType(); 296 const BuiltinType *BT = PT->getAs<BuiltinType>(); 297 if (!BT) 298 return false; 299 300 // Pointer to a character. 301 if (PT->isAnyCharacterType()) { 302 CS.setKind(ConversionSpecifier::sArg); 303 if (PT->isWideCharType()) 304 LM.setKind(LengthModifier::AsWideChar); 305 else 306 LM.setKind(LengthModifier::None); 307 return true; 308 } 309 310 // Figure out the length modifier. 311 switch (BT->getKind()) { 312 // no modifier 313 case BuiltinType::UInt: 314 case BuiltinType::Int: 315 case BuiltinType::Float: 316 LM.setKind(LengthModifier::None); 317 break; 318 319 // hh 320 case BuiltinType::Char_U: 321 case BuiltinType::UChar: 322 case BuiltinType::Char_S: 323 case BuiltinType::SChar: 324 LM.setKind(LengthModifier::AsChar); 325 break; 326 327 // h 328 case BuiltinType::Short: 329 case BuiltinType::UShort: 330 LM.setKind(LengthModifier::AsShort); 331 break; 332 333 // l 334 case BuiltinType::Long: 335 case BuiltinType::ULong: 336 case BuiltinType::Double: 337 LM.setKind(LengthModifier::AsLong); 338 break; 339 340 // ll 341 case BuiltinType::LongLong: 342 case BuiltinType::ULongLong: 343 LM.setKind(LengthModifier::AsLongLong); 344 break; 345 346 // L 347 case BuiltinType::LongDouble: 348 LM.setKind(LengthModifier::AsLongDouble); 349 break; 350 351 // Don't know. 352 default: 353 return false; 354 } 355 356 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 357 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { 358 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); 359 if (Identifier->getName() == "size_t") { 360 LM.setKind(LengthModifier::AsSizeT); 361 } else if (Identifier->getName() == "ssize_t") { 362 // Not C99, but common in Unix. 363 LM.setKind(LengthModifier::AsSizeT); 364 } else if (Identifier->getName() == "intmax_t") { 365 LM.setKind(LengthModifier::AsIntMax); 366 } else if (Identifier->getName() == "uintmax_t") { 367 LM.setKind(LengthModifier::AsIntMax); 368 } else if (Identifier->getName() == "ptrdiff_t") { 369 LM.setKind(LengthModifier::AsPtrDiff); 370 } 371 } 372 373 // Figure out the conversion specifier. 374 if (PT->isRealFloatingType()) 375 CS.setKind(ConversionSpecifier::fArg); 376 else if (PT->isSignedIntegerType()) 377 CS.setKind(ConversionSpecifier::dArg); 378 else if (PT->isUnsignedIntegerType()) { 379 // Preserve the original formatting, e.g. 'X', 'o'. 380 if (!CS.isUIntArg()) { 381 CS.setKind(ConversionSpecifier::uArg); 382 } 383 } else 384 llvm_unreachable("Unexpected type"); 385 386 return true; 387} 388 389void ScanfSpecifier::toString(raw_ostream &os) const { 390 os << "%"; 391 392 if (usesPositionalArg()) 393 os << getPositionalArgIndex() << "$"; 394 if (SuppressAssignment) 395 os << "*"; 396 397 FieldWidth.toString(os); 398 os << LM.toString(); 399 os << CS.toString(); 400} 401 402bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 403 const char *I, 404 const char *E) { 405 406 unsigned argIndex = 0; 407 408 // Keep looking for a format specifier until we have exhausted the string. 409 while (I != E) { 410 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex); 411 // Did a fail-stop error of any kind occur when parsing the specifier? 412 // If so, don't do any more processing. 413 if (FSR.shouldStop()) 414 return true;; 415 // Did we exhaust the string or encounter an error that 416 // we can recover from? 417 if (!FSR.hasValue()) 418 continue; 419 // We have a format specifier. Pass it to the callback. 420 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 421 I - FSR.getStart())) { 422 return true; 423 } 424 } 425 assert(I == E && "Format string not exhausted"); 426 return false; 427} 428 429bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { 430 switch (K) { 431 case InvalidTy: 432 llvm_unreachable("ArgTypeResult must be valid"); 433 case UnknownTy: 434 return true; 435 case CStrTy: 436 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); 437 case WCStrTy: 438 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); 439 case PtrToArgTypeResultTy: { 440 const PointerType *PT = argTy->getAs<PointerType>(); 441 if (!PT) 442 return false; 443 return A.matchesType(C, PT->getPointeeType()); 444 } 445 } 446 447 return false; // Unreachable, but we still get a warning. 448} 449 450QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { 451 switch (K) { 452 case InvalidTy: 453 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 454 case UnknownTy: 455 return QualType(); 456 case CStrTy: 457 return C.getPointerType(C.CharTy); 458 case WCStrTy: 459 return C.getPointerType(C.getWCharType()); 460 case PtrToArgTypeResultTy: 461 return C.getPointerType(A.getRepresentativeType(C)); 462 } 463 464 return QualType(); // Not reachable. 465} 466 467std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { 468 std::string S = getRepresentativeType(C).getAsString(); 469 if (!Name) 470 return std::string("'") + S + "'"; 471 return std::string("'") + Name + "' (aka '" + S + "')"; 472} 473