ScanfFormatString.cpp revision 651f13cea278ec967336033dd032faef0e9fc2ec
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17#include "clang/Basic/TargetInfo.h" 18 19using clang::analyze_format_string::ArgType; 20using clang::analyze_format_string::FormatStringHandler; 21using clang::analyze_format_string::LengthModifier; 22using clang::analyze_format_string::OptionalAmount; 23using clang::analyze_format_string::ConversionSpecifier; 24using clang::analyze_scanf::ScanfConversionSpecifier; 25using clang::analyze_scanf::ScanfSpecifier; 26using clang::UpdateOnReturn; 27using namespace clang; 28 29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 30 ScanfSpecifierResult; 31 32static bool ParseScanList(FormatStringHandler &H, 33 ScanfConversionSpecifier &CS, 34 const char *&Beg, const char *E) { 35 const char *I = Beg; 36 const char *start = I - 1; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 // No more characters? 40 if (I == E) { 41 H.HandleIncompleteScanList(start, I); 42 return true; 43 } 44 45 // Special case: ']' is the first character. 46 if (*I == ']') { 47 if (++I == E) { 48 H.HandleIncompleteScanList(start, I - 1); 49 return true; 50 } 51 } 52 53 // Look for a ']' character which denotes the end of the scan list. 54 while (*I != ']') { 55 if (++I == E) { 56 H.HandleIncompleteScanList(start, I - 1); 57 return true; 58 } 59 } 60 61 CS.setEndScanList(I); 62 return false; 63} 64 65// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 66// We can possibly refactor. 67static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 68 const char *&Beg, 69 const char *E, 70 unsigned &argIndex, 71 const LangOptions &LO, 72 const TargetInfo &Target) { 73 74 using namespace clang::analyze_scanf; 75 const char *I = Beg; 76 const char *Start = 0; 77 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 78 79 // Look for a '%' character that indicates the start of a format specifier. 80 for ( ; I != E ; ++I) { 81 char c = *I; 82 if (c == '\0') { 83 // Detect spurious null characters, which are likely errors. 84 H.HandleNullChar(I); 85 return true; 86 } 87 if (c == '%') { 88 Start = I++; // Record the start of the format specifier. 89 break; 90 } 91 } 92 93 // No format specifier found? 94 if (!Start) 95 return false; 96 97 if (I == E) { 98 // No more characters left? 99 H.HandleIncompleteSpecifier(Start, E - Start); 100 return true; 101 } 102 103 ScanfSpecifier FS; 104 if (ParseArgPosition(H, FS, Start, I, E)) 105 return true; 106 107 if (I == E) { 108 // No more characters left? 109 H.HandleIncompleteSpecifier(Start, E - Start); 110 return true; 111 } 112 113 // Look for '*' flag if it is present. 114 if (*I == '*') { 115 FS.setSuppressAssignment(I); 116 if (++I == E) { 117 H.HandleIncompleteSpecifier(Start, E - Start); 118 return true; 119 } 120 } 121 122 // Look for the field width (if any). Unlike printf, this is either 123 // a fixed integer or isn't present. 124 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 125 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 126 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 127 FS.setFieldWidth(Amt); 128 129 if (I == E) { 130 // No more characters left? 131 H.HandleIncompleteSpecifier(Start, E - Start); 132 return true; 133 } 134 } 135 136 // Look for the length modifier. 137 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 138 // No more characters left? 139 H.HandleIncompleteSpecifier(Start, E - Start); 140 return true; 141 } 142 143 // Detect spurious null characters, which are likely errors. 144 if (*I == '\0') { 145 H.HandleNullChar(I); 146 return true; 147 } 148 149 // Finally, look for the conversion specifier. 150 const char *conversionPosition = I++; 151 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 152 switch (*conversionPosition) { 153 default: 154 break; 155 case '%': k = ConversionSpecifier::PercentArg; break; 156 case 'A': k = ConversionSpecifier::AArg; break; 157 case 'E': k = ConversionSpecifier::EArg; break; 158 case 'F': k = ConversionSpecifier::FArg; break; 159 case 'G': k = ConversionSpecifier::GArg; break; 160 case 'X': k = ConversionSpecifier::XArg; break; 161 case 'a': k = ConversionSpecifier::aArg; break; 162 case 'd': k = ConversionSpecifier::dArg; break; 163 case 'e': k = ConversionSpecifier::eArg; break; 164 case 'f': k = ConversionSpecifier::fArg; break; 165 case 'g': k = ConversionSpecifier::gArg; break; 166 case 'i': k = ConversionSpecifier::iArg; break; 167 case 'n': k = ConversionSpecifier::nArg; break; 168 case 'c': k = ConversionSpecifier::cArg; break; 169 case 'C': k = ConversionSpecifier::CArg; break; 170 case 'S': k = ConversionSpecifier::SArg; break; 171 case '[': k = ConversionSpecifier::ScanListArg; break; 172 case 'u': k = ConversionSpecifier::uArg; break; 173 case 'x': k = ConversionSpecifier::xArg; break; 174 case 'o': k = ConversionSpecifier::oArg; break; 175 case 's': k = ConversionSpecifier::sArg; break; 176 case 'p': k = ConversionSpecifier::pArg; break; 177 // Apple extensions 178 // Apple-specific 179 case 'D': 180 if (Target.getTriple().isOSDarwin()) 181 k = ConversionSpecifier::DArg; 182 break; 183 case 'O': 184 if (Target.getTriple().isOSDarwin()) 185 k = ConversionSpecifier::OArg; 186 break; 187 case 'U': 188 if (Target.getTriple().isOSDarwin()) 189 k = ConversionSpecifier::UArg; 190 break; 191 } 192 ScanfConversionSpecifier CS(conversionPosition, k); 193 if (k == ScanfConversionSpecifier::ScanListArg) { 194 if (ParseScanList(H, CS, I, E)) 195 return true; 196 } 197 FS.setConversionSpecifier(CS); 198 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 199 && !FS.usesPositionalArg()) 200 FS.setArgIndex(argIndex++); 201 202 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 203 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 204 205 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 206 // Assume the conversion takes one argument. 207 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 208 } 209 return ScanfSpecifierResult(Start, FS); 210} 211 212ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 213 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 214 215 if (!CS.consumesDataArgument()) 216 return ArgType::Invalid(); 217 218 switch(CS.getKind()) { 219 // Signed int. 220 case ConversionSpecifier::dArg: 221 case ConversionSpecifier::DArg: 222 case ConversionSpecifier::iArg: 223 switch (LM.getKind()) { 224 case LengthModifier::None: 225 return ArgType::PtrTo(Ctx.IntTy); 226 case LengthModifier::AsChar: 227 return ArgType::PtrTo(ArgType::AnyCharTy); 228 case LengthModifier::AsShort: 229 return ArgType::PtrTo(Ctx.ShortTy); 230 case LengthModifier::AsLong: 231 return ArgType::PtrTo(Ctx.LongTy); 232 case LengthModifier::AsLongLong: 233 case LengthModifier::AsQuad: 234 return ArgType::PtrTo(Ctx.LongLongTy); 235 case LengthModifier::AsInt64: 236 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 237 case LengthModifier::AsIntMax: 238 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 239 case LengthModifier::AsSizeT: 240 // FIXME: ssize_t. 241 return ArgType(); 242 case LengthModifier::AsPtrDiff: 243 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 244 case LengthModifier::AsLongDouble: 245 // GNU extension. 246 return ArgType::PtrTo(Ctx.LongLongTy); 247 case LengthModifier::AsAllocate: 248 case LengthModifier::AsMAllocate: 249 case LengthModifier::AsInt32: 250 case LengthModifier::AsInt3264: 251 return ArgType::Invalid(); 252 } 253 254 // Unsigned int. 255 case ConversionSpecifier::oArg: 256 case ConversionSpecifier::OArg: 257 case ConversionSpecifier::uArg: 258 case ConversionSpecifier::UArg: 259 case ConversionSpecifier::xArg: 260 case ConversionSpecifier::XArg: 261 switch (LM.getKind()) { 262 case LengthModifier::None: 263 return ArgType::PtrTo(Ctx.UnsignedIntTy); 264 case LengthModifier::AsChar: 265 return ArgType::PtrTo(Ctx.UnsignedCharTy); 266 case LengthModifier::AsShort: 267 return ArgType::PtrTo(Ctx.UnsignedShortTy); 268 case LengthModifier::AsLong: 269 return ArgType::PtrTo(Ctx.UnsignedLongTy); 270 case LengthModifier::AsLongLong: 271 case LengthModifier::AsQuad: 272 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 273 case LengthModifier::AsInt64: 274 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 275 case LengthModifier::AsIntMax: 276 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 277 case LengthModifier::AsSizeT: 278 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 279 case LengthModifier::AsPtrDiff: 280 // FIXME: Unsigned version of ptrdiff_t? 281 return ArgType(); 282 case LengthModifier::AsLongDouble: 283 // GNU extension. 284 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 285 case LengthModifier::AsAllocate: 286 case LengthModifier::AsMAllocate: 287 case LengthModifier::AsInt32: 288 case LengthModifier::AsInt3264: 289 return ArgType::Invalid(); 290 } 291 292 // Float. 293 case ConversionSpecifier::aArg: 294 case ConversionSpecifier::AArg: 295 case ConversionSpecifier::eArg: 296 case ConversionSpecifier::EArg: 297 case ConversionSpecifier::fArg: 298 case ConversionSpecifier::FArg: 299 case ConversionSpecifier::gArg: 300 case ConversionSpecifier::GArg: 301 switch (LM.getKind()) { 302 case LengthModifier::None: 303 return ArgType::PtrTo(Ctx.FloatTy); 304 case LengthModifier::AsLong: 305 return ArgType::PtrTo(Ctx.DoubleTy); 306 case LengthModifier::AsLongDouble: 307 return ArgType::PtrTo(Ctx.LongDoubleTy); 308 default: 309 return ArgType::Invalid(); 310 } 311 312 // Char, string and scanlist. 313 case ConversionSpecifier::cArg: 314 case ConversionSpecifier::sArg: 315 case ConversionSpecifier::ScanListArg: 316 switch (LM.getKind()) { 317 case LengthModifier::None: 318 return ArgType::PtrTo(ArgType::AnyCharTy); 319 case LengthModifier::AsLong: 320 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 321 case LengthModifier::AsAllocate: 322 case LengthModifier::AsMAllocate: 323 return ArgType::PtrTo(ArgType::CStrTy); 324 default: 325 return ArgType::Invalid(); 326 } 327 case ConversionSpecifier::CArg: 328 case ConversionSpecifier::SArg: 329 // FIXME: Mac OS X specific? 330 switch (LM.getKind()) { 331 case LengthModifier::None: 332 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 333 case LengthModifier::AsAllocate: 334 case LengthModifier::AsMAllocate: 335 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 336 default: 337 return ArgType::Invalid(); 338 } 339 340 // Pointer. 341 case ConversionSpecifier::pArg: 342 return ArgType::PtrTo(ArgType::CPointerTy); 343 344 // Write-back. 345 case ConversionSpecifier::nArg: 346 switch (LM.getKind()) { 347 case LengthModifier::None: 348 return ArgType::PtrTo(Ctx.IntTy); 349 case LengthModifier::AsChar: 350 return ArgType::PtrTo(Ctx.SignedCharTy); 351 case LengthModifier::AsShort: 352 return ArgType::PtrTo(Ctx.ShortTy); 353 case LengthModifier::AsLong: 354 return ArgType::PtrTo(Ctx.LongTy); 355 case LengthModifier::AsLongLong: 356 case LengthModifier::AsQuad: 357 return ArgType::PtrTo(Ctx.LongLongTy); 358 case LengthModifier::AsInt64: 359 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 360 case LengthModifier::AsIntMax: 361 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 362 case LengthModifier::AsSizeT: 363 return ArgType(); // FIXME: ssize_t 364 case LengthModifier::AsPtrDiff: 365 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 366 case LengthModifier::AsLongDouble: 367 return ArgType(); // FIXME: Is this a known extension? 368 case LengthModifier::AsAllocate: 369 case LengthModifier::AsMAllocate: 370 case LengthModifier::AsInt32: 371 case LengthModifier::AsInt3264: 372 return ArgType::Invalid(); 373 } 374 375 default: 376 break; 377 } 378 379 return ArgType(); 380} 381 382bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, 383 const LangOptions &LangOpt, 384 ASTContext &Ctx) { 385 386 // %n is different from other conversion specifiers; don't try to fix it. 387 if (CS.getKind() == ConversionSpecifier::nArg) 388 return false; 389 390 if (!QT->isPointerType()) 391 return false; 392 393 QualType PT = QT->getPointeeType(); 394 395 // If it's an enum, get its underlying type. 396 if (const EnumType *ETy = PT->getAs<EnumType>()) 397 PT = ETy->getDecl()->getIntegerType(); 398 399 const BuiltinType *BT = PT->getAs<BuiltinType>(); 400 if (!BT) 401 return false; 402 403 // Pointer to a character. 404 if (PT->isAnyCharacterType()) { 405 CS.setKind(ConversionSpecifier::sArg); 406 if (PT->isWideCharType()) 407 LM.setKind(LengthModifier::AsWideChar); 408 else 409 LM.setKind(LengthModifier::None); 410 411 // If we know the target array length, we can use it as a field width. 412 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { 413 if (CAT->getSizeModifier() == ArrayType::Normal) 414 FieldWidth = OptionalAmount(OptionalAmount::Constant, 415 CAT->getSize().getZExtValue() - 1, 416 "", 0, false); 417 418 } 419 return true; 420 } 421 422 // Figure out the length modifier. 423 switch (BT->getKind()) { 424 // no modifier 425 case BuiltinType::UInt: 426 case BuiltinType::Int: 427 case BuiltinType::Float: 428 LM.setKind(LengthModifier::None); 429 break; 430 431 // hh 432 case BuiltinType::Char_U: 433 case BuiltinType::UChar: 434 case BuiltinType::Char_S: 435 case BuiltinType::SChar: 436 LM.setKind(LengthModifier::AsChar); 437 break; 438 439 // h 440 case BuiltinType::Short: 441 case BuiltinType::UShort: 442 LM.setKind(LengthModifier::AsShort); 443 break; 444 445 // l 446 case BuiltinType::Long: 447 case BuiltinType::ULong: 448 case BuiltinType::Double: 449 LM.setKind(LengthModifier::AsLong); 450 break; 451 452 // ll 453 case BuiltinType::LongLong: 454 case BuiltinType::ULongLong: 455 LM.setKind(LengthModifier::AsLongLong); 456 break; 457 458 // L 459 case BuiltinType::LongDouble: 460 LM.setKind(LengthModifier::AsLongDouble); 461 break; 462 463 // Don't know. 464 default: 465 return false; 466 } 467 468 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 469 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 470 namedTypeToLengthModifier(PT, LM); 471 472 // If fixing the length modifier was enough, we are done. 473 if (hasValidLengthModifier(Ctx.getTargetInfo())) { 474 const analyze_scanf::ArgType &AT = getArgType(Ctx); 475 if (AT.isValid() && AT.matchesType(Ctx, QT)) 476 return true; 477 } 478 479 // Figure out the conversion specifier. 480 if (PT->isRealFloatingType()) 481 CS.setKind(ConversionSpecifier::fArg); 482 else if (PT->isSignedIntegerType()) 483 CS.setKind(ConversionSpecifier::dArg); 484 else if (PT->isUnsignedIntegerType()) 485 CS.setKind(ConversionSpecifier::uArg); 486 else 487 llvm_unreachable("Unexpected type"); 488 489 return true; 490} 491 492void ScanfSpecifier::toString(raw_ostream &os) const { 493 os << "%"; 494 495 if (usesPositionalArg()) 496 os << getPositionalArgIndex() << "$"; 497 if (SuppressAssignment) 498 os << "*"; 499 500 FieldWidth.toString(os); 501 os << LM.toString(); 502 os << CS.toString(); 503} 504 505bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 506 const char *I, 507 const char *E, 508 const LangOptions &LO, 509 const TargetInfo &Target) { 510 511 unsigned argIndex = 0; 512 513 // Keep looking for a format specifier until we have exhausted the string. 514 while (I != E) { 515 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 516 LO, Target); 517 // Did a fail-stop error of any kind occur when parsing the specifier? 518 // If so, don't do any more processing. 519 if (FSR.shouldStop()) 520 return true; 521 // Did we exhaust the string or encounter an error that 522 // we can recover from? 523 if (!FSR.hasValue()) 524 continue; 525 // We have a format specifier. Pass it to the callback. 526 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 527 I - FSR.getStart())) { 528 return true; 529 } 530 } 531 assert(I == E && "Format string not exhausted"); 532 return false; 533} 534