PrintfFormatString.cpp revision 7f70dc85d5055c19c8003f43a59135de211ad1b9
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::FormatSpecifier; 19using clang::analyze_printf::OptionalAmount; 20using clang::analyze_printf::ArgTypeResult; 21using clang::analyze_printf::FormatStringHandler; 22using namespace clang; 23 24namespace { 25class FormatSpecifierResult { 26 FormatSpecifier FS; 27 const char *Start; 28 bool Stop; 29public: 30 FormatSpecifierResult(bool stop = false) 31 : Start(0), Stop(stop) {} 32 FormatSpecifierResult(const char *start, 33 const FormatSpecifier &fs) 34 : FS(fs), Start(start), Stop(false) {} 35 36 37 const char *getStart() const { return Start; } 38 bool shouldStop() const { return Stop; } 39 bool hasValue() const { return Start != 0; } 40 const FormatSpecifier &getValue() const { 41 assert(hasValue()); 42 return FS; 43 } 44 const FormatSpecifier &getValue() { return FS; } 45}; 46} // end anonymous namespace 47 48template <typename T> 49class UpdateOnReturn { 50 T &ValueToUpdate; 51 const T &ValueToCopy; 52public: 53 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 54 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 55 56 ~UpdateOnReturn() { 57 ValueToUpdate = ValueToCopy; 58 } 59}; 60 61//===----------------------------------------------------------------------===// 62// Methods for parsing format strings. 63//===----------------------------------------------------------------------===// 64 65static OptionalAmount ParseAmount(const char *&Beg, const char *E, 66 unsigned &argIndex) { 67 const char *I = Beg; 68 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 69 70 bool foundDigits = false; 71 unsigned accumulator = 0; 72 73 for ( ; I != E; ++I) { 74 char c = *I; 75 if (c >= '0' && c <= '9') { 76 foundDigits = true; 77 accumulator += (accumulator * 10) + (c - '0'); 78 continue; 79 } 80 81 if (foundDigits) 82 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); 83 84 if (c == '*') { 85 ++I; 86 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); 87 } 88 89 break; 90 } 91 92 return OptionalAmount(); 93} 94 95static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 96 const char *&Beg, 97 const char *E, 98 unsigned &argIndex) { 99 100 using namespace clang::analyze_printf; 101 102 const char *I = Beg; 103 const char *Start = 0; 104 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 105 106 // Look for a '%' character that indicates the start of a format specifier. 107 for ( ; I != E ; ++I) { 108 char c = *I; 109 if (c == '\0') { 110 // Detect spurious null characters, which are likely errors. 111 H.HandleNullChar(I); 112 return true; 113 } 114 if (c == '%') { 115 Start = I++; // Record the start of the format specifier. 116 break; 117 } 118 } 119 120 // No format specifier found? 121 if (!Start) 122 return false; 123 124 if (I == E) { 125 // No more characters left? 126 H.HandleIncompleteFormatSpecifier(Start, E - Start); 127 return true; 128 } 129 130 FormatSpecifier FS; 131 132 // Look for flags (if any). 133 bool hasMore = true; 134 for ( ; I != E; ++I) { 135 switch (*I) { 136 default: hasMore = false; break; 137 case '-': FS.setIsLeftJustified(); break; 138 case '+': FS.setHasPlusPrefix(); break; 139 case ' ': FS.setHasSpacePrefix(); break; 140 case '#': FS.setHasAlternativeForm(); break; 141 case '0': FS.setHasLeadingZeros(); break; 142 } 143 if (!hasMore) 144 break; 145 } 146 147 if (I == E) { 148 // No more characters left? 149 H.HandleIncompleteFormatSpecifier(Start, E - Start); 150 return true; 151 } 152 153 // Look for the field width (if any). 154 FS.setFieldWidth(ParseAmount(I, E, argIndex)); 155 156 if (I == E) { 157 // No more characters left? 158 H.HandleIncompleteFormatSpecifier(Start, E - Start); 159 return true; 160 } 161 162 // Look for the precision (if any). 163 if (*I == '.') { 164 ++I; 165 if (I == E) { 166 H.HandleIncompleteFormatSpecifier(Start, E - Start); 167 return true; 168 } 169 170 FS.setPrecision(ParseAmount(I, E, argIndex)); 171 172 if (I == E) { 173 // No more characters left? 174 H.HandleIncompleteFormatSpecifier(Start, E - Start); 175 return true; 176 } 177 } 178 179 // Look for the length modifier. 180 LengthModifier lm = None; 181 switch (*I) { 182 default: 183 break; 184 case 'h': 185 ++I; 186 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 187 break; 188 case 'l': 189 ++I; 190 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 191 break; 192 case 'j': lm = AsIntMax; ++I; break; 193 case 'z': lm = AsSizeT; ++I; break; 194 case 't': lm = AsPtrDiff; ++I; break; 195 case 'L': lm = AsLongDouble; ++I; break; 196 case 'q': lm = AsLongLong; ++I; break; 197 } 198 FS.setLengthModifier(lm); 199 200 if (I == E) { 201 // No more characters left? 202 H.HandleIncompleteFormatSpecifier(Start, E - Start); 203 return true; 204 } 205 206 if (*I == '\0') { 207 // Detect spurious null characters, which are likely errors. 208 H.HandleNullChar(I); 209 return true; 210 } 211 212 // Finally, look for the conversion specifier. 213 const char *conversionPosition = I++; 214 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 215 switch (*conversionPosition) { 216 default: 217 break; 218 // C99: 7.19.6.1 (section 8). 219 case '%': k = ConversionSpecifier::PercentArg; break; 220 case 'A': k = ConversionSpecifier::AArg; break; 221 case 'E': k = ConversionSpecifier::EArg; break; 222 case 'F': k = ConversionSpecifier::FArg; break; 223 case 'G': k = ConversionSpecifier::GArg; break; 224 case 'X': k = ConversionSpecifier::XArg; break; 225 case 'a': k = ConversionSpecifier::aArg; break; 226 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 227 case 'd': k = ConversionSpecifier::dArg; break; 228 case 'e': k = ConversionSpecifier::eArg; break; 229 case 'f': k = ConversionSpecifier::fArg; break; 230 case 'g': k = ConversionSpecifier::gArg; break; 231 case 'i': k = ConversionSpecifier::iArg; break; 232 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 233 case 'o': k = ConversionSpecifier::oArg; break; 234 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 235 case 's': k = ConversionSpecifier::CStrArg; break; 236 case 'u': k = ConversionSpecifier::uArg; break; 237 case 'x': k = ConversionSpecifier::xArg; break; 238 // Mac OS X (unicode) specific 239 case 'C': k = ConversionSpecifier::CArg; break; 240 case 'S': k = ConversionSpecifier::UnicodeStrArg; break; 241 // Objective-C. 242 case '@': k = ConversionSpecifier::ObjCObjArg; break; 243 // Glibc specific. 244 case 'm': k = ConversionSpecifier::PrintErrno; break; 245 } 246 ConversionSpecifier CS(conversionPosition, k); 247 FS.setConversionSpecifier(CS); 248 if (CS.consumesDataArgument()) 249 FS.setArgIndex(argIndex++); 250 251 if (k == ConversionSpecifier::InvalidSpecifier) { 252 // Assume the conversion takes one argument. 253 return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 254 } 255 return FormatSpecifierResult(Start, FS); 256} 257 258bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, 259 const char *I, const char *E) { 260 261 unsigned argIndex = 0; 262 263 // Keep looking for a format specifier until we have exhausted the string. 264 while (I != E) { 265 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); 266 // Did a fail-stop error of any kind occur when parsing the specifier? 267 // If so, don't do any more processing. 268 if (FSR.shouldStop()) 269 return true;; 270 // Did we exhaust the string or encounter an error that 271 // we can recover from? 272 if (!FSR.hasValue()) 273 continue; 274 // We have a format specifier. Pass it to the callback. 275 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 276 I - FSR.getStart())) 277 return true; 278 } 279 assert(I == E && "Format string not exhausted"); 280 return false; 281} 282 283FormatStringHandler::~FormatStringHandler() {} 284 285//===----------------------------------------------------------------------===// 286// Methods on ArgTypeResult. 287//===----------------------------------------------------------------------===// 288 289bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 290 assert(isValid()); 291 292 if (K == UnknownTy) 293 return true; 294 295 if (K == SpecificTy) { 296 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 297 298 if (T == argTy) 299 return true; 300 301 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 302 switch (BT->getKind()) { 303 default: 304 break; 305 case BuiltinType::Char_S: 306 case BuiltinType::SChar: 307 return T == C.UnsignedCharTy; 308 case BuiltinType::Char_U: 309 case BuiltinType::UChar: 310 return T == C.SignedCharTy; 311 case BuiltinType::Short: 312 return T == C.UnsignedShortTy; 313 case BuiltinType::UShort: 314 return T == C.ShortTy; 315 case BuiltinType::Int: 316 return T == C.UnsignedIntTy; 317 case BuiltinType::UInt: 318 return T == C.IntTy; 319 case BuiltinType::Long: 320 return T == C.UnsignedLongTy; 321 case BuiltinType::ULong: 322 return T == C.LongTy; 323 case BuiltinType::LongLong: 324 return T == C.UnsignedLongLongTy; 325 case BuiltinType::ULongLong: 326 return T == C.LongLongTy; 327 } 328 329 return false; 330 } 331 332 if (K == CStrTy) { 333 const PointerType *PT = argTy->getAs<PointerType>(); 334 if (!PT) 335 return false; 336 337 QualType pointeeTy = PT->getPointeeType(); 338 339 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 340 switch (BT->getKind()) { 341 case BuiltinType::Void: 342 case BuiltinType::Char_U: 343 case BuiltinType::UChar: 344 case BuiltinType::Char_S: 345 case BuiltinType::SChar: 346 return true; 347 default: 348 break; 349 } 350 351 return false; 352 } 353 354 if (K == WCStrTy) { 355 const PointerType *PT = argTy->getAs<PointerType>(); 356 if (!PT) 357 return false; 358 359 QualType pointeeTy = 360 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 361 362 return pointeeTy == C.getWCharType(); 363 } 364 365 return false; 366} 367 368QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 369 assert(isValid()); 370 if (K == SpecificTy) 371 return T; 372 if (K == CStrTy) 373 return C.getPointerType(C.CharTy); 374 if (K == WCStrTy) 375 return C.getPointerType(C.getWCharType()); 376 if (K == ObjCPointerTy) 377 return C.ObjCBuiltinIdTy; 378 379 return QualType(); 380} 381 382//===----------------------------------------------------------------------===// 383// Methods on OptionalAmount. 384//===----------------------------------------------------------------------===// 385 386ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const { 387 return Ctx.IntTy; 388} 389 390//===----------------------------------------------------------------------===// 391// Methods on FormatSpecifier. 392//===----------------------------------------------------------------------===// 393 394ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 395 if (!CS.consumesDataArgument()) 396 return ArgTypeResult::Invalid(); 397 398 if (CS.isIntArg()) 399 switch (LM) { 400 case AsLongDouble: 401 return ArgTypeResult::Invalid(); 402 case None: return Ctx.IntTy; 403 case AsChar: return Ctx.SignedCharTy; 404 case AsShort: return Ctx.ShortTy; 405 case AsLong: return Ctx.LongTy; 406 case AsLongLong: return Ctx.LongLongTy; 407 case AsIntMax: 408 // FIXME: Return unknown for now. 409 return ArgTypeResult(); 410 case AsSizeT: return Ctx.getSizeType(); 411 case AsPtrDiff: return Ctx.getPointerDiffType(); 412 } 413 414 if (CS.isUIntArg()) 415 switch (LM) { 416 case AsLongDouble: 417 return ArgTypeResult::Invalid(); 418 case None: return Ctx.UnsignedIntTy; 419 case AsChar: return Ctx.UnsignedCharTy; 420 case AsShort: return Ctx.UnsignedShortTy; 421 case AsLong: return Ctx.UnsignedLongTy; 422 case AsLongLong: return Ctx.UnsignedLongLongTy; 423 case AsIntMax: 424 // FIXME: Return unknown for now. 425 return ArgTypeResult(); 426 case AsSizeT: 427 // FIXME: How to get the corresponding unsigned 428 // version of size_t? 429 return ArgTypeResult(); 430 case AsPtrDiff: 431 // FIXME: How to get the corresponding unsigned 432 // version of ptrdiff_t? 433 return ArgTypeResult(); 434 } 435 436 if (CS.isDoubleArg()) { 437 if (LM == AsLongDouble) 438 return Ctx.LongDoubleTy; 439 return Ctx.DoubleTy; 440 } 441 442 switch (CS.getKind()) { 443 case ConversionSpecifier::CStrArg: 444 return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); 445 case ConversionSpecifier::UnicodeStrArg: 446 // FIXME: This appears to be Mac OS X specific. 447 return ArgTypeResult::WCStrTy; 448 case ConversionSpecifier::CArg: 449 return Ctx.WCharTy; 450 default: 451 break; 452 } 453 454 // FIXME: Handle other cases. 455 return ArgTypeResult(); 456} 457 458