FormatString.cpp revision 9dbe16eb808ed3b58be6be48bf4ae7317db63e89
1// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Shared details for processing format strings of printf and scanf 11// (and friends). 12// 13//===----------------------------------------------------------------------===// 14 15#include "FormatStringParsing.h" 16 17using clang::analyze_format_string::ArgTypeResult; 18using clang::analyze_format_string::FormatStringHandler; 19using clang::analyze_format_string::FormatSpecifier; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_format_string::PositionContext; 23using clang::analyze_format_string::ConversionSpecifier; 24using namespace clang; 25 26// Key function to FormatStringHandler. 27FormatStringHandler::~FormatStringHandler() {} 28 29//===----------------------------------------------------------------------===// 30// Functions for parsing format strings components in both printf and 31// scanf format strings. 32//===----------------------------------------------------------------------===// 33 34OptionalAmount 35clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 36 const char *I = Beg; 37 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38 39 unsigned accumulator = 0; 40 bool hasDigits = false; 41 42 for ( ; I != E; ++I) { 43 char c = *I; 44 if (c >= '0' && c <= '9') { 45 hasDigits = true; 46 accumulator = (accumulator * 10) + (c - '0'); 47 continue; 48 } 49 50 if (hasDigits) 51 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 52 false); 53 54 break; 55 } 56 57 return OptionalAmount(); 58} 59 60OptionalAmount 61clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 62 const char *E, 63 unsigned &argIndex) { 64 if (*Beg == '*') { 65 ++Beg; 66 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 67 } 68 69 return ParseAmount(Beg, E); 70} 71 72OptionalAmount 73clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 74 const char *Start, 75 const char *&Beg, 76 const char *E, 77 PositionContext p) { 78 if (*Beg == '*') { 79 const char *I = Beg + 1; 80 const OptionalAmount &Amt = ParseAmount(I, E); 81 82 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 83 H.HandleInvalidPosition(Beg, I - Beg, p); 84 return OptionalAmount(false); 85 } 86 87 if (I == E) { 88 // No more characters left? 89 H.HandleIncompleteSpecifier(Start, E - Start); 90 return OptionalAmount(false); 91 } 92 93 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 94 95 if (*I == '$') { 96 // Handle positional arguments 97 98 // Special case: '*0$', since this is an easy mistake. 99 if (Amt.getConstantAmount() == 0) { 100 H.HandleZeroPosition(Beg, I - Beg + 1); 101 return OptionalAmount(false); 102 } 103 104 const char *Tmp = Beg; 105 Beg = ++I; 106 107 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 108 Tmp, 0, true); 109 } 110 111 H.HandleInvalidPosition(Beg, I - Beg, p); 112 return OptionalAmount(false); 113 } 114 115 return ParseAmount(Beg, E); 116} 117 118 119bool 120clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 121 FormatSpecifier &CS, 122 const char *Start, 123 const char *&Beg, const char *E, 124 unsigned *argIndex) { 125 // FIXME: Support negative field widths. 126 if (argIndex) { 127 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 128 } 129 else { 130 const OptionalAmount Amt = 131 ParsePositionAmount(H, Start, Beg, E, 132 analyze_format_string::FieldWidthPos); 133 134 if (Amt.isInvalid()) 135 return true; 136 CS.setFieldWidth(Amt); 137 } 138 return false; 139} 140 141bool 142clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 143 FormatSpecifier &FS, 144 const char *Start, 145 const char *&Beg, 146 const char *E) { 147 const char *I = Beg; 148 149 const OptionalAmount &Amt = ParseAmount(I, E); 150 151 if (I == E) { 152 // No more characters left? 153 H.HandleIncompleteSpecifier(Start, E - Start); 154 return true; 155 } 156 157 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 158 // Special case: '%0$', since this is an easy mistake. 159 if (Amt.getConstantAmount() == 0) { 160 H.HandleZeroPosition(Start, I - Start); 161 return true; 162 } 163 164 FS.setArgIndex(Amt.getConstantAmount() - 1); 165 FS.setUsesPositionalArg(); 166 // Update the caller's pointer if we decided to consume 167 // these characters. 168 Beg = I; 169 return false; 170 } 171 172 return false; 173} 174 175bool 176clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 177 const char *&I, 178 const char *E) { 179 LengthModifier::Kind lmKind = LengthModifier::None; 180 const char *lmPosition = I; 181 switch (*I) { 182 default: 183 return false; 184 case 'h': 185 ++I; 186 lmKind = (I != E && *I == 'h') ? 187 ++I, LengthModifier::AsChar : LengthModifier::AsShort; 188 break; 189 case 'l': 190 ++I; 191 lmKind = (I != E && *I == 'l') ? 192 ++I, LengthModifier::AsLongLong : LengthModifier::AsLong; 193 break; 194 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 195 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 196 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 197 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 198 case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; 199 } 200 LengthModifier lm(lmPosition, lmKind); 201 FS.setLengthModifier(lm); 202 return true; 203} 204 205//===----------------------------------------------------------------------===// 206// Methods on ArgTypeResult. 207//===----------------------------------------------------------------------===// 208 209static bool hasSameSize(ASTContext &astContext, QualType typeA, QualType typeB) { 210 return astContext.getTypeSize(typeA) == astContext.getTypeSize(typeB); 211} 212 213bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 214 switch (K) { 215 case InvalidTy: 216 assert(false && "ArgTypeResult must be valid"); 217 return true; 218 219 case UnknownTy: 220 return true; 221 222 case SpecificTy: { 223 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 224 if (T == argTy) 225 return true; 226 // Check for "compatible types". 227 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) { 228 if (!T->isIntegerType()) 229 return false; 230 switch (BT->getKind()) { 231 default: 232 break; 233 case BuiltinType::Char_S: 234 case BuiltinType::SChar: 235 case BuiltinType::Char_U: 236 case BuiltinType::UChar: 237 return hasSameSize(C, T, C.UnsignedCharTy); 238 case BuiltinType::Short: 239 case BuiltinType::UShort: 240 return hasSameSize(C, T, C.ShortTy); 241 case BuiltinType::Int: 242 case BuiltinType::UInt: 243 return hasSameSize(C, T, C.IntTy); 244 case BuiltinType::Long: 245 case BuiltinType::ULong: 246 return hasSameSize(C, T, C.LongTy); 247 case BuiltinType::LongLong: 248 case BuiltinType::ULongLong: 249 return hasSameSize(C, T, C.LongLongTy); 250 } 251 } 252 return false; 253 } 254 255 case CStrTy: { 256 const PointerType *PT = argTy->getAs<PointerType>(); 257 if (!PT) 258 return false; 259 QualType pointeeTy = PT->getPointeeType(); 260 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 261 switch (BT->getKind()) { 262 case BuiltinType::Void: 263 case BuiltinType::Char_U: 264 case BuiltinType::UChar: 265 case BuiltinType::Char_S: 266 case BuiltinType::SChar: 267 return true; 268 default: 269 break; 270 } 271 272 return false; 273 } 274 275 case WCStrTy: { 276 const PointerType *PT = argTy->getAs<PointerType>(); 277 if (!PT) 278 return false; 279 QualType pointeeTy = 280 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 281 return pointeeTy == C.getWCharType(); 282 } 283 284 case WIntTy: { 285 // Instead of doing a lookup for the definition of 'wint_t' (which 286 // is defined by the system headers) instead see if wchar_t and 287 // the argument type promote to the same type. 288 QualType PromoWChar = 289 C.getWCharType()->isPromotableIntegerType() 290 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); 291 QualType PromoArg = 292 argTy->isPromotableIntegerType() 293 ? C.getPromotedIntegerType(argTy) : argTy; 294 295 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); 296 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 297 298 return PromoWChar == PromoArg; 299 } 300 301 case CPointerTy: 302 return argTy->isPointerType() || argTy->isObjCObjectPointerType() || 303 argTy->isNullPtrType(); 304 305 case ObjCPointerTy: 306 return argTy->getAs<ObjCObjectPointerType>() != NULL; 307 } 308 309 // FIXME: Should be unreachable, but Clang is currently emitting 310 // a warning. 311 return false; 312} 313 314QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 315 switch (K) { 316 case InvalidTy: 317 assert(false && "No representative type for Invalid ArgTypeResult"); 318 // Fall-through. 319 case UnknownTy: 320 return QualType(); 321 case SpecificTy: 322 return T; 323 case CStrTy: 324 return C.getPointerType(C.CharTy); 325 case WCStrTy: 326 return C.getPointerType(C.getWCharType()); 327 case ObjCPointerTy: 328 return C.ObjCBuiltinIdTy; 329 case CPointerTy: 330 return C.VoidPtrTy; 331 case WIntTy: { 332 QualType WC = C.getWCharType(); 333 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; 334 } 335 } 336 337 // FIXME: Should be unreachable, but Clang is currently emitting 338 // a warning. 339 return QualType(); 340} 341 342//===----------------------------------------------------------------------===// 343// Methods on OptionalAmount. 344//===----------------------------------------------------------------------===// 345 346ArgTypeResult 347analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 348 return Ctx.IntTy; 349} 350 351//===----------------------------------------------------------------------===// 352// Methods on LengthModifier. 353//===----------------------------------------------------------------------===// 354 355const char * 356analyze_format_string::LengthModifier::toString() const { 357 switch (kind) { 358 case AsChar: 359 return "hh"; 360 case AsShort: 361 return "h"; 362 case AsLong: // or AsWideChar 363 return "l"; 364 case AsLongLong: 365 return "ll"; 366 case AsIntMax: 367 return "j"; 368 case AsSizeT: 369 return "z"; 370 case AsPtrDiff: 371 return "t"; 372 case AsLongDouble: 373 return "L"; 374 case None: 375 return ""; 376 } 377 return NULL; 378} 379 380//===----------------------------------------------------------------------===// 381// Methods on OptionalAmount. 382//===----------------------------------------------------------------------===// 383 384void OptionalAmount::toString(llvm::raw_ostream &os) const { 385 switch (hs) { 386 case Invalid: 387 case NotSpecified: 388 return; 389 case Arg: 390 if (UsesDotPrefix) 391 os << "."; 392 if (usesPositionalArg()) 393 os << "*" << getPositionalArgIndex() << "$"; 394 else 395 os << "*"; 396 break; 397 case Constant: 398 if (UsesDotPrefix) 399 os << "."; 400 os << amt; 401 break; 402 } 403} 404 405//===----------------------------------------------------------------------===// 406// Methods on ConversionSpecifier. 407//===----------------------------------------------------------------------===// 408 409bool FormatSpecifier::hasValidLengthModifier() const { 410 switch (LM.getKind()) { 411 case LengthModifier::None: 412 return true; 413 414 // Handle most integer flags 415 case LengthModifier::AsChar: 416 case LengthModifier::AsShort: 417 case LengthModifier::AsLongLong: 418 case LengthModifier::AsIntMax: 419 case LengthModifier::AsSizeT: 420 case LengthModifier::AsPtrDiff: 421 switch (CS.getKind()) { 422 case ConversionSpecifier::dArg: 423 case ConversionSpecifier::iArg: 424 case ConversionSpecifier::oArg: 425 case ConversionSpecifier::uArg: 426 case ConversionSpecifier::xArg: 427 case ConversionSpecifier::XArg: 428 case ConversionSpecifier::nArg: 429 return true; 430 default: 431 return false; 432 } 433 434 // Handle 'l' flag 435 case LengthModifier::AsLong: 436 switch (CS.getKind()) { 437 case ConversionSpecifier::dArg: 438 case ConversionSpecifier::iArg: 439 case ConversionSpecifier::oArg: 440 case ConversionSpecifier::uArg: 441 case ConversionSpecifier::xArg: 442 case ConversionSpecifier::XArg: 443 case ConversionSpecifier::aArg: 444 case ConversionSpecifier::AArg: 445 case ConversionSpecifier::fArg: 446 case ConversionSpecifier::FArg: 447 case ConversionSpecifier::eArg: 448 case ConversionSpecifier::EArg: 449 case ConversionSpecifier::gArg: 450 case ConversionSpecifier::GArg: 451 case ConversionSpecifier::nArg: 452 case ConversionSpecifier::cArg: 453 case ConversionSpecifier::sArg: 454 return true; 455 default: 456 return false; 457 } 458 459 case LengthModifier::AsLongDouble: 460 switch (CS.getKind()) { 461 case ConversionSpecifier::aArg: 462 case ConversionSpecifier::AArg: 463 case ConversionSpecifier::fArg: 464 case ConversionSpecifier::FArg: 465 case ConversionSpecifier::eArg: 466 case ConversionSpecifier::EArg: 467 case ConversionSpecifier::gArg: 468 case ConversionSpecifier::GArg: 469 return true; 470 default: 471 return false; 472 } 473 } 474 return false; 475} 476 477 478