FormatString.cpp revision 6ca4a9ae99d65948e578d3e7d1f58ab6a947d2d7
1a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- 2a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 30406ce1417f76f2034833414dcecc9f56253640cVikas Arora// The LLVM Compiler Infrastructure 40406ce1417f76f2034833414dcecc9f56253640cVikas Arora// 50406ce1417f76f2034833414dcecc9f56253640cVikas Arora// This file is distributed under the University of Illinois Open Source 60406ce1417f76f2034833414dcecc9f56253640cVikas Arora// License. See LICENSE.TXT for details. 70406ce1417f76f2034833414dcecc9f56253640cVikas Arora// 8a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//===----------------------------------------------------------------------===// 9a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 10a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Shared details for processing format strings of printf and scanf 11a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// (and friends). 12a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// 13a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//===----------------------------------------------------------------------===// 14a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 15a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora#include "FormatStringParsing.h" 16a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 17a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::ArgTypeResult; 18a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::FormatStringHandler; 19a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::FormatSpecifier; 20a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::LengthModifier; 21a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::OptionalAmount; 22a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::PositionContext; 23a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing clang::analyze_format_string::ConversionSpecifier; 24a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arorausing namespace clang; 25a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 26a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Key function to FormatStringHandler. 27a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraFormatStringHandler::~FormatStringHandler() {} 28a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 29a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//===----------------------------------------------------------------------===// 30a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// Functions for parsing format strings components in both printf and 31a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora// scanf format strings. 32a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora//===----------------------------------------------------------------------===// 33a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 34a2415724fb3466168b2af5b08bd94ba732c0e753Vikas AroraOptionalAmount 35af51b94a435132e9014c324e25fb686b3d07a8c8Vikas Aroraclang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { 36a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora const char *I = Beg; 37a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora UpdateOnReturn <const char*> UpdateBeg(Beg, I); 38a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora 39a2415724fb3466168b2af5b08bd94ba732c0e753Vikas Arora unsigned accumulator = 0; 40 bool hasDigits = false; 41 42 for ( ; I != E; ++I) { 43 char c = *I; 44 if (c >= '0' && c <= '9') { 45 hasDigits = true; 46 accumulator = (accumulator * 10) + (c - '0'); 47 continue; 48 } 49 50 if (hasDigits) 51 return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, 52 false); 53 54 break; 55 } 56 57 return OptionalAmount(); 58} 59 60OptionalAmount 61clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, 62 const char *E, 63 unsigned &argIndex) { 64 if (*Beg == '*') { 65 ++Beg; 66 return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); 67 } 68 69 return ParseAmount(Beg, E); 70} 71 72OptionalAmount 73clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, 74 const char *Start, 75 const char *&Beg, 76 const char *E, 77 PositionContext p) { 78 if (*Beg == '*') { 79 const char *I = Beg + 1; 80 const OptionalAmount &Amt = ParseAmount(I, E); 81 82 if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { 83 H.HandleInvalidPosition(Beg, I - Beg, p); 84 return OptionalAmount(false); 85 } 86 87 if (I == E) { 88 // No more characters left? 89 H.HandleIncompleteSpecifier(Start, E - Start); 90 return OptionalAmount(false); 91 } 92 93 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 94 95 if (*I == '$') { 96 // Handle positional arguments 97 98 // Special case: '*0$', since this is an easy mistake. 99 if (Amt.getConstantAmount() == 0) { 100 H.HandleZeroPosition(Beg, I - Beg + 1); 101 return OptionalAmount(false); 102 } 103 104 const char *Tmp = Beg; 105 Beg = ++I; 106 107 return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, 108 Tmp, 0, true); 109 } 110 111 H.HandleInvalidPosition(Beg, I - Beg, p); 112 return OptionalAmount(false); 113 } 114 115 return ParseAmount(Beg, E); 116} 117 118 119bool 120clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, 121 FormatSpecifier &CS, 122 const char *Start, 123 const char *&Beg, const char *E, 124 unsigned *argIndex) { 125 // FIXME: Support negative field widths. 126 if (argIndex) { 127 CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); 128 } 129 else { 130 const OptionalAmount Amt = 131 ParsePositionAmount(H, Start, Beg, E, 132 analyze_format_string::FieldWidthPos); 133 134 if (Amt.isInvalid()) 135 return true; 136 CS.setFieldWidth(Amt); 137 } 138 return false; 139} 140 141bool 142clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, 143 FormatSpecifier &FS, 144 const char *Start, 145 const char *&Beg, 146 const char *E) { 147 const char *I = Beg; 148 149 const OptionalAmount &Amt = ParseAmount(I, E); 150 151 if (I == E) { 152 // No more characters left? 153 H.HandleIncompleteSpecifier(Start, E - Start); 154 return true; 155 } 156 157 if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { 158 // Special case: '%0$', since this is an easy mistake. 159 if (Amt.getConstantAmount() == 0) { 160 H.HandleZeroPosition(Start, I - Start); 161 return true; 162 } 163 164 FS.setArgIndex(Amt.getConstantAmount() - 1); 165 FS.setUsesPositionalArg(); 166 // Update the caller's pointer if we decided to consume 167 // these characters. 168 Beg = I; 169 return false; 170 } 171 172 return false; 173} 174 175bool 176clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, 177 const char *&I, 178 const char *E) { 179 LengthModifier::Kind lmKind = LengthModifier::None; 180 const char *lmPosition = I; 181 switch (*I) { 182 default: 183 return false; 184 case 'h': 185 ++I; 186 lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) 187 : LengthModifier::AsShort; 188 break; 189 case 'l': 190 ++I; 191 lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) 192 : LengthModifier::AsLong; 193 break; 194 case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; 195 case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; 196 case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; 197 case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; 198 case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; 199 } 200 LengthModifier lm(lmPosition, lmKind); 201 FS.setLengthModifier(lm); 202 return true; 203} 204 205//===----------------------------------------------------------------------===// 206// Methods on ArgTypeResult. 207//===----------------------------------------------------------------------===// 208 209bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { 210 switch (K) { 211 case InvalidTy: 212 llvm_unreachable("ArgTypeResult must be valid"); 213 214 case UnknownTy: 215 return true; 216 217 case AnyCharTy: { 218 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 219 switch (BT->getKind()) { 220 default: 221 break; 222 case BuiltinType::Char_S: 223 case BuiltinType::SChar: 224 case BuiltinType::UChar: 225 case BuiltinType::Char_U: 226 return true; 227 } 228 return false; 229 } 230 231 case SpecificTy: { 232 argTy = C.getCanonicalType(argTy).getUnqualifiedType(); 233 if (T == argTy) 234 return true; 235 // Check for "compatible types". 236 if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) 237 switch (BT->getKind()) { 238 default: 239 break; 240 case BuiltinType::Char_S: 241 case BuiltinType::SChar: 242 return T == C.UnsignedCharTy; 243 case BuiltinType::Char_U: 244 case BuiltinType::UChar: 245 return T == C.SignedCharTy; 246 case BuiltinType::Short: 247 return T == C.UnsignedShortTy; 248 case BuiltinType::UShort: 249 return T == C.ShortTy; 250 case BuiltinType::Int: 251 return T == C.UnsignedIntTy; 252 case BuiltinType::UInt: 253 return T == C.IntTy; 254 case BuiltinType::Long: 255 return T == C.UnsignedLongTy; 256 case BuiltinType::ULong: 257 return T == C.LongTy; 258 case BuiltinType::LongLong: 259 return T == C.UnsignedLongLongTy; 260 case BuiltinType::ULongLong: 261 return T == C.LongLongTy; 262 } 263 return false; 264 } 265 266 case CStrTy: { 267 const PointerType *PT = argTy->getAs<PointerType>(); 268 if (!PT) 269 return false; 270 QualType pointeeTy = PT->getPointeeType(); 271 if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) 272 switch (BT->getKind()) { 273 case BuiltinType::Void: 274 case BuiltinType::Char_U: 275 case BuiltinType::UChar: 276 case BuiltinType::Char_S: 277 case BuiltinType::SChar: 278 return true; 279 default: 280 break; 281 } 282 283 return false; 284 } 285 286 case WCStrTy: { 287 const PointerType *PT = argTy->getAs<PointerType>(); 288 if (!PT) 289 return false; 290 QualType pointeeTy = 291 C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); 292 return pointeeTy == C.getWCharType(); 293 } 294 295 case WIntTy: { 296 // Instead of doing a lookup for the definition of 'wint_t' (which 297 // is defined by the system headers) instead see if wchar_t and 298 // the argument type promote to the same type. 299 QualType PromoWChar = 300 C.getWCharType()->isPromotableIntegerType() 301 ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); 302 QualType PromoArg = 303 argTy->isPromotableIntegerType() 304 ? C.getPromotedIntegerType(argTy) : argTy; 305 306 PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); 307 PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); 308 309 return PromoWChar == PromoArg; 310 } 311 312 case CPointerTy: 313 return argTy->isPointerType() || argTy->isObjCObjectPointerType() || 314 argTy->isNullPtrType(); 315 316 case ObjCPointerTy: 317 return argTy->getAs<ObjCObjectPointerType>() != NULL; 318 } 319 320 // FIXME: Should be unreachable, but Clang is currently emitting 321 // a warning. 322 return false; 323} 324 325QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { 326 switch (K) { 327 case InvalidTy: 328 llvm_unreachable("No representative type for Invalid ArgTypeResult"); 329 case UnknownTy: 330 return QualType(); 331 case AnyCharTy: 332 return C.CharTy; 333 case SpecificTy: 334 return T; 335 case CStrTy: 336 return C.getPointerType(C.CharTy); 337 case WCStrTy: 338 return C.getPointerType(C.getWCharType()); 339 case ObjCPointerTy: 340 return C.ObjCBuiltinIdTy; 341 case CPointerTy: 342 return C.VoidPtrTy; 343 case WIntTy: { 344 QualType WC = C.getWCharType(); 345 return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; 346 } 347 } 348 349 // FIXME: Should be unreachable, but Clang is currently emitting 350 // a warning. 351 return QualType(); 352} 353 354//===----------------------------------------------------------------------===// 355// Methods on OptionalAmount. 356//===----------------------------------------------------------------------===// 357 358ArgTypeResult 359analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { 360 return Ctx.IntTy; 361} 362 363//===----------------------------------------------------------------------===// 364// Methods on LengthModifier. 365//===----------------------------------------------------------------------===// 366 367const char * 368analyze_format_string::LengthModifier::toString() const { 369 switch (kind) { 370 case AsChar: 371 return "hh"; 372 case AsShort: 373 return "h"; 374 case AsLong: // or AsWideChar 375 return "l"; 376 case AsLongLong: 377 return "ll"; 378 case AsIntMax: 379 return "j"; 380 case AsSizeT: 381 return "z"; 382 case AsPtrDiff: 383 return "t"; 384 case AsLongDouble: 385 return "L"; 386 case None: 387 return ""; 388 } 389 return NULL; 390} 391 392//===----------------------------------------------------------------------===// 393// Methods on OptionalAmount. 394//===----------------------------------------------------------------------===// 395 396void OptionalAmount::toString(raw_ostream &os) const { 397 switch (hs) { 398 case Invalid: 399 case NotSpecified: 400 return; 401 case Arg: 402 if (UsesDotPrefix) 403 os << "."; 404 if (usesPositionalArg()) 405 os << "*" << getPositionalArgIndex() << "$"; 406 else 407 os << "*"; 408 break; 409 case Constant: 410 if (UsesDotPrefix) 411 os << "."; 412 os << amt; 413 break; 414 } 415} 416 417//===----------------------------------------------------------------------===// 418// Methods on ConversionSpecifier. 419//===----------------------------------------------------------------------===// 420 421bool FormatSpecifier::hasValidLengthModifier() const { 422 switch (LM.getKind()) { 423 case LengthModifier::None: 424 return true; 425 426 // Handle most integer flags 427 case LengthModifier::AsChar: 428 case LengthModifier::AsShort: 429 case LengthModifier::AsLongLong: 430 case LengthModifier::AsIntMax: 431 case LengthModifier::AsSizeT: 432 case LengthModifier::AsPtrDiff: 433 switch (CS.getKind()) { 434 case ConversionSpecifier::dArg: 435 case ConversionSpecifier::iArg: 436 case ConversionSpecifier::oArg: 437 case ConversionSpecifier::uArg: 438 case ConversionSpecifier::xArg: 439 case ConversionSpecifier::XArg: 440 case ConversionSpecifier::nArg: 441 return true; 442 default: 443 return false; 444 } 445 446 // Handle 'l' flag 447 case LengthModifier::AsLong: 448 switch (CS.getKind()) { 449 case ConversionSpecifier::dArg: 450 case ConversionSpecifier::iArg: 451 case ConversionSpecifier::oArg: 452 case ConversionSpecifier::uArg: 453 case ConversionSpecifier::xArg: 454 case ConversionSpecifier::XArg: 455 case ConversionSpecifier::aArg: 456 case ConversionSpecifier::AArg: 457 case ConversionSpecifier::fArg: 458 case ConversionSpecifier::FArg: 459 case ConversionSpecifier::eArg: 460 case ConversionSpecifier::EArg: 461 case ConversionSpecifier::gArg: 462 case ConversionSpecifier::GArg: 463 case ConversionSpecifier::nArg: 464 case ConversionSpecifier::cArg: 465 case ConversionSpecifier::sArg: 466 return true; 467 default: 468 return false; 469 } 470 471 case LengthModifier::AsLongDouble: 472 switch (CS.getKind()) { 473 case ConversionSpecifier::aArg: 474 case ConversionSpecifier::AArg: 475 case ConversionSpecifier::fArg: 476 case ConversionSpecifier::FArg: 477 case ConversionSpecifier::eArg: 478 case ConversionSpecifier::EArg: 479 case ConversionSpecifier::gArg: 480 case ConversionSpecifier::GArg: 481 return true; 482 default: 483 return false; 484 } 485 } 486 return false; 487} 488 489 490