PrintfFormatString.cpp revision f911eba72e6d7275e5cfdb79ab23fb2aa9cc01d0
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::FormatSpecifier; 19using clang::analyze_printf::OptionalAmount; 20using clang::analyze_printf::ArgTypeResult; 21using namespace clang; 22 23namespace { 24class FormatSpecifierResult { 25 FormatSpecifier FS; 26 const char *Start; 27 bool Stop; 28public: 29 FormatSpecifierResult(bool stop = false) 30 : Start(0), Stop(stop) {} 31 FormatSpecifierResult(const char *start, 32 const FormatSpecifier &fs) 33 : FS(fs), Start(start), Stop(false) {} 34 35 36 const char *getStart() const { return Start; } 37 bool shouldStop() const { return Stop; } 38 bool hasValue() const { return Start != 0; } 39 const FormatSpecifier &getValue() const { 40 assert(hasValue()); 41 return FS; 42 } 43 const FormatSpecifier &getValue() { return FS; } 44}; 45} // end anonymous namespace 46 47template <typename T> 48class UpdateOnReturn { 49 T &ValueToUpdate; 50 const T &ValueToCopy; 51public: 52 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 53 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 54 55 ~UpdateOnReturn() { 56 ValueToUpdate = ValueToCopy; 57 } 58}; 59 60static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 61 const char *I = Beg; 62 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 63 64 bool foundDigits = false; 65 unsigned accumulator = 0; 66 67 for ( ; I != E; ++I) { 68 char c = *I; 69 if (c >= '0' && c <= '9') { 70 foundDigits = true; 71 accumulator += (accumulator * 10) + (c - '0'); 72 continue; 73 } 74 75 if (foundDigits) 76 return OptionalAmount(accumulator, Beg); 77 78 if (c == '*') { 79 ++I; 80 return OptionalAmount(OptionalAmount::Arg, Beg); 81 } 82 83 break; 84 } 85 86 return OptionalAmount(); 87} 88 89static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 90 const char *&Beg, const char *E) { 91 92 using namespace clang::analyze_printf; 93 94 const char *I = Beg; 95 const char *Start = 0; 96 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 97 98 // Look for a '%' character that indicates the start of a format specifier. 99 for ( ; I != E ; ++I) { 100 char c = *I; 101 if (c == '\0') { 102 // Detect spurious null characters, which are likely errors. 103 H.HandleNullChar(I); 104 return true; 105 } 106 if (c == '%') { 107 Start = I++; // Record the start of the format specifier. 108 break; 109 } 110 } 111 112 // No format specifier found? 113 if (!Start) 114 return false; 115 116 if (I == E) { 117 // No more characters left? 118 H.HandleIncompleteFormatSpecifier(Start, E - Start); 119 return true; 120 } 121 122 FormatSpecifier FS; 123 124 // Look for flags (if any). 125 bool hasMore = true; 126 for ( ; I != E; ++I) { 127 switch (*I) { 128 default: hasMore = false; break; 129 case '-': FS.setIsLeftJustified(); break; 130 case '+': FS.setHasPlusPrefix(); break; 131 case ' ': FS.setHasSpacePrefix(); break; 132 case '#': FS.setHasAlternativeForm(); break; 133 case '0': FS.setHasLeadingZeros(); break; 134 } 135 if (!hasMore) 136 break; 137 } 138 139 if (I == E) { 140 // No more characters left? 141 H.HandleIncompleteFormatSpecifier(Start, E - Start); 142 return true; 143 } 144 145 // Look for the field width (if any). 146 FS.setFieldWidth(ParseAmount(I, E)); 147 148 if (I == E) { 149 // No more characters left? 150 H.HandleIncompleteFormatSpecifier(Start, E - Start); 151 return true; 152 } 153 154 // Look for the precision (if any). 155 if (*I == '.') { 156 ++I; 157 if (I == E) { 158 H.HandleIncompleteFormatSpecifier(Start, E - Start); 159 return true; 160 } 161 162 FS.setPrecision(ParseAmount(I, E)); 163 164 if (I == E) { 165 // No more characters left? 166 H.HandleIncompleteFormatSpecifier(Start, E - Start); 167 return true; 168 } 169 } 170 171 // Look for the length modifier. 172 LengthModifier lm = None; 173 switch (*I) { 174 default: 175 break; 176 case 'h': 177 ++I; 178 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 179 break; 180 case 'l': 181 ++I; 182 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 183 break; 184 case 'j': lm = AsIntMax; ++I; break; 185 case 'z': lm = AsSizeT; ++I; break; 186 case 't': lm = AsPtrDiff; ++I; break; 187 case 'L': lm = AsLongDouble; ++I; break; 188 case 'q': lm = AsLongLong; ++I; break; 189 } 190 FS.setLengthModifier(lm); 191 192 if (I == E) { 193 // No more characters left? 194 H.HandleIncompleteFormatSpecifier(Start, E - Start); 195 return true; 196 } 197 198 if (*I == '\0') { 199 // Detect spurious null characters, which are likely errors. 200 H.HandleNullChar(I); 201 return true; 202 } 203 204 // Finally, look for the conversion specifier. 205 const char *conversionPosition = I++; 206 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 207 switch (*conversionPosition) { 208 default: 209 break; 210 // C99: 7.19.6.1 (section 8). 211 case 'd': k = ConversionSpecifier::dArg; break; 212 case 'i': k = ConversionSpecifier::iArg; break; 213 case 'o': k = ConversionSpecifier::oArg; break; 214 case 'u': k = ConversionSpecifier::uArg; break; 215 case 'x': k = ConversionSpecifier::xArg; break; 216 case 'X': k = ConversionSpecifier::XArg; break; 217 case 'f': k = ConversionSpecifier::fArg; break; 218 case 'F': k = ConversionSpecifier::FArg; break; 219 case 'e': k = ConversionSpecifier::eArg; break; 220 case 'E': k = ConversionSpecifier::EArg; break; 221 case 'g': k = ConversionSpecifier::gArg; break; 222 case 'G': k = ConversionSpecifier::GArg; break; 223 case 'a': k = ConversionSpecifier::aArg; break; 224 case 'A': k = ConversionSpecifier::AArg; break; 225 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 226 case 's': k = ConversionSpecifier::CStrArg; break; 227 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 228 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 229 case '%': k = ConversionSpecifier::PercentArg; break; 230 // Objective-C. 231 case '@': k = ConversionSpecifier::ObjCObjArg; break; 232 // Glibc specific. 233 case 'm': k = ConversionSpecifier::PrintErrno; break; 234 } 235 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k)); 236 237 if (k == ConversionSpecifier::InvalidSpecifier) { 238 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 239 return false; // Keep processing format specifiers. 240 } 241 return FormatSpecifierResult(Start, FS); 242} 243 244bool clang::ParseFormatString(FormatStringHandler &H, 245 const char *I, const char *E) { 246 // Keep looking for a format specifier until we have exhausted the string. 247 while (I != E) { 248 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); 249 // Did a fail-stop error of any kind occur when parsing the specifier? 250 // If so, don't do any more processing. 251 if (FSR.shouldStop()) 252 return true;; 253 // Did we exhaust the string or encounter an error that 254 // we can recover from? 255 if (!FSR.hasValue()) 256 continue; 257 // We have a format specifier. Pass it to the callback. 258 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 259 I - FSR.getStart())) 260 return true; 261 } 262 assert(I == E && "Format string not exhausted"); 263 return false; 264} 265 266FormatStringHandler::~FormatStringHandler() {} 267 268//===----------------------------------------------------------------------===// 269// Methods on FormatSpecifier. 270//===----------------------------------------------------------------------===// 271 272ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 273 if (!CS.consumesDataArgument()) 274 return ArgTypeResult::Invalid(); 275 276 if (CS.isIntArg()) 277 switch (LM) { 278 case AsLongDouble: 279 return ArgTypeResult::Invalid(); 280 case None: return Ctx.IntTy; 281 case AsChar: return Ctx.SignedCharTy; 282 case AsShort: return Ctx.ShortTy; 283 case AsLong: return Ctx.LongTy; 284 case AsLongLong: return Ctx.LongLongTy; 285 case AsIntMax: 286 // FIXME: Return unknown for now. 287 return ArgTypeResult(); 288 case AsSizeT: return Ctx.getSizeType(); 289 case AsPtrDiff: return Ctx.getPointerDiffType(); 290 } 291 292 if (CS.isUIntArg()) 293 switch (LM) { 294 case AsLongDouble: 295 return ArgTypeResult::Invalid(); 296 case None: return Ctx.UnsignedIntTy; 297 case AsChar: return Ctx.UnsignedCharTy; 298 case AsShort: return Ctx.UnsignedShortTy; 299 case AsLong: return Ctx.UnsignedLongTy; 300 case AsLongLong: return Ctx.UnsignedLongLongTy; 301 case AsIntMax: 302 // FIXME: Return unknown for now. 303 return ArgTypeResult(); 304 case AsSizeT: 305 // FIXME: How to get the corresponding unsigned 306 // version of size_t? 307 return ArgTypeResult(); 308 case AsPtrDiff: 309 // FIXME: How to get the corresponding unsigned 310 // version of ptrdiff_t? 311 return ArgTypeResult(); 312 } 313 314 if (CS.isDoubleArg()) { 315 if (LM == AsLongDouble) 316 return Ctx.LongDoubleTy; 317 return Ctx.DoubleTy; 318 } 319 320 // FIXME: Handle other cases. 321 return ArgTypeResult(); 322} 323 324