PrintfFormatString.cpp revision 2e8f8dc12c16b7a499d4898ad55cafe1c71b1e25
1//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in printf and friends. The structure of format 11// strings for fprintf() are described in C99 7.19.6.1. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/PrintfFormatString.h" 16#include "clang/AST/ASTContext.h" 17 18using clang::analyze_printf::FormatSpecifier; 19using clang::analyze_printf::OptionalAmount; 20using clang::analyze_printf::ArgTypeResult; 21using clang::analyze_printf::FormatStringHandler; 22using namespace clang; 23 24namespace { 25class FormatSpecifierResult { 26 FormatSpecifier FS; 27 const char *Start; 28 bool Stop; 29public: 30 FormatSpecifierResult(bool stop = false) 31 : Start(0), Stop(stop) {} 32 FormatSpecifierResult(const char *start, 33 const FormatSpecifier &fs) 34 : FS(fs), Start(start), Stop(false) {} 35 36 37 const char *getStart() const { return Start; } 38 bool shouldStop() const { return Stop; } 39 bool hasValue() const { return Start != 0; } 40 const FormatSpecifier &getValue() const { 41 assert(hasValue()); 42 return FS; 43 } 44 const FormatSpecifier &getValue() { return FS; } 45}; 46} // end anonymous namespace 47 48template <typename T> 49class UpdateOnReturn { 50 T &ValueToUpdate; 51 const T &ValueToCopy; 52public: 53 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 54 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 55 56 ~UpdateOnReturn() { 57 ValueToUpdate = ValueToCopy; 58 } 59}; 60 61static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 62 const char *I = Beg; 63 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 64 65 bool foundDigits = false; 66 unsigned accumulator = 0; 67 68 for ( ; I != E; ++I) { 69 char c = *I; 70 if (c >= '0' && c <= '9') { 71 foundDigits = true; 72 accumulator += (accumulator * 10) + (c - '0'); 73 continue; 74 } 75 76 if (foundDigits) 77 return OptionalAmount(accumulator, Beg); 78 79 if (c == '*') { 80 ++I; 81 return OptionalAmount(OptionalAmount::Arg, Beg); 82 } 83 84 break; 85 } 86 87 return OptionalAmount(); 88} 89 90static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 91 const char *&Beg, 92 const char *E) { 93 94 using namespace clang::analyze_printf; 95 96 const char *I = Beg; 97 const char *Start = 0; 98 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 99 100 // Look for a '%' character that indicates the start of a format specifier. 101 for ( ; I != E ; ++I) { 102 char c = *I; 103 if (c == '\0') { 104 // Detect spurious null characters, which are likely errors. 105 H.HandleNullChar(I); 106 return true; 107 } 108 if (c == '%') { 109 Start = I++; // Record the start of the format specifier. 110 break; 111 } 112 } 113 114 // No format specifier found? 115 if (!Start) 116 return false; 117 118 if (I == E) { 119 // No more characters left? 120 H.HandleIncompleteFormatSpecifier(Start, E - Start); 121 return true; 122 } 123 124 FormatSpecifier FS; 125 126 // Look for flags (if any). 127 bool hasMore = true; 128 for ( ; I != E; ++I) { 129 switch (*I) { 130 default: hasMore = false; break; 131 case '-': FS.setIsLeftJustified(); break; 132 case '+': FS.setHasPlusPrefix(); break; 133 case ' ': FS.setHasSpacePrefix(); break; 134 case '#': FS.setHasAlternativeForm(); break; 135 case '0': FS.setHasLeadingZeros(); break; 136 } 137 if (!hasMore) 138 break; 139 } 140 141 if (I == E) { 142 // No more characters left? 143 H.HandleIncompleteFormatSpecifier(Start, E - Start); 144 return true; 145 } 146 147 // Look for the field width (if any). 148 FS.setFieldWidth(ParseAmount(I, E)); 149 150 if (I == E) { 151 // No more characters left? 152 H.HandleIncompleteFormatSpecifier(Start, E - Start); 153 return true; 154 } 155 156 // Look for the precision (if any). 157 if (*I == '.') { 158 ++I; 159 if (I == E) { 160 H.HandleIncompleteFormatSpecifier(Start, E - Start); 161 return true; 162 } 163 164 FS.setPrecision(ParseAmount(I, E)); 165 166 if (I == E) { 167 // No more characters left? 168 H.HandleIncompleteFormatSpecifier(Start, E - Start); 169 return true; 170 } 171 } 172 173 // Look for the length modifier. 174 LengthModifier lm = None; 175 switch (*I) { 176 default: 177 break; 178 case 'h': 179 ++I; 180 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 181 break; 182 case 'l': 183 ++I; 184 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 185 break; 186 case 'j': lm = AsIntMax; ++I; break; 187 case 'z': lm = AsSizeT; ++I; break; 188 case 't': lm = AsPtrDiff; ++I; break; 189 case 'L': lm = AsLongDouble; ++I; break; 190 case 'q': lm = AsLongLong; ++I; break; 191 } 192 FS.setLengthModifier(lm); 193 194 if (I == E) { 195 // No more characters left? 196 H.HandleIncompleteFormatSpecifier(Start, E - Start); 197 return true; 198 } 199 200 if (*I == '\0') { 201 // Detect spurious null characters, which are likely errors. 202 H.HandleNullChar(I); 203 return true; 204 } 205 206 // Finally, look for the conversion specifier. 207 const char *conversionPosition = I++; 208 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 209 switch (*conversionPosition) { 210 default: 211 break; 212 // C99: 7.19.6.1 (section 8). 213 case 'd': k = ConversionSpecifier::dArg; break; 214 case 'i': k = ConversionSpecifier::iArg; break; 215 case 'o': k = ConversionSpecifier::oArg; break; 216 case 'u': k = ConversionSpecifier::uArg; break; 217 case 'x': k = ConversionSpecifier::xArg; break; 218 case 'X': k = ConversionSpecifier::XArg; break; 219 case 'f': k = ConversionSpecifier::fArg; break; 220 case 'F': k = ConversionSpecifier::FArg; break; 221 case 'e': k = ConversionSpecifier::eArg; break; 222 case 'E': k = ConversionSpecifier::EArg; break; 223 case 'g': k = ConversionSpecifier::gArg; break; 224 case 'G': k = ConversionSpecifier::GArg; break; 225 case 'a': k = ConversionSpecifier::aArg; break; 226 case 'A': k = ConversionSpecifier::AArg; break; 227 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 228 case 's': k = ConversionSpecifier::CStrArg; break; 229 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 230 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 231 case '%': k = ConversionSpecifier::PercentArg; break; 232 // Objective-C. 233 case '@': k = ConversionSpecifier::ObjCObjArg; break; 234 // Glibc specific. 235 case 'm': k = ConversionSpecifier::PrintErrno; break; 236 } 237 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k)); 238 239 if (k == ConversionSpecifier::InvalidSpecifier) { 240 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 241 return false; // Keep processing format specifiers. 242 } 243 return FormatSpecifierResult(Start, FS); 244} 245 246bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, 247 const char *I, const char *E) { 248 // Keep looking for a format specifier until we have exhausted the string. 249 while (I != E) { 250 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); 251 // Did a fail-stop error of any kind occur when parsing the specifier? 252 // If so, don't do any more processing. 253 if (FSR.shouldStop()) 254 return true;; 255 // Did we exhaust the string or encounter an error that 256 // we can recover from? 257 if (!FSR.hasValue()) 258 continue; 259 // We have a format specifier. Pass it to the callback. 260 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 261 I - FSR.getStart())) 262 return true; 263 } 264 assert(I == E && "Format string not exhausted"); 265 return false; 266} 267 268FormatStringHandler::~FormatStringHandler() {} 269 270//===----------------------------------------------------------------------===// 271// Methods on FormatSpecifier. 272//===----------------------------------------------------------------------===// 273 274ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { 275 if (!CS.consumesDataArgument()) 276 return ArgTypeResult::Invalid(); 277 278 if (CS.isIntArg()) 279 switch (LM) { 280 case AsLongDouble: 281 return ArgTypeResult::Invalid(); 282 case None: return Ctx.IntTy; 283 case AsChar: return Ctx.SignedCharTy; 284 case AsShort: return Ctx.ShortTy; 285 case AsLong: return Ctx.LongTy; 286 case AsLongLong: return Ctx.LongLongTy; 287 case AsIntMax: 288 // FIXME: Return unknown for now. 289 return ArgTypeResult(); 290 case AsSizeT: return Ctx.getSizeType(); 291 case AsPtrDiff: return Ctx.getPointerDiffType(); 292 } 293 294 if (CS.isUIntArg()) 295 switch (LM) { 296 case AsLongDouble: 297 return ArgTypeResult::Invalid(); 298 case None: return Ctx.UnsignedIntTy; 299 case AsChar: return Ctx.UnsignedCharTy; 300 case AsShort: return Ctx.UnsignedShortTy; 301 case AsLong: return Ctx.UnsignedLongTy; 302 case AsLongLong: return Ctx.UnsignedLongLongTy; 303 case AsIntMax: 304 // FIXME: Return unknown for now. 305 return ArgTypeResult(); 306 case AsSizeT: 307 // FIXME: How to get the corresponding unsigned 308 // version of size_t? 309 return ArgTypeResult(); 310 case AsPtrDiff: 311 // FIXME: How to get the corresponding unsigned 312 // version of ptrdiff_t? 313 return ArgTypeResult(); 314 } 315 316 if (CS.isDoubleArg()) { 317 if (LM == AsLongDouble) 318 return Ctx.LongDoubleTy; 319 return Ctx.DoubleTy; 320 } 321 322 // FIXME: Handle other cases. 323 return ArgTypeResult(); 324} 325 326