PrintfFormatString.cpp revision 4dcb18ff9d92c66c78077ac5cae4b83af37292e4
1c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// 2f976c856fcc5055f3fc7d9f070d72c2d027c1d9dMisha Brukman// 3b576c94c15af9a440f69d9d03c2afead7971118cJohn Criswell// The LLVM Compiler Infrastructure 4b576c94c15af9a440f69d9d03c2afead7971118cJohn Criswell// 54ee451de366474b9c228b4e5fa573795a715216dChris Lattner// This file is distributed under the University of Illinois Open Source 64ee451de366474b9c228b4e5fa573795a715216dChris Lattner// License. See LICENSE.TXT for details. 7f976c856fcc5055f3fc7d9f070d72c2d027c1d9dMisha Brukman// 8b576c94c15af9a440f69d9d03c2afead7971118cJohn Criswell//===----------------------------------------------------------------------===// 9c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner// 1013a253aae77594bb3fd804417e4aa3d4ffe0229bChris Lattner// Handling of format string in printf and friends. The structure of format 11c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner// strings for fprintf() are described in C99 7.19.6.1. 12c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner// 13c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner//===----------------------------------------------------------------------===// 1413a253aae77594bb3fd804417e4aa3d4ffe0229bChris Lattner 15551ccae044b0ff658fe629dd67edd5ffe75d10e8Reid Spencer#include "clang/Analysis/Analyses/PrintfFormatString.h" 161f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencer 17d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruthusing clang::analyze_printf::FormatSpecifier; 181f6efa3996dd1929fbc129203ce5009b620e6969Michael J. Spencerusing clang::analyze_printf::OptionalAmount; 19d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruthusing namespace clang; 204b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth 212cdd21c2e4d855500dfb53f77aa74da53ccf9de6Chris Lattnernamespace { 22d0fde30ce850b78371fd1386338350591f9ff494Brian Gaekeclass FormatSpecifierResult { 23e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerouge FormatSpecifier FS; 2461ffc0c7fd9a23dc423305f144948fbae9956bf6Owen Anderson const char *Start; 254b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth bool Stop; 2613a253aae77594bb3fd804417e4aa3d4ffe0229bChris Lattnerpublic: 27a9d1f2c559ef4b2549e29288fe6944e68913ba0fOwen Anderson FormatSpecifierResult(bool stop = false) 2817aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner : Start(0), Stop(stop) {} 2917aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner FormatSpecifierResult(const char *start, 30d9ea85ab01fb0f2929ed50223d3758dceea8bcbdChris Lattner const FormatSpecifier &fs) 31d9ea85ab01fb0f2929ed50223d3758dceea8bcbdChris Lattner : FS(fs), Start(start), Stop(false) {} 3217aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner 3317aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner 3417aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner const char *getStart() const { return Start; } 35c1b5d092a0f89db5356ae79d8cc4213118f230ddChris Lattner bool shouldStop() const { return Stop; } 364b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth bool hasValue() const { return Start != 0; } 3717aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner const FormatSpecifier &getValue() const { 38a9d1f2c559ef4b2549e29288fe6944e68913ba0fOwen Anderson assert(hasValue()); 39e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerouge return FS; 404b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth } 414b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth const FormatSpecifier &getValue() { return FS; } 4217aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattner}; 43a9d1f2c559ef4b2549e29288fe6944e68913ba0fOwen Anderson} // end anonymous namespace 44e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerouge 45e7d3af54737b7787c92ea03588dd4301b29d0899Julien Lerougetemplate <typename T> 4617aa9d3f53d0afe6a5188fd5f76f0738cb7e6a07Chris Lattnerclass UpdateOnReturn { 474b934766bde8989b4eeb3f4a1cc222327e262379Andrew Lenharth T &ValueToUpdate; 48 const T &ValueToCopy; 49public: 50 UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) 51 : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} 52 53 ~UpdateOnReturn() { 54 ValueToUpdate = ValueToCopy; 55 } 56}; 57 58static OptionalAmount ParseAmount(const char *&Beg, const char *E) { 59 const char *I = Beg; 60 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 61 62 bool foundDigits = false; 63 unsigned accumulator = 0; 64 65 for ( ; I != E; ++I) { 66 char c = *I; 67 if (c >= '0' && c <= '9') { 68 foundDigits = true; 69 accumulator += (accumulator * 10) + (c - '0'); 70 continue; 71 } 72 73 if (foundDigits) 74 return OptionalAmount(accumulator, Beg); 75 76 if (c == '*') { 77 ++I; 78 return OptionalAmount(OptionalAmount::Arg, Beg); 79 } 80 81 break; 82 } 83 84 return OptionalAmount(); 85} 86 87static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, 88 const char *&Beg, const char *E) { 89 90 using namespace clang::analyze_printf; 91 92 const char *I = Beg; 93 const char *Start = 0; 94 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 95 96 // Look for a '%' character that indicates the start of a format specifier. 97 for ( ; I != E ; ++I) { 98 char c = *I; 99 if (c == '\0') { 100 // Detect spurious null characters, which are likely errors. 101 H.HandleNullChar(I); 102 return true; 103 } 104 if (c == '%') { 105 Start = I++; // Record the start of the format specifier. 106 break; 107 } 108 } 109 110 // No format specifier found? 111 if (!Start) 112 return false; 113 114 if (I == E) { 115 // No more characters left? 116 H.HandleIncompleteFormatSpecifier(Start, E - Start); 117 return true; 118 } 119 120 FormatSpecifier FS; 121 122 // Look for flags (if any). 123 bool hasMore = true; 124 for ( ; I != E; ++I) { 125 switch (*I) { 126 default: hasMore = false; break; 127 case '-': FS.setIsLeftJustified(); break; 128 case '+': FS.setHasPlusPrefix(); break; 129 case ' ': FS.setHasSpacePrefix(); break; 130 case '#': FS.setHasAlternativeForm(); break; 131 case '0': FS.setHasLeadingZeros(); break; 132 } 133 if (!hasMore) 134 break; 135 } 136 137 if (I == E) { 138 // No more characters left? 139 H.HandleIncompleteFormatSpecifier(Start, E - Start); 140 return true; 141 } 142 143 // Look for the field width (if any). 144 FS.setFieldWidth(ParseAmount(I, E)); 145 146 if (I == E) { 147 // No more characters left? 148 H.HandleIncompleteFormatSpecifier(Start, E - Start); 149 return true; 150 } 151 152 // Look for the precision (if any). 153 if (*I == '.') { 154 ++I; 155 if (I == E) { 156 H.HandleIncompleteFormatSpecifier(Start, E - Start); 157 return true; 158 } 159 160 FS.setPrecision(ParseAmount(I, E)); 161 162 if (I == E) { 163 // No more characters left? 164 H.HandleIncompleteFormatSpecifier(Start, E - Start); 165 return true; 166 } 167 } 168 169 // Look for the length modifier. 170 LengthModifier lm = None; 171 switch (*I) { 172 default: 173 break; 174 case 'h': 175 ++I; 176 lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; 177 break; 178 case 'l': 179 ++I; 180 lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; 181 break; 182 case 'j': lm = AsIntMax; ++I; break; 183 case 'z': lm = AsSizeT; ++I; break; 184 case 't': lm = AsPtrDiff; ++I; break; 185 case 'L': lm = AsLongDouble; ++I; break; 186 } 187 FS.setLengthModifier(lm); 188 189 if (I == E) { 190 // No more characters left? 191 H.HandleIncompleteFormatSpecifier(Start, E - Start); 192 return true; 193 } 194 195 if (*I == '\0') { 196 // Detect spurious null characters, which are likely errors. 197 H.HandleNullChar(I); 198 return true; 199 } 200 201 // Finally, look for the conversion specifier. 202 const char *conversionPosition = I++; 203 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 204 switch (*conversionPosition) { 205 default: 206 break; 207 // C99: 7.19.6.1 (section 8). 208 case 'd': k = ConversionSpecifier::dArg; break; 209 case 'i': k = ConversionSpecifier::iArg; break; 210 case 'o': k = ConversionSpecifier::oArg; break; 211 case 'u': k = ConversionSpecifier::uArg; break; 212 case 'x': k = ConversionSpecifier::xArg; break; 213 case 'X': k = ConversionSpecifier::XArg; break; 214 case 'f': k = ConversionSpecifier::fArg; break; 215 case 'F': k = ConversionSpecifier::FArg; break; 216 case 'e': k = ConversionSpecifier::eArg; break; 217 case 'E': k = ConversionSpecifier::EArg; break; 218 case 'g': k = ConversionSpecifier::gArg; break; 219 case 'G': k = ConversionSpecifier::GArg; break; 220 case 'a': k = ConversionSpecifier::aArg; break; 221 case 'A': k = ConversionSpecifier::AArg; break; 222 case 'c': k = ConversionSpecifier::IntAsCharArg; break; 223 case 's': k = ConversionSpecifier::CStrArg; break; 224 case 'p': k = ConversionSpecifier::VoidPtrArg; break; 225 case 'n': k = ConversionSpecifier::OutIntPtrArg; break; 226 case '%': k = ConversionSpecifier::PercentArg; break; 227 // Objective-C. 228 case '@': k = ConversionSpecifier::ObjCObjArg; break; 229 // Glibc specific. 230 case 'm': k = ConversionSpecifier::PrintErrno; break; 231 } 232 FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k)); 233 234 if (k == ConversionSpecifier::InvalidSpecifier) { 235 H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); 236 return false; // Keep processing format specifiers. 237 } 238 return FormatSpecifierResult(Start, FS); 239} 240 241bool clang::ParseFormatString(FormatStringHandler &H, 242 const char *I, const char *E) { 243 // Keep looking for a format specifier until we have exhausted the string. 244 while (I != E) { 245 const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); 246 // Did a fail-stop error of any kind occur when parsing the specifier? 247 // If so, don't do any more processing. 248 if (FSR.shouldStop()) 249 return true;; 250 // Did we exhaust the string or encounter an error that 251 // we can recover from? 252 if (!FSR.hasValue()) 253 continue; 254 // We have a format specifier. Pass it to the callback. 255 if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), 256 I - FSR.getStart())) 257 return true; 258 } 259 assert(I == E && "Format string not exhausted"); 260 return false; 261} 262 263FormatStringHandler::~FormatStringHandler() {} 264