ScanfFormatString.cpp revision be86ecc82772546d9e404a32002e446ac0c725d9
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Handling of format string in scanf and friends. The structure of format 11// strings for fscanf() are described in C99 7.19.6.2. 12// 13//===----------------------------------------------------------------------===// 14 15#include "clang/Analysis/Analyses/FormatString.h" 16#include "FormatStringParsing.h" 17 18using clang::analyze_format_string::ArgTypeResult; 19using clang::analyze_format_string::FormatStringHandler; 20using clang::analyze_format_string::LengthModifier; 21using clang::analyze_format_string::OptionalAmount; 22using clang::analyze_scanf::ConversionSpecifier; 23using clang::analyze_scanf::ScanfSpecifier; 24 25typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 26 ScanfSpecifierResult; 27 28static bool ParseScanList(FormatStringHandler &H, 29 ConversionSpecifier &CS, 30 const char *&Beg, const char *E) { 31 const char *I = Beg; 32 const char *start = I - 1; 33 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 34 35 // No more characters? 36 if (I == E) { 37 H.HandleIncompleteScanList(start, I); 38 return true; 39 } 40 41 // Special case: ']' is the first character. 42 if (*I == ']') { 43 if (++I == E) { 44 H.HandleIncompleteScanList(start, I - 1); 45 return true; 46 } 47 } 48 49 // Look for a ']' character which denotes the end of the scan list. 50 while (*I != ']') { 51 if (++I == E) { 52 H.HandleIncompleteScanList(start, I - 1); 53 return true; 54 } 55 } 56 57 CS.setEndScanList(I); 58 return false; 59} 60 61// FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 62// We can possibly refactor. 63static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 64 const char *&Beg, 65 const char *E, 66 unsigned &argIndex) { 67 68 using namespace clang::analyze_scanf; 69 const char *I = Beg; 70 const char *Start = 0; 71 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 72 73 // Look for a '%' character that indicates the start of a format specifier. 74 for ( ; I != E ; ++I) { 75 char c = *I; 76 if (c == '\0') { 77 // Detect spurious null characters, which are likely errors. 78 H.HandleNullChar(I); 79 return true; 80 } 81 if (c == '%') { 82 Start = I++; // Record the start of the format specifier. 83 break; 84 } 85 } 86 87 // No format specifier found? 88 if (!Start) 89 return false; 90 91 if (I == E) { 92 // No more characters left? 93 H.HandleIncompleteSpecifier(Start, E - Start); 94 return true; 95 } 96 97 ScanfSpecifier FS; 98 if (ParseArgPosition(H, FS, Start, I, E)) 99 return true; 100 101 if (I == E) { 102 // No more characters left? 103 H.HandleIncompleteSpecifier(Start, E - Start); 104 return true; 105 } 106 107 // Look for '*' flag if it is present. 108 if (*I == '*') { 109 FS.setSuppressAssignment(I); 110 if (++I == E) { 111 H.HandleIncompleteSpecifier(Start, E - Start); 112 return true; 113 } 114 } 115 116 // Look for the field width (if any). Unlike printf, this is either 117 // a fixed integer or isn't present. 118 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 119 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 120 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 121 FS.setFieldWidth(Amt); 122 123 if (I == E) { 124 // No more characters left? 125 H.HandleIncompleteSpecifier(Start, E - Start); 126 return true; 127 } 128 } 129 130 // Look for the length modifier. 131 if (ParseLengthModifier(FS, I, E) && I == E) { 132 // No more characters left? 133 H.HandleIncompleteSpecifier(Start, E - Start); 134 return true; 135 } 136 137 // Detect spurious null characters, which are likely errors. 138 if (*I == '\0') { 139 H.HandleNullChar(I); 140 return true; 141 } 142 143 // Finally, look for the conversion specifier. 144 const char *conversionPosition = I++; 145 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier; 146 switch (*conversionPosition) { 147 default: 148 break; 149 case '%': k = ConversionSpecifier::PercentArg; break; 150 case 'A': k = ConversionSpecifier::AArg; break; 151 case 'E': k = ConversionSpecifier::EArg; break; 152 case 'F': k = ConversionSpecifier::FArg; break; 153 case 'G': k = ConversionSpecifier::GArg; break; 154 case 'X': k = ConversionSpecifier::XArg; break; 155 case 'a': k = ConversionSpecifier::aArg; break; 156 case 'd': k = ConversionSpecifier::dArg; break; 157 case 'e': k = ConversionSpecifier::eArg; break; 158 case 'f': k = ConversionSpecifier::fArg; break; 159 case 'g': k = ConversionSpecifier::gArg; break; 160 case 'i': k = ConversionSpecifier::iArg; break; 161 case 'n': k = ConversionSpecifier::ConsumedSoFarArg; break; 162 case 'c': k = ConversionSpecifier::cArg; break; 163 case 'C': k = ConversionSpecifier::CArg; break; 164 case 'S': k = ConversionSpecifier::SArg; break; 165 case '[': k = ConversionSpecifier::ScanListArg; break; 166 case 'u': k = ConversionSpecifier::uArg; break; 167 case 'x': k = ConversionSpecifier::xArg; break; 168 case 'o': k = ConversionSpecifier::oArg; break; 169 case 's': k = ConversionSpecifier::sArg; break; 170 } 171 ConversionSpecifier CS(conversionPosition, k); 172 if (k == ConversionSpecifier::ScanListArg) { 173 if (!ParseScanList(H, CS, I, E)) 174 return true; 175 } 176 FS.setConversionSpecifier(CS); 177 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 178 && !FS.usesPositionalArg()) 179 FS.setArgIndex(argIndex++); 180 181 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 182 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 183 184 if (k == ConversionSpecifier::InvalidSpecifier) { 185 // Assume the conversion takes one argument. 186 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 187 } 188 return ScanfSpecifierResult(Start, FS); 189} 190 191bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 192 const char *I, 193 const char *E) { 194 195 unsigned argIndex = 0; 196 197 // Keep looking for a format specifier until we have exhausted the string. 198 while (I != E) { 199 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex); 200 // Did a fail-stop error of any kind occur when parsing the specifier? 201 // If so, don't do any more processing. 202 if (FSR.shouldStop()) 203 return true;; 204 // Did we exhaust the string or encounter an error that 205 // we can recover from? 206 if (!FSR.hasValue()) 207 continue; 208 // We have a format specifier. Pass it to the callback. 209 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 210 I - FSR.getStart())) { 211 return true; 212 } 213 } 214 assert(I == E && "Format string not exhausted"); 215 return false; 216} 217 218 219