ScanfFormatString.cpp revision 6bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89
1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17#include "clang/Basic/TargetInfo.h"
18
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::FormatStringHandler;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::ConversionSpecifier;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55    I += 2;
56    if (I == E) {
57      H.HandleIncompleteScanList(start, I - 1);
58      return true;
59    }
60  }
61
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64    if (++I == E) {
65      H.HandleIncompleteScanList(start, I - 1);
66      return true;
67    }
68  }
69
70  CS.setEndScanList(I);
71  return false;
72}
73
74// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75// We can possibly refactor.
76static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
77                                                const char *&Beg,
78                                                const char *E,
79                                                unsigned &argIndex,
80                                                const LangOptions &LO,
81                                                const TargetInfo &Target) {
82
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87
88    // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90    char c = *I;
91    if (c == '\0') {
92        // Detect spurious null characters, which are likely errors.
93      H.HandleNullChar(I);
94      return true;
95    }
96    if (c == '%') {
97      Start = I++;  // Record the start of the format specifier.
98      break;
99    }
100  }
101
102    // No format specifier found?
103  if (!Start)
104    return false;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114    return true;
115
116  if (I == E) {
117      // No more characters left?
118    H.HandleIncompleteSpecifier(Start, E - Start);
119    return true;
120  }
121
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124    FS.setSuppressAssignment(I);
125    if (++I == E) {
126      H.HandleIncompleteSpecifier(Start, E - Start);
127      return true;
128    }
129  }
130
131  // Look for the field width (if any).  Unlike printf, this is either
132  // a fixed integer or isn't present.
133  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136    FS.setFieldWidth(Amt);
137
138    if (I == E) {
139      // No more characters left?
140      H.HandleIncompleteSpecifier(Start, E - Start);
141      return true;
142    }
143  }
144
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147      // No more characters left?
148    H.HandleIncompleteSpecifier(Start, E - Start);
149    return true;
150  }
151
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154    H.HandleNullChar(I);
155    return true;
156  }
157
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
160  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161  switch (*conversionPosition) {
162    default:
163      break;
164    case '%': k = ConversionSpecifier::PercentArg;   break;
165    case 'A': k = ConversionSpecifier::AArg; break;
166    case 'E': k = ConversionSpecifier::EArg; break;
167    case 'F': k = ConversionSpecifier::FArg; break;
168    case 'G': k = ConversionSpecifier::GArg; break;
169    case 'X': k = ConversionSpecifier::XArg; break;
170    case 'a': k = ConversionSpecifier::aArg; break;
171    case 'd': k = ConversionSpecifier::dArg; break;
172    case 'e': k = ConversionSpecifier::eArg; break;
173    case 'f': k = ConversionSpecifier::fArg; break;
174    case 'g': k = ConversionSpecifier::gArg; break;
175    case 'i': k = ConversionSpecifier::iArg; break;
176    case 'n': k = ConversionSpecifier::nArg; break;
177    case 'c': k = ConversionSpecifier::cArg; break;
178    case 'C': k = ConversionSpecifier::CArg; break;
179    case 'S': k = ConversionSpecifier::SArg; break;
180    case '[': k = ConversionSpecifier::ScanListArg; break;
181    case 'u': k = ConversionSpecifier::uArg; break;
182    case 'x': k = ConversionSpecifier::xArg; break;
183    case 'o': k = ConversionSpecifier::oArg; break;
184    case 's': k = ConversionSpecifier::sArg; break;
185    case 'p': k = ConversionSpecifier::pArg; break;
186    // Apple extensions
187      // Apple-specific
188    case 'D':
189      if (Target.getTriple().isOSDarwin())
190        k = ConversionSpecifier::DArg;
191      break;
192    case 'O':
193      if (Target.getTriple().isOSDarwin())
194        k = ConversionSpecifier::OArg;
195      break;
196    case 'U':
197      if (Target.getTriple().isOSDarwin())
198        k = ConversionSpecifier::UArg;
199      break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203    if (ParseScanList(H, CS, I, E))
204      return true;
205  }
206  FS.setConversionSpecifier(CS);
207  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208      && !FS.usesPositionalArg())
209    FS.setArgIndex(argIndex++);
210
211  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215    // Assume the conversion takes one argument.
216    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
217  }
218  return ScanfSpecifierResult(Start, FS);
219}
220
221ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
222  const ScanfConversionSpecifier &CS = getConversionSpecifier();
223
224  if (!CS.consumesDataArgument())
225    return ArgType::Invalid();
226
227  switch(CS.getKind()) {
228    // Signed int.
229    case ConversionSpecifier::dArg:
230    case ConversionSpecifier::DArg:
231    case ConversionSpecifier::iArg:
232      switch (LM.getKind()) {
233        case LengthModifier::None:
234          return ArgType::PtrTo(Ctx.IntTy);
235        case LengthModifier::AsChar:
236          return ArgType::PtrTo(ArgType::AnyCharTy);
237        case LengthModifier::AsShort:
238          return ArgType::PtrTo(Ctx.ShortTy);
239        case LengthModifier::AsLong:
240          return ArgType::PtrTo(Ctx.LongTy);
241        case LengthModifier::AsLongLong:
242        case LengthModifier::AsQuad:
243          return ArgType::PtrTo(Ctx.LongLongTy);
244        case LengthModifier::AsInt64:
245          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
246        case LengthModifier::AsIntMax:
247          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
248        case LengthModifier::AsSizeT:
249          // FIXME: ssize_t.
250          return ArgType();
251        case LengthModifier::AsPtrDiff:
252          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
253        case LengthModifier::AsLongDouble:
254          // GNU extension.
255          return ArgType::PtrTo(Ctx.LongLongTy);
256        case LengthModifier::AsAllocate:
257        case LengthModifier::AsMAllocate:
258        case LengthModifier::AsInt32:
259        case LengthModifier::AsInt3264:
260          return ArgType::Invalid();
261      }
262
263    // Unsigned int.
264    case ConversionSpecifier::oArg:
265    case ConversionSpecifier::OArg:
266    case ConversionSpecifier::uArg:
267    case ConversionSpecifier::UArg:
268    case ConversionSpecifier::xArg:
269    case ConversionSpecifier::XArg:
270      switch (LM.getKind()) {
271        case LengthModifier::None:
272          return ArgType::PtrTo(Ctx.UnsignedIntTy);
273        case LengthModifier::AsChar:
274          return ArgType::PtrTo(Ctx.UnsignedCharTy);
275        case LengthModifier::AsShort:
276          return ArgType::PtrTo(Ctx.UnsignedShortTy);
277        case LengthModifier::AsLong:
278          return ArgType::PtrTo(Ctx.UnsignedLongTy);
279        case LengthModifier::AsLongLong:
280        case LengthModifier::AsQuad:
281          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
282        case LengthModifier::AsInt64:
283          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
284        case LengthModifier::AsIntMax:
285          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
286        case LengthModifier::AsSizeT:
287          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
288        case LengthModifier::AsPtrDiff:
289          // FIXME: Unsigned version of ptrdiff_t?
290          return ArgType();
291        case LengthModifier::AsLongDouble:
292          // GNU extension.
293          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
294        case LengthModifier::AsAllocate:
295        case LengthModifier::AsMAllocate:
296        case LengthModifier::AsInt32:
297        case LengthModifier::AsInt3264:
298          return ArgType::Invalid();
299      }
300
301    // Float.
302    case ConversionSpecifier::aArg:
303    case ConversionSpecifier::AArg:
304    case ConversionSpecifier::eArg:
305    case ConversionSpecifier::EArg:
306    case ConversionSpecifier::fArg:
307    case ConversionSpecifier::FArg:
308    case ConversionSpecifier::gArg:
309    case ConversionSpecifier::GArg:
310      switch (LM.getKind()) {
311        case LengthModifier::None:
312          return ArgType::PtrTo(Ctx.FloatTy);
313        case LengthModifier::AsLong:
314          return ArgType::PtrTo(Ctx.DoubleTy);
315        case LengthModifier::AsLongDouble:
316          return ArgType::PtrTo(Ctx.LongDoubleTy);
317        default:
318          return ArgType::Invalid();
319      }
320
321    // Char, string and scanlist.
322    case ConversionSpecifier::cArg:
323    case ConversionSpecifier::sArg:
324    case ConversionSpecifier::ScanListArg:
325      switch (LM.getKind()) {
326        case LengthModifier::None:
327          return ArgType::PtrTo(ArgType::AnyCharTy);
328        case LengthModifier::AsLong:
329          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
330        case LengthModifier::AsAllocate:
331        case LengthModifier::AsMAllocate:
332          return ArgType::PtrTo(ArgType::CStrTy);
333        default:
334          return ArgType::Invalid();
335      }
336    case ConversionSpecifier::CArg:
337    case ConversionSpecifier::SArg:
338      // FIXME: Mac OS X specific?
339      switch (LM.getKind()) {
340        case LengthModifier::None:
341          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
342        case LengthModifier::AsAllocate:
343        case LengthModifier::AsMAllocate:
344          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
345        default:
346          return ArgType::Invalid();
347      }
348
349    // Pointer.
350    case ConversionSpecifier::pArg:
351      return ArgType::PtrTo(ArgType::CPointerTy);
352
353    // Write-back.
354    case ConversionSpecifier::nArg:
355      switch (LM.getKind()) {
356        case LengthModifier::None:
357          return ArgType::PtrTo(Ctx.IntTy);
358        case LengthModifier::AsChar:
359          return ArgType::PtrTo(Ctx.SignedCharTy);
360        case LengthModifier::AsShort:
361          return ArgType::PtrTo(Ctx.ShortTy);
362        case LengthModifier::AsLong:
363          return ArgType::PtrTo(Ctx.LongTy);
364        case LengthModifier::AsLongLong:
365        case LengthModifier::AsQuad:
366          return ArgType::PtrTo(Ctx.LongLongTy);
367        case LengthModifier::AsInt64:
368          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
369        case LengthModifier::AsIntMax:
370          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
371        case LengthModifier::AsSizeT:
372          return ArgType(); // FIXME: ssize_t
373        case LengthModifier::AsPtrDiff:
374          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
375        case LengthModifier::AsLongDouble:
376          return ArgType(); // FIXME: Is this a known extension?
377        case LengthModifier::AsAllocate:
378        case LengthModifier::AsMAllocate:
379        case LengthModifier::AsInt32:
380        case LengthModifier::AsInt3264:
381          return ArgType::Invalid();
382        }
383
384    default:
385      break;
386  }
387
388  return ArgType();
389}
390
391bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
392                             const LangOptions &LangOpt,
393                             ASTContext &Ctx) {
394
395  // %n is different from other conversion specifiers; don't try to fix it.
396  if (CS.getKind() == ConversionSpecifier::nArg)
397    return false;
398
399  if (!QT->isPointerType())
400    return false;
401
402  QualType PT = QT->getPointeeType();
403
404  // If it's an enum, get its underlying type.
405  if (const EnumType *ETy = PT->getAs<EnumType>())
406    PT = ETy->getDecl()->getIntegerType();
407
408  const BuiltinType *BT = PT->getAs<BuiltinType>();
409  if (!BT)
410    return false;
411
412  // Pointer to a character.
413  if (PT->isAnyCharacterType()) {
414    CS.setKind(ConversionSpecifier::sArg);
415    if (PT->isWideCharType())
416      LM.setKind(LengthModifier::AsWideChar);
417    else
418      LM.setKind(LengthModifier::None);
419
420    // If we know the target array length, we can use it as a field width.
421    if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
422      if (CAT->getSizeModifier() == ArrayType::Normal)
423        FieldWidth = OptionalAmount(OptionalAmount::Constant,
424                                    CAT->getSize().getZExtValue() - 1,
425                                    "", 0, false);
426
427    }
428    return true;
429  }
430
431  // Figure out the length modifier.
432  switch (BT->getKind()) {
433    // no modifier
434    case BuiltinType::UInt:
435    case BuiltinType::Int:
436    case BuiltinType::Float:
437      LM.setKind(LengthModifier::None);
438      break;
439
440    // hh
441    case BuiltinType::Char_U:
442    case BuiltinType::UChar:
443    case BuiltinType::Char_S:
444    case BuiltinType::SChar:
445      LM.setKind(LengthModifier::AsChar);
446      break;
447
448    // h
449    case BuiltinType::Short:
450    case BuiltinType::UShort:
451      LM.setKind(LengthModifier::AsShort);
452      break;
453
454    // l
455    case BuiltinType::Long:
456    case BuiltinType::ULong:
457    case BuiltinType::Double:
458      LM.setKind(LengthModifier::AsLong);
459      break;
460
461    // ll
462    case BuiltinType::LongLong:
463    case BuiltinType::ULongLong:
464      LM.setKind(LengthModifier::AsLongLong);
465      break;
466
467    // L
468    case BuiltinType::LongDouble:
469      LM.setKind(LengthModifier::AsLongDouble);
470      break;
471
472    // Don't know.
473    default:
474      return false;
475  }
476
477  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
478  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
479    namedTypeToLengthModifier(PT, LM);
480
481  // If fixing the length modifier was enough, we are done.
482  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
483    const analyze_scanf::ArgType &AT = getArgType(Ctx);
484    if (AT.isValid() && AT.matchesType(Ctx, QT))
485      return true;
486  }
487
488  // Figure out the conversion specifier.
489  if (PT->isRealFloatingType())
490    CS.setKind(ConversionSpecifier::fArg);
491  else if (PT->isSignedIntegerType())
492    CS.setKind(ConversionSpecifier::dArg);
493  else if (PT->isUnsignedIntegerType())
494    CS.setKind(ConversionSpecifier::uArg);
495  else
496    llvm_unreachable("Unexpected type");
497
498  return true;
499}
500
501void ScanfSpecifier::toString(raw_ostream &os) const {
502  os << "%";
503
504  if (usesPositionalArg())
505    os << getPositionalArgIndex() << "$";
506  if (SuppressAssignment)
507    os << "*";
508
509  FieldWidth.toString(os);
510  os << LM.toString();
511  os << CS.toString();
512}
513
514bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
515                                                    const char *I,
516                                                    const char *E,
517                                                    const LangOptions &LO,
518                                                    const TargetInfo &Target) {
519
520  unsigned argIndex = 0;
521
522  // Keep looking for a format specifier until we have exhausted the string.
523  while (I != E) {
524    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
525                                                          LO, Target);
526    // Did a fail-stop error of any kind occur when parsing the specifier?
527    // If so, don't do any more processing.
528    if (FSR.shouldStop())
529      return true;
530      // Did we exhaust the string or encounter an error that
531      // we can recover from?
532    if (!FSR.hasValue())
533      continue;
534      // We have a format specifier.  Pass it to the callback.
535    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
536                                I - FSR.getStart())) {
537      return true;
538    }
539  }
540  assert(I == E && "Format string not exhausted");
541  return false;
542}
543