1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17#include "clang/Basic/TargetInfo.h"
18
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::FormatStringHandler;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::ConversionSpecifier;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55    I += 2;
56    if (I == E) {
57      H.HandleIncompleteScanList(start, I - 1);
58      return true;
59    }
60  }
61
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64    if (++I == E) {
65      H.HandleIncompleteScanList(start, I - 1);
66      return true;
67    }
68  }
69
70  CS.setEndScanList(I);
71  return false;
72}
73
74// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75// We can possibly refactor.
76static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
77                                                const char *&Beg,
78                                                const char *E,
79                                                unsigned &argIndex,
80                                                const LangOptions &LO,
81                                                const TargetInfo &Target) {
82
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87
88    // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90    char c = *I;
91    if (c == '\0') {
92        // Detect spurious null characters, which are likely errors.
93      H.HandleNullChar(I);
94      return true;
95    }
96    if (c == '%') {
97      Start = I++;  // Record the start of the format specifier.
98      break;
99    }
100  }
101
102    // No format specifier found?
103  if (!Start)
104    return false;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114    return true;
115
116  if (I == E) {
117      // No more characters left?
118    H.HandleIncompleteSpecifier(Start, E - Start);
119    return true;
120  }
121
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124    FS.setSuppressAssignment(I);
125    if (++I == E) {
126      H.HandleIncompleteSpecifier(Start, E - Start);
127      return true;
128    }
129  }
130
131  // Look for the field width (if any).  Unlike printf, this is either
132  // a fixed integer or isn't present.
133  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136    FS.setFieldWidth(Amt);
137
138    if (I == E) {
139      // No more characters left?
140      H.HandleIncompleteSpecifier(Start, E - Start);
141      return true;
142    }
143  }
144
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147      // No more characters left?
148    H.HandleIncompleteSpecifier(Start, E - Start);
149    return true;
150  }
151
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154    H.HandleNullChar(I);
155    return true;
156  }
157
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
160  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161  switch (*conversionPosition) {
162    default:
163      break;
164    case '%': k = ConversionSpecifier::PercentArg;   break;
165    case 'A': k = ConversionSpecifier::AArg; break;
166    case 'E': k = ConversionSpecifier::EArg; break;
167    case 'F': k = ConversionSpecifier::FArg; break;
168    case 'G': k = ConversionSpecifier::GArg; break;
169    case 'X': k = ConversionSpecifier::XArg; break;
170    case 'a': k = ConversionSpecifier::aArg; break;
171    case 'd': k = ConversionSpecifier::dArg; break;
172    case 'e': k = ConversionSpecifier::eArg; break;
173    case 'f': k = ConversionSpecifier::fArg; break;
174    case 'g': k = ConversionSpecifier::gArg; break;
175    case 'i': k = ConversionSpecifier::iArg; break;
176    case 'n': k = ConversionSpecifier::nArg; break;
177    case 'c': k = ConversionSpecifier::cArg; break;
178    case 'C': k = ConversionSpecifier::CArg; break;
179    case 'S': k = ConversionSpecifier::SArg; break;
180    case '[': k = ConversionSpecifier::ScanListArg; break;
181    case 'u': k = ConversionSpecifier::uArg; break;
182    case 'x': k = ConversionSpecifier::xArg; break;
183    case 'o': k = ConversionSpecifier::oArg; break;
184    case 's': k = ConversionSpecifier::sArg; break;
185    case 'p': k = ConversionSpecifier::pArg; break;
186    // Apple extensions
187      // Apple-specific
188    case 'D':
189      if (Target.getTriple().isOSDarwin())
190        k = ConversionSpecifier::DArg;
191      break;
192    case 'O':
193      if (Target.getTriple().isOSDarwin())
194        k = ConversionSpecifier::OArg;
195      break;
196    case 'U':
197      if (Target.getTriple().isOSDarwin())
198        k = ConversionSpecifier::UArg;
199      break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203    if (ParseScanList(H, CS, I, E))
204      return true;
205  }
206  FS.setConversionSpecifier(CS);
207  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208      && !FS.usesPositionalArg())
209    FS.setArgIndex(argIndex++);
210
211  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215    // Assume the conversion takes one argument.
216    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
217  }
218  return ScanfSpecifierResult(Start, FS);
219}
220
221ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
222  const ScanfConversionSpecifier &CS = getConversionSpecifier();
223
224  if (!CS.consumesDataArgument())
225    return ArgType::Invalid();
226
227  switch(CS.getKind()) {
228    // Signed int.
229    case ConversionSpecifier::dArg:
230    case ConversionSpecifier::DArg:
231    case ConversionSpecifier::iArg:
232      switch (LM.getKind()) {
233        case LengthModifier::None:
234          return ArgType::PtrTo(Ctx.IntTy);
235        case LengthModifier::AsChar:
236          return ArgType::PtrTo(ArgType::AnyCharTy);
237        case LengthModifier::AsShort:
238          return ArgType::PtrTo(Ctx.ShortTy);
239        case LengthModifier::AsLong:
240          return ArgType::PtrTo(Ctx.LongTy);
241        case LengthModifier::AsLongLong:
242        case LengthModifier::AsQuad:
243          return ArgType::PtrTo(Ctx.LongLongTy);
244        case LengthModifier::AsInt64:
245          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
246        case LengthModifier::AsIntMax:
247          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
248        case LengthModifier::AsSizeT:
249          // FIXME: ssize_t.
250          return ArgType();
251        case LengthModifier::AsPtrDiff:
252          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
253        case LengthModifier::AsLongDouble:
254          // GNU extension.
255          return ArgType::PtrTo(Ctx.LongLongTy);
256        case LengthModifier::AsAllocate:
257        case LengthModifier::AsMAllocate:
258        case LengthModifier::AsInt32:
259        case LengthModifier::AsInt3264:
260        case LengthModifier::AsWide:
261          return ArgType::Invalid();
262      }
263
264    // Unsigned int.
265    case ConversionSpecifier::oArg:
266    case ConversionSpecifier::OArg:
267    case ConversionSpecifier::uArg:
268    case ConversionSpecifier::UArg:
269    case ConversionSpecifier::xArg:
270    case ConversionSpecifier::XArg:
271      switch (LM.getKind()) {
272        case LengthModifier::None:
273          return ArgType::PtrTo(Ctx.UnsignedIntTy);
274        case LengthModifier::AsChar:
275          return ArgType::PtrTo(Ctx.UnsignedCharTy);
276        case LengthModifier::AsShort:
277          return ArgType::PtrTo(Ctx.UnsignedShortTy);
278        case LengthModifier::AsLong:
279          return ArgType::PtrTo(Ctx.UnsignedLongTy);
280        case LengthModifier::AsLongLong:
281        case LengthModifier::AsQuad:
282          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
283        case LengthModifier::AsInt64:
284          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
285        case LengthModifier::AsIntMax:
286          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
287        case LengthModifier::AsSizeT:
288          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
289        case LengthModifier::AsPtrDiff:
290          // FIXME: Unsigned version of ptrdiff_t?
291          return ArgType();
292        case LengthModifier::AsLongDouble:
293          // GNU extension.
294          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
295        case LengthModifier::AsAllocate:
296        case LengthModifier::AsMAllocate:
297        case LengthModifier::AsInt32:
298        case LengthModifier::AsInt3264:
299        case LengthModifier::AsWide:
300          return ArgType::Invalid();
301      }
302
303    // Float.
304    case ConversionSpecifier::aArg:
305    case ConversionSpecifier::AArg:
306    case ConversionSpecifier::eArg:
307    case ConversionSpecifier::EArg:
308    case ConversionSpecifier::fArg:
309    case ConversionSpecifier::FArg:
310    case ConversionSpecifier::gArg:
311    case ConversionSpecifier::GArg:
312      switch (LM.getKind()) {
313        case LengthModifier::None:
314          return ArgType::PtrTo(Ctx.FloatTy);
315        case LengthModifier::AsLong:
316          return ArgType::PtrTo(Ctx.DoubleTy);
317        case LengthModifier::AsLongDouble:
318          return ArgType::PtrTo(Ctx.LongDoubleTy);
319        default:
320          return ArgType::Invalid();
321      }
322
323    // Char, string and scanlist.
324    case ConversionSpecifier::cArg:
325    case ConversionSpecifier::sArg:
326    case ConversionSpecifier::ScanListArg:
327      switch (LM.getKind()) {
328        case LengthModifier::None:
329          return ArgType::PtrTo(ArgType::AnyCharTy);
330        case LengthModifier::AsLong:
331        case LengthModifier::AsWide:
332          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
333        case LengthModifier::AsAllocate:
334        case LengthModifier::AsMAllocate:
335          return ArgType::PtrTo(ArgType::CStrTy);
336        case LengthModifier::AsShort:
337          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
338            return ArgType::PtrTo(ArgType::AnyCharTy);
339        default:
340          return ArgType::Invalid();
341      }
342    case ConversionSpecifier::CArg:
343    case ConversionSpecifier::SArg:
344      // FIXME: Mac OS X specific?
345      switch (LM.getKind()) {
346        case LengthModifier::None:
347        case LengthModifier::AsWide:
348          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
349        case LengthModifier::AsAllocate:
350        case LengthModifier::AsMAllocate:
351          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
352        case LengthModifier::AsShort:
353          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
354            return ArgType::PtrTo(ArgType::AnyCharTy);
355        default:
356          return ArgType::Invalid();
357      }
358
359    // Pointer.
360    case ConversionSpecifier::pArg:
361      return ArgType::PtrTo(ArgType::CPointerTy);
362
363    // Write-back.
364    case ConversionSpecifier::nArg:
365      switch (LM.getKind()) {
366        case LengthModifier::None:
367          return ArgType::PtrTo(Ctx.IntTy);
368        case LengthModifier::AsChar:
369          return ArgType::PtrTo(Ctx.SignedCharTy);
370        case LengthModifier::AsShort:
371          return ArgType::PtrTo(Ctx.ShortTy);
372        case LengthModifier::AsLong:
373          return ArgType::PtrTo(Ctx.LongTy);
374        case LengthModifier::AsLongLong:
375        case LengthModifier::AsQuad:
376          return ArgType::PtrTo(Ctx.LongLongTy);
377        case LengthModifier::AsInt64:
378          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
379        case LengthModifier::AsIntMax:
380          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
381        case LengthModifier::AsSizeT:
382          return ArgType(); // FIXME: ssize_t
383        case LengthModifier::AsPtrDiff:
384          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
385        case LengthModifier::AsLongDouble:
386          return ArgType(); // FIXME: Is this a known extension?
387        case LengthModifier::AsAllocate:
388        case LengthModifier::AsMAllocate:
389        case LengthModifier::AsInt32:
390        case LengthModifier::AsInt3264:
391        case LengthModifier::AsWide:
392          return ArgType::Invalid();
393        }
394
395    default:
396      break;
397  }
398
399  return ArgType();
400}
401
402bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
403                             const LangOptions &LangOpt,
404                             ASTContext &Ctx) {
405
406  // %n is different from other conversion specifiers; don't try to fix it.
407  if (CS.getKind() == ConversionSpecifier::nArg)
408    return false;
409
410  if (!QT->isPointerType())
411    return false;
412
413  QualType PT = QT->getPointeeType();
414
415  // If it's an enum, get its underlying type.
416  if (const EnumType *ETy = PT->getAs<EnumType>())
417    PT = ETy->getDecl()->getIntegerType();
418
419  const BuiltinType *BT = PT->getAs<BuiltinType>();
420  if (!BT)
421    return false;
422
423  // Pointer to a character.
424  if (PT->isAnyCharacterType()) {
425    CS.setKind(ConversionSpecifier::sArg);
426    if (PT->isWideCharType())
427      LM.setKind(LengthModifier::AsWideChar);
428    else
429      LM.setKind(LengthModifier::None);
430
431    // If we know the target array length, we can use it as a field width.
432    if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
433      if (CAT->getSizeModifier() == ArrayType::Normal)
434        FieldWidth = OptionalAmount(OptionalAmount::Constant,
435                                    CAT->getSize().getZExtValue() - 1,
436                                    "", 0, false);
437
438    }
439    return true;
440  }
441
442  // Figure out the length modifier.
443  switch (BT->getKind()) {
444    // no modifier
445    case BuiltinType::UInt:
446    case BuiltinType::Int:
447    case BuiltinType::Float:
448      LM.setKind(LengthModifier::None);
449      break;
450
451    // hh
452    case BuiltinType::Char_U:
453    case BuiltinType::UChar:
454    case BuiltinType::Char_S:
455    case BuiltinType::SChar:
456      LM.setKind(LengthModifier::AsChar);
457      break;
458
459    // h
460    case BuiltinType::Short:
461    case BuiltinType::UShort:
462      LM.setKind(LengthModifier::AsShort);
463      break;
464
465    // l
466    case BuiltinType::Long:
467    case BuiltinType::ULong:
468    case BuiltinType::Double:
469      LM.setKind(LengthModifier::AsLong);
470      break;
471
472    // ll
473    case BuiltinType::LongLong:
474    case BuiltinType::ULongLong:
475      LM.setKind(LengthModifier::AsLongLong);
476      break;
477
478    // L
479    case BuiltinType::LongDouble:
480      LM.setKind(LengthModifier::AsLongDouble);
481      break;
482
483    // Don't know.
484    default:
485      return false;
486  }
487
488  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
489  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
490    namedTypeToLengthModifier(PT, LM);
491
492  // If fixing the length modifier was enough, we are done.
493  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
494    const analyze_scanf::ArgType &AT = getArgType(Ctx);
495    if (AT.isValid() && AT.matchesType(Ctx, QT))
496      return true;
497  }
498
499  // Figure out the conversion specifier.
500  if (PT->isRealFloatingType())
501    CS.setKind(ConversionSpecifier::fArg);
502  else if (PT->isSignedIntegerType())
503    CS.setKind(ConversionSpecifier::dArg);
504  else if (PT->isUnsignedIntegerType())
505    CS.setKind(ConversionSpecifier::uArg);
506  else
507    llvm_unreachable("Unexpected type");
508
509  return true;
510}
511
512void ScanfSpecifier::toString(raw_ostream &os) const {
513  os << "%";
514
515  if (usesPositionalArg())
516    os << getPositionalArgIndex() << "$";
517  if (SuppressAssignment)
518    os << "*";
519
520  FieldWidth.toString(os);
521  os << LM.toString();
522  os << CS.toString();
523}
524
525bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
526                                                    const char *I,
527                                                    const char *E,
528                                                    const LangOptions &LO,
529                                                    const TargetInfo &Target) {
530
531  unsigned argIndex = 0;
532
533  // Keep looking for a format specifier until we have exhausted the string.
534  while (I != E) {
535    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
536                                                          LO, Target);
537    // Did a fail-stop error of any kind occur when parsing the specifier?
538    // If so, don't do any more processing.
539    if (FSR.shouldStop())
540      return true;
541      // Did we exhaust the string or encounter an error that
542      // we can recover from?
543    if (!FSR.hasValue())
544      continue;
545      // We have a format specifier.  Pass it to the callback.
546    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
547                                I - FSR.getStart())) {
548      return true;
549    }
550  }
551  assert(I == E && "Format string not exhausted");
552  return false;
553}
554