1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Handling of format string in scanf and friends.  The structure of format
11// strings for fscanf() are described in C99 7.19.6.2.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Analysis/Analyses/FormatString.h"
16#include "FormatStringParsing.h"
17#include "clang/Basic/TargetInfo.h"
18
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::FormatStringHandler;
21using clang::analyze_format_string::LengthModifier;
22using clang::analyze_format_string::OptionalAmount;
23using clang::analyze_format_string::ConversionSpecifier;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using clang::UpdateOnReturn;
27using namespace clang;
28
29typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
30        ScanfSpecifierResult;
31
32static bool ParseScanList(FormatStringHandler &H,
33                          ScanfConversionSpecifier &CS,
34                          const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38
39  // No more characters?
40  if (I == E) {
41    H.HandleIncompleteScanList(start, I);
42    return true;
43  }
44
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47    if (++I == E) {
48      H.HandleIncompleteScanList(start, I - 1);
49      return true;
50    }
51  }
52
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55    I += 2;
56    if (I == E) {
57      H.HandleIncompleteScanList(start, I - 1);
58      return true;
59    }
60  }
61
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64    if (++I == E) {
65      H.HandleIncompleteScanList(start, I - 1);
66      return true;
67    }
68  }
69
70  CS.setEndScanList(I);
71  return false;
72}
73
74// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75// We can possibly refactor.
76static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
77                                                const char *&Beg,
78                                                const char *E,
79                                                unsigned &argIndex,
80                                                const LangOptions &LO,
81                                                const TargetInfo &Target) {
82  using namespace clang::analyze_format_string;
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87
88    // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90    char c = *I;
91    if (c == '\0') {
92        // Detect spurious null characters, which are likely errors.
93      H.HandleNullChar(I);
94      return true;
95    }
96    if (c == '%') {
97      Start = I++;  // Record the start of the format specifier.
98      break;
99    }
100  }
101
102    // No format specifier found?
103  if (!Start)
104    return false;
105
106  if (I == E) {
107      // No more characters left?
108    H.HandleIncompleteSpecifier(Start, E - Start);
109    return true;
110  }
111
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114    return true;
115
116  if (I == E) {
117      // No more characters left?
118    H.HandleIncompleteSpecifier(Start, E - Start);
119    return true;
120  }
121
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124    FS.setSuppressAssignment(I);
125    if (++I == E) {
126      H.HandleIncompleteSpecifier(Start, E - Start);
127      return true;
128    }
129  }
130
131  // Look for the field width (if any).  Unlike printf, this is either
132  // a fixed integer or isn't present.
133  const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134  if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135    assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136    FS.setFieldWidth(Amt);
137
138    if (I == E) {
139      // No more characters left?
140      H.HandleIncompleteSpecifier(Start, E - Start);
141      return true;
142    }
143  }
144
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147      // No more characters left?
148    H.HandleIncompleteSpecifier(Start, E - Start);
149    return true;
150  }
151
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154    H.HandleNullChar(I);
155    return true;
156  }
157
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
160  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161  switch (*conversionPosition) {
162    default:
163      break;
164    case '%': k = ConversionSpecifier::PercentArg;   break;
165    case 'A': k = ConversionSpecifier::AArg; break;
166    case 'E': k = ConversionSpecifier::EArg; break;
167    case 'F': k = ConversionSpecifier::FArg; break;
168    case 'G': k = ConversionSpecifier::GArg; break;
169    case 'X': k = ConversionSpecifier::XArg; break;
170    case 'a': k = ConversionSpecifier::aArg; break;
171    case 'd': k = ConversionSpecifier::dArg; break;
172    case 'e': k = ConversionSpecifier::eArg; break;
173    case 'f': k = ConversionSpecifier::fArg; break;
174    case 'g': k = ConversionSpecifier::gArg; break;
175    case 'i': k = ConversionSpecifier::iArg; break;
176    case 'n': k = ConversionSpecifier::nArg; break;
177    case 'c': k = ConversionSpecifier::cArg; break;
178    case 'C': k = ConversionSpecifier::CArg; break;
179    case 'S': k = ConversionSpecifier::SArg; break;
180    case '[': k = ConversionSpecifier::ScanListArg; break;
181    case 'u': k = ConversionSpecifier::uArg; break;
182    case 'x': k = ConversionSpecifier::xArg; break;
183    case 'o': k = ConversionSpecifier::oArg; break;
184    case 's': k = ConversionSpecifier::sArg; break;
185    case 'p': k = ConversionSpecifier::pArg; break;
186    // Apple extensions
187      // Apple-specific
188    case 'D':
189      if (Target.getTriple().isOSDarwin())
190        k = ConversionSpecifier::DArg;
191      break;
192    case 'O':
193      if (Target.getTriple().isOSDarwin())
194        k = ConversionSpecifier::OArg;
195      break;
196    case 'U':
197      if (Target.getTriple().isOSDarwin())
198        k = ConversionSpecifier::UArg;
199      break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203    if (ParseScanList(H, CS, I, E))
204      return true;
205  }
206  FS.setConversionSpecifier(CS);
207  if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208      && !FS.usesPositionalArg())
209    FS.setArgIndex(argIndex++);
210
211  // FIXME: '%' and '*' doesn't make sense.  Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215    unsigned Len = I - Beg;
216    if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217      CS.setEndScanList(Beg + Len);
218      FS.setConversionSpecifier(CS);
219    }
220    // Assume the conversion takes one argument.
221    return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222  }
223  return ScanfSpecifierResult(Start, FS);
224}
225
226ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227  const ScanfConversionSpecifier &CS = getConversionSpecifier();
228
229  if (!CS.consumesDataArgument())
230    return ArgType::Invalid();
231
232  switch(CS.getKind()) {
233    // Signed int.
234    case ConversionSpecifier::dArg:
235    case ConversionSpecifier::DArg:
236    case ConversionSpecifier::iArg:
237      switch (LM.getKind()) {
238        case LengthModifier::None:
239          return ArgType::PtrTo(Ctx.IntTy);
240        case LengthModifier::AsChar:
241          return ArgType::PtrTo(ArgType::AnyCharTy);
242        case LengthModifier::AsShort:
243          return ArgType::PtrTo(Ctx.ShortTy);
244        case LengthModifier::AsLong:
245          return ArgType::PtrTo(Ctx.LongTy);
246        case LengthModifier::AsLongLong:
247        case LengthModifier::AsQuad:
248          return ArgType::PtrTo(Ctx.LongLongTy);
249        case LengthModifier::AsInt64:
250          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
251        case LengthModifier::AsIntMax:
252          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
253        case LengthModifier::AsSizeT:
254          // FIXME: ssize_t.
255          return ArgType();
256        case LengthModifier::AsPtrDiff:
257          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
258        case LengthModifier::AsLongDouble:
259          // GNU extension.
260          return ArgType::PtrTo(Ctx.LongLongTy);
261        case LengthModifier::AsAllocate:
262        case LengthModifier::AsMAllocate:
263        case LengthModifier::AsInt32:
264        case LengthModifier::AsInt3264:
265        case LengthModifier::AsWide:
266          return ArgType::Invalid();
267      }
268
269    // Unsigned int.
270    case ConversionSpecifier::oArg:
271    case ConversionSpecifier::OArg:
272    case ConversionSpecifier::uArg:
273    case ConversionSpecifier::UArg:
274    case ConversionSpecifier::xArg:
275    case ConversionSpecifier::XArg:
276      switch (LM.getKind()) {
277        case LengthModifier::None:
278          return ArgType::PtrTo(Ctx.UnsignedIntTy);
279        case LengthModifier::AsChar:
280          return ArgType::PtrTo(Ctx.UnsignedCharTy);
281        case LengthModifier::AsShort:
282          return ArgType::PtrTo(Ctx.UnsignedShortTy);
283        case LengthModifier::AsLong:
284          return ArgType::PtrTo(Ctx.UnsignedLongTy);
285        case LengthModifier::AsLongLong:
286        case LengthModifier::AsQuad:
287          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288        case LengthModifier::AsInt64:
289          return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290        case LengthModifier::AsIntMax:
291          return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292        case LengthModifier::AsSizeT:
293          return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294        case LengthModifier::AsPtrDiff:
295          // FIXME: Unsigned version of ptrdiff_t?
296          return ArgType();
297        case LengthModifier::AsLongDouble:
298          // GNU extension.
299          return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300        case LengthModifier::AsAllocate:
301        case LengthModifier::AsMAllocate:
302        case LengthModifier::AsInt32:
303        case LengthModifier::AsInt3264:
304        case LengthModifier::AsWide:
305          return ArgType::Invalid();
306      }
307
308    // Float.
309    case ConversionSpecifier::aArg:
310    case ConversionSpecifier::AArg:
311    case ConversionSpecifier::eArg:
312    case ConversionSpecifier::EArg:
313    case ConversionSpecifier::fArg:
314    case ConversionSpecifier::FArg:
315    case ConversionSpecifier::gArg:
316    case ConversionSpecifier::GArg:
317      switch (LM.getKind()) {
318        case LengthModifier::None:
319          return ArgType::PtrTo(Ctx.FloatTy);
320        case LengthModifier::AsLong:
321          return ArgType::PtrTo(Ctx.DoubleTy);
322        case LengthModifier::AsLongDouble:
323          return ArgType::PtrTo(Ctx.LongDoubleTy);
324        default:
325          return ArgType::Invalid();
326      }
327
328    // Char, string and scanlist.
329    case ConversionSpecifier::cArg:
330    case ConversionSpecifier::sArg:
331    case ConversionSpecifier::ScanListArg:
332      switch (LM.getKind()) {
333        case LengthModifier::None:
334          return ArgType::PtrTo(ArgType::AnyCharTy);
335        case LengthModifier::AsLong:
336        case LengthModifier::AsWide:
337          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
338        case LengthModifier::AsAllocate:
339        case LengthModifier::AsMAllocate:
340          return ArgType::PtrTo(ArgType::CStrTy);
341        case LengthModifier::AsShort:
342          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
343            return ArgType::PtrTo(ArgType::AnyCharTy);
344        default:
345          return ArgType::Invalid();
346      }
347    case ConversionSpecifier::CArg:
348    case ConversionSpecifier::SArg:
349      // FIXME: Mac OS X specific?
350      switch (LM.getKind()) {
351        case LengthModifier::None:
352        case LengthModifier::AsWide:
353          return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
354        case LengthModifier::AsAllocate:
355        case LengthModifier::AsMAllocate:
356          return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
357        case LengthModifier::AsShort:
358          if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
359            return ArgType::PtrTo(ArgType::AnyCharTy);
360        default:
361          return ArgType::Invalid();
362      }
363
364    // Pointer.
365    case ConversionSpecifier::pArg:
366      return ArgType::PtrTo(ArgType::CPointerTy);
367
368    // Write-back.
369    case ConversionSpecifier::nArg:
370      switch (LM.getKind()) {
371        case LengthModifier::None:
372          return ArgType::PtrTo(Ctx.IntTy);
373        case LengthModifier::AsChar:
374          return ArgType::PtrTo(Ctx.SignedCharTy);
375        case LengthModifier::AsShort:
376          return ArgType::PtrTo(Ctx.ShortTy);
377        case LengthModifier::AsLong:
378          return ArgType::PtrTo(Ctx.LongTy);
379        case LengthModifier::AsLongLong:
380        case LengthModifier::AsQuad:
381          return ArgType::PtrTo(Ctx.LongLongTy);
382        case LengthModifier::AsInt64:
383          return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
384        case LengthModifier::AsIntMax:
385          return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
386        case LengthModifier::AsSizeT:
387          return ArgType(); // FIXME: ssize_t
388        case LengthModifier::AsPtrDiff:
389          return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
390        case LengthModifier::AsLongDouble:
391          return ArgType(); // FIXME: Is this a known extension?
392        case LengthModifier::AsAllocate:
393        case LengthModifier::AsMAllocate:
394        case LengthModifier::AsInt32:
395        case LengthModifier::AsInt3264:
396        case LengthModifier::AsWide:
397          return ArgType::Invalid();
398        }
399
400    default:
401      break;
402  }
403
404  return ArgType();
405}
406
407bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
408                             const LangOptions &LangOpt,
409                             ASTContext &Ctx) {
410
411  // %n is different from other conversion specifiers; don't try to fix it.
412  if (CS.getKind() == ConversionSpecifier::nArg)
413    return false;
414
415  if (!QT->isPointerType())
416    return false;
417
418  QualType PT = QT->getPointeeType();
419
420  // If it's an enum, get its underlying type.
421  if (const EnumType *ETy = PT->getAs<EnumType>())
422    PT = ETy->getDecl()->getIntegerType();
423
424  const BuiltinType *BT = PT->getAs<BuiltinType>();
425  if (!BT)
426    return false;
427
428  // Pointer to a character.
429  if (PT->isAnyCharacterType()) {
430    CS.setKind(ConversionSpecifier::sArg);
431    if (PT->isWideCharType())
432      LM.setKind(LengthModifier::AsWideChar);
433    else
434      LM.setKind(LengthModifier::None);
435
436    // If we know the target array length, we can use it as a field width.
437    if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
438      if (CAT->getSizeModifier() == ArrayType::Normal)
439        FieldWidth = OptionalAmount(OptionalAmount::Constant,
440                                    CAT->getSize().getZExtValue() - 1,
441                                    "", 0, false);
442
443    }
444    return true;
445  }
446
447  // Figure out the length modifier.
448  switch (BT->getKind()) {
449    // no modifier
450    case BuiltinType::UInt:
451    case BuiltinType::Int:
452    case BuiltinType::Float:
453      LM.setKind(LengthModifier::None);
454      break;
455
456    // hh
457    case BuiltinType::Char_U:
458    case BuiltinType::UChar:
459    case BuiltinType::Char_S:
460    case BuiltinType::SChar:
461      LM.setKind(LengthModifier::AsChar);
462      break;
463
464    // h
465    case BuiltinType::Short:
466    case BuiltinType::UShort:
467      LM.setKind(LengthModifier::AsShort);
468      break;
469
470    // l
471    case BuiltinType::Long:
472    case BuiltinType::ULong:
473    case BuiltinType::Double:
474      LM.setKind(LengthModifier::AsLong);
475      break;
476
477    // ll
478    case BuiltinType::LongLong:
479    case BuiltinType::ULongLong:
480      LM.setKind(LengthModifier::AsLongLong);
481      break;
482
483    // L
484    case BuiltinType::LongDouble:
485      LM.setKind(LengthModifier::AsLongDouble);
486      break;
487
488    // Don't know.
489    default:
490      return false;
491  }
492
493  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
494  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
495    namedTypeToLengthModifier(PT, LM);
496
497  // If fixing the length modifier was enough, we are done.
498  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
499    const analyze_scanf::ArgType &AT = getArgType(Ctx);
500    if (AT.isValid() && AT.matchesType(Ctx, QT))
501      return true;
502  }
503
504  // Figure out the conversion specifier.
505  if (PT->isRealFloatingType())
506    CS.setKind(ConversionSpecifier::fArg);
507  else if (PT->isSignedIntegerType())
508    CS.setKind(ConversionSpecifier::dArg);
509  else if (PT->isUnsignedIntegerType())
510    CS.setKind(ConversionSpecifier::uArg);
511  else
512    llvm_unreachable("Unexpected type");
513
514  return true;
515}
516
517void ScanfSpecifier::toString(raw_ostream &os) const {
518  os << "%";
519
520  if (usesPositionalArg())
521    os << getPositionalArgIndex() << "$";
522  if (SuppressAssignment)
523    os << "*";
524
525  FieldWidth.toString(os);
526  os << LM.toString();
527  os << CS.toString();
528}
529
530bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
531                                                    const char *I,
532                                                    const char *E,
533                                                    const LangOptions &LO,
534                                                    const TargetInfo &Target) {
535
536  unsigned argIndex = 0;
537
538  // Keep looking for a format specifier until we have exhausted the string.
539  while (I != E) {
540    const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
541                                                          LO, Target);
542    // Did a fail-stop error of any kind occur when parsing the specifier?
543    // If so, don't do any more processing.
544    if (FSR.shouldStop())
545      return true;
546      // Did we exhaust the string or encounter an error that
547      // we can recover from?
548    if (!FSR.hasValue())
549      continue;
550      // We have a format specifier.  Pass it to the callback.
551    if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
552                                I - FSR.getStart())) {
553      return true;
554    }
555  }
556  assert(I == E && "Format string not exhausted");
557  return false;
558}
559