1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Scanf/printf implementation for use in *Sanitizer interceptors.
11// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
12// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
13// with a few common GNU extensions.
14//
15//===----------------------------------------------------------------------===//
16#include <stdarg.h>
17
18static const char *parse_number(const char *p, int *out) {
19  *out = internal_atoll(p);
20  while (*p >= '0' && *p <= '9')
21    ++p;
22  return p;
23}
24
25static const char *maybe_parse_param_index(const char *p, int *out) {
26  // n$
27  if (*p >= '0' && *p <= '9') {
28    int number;
29    const char *q = parse_number(p, &number);
30    CHECK(q);
31    if (*q == '$') {
32      *out = number;
33      p = q + 1;
34    }
35  }
36
37  // Otherwise, do not change p. This will be re-parsed later as the field
38  // width.
39  return p;
40}
41
42static bool char_is_one_of(char c, const char *s) {
43  return !!internal_strchr(s, c);
44}
45
46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47  if (char_is_one_of(*p, "jztLq")) {
48    ll[0] = *p;
49    ++p;
50  } else if (*p == 'h') {
51    ll[0] = 'h';
52    ++p;
53    if (*p == 'h') {
54      ll[1] = 'h';
55      ++p;
56    }
57  } else if (*p == 'l') {
58    ll[0] = 'l';
59    ++p;
60    if (*p == 'l') {
61      ll[1] = 'l';
62      ++p;
63    }
64  }
65  return p;
66}
67
68// Returns true if the character is an integer conversion specifier.
69static bool format_is_integer_conv(char c) {
70  return char_is_one_of(c, "diouxXn");
71}
72
73// Returns true if the character is an floating point conversion specifier.
74static bool format_is_float_conv(char c) {
75  return char_is_one_of(c, "aAeEfFgG");
76}
77
78// Returns string output character size for string-like conversions,
79// or 0 if the conversion is invalid.
80static int format_get_char_size(char convSpecifier,
81                                const char lengthModifier[2]) {
82  if (char_is_one_of(convSpecifier, "CS")) {
83    return sizeof(wchar_t);
84  }
85
86  if (char_is_one_of(convSpecifier, "cs[")) {
87    if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
88      return sizeof(wchar_t);
89    else if (lengthModifier[0] == '\0')
90      return sizeof(char);
91  }
92
93  return 0;
94}
95
96enum FormatStoreSize {
97  // Store size not known in advance; can be calculated as wcslen() of the
98  // destination buffer.
99  FSS_WCSLEN = -2,
100  // Store size not known in advance; can be calculated as strlen() of the
101  // destination buffer.
102  FSS_STRLEN = -1,
103  // Invalid conversion specifier.
104  FSS_INVALID = 0
105};
106
107// Returns the memory size of a format directive (if >0), or a value of
108// FormatStoreSize.
109static int format_get_value_size(char convSpecifier,
110                                 const char lengthModifier[2],
111                                 bool promote_float) {
112  if (format_is_integer_conv(convSpecifier)) {
113    switch (lengthModifier[0]) {
114    case 'h':
115      return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
116    case 'l':
117      return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
118    case 'q':
119      return sizeof(long long);
120    case 'L':
121      return sizeof(long long);
122    case 'j':
123      return sizeof(INTMAX_T);
124    case 'z':
125      return sizeof(SIZE_T);
126    case 't':
127      return sizeof(PTRDIFF_T);
128    case 0:
129      return sizeof(int);
130    default:
131      return FSS_INVALID;
132    }
133  }
134
135  if (format_is_float_conv(convSpecifier)) {
136    switch (lengthModifier[0]) {
137    case 'L':
138    case 'q':
139      return sizeof(long double);
140    case 'l':
141      return lengthModifier[1] == 'l' ? sizeof(long double)
142                                           : sizeof(double);
143    case 0:
144      // Printf promotes floats to doubles but scanf does not
145      return promote_float ? sizeof(double) : sizeof(float);
146    default:
147      return FSS_INVALID;
148    }
149  }
150
151  if (convSpecifier == 'p') {
152    if (lengthModifier[0] != 0)
153      return FSS_INVALID;
154    return sizeof(void *);
155  }
156
157  return FSS_INVALID;
158}
159
160struct ScanfDirective {
161  int argIdx; // argument index, or -1 if not specified ("%n$")
162  int fieldWidth;
163  const char *begin;
164  const char *end;
165  bool suppressed; // suppress assignment ("*")
166  bool allocate;   // allocate space ("m")
167  char lengthModifier[2];
168  char convSpecifier;
169  bool maybeGnuMalloc;
170};
171
172// Parse scanf format string. If a valid directive in encountered, it is
173// returned in dir. This function returns the pointer to the first
174// unprocessed character, or 0 in case of error.
175// In case of the end-of-string, a pointer to the closing \0 is returned.
176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
177                                    ScanfDirective *dir) {
178  internal_memset(dir, 0, sizeof(*dir));
179  dir->argIdx = -1;
180
181  while (*p) {
182    if (*p != '%') {
183      ++p;
184      continue;
185    }
186    dir->begin = p;
187    ++p;
188    // %%
189    if (*p == '%') {
190      ++p;
191      continue;
192    }
193    if (*p == '\0') {
194      return 0;
195    }
196    // %n$
197    p = maybe_parse_param_index(p, &dir->argIdx);
198    CHECK(p);
199    // *
200    if (*p == '*') {
201      dir->suppressed = true;
202      ++p;
203    }
204    // Field width
205    if (*p >= '0' && *p <= '9') {
206      p = parse_number(p, &dir->fieldWidth);
207      CHECK(p);
208      if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
209        return 0;
210    }
211    // m
212    if (*p == 'm') {
213      dir->allocate = true;
214      ++p;
215    }
216    // Length modifier.
217    p = maybe_parse_length_modifier(p, dir->lengthModifier);
218    // Conversion specifier.
219    dir->convSpecifier = *p++;
220    // Consume %[...] expression.
221    if (dir->convSpecifier == '[') {
222      if (*p == '^')
223        ++p;
224      if (*p == ']')
225        ++p;
226      while (*p && *p != ']')
227        ++p;
228      if (*p == 0)
229        return 0; // unexpected end of string
230                  // Consume the closing ']'.
231      ++p;
232    }
233    // This is unfortunately ambiguous between old GNU extension
234    // of %as, %aS and %a[...] and newer POSIX %a followed by
235    // letters s, S or [.
236    if (allowGnuMalloc && dir->convSpecifier == 'a' &&
237        !dir->lengthModifier[0]) {
238      if (*p == 's' || *p == 'S') {
239        dir->maybeGnuMalloc = true;
240        ++p;
241      } else if (*p == '[') {
242        // Watch for %a[h-j%d], if % appears in the
243        // [...] range, then we need to give up, we don't know
244        // if scanf will parse it as POSIX %a [h-j %d ] or
245        // GNU allocation of string with range dh-j plus %.
246        const char *q = p + 1;
247        if (*q == '^')
248          ++q;
249        if (*q == ']')
250          ++q;
251        while (*q && *q != ']' && *q != '%')
252          ++q;
253        if (*q == 0 || *q == '%')
254          return 0;
255        p = q + 1; // Consume the closing ']'.
256        dir->maybeGnuMalloc = true;
257      }
258    }
259    dir->end = p;
260    break;
261  }
262  return p;
263}
264
265static int scanf_get_value_size(ScanfDirective *dir) {
266  if (dir->allocate) {
267    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
268      return FSS_INVALID;
269    return sizeof(char *);
270  }
271
272  if (dir->maybeGnuMalloc) {
273    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
274      return FSS_INVALID;
275    // This is ambiguous, so check the smaller size of char * (if it is
276    // a GNU extension of %as, %aS or %a[...]) and float (if it is
277    // POSIX %a followed by s, S or [ letters).
278    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
279  }
280
281  if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
282    bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
283    unsigned charSize =
284        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
285    if (charSize == 0)
286      return FSS_INVALID;
287    if (dir->fieldWidth == 0) {
288      if (!needsTerminator)
289        return charSize;
290      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
291    }
292    return (dir->fieldWidth + needsTerminator) * charSize;
293  }
294
295  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
296}
297
298// Common part of *scanf interceptors.
299// Process format string and va_list, and report all store ranges.
300// Stops when "consuming" n_inputs input items.
301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
302                         const char *format, va_list aq) {
303  CHECK_GT(n_inputs, 0);
304  const char *p = format;
305
306  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
307
308  while (*p) {
309    ScanfDirective dir;
310    p = scanf_parse_next(p, allowGnuMalloc, &dir);
311    if (!p)
312      break;
313    if (dir.convSpecifier == 0) {
314      // This can only happen at the end of the format string.
315      CHECK_EQ(*p, 0);
316      break;
317    }
318    // Here the directive is valid. Do what it says.
319    if (dir.argIdx != -1) {
320      // Unsupported.
321      break;
322    }
323    if (dir.suppressed)
324      continue;
325    int size = scanf_get_value_size(&dir);
326    if (size == FSS_INVALID) {
327      Report("WARNING: unexpected format specifier in scanf interceptor: "
328        "%.*s\n", dir.end - dir.begin, dir.begin);
329      break;
330    }
331    void *argp = va_arg(aq, void *);
332    if (dir.convSpecifier != 'n')
333      --n_inputs;
334    if (n_inputs < 0)
335      break;
336    if (size == FSS_STRLEN) {
337      size = internal_strlen((const char *)argp) + 1;
338    } else if (size == FSS_WCSLEN) {
339      // FIXME: actually use wcslen() to calculate it.
340      size = 0;
341    }
342    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
343  }
344}
345
346#if SANITIZER_INTERCEPT_PRINTF
347
348struct PrintfDirective {
349  int fieldWidth;
350  int fieldPrecision;
351  int argIdx; // width argument index, or -1 if not specified ("%*n$")
352  int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
353  const char *begin;
354  const char *end;
355  bool starredWidth;
356  bool starredPrecision;
357  char lengthModifier[2];
358  char convSpecifier;
359};
360
361static const char *maybe_parse_number(const char *p, int *out) {
362  if (*p >= '0' && *p <= '9')
363    p = parse_number(p, out);
364  return p;
365}
366
367static const char *maybe_parse_number_or_star(const char *p, int *out,
368                                              bool *star) {
369  if (*p == '*') {
370    *star = true;
371    ++p;
372  } else {
373    *star = false;
374    p = maybe_parse_number(p, out);
375  }
376  return p;
377}
378
379// Parse printf format string. Same as scanf_parse_next.
380static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
381  internal_memset(dir, 0, sizeof(*dir));
382  dir->argIdx = -1;
383  dir->precisionIdx = -1;
384
385  while (*p) {
386    if (*p != '%') {
387      ++p;
388      continue;
389    }
390    dir->begin = p;
391    ++p;
392    // %%
393    if (*p == '%') {
394      ++p;
395      continue;
396    }
397    if (*p == '\0') {
398      return 0;
399    }
400    // %n$
401    p = maybe_parse_param_index(p, &dir->precisionIdx);
402    CHECK(p);
403    // Flags
404    while (char_is_one_of(*p, "'-+ #0")) {
405      ++p;
406    }
407    // Field width
408    p = maybe_parse_number_or_star(p, &dir->fieldWidth,
409                                   &dir->starredWidth);
410    if (!p)
411      return 0;
412    // Precision
413    if (*p == '.') {
414      ++p;
415      // Actual precision is optional (surprise!)
416      p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
417                                     &dir->starredPrecision);
418      if (!p)
419        return 0;
420      // m$
421      if (dir->starredPrecision) {
422        p = maybe_parse_param_index(p, &dir->precisionIdx);
423        CHECK(p);
424      }
425    }
426    // Length modifier.
427    p = maybe_parse_length_modifier(p, dir->lengthModifier);
428    // Conversion specifier.
429    dir->convSpecifier = *p++;
430    dir->end = p;
431    break;
432  }
433  return p;
434}
435
436static int printf_get_value_size(PrintfDirective *dir) {
437  if (dir->convSpecifier == 'm') {
438    return sizeof(char *);
439  }
440
441  if (char_is_one_of(dir->convSpecifier, "cCsS")) {
442    unsigned charSize =
443        format_get_char_size(dir->convSpecifier, dir->lengthModifier);
444    if (charSize == 0)
445      return FSS_INVALID;
446    if (char_is_one_of(dir->convSpecifier, "sS")) {
447      return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
448    }
449    return charSize;
450  }
451
452  return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
453}
454
455#define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
456  do {                                                             \
457    if (format_is_float_conv(convSpecifier)) {                     \
458      switch (size) {                                              \
459      case 8:                                                      \
460        va_arg(*aq, double);                                       \
461        break;                                                     \
462      case 12:                                                     \
463        va_arg(*aq, long double);                                  \
464        break;                                                     \
465      case 16:                                                     \
466        va_arg(*aq, long double);                                  \
467        break;                                                     \
468      default:                                                     \
469        Report("WARNING: unexpected floating-point arg size"       \
470               " in printf interceptor: %d\n", size);              \
471        return;                                                    \
472      }                                                            \
473    } else {                                                       \
474      switch (size) {                                              \
475      case 1:                                                      \
476      case 2:                                                      \
477      case 4:                                                      \
478        va_arg(*aq, u32);                                          \
479        break;                                                     \
480      case 8:                                                      \
481        va_arg(*aq, u64);                                          \
482        break;                                                     \
483      default:                                                     \
484        Report("WARNING: unexpected arg size"                      \
485               " in printf interceptor: %d\n", size);              \
486        return;                                                    \
487      }                                                            \
488    }                                                              \
489  } while (0)
490
491// Common part of *printf interceptors.
492// Process format string and va_list, and report all load ranges.
493static void printf_common(void *ctx, const char *format, va_list aq) {
494  COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
495
496  const char *p = format;
497
498  while (*p) {
499    PrintfDirective dir;
500    p = printf_parse_next(p, &dir);
501    if (!p)
502      break;
503    if (dir.convSpecifier == 0) {
504      // This can only happen at the end of the format string.
505      CHECK_EQ(*p, 0);
506      break;
507    }
508    // Here the directive is valid. Do what it says.
509    if (dir.argIdx != -1 || dir.precisionIdx != -1) {
510      // Unsupported.
511      break;
512    }
513    if (dir.starredWidth) {
514      // Dynamic width
515      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
516    }
517    if (dir.starredPrecision) {
518      // Dynamic precision
519      SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
520    }
521    int size = printf_get_value_size(&dir);
522    if (size == FSS_INVALID) {
523      Report("WARNING: unexpected format specifier in printf "
524             "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
525      break;
526    }
527    if (dir.convSpecifier == 'n') {
528      void *argp = va_arg(aq, void *);
529      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
530      continue;
531    } else if (size == FSS_STRLEN) {
532      if (void *argp = va_arg(aq, void *)) {
533        if (dir.starredPrecision) {
534          // FIXME: properly support starred precision for strings.
535          size = 0;
536        } else if (dir.fieldPrecision > 0) {
537          // Won't read more than "precision" symbols.
538          size = internal_strnlen((const char *)argp, dir.fieldPrecision);
539          if (size < dir.fieldPrecision) size++;
540        } else {
541          // Whole string will be accessed.
542          size = internal_strlen((const char *)argp) + 1;
543        }
544        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
545      }
546    } else if (size == FSS_WCSLEN) {
547      if (void *argp = va_arg(aq, void *)) {
548        // FIXME: Properly support wide-character strings (via wcsrtombs).
549        size = 0;
550        COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
551      }
552    } else {
553      // Skip non-pointer args
554      SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
555    }
556  }
557}
558
559#endif  // SANITIZER_INTERCEPT_PRINTF
560