1/*	$OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2/*-
3 * Copyright (c) 1990, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <inttypes.h>
35#include <limits.h>
36#include <locale.h>
37#include <stdarg.h>
38#include <stddef.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <wctype.h>
43#include "local.h"
44
45#define BUF 513 /* Maximum length of numeric string. */
46
47/*
48 * Flags used during conversion.
49 */
50#define LONG 0x00001       /* l: long or double */
51#define LONGDBL 0x00002    /* L: long double */
52#define SHORT 0x00004      /* h: short */
53#define SHORTSHORT 0x00008 /* hh: 8 bit integer */
54#define LLONG 0x00010      /* ll: long long (+ deprecated q: quad) */
55#define POINTER 0x00020    /* p: void * (as hex) */
56#define SIZEINT 0x00040    /* z: (signed) size_t */
57#define MAXINT 0x00080     /* j: intmax_t */
58#define PTRINT 0x00100     /* t: ptrdiff_t */
59#define NOSKIP 0x00200     /* [ or c: do not skip blanks */
60#define SUPPRESS 0x00400   /* *: suppress assignment */
61#define UNSIGNED 0x00800   /* %[oupxX] conversions */
62
63/*
64 * The following are used in numeric conversions only:
65 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
66 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
67 */
68#define SIGNOK 0x01000   /* +/- is (still) legal */
69#define HAVESIGN 0x02000 /* sign detected */
70#define NDIGITS 0x04000  /* no digits detected */
71
72#define DPTOK 0x08000 /* (float) decimal point is still legal */
73#define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
74
75#define PFXOK 0x08000    /* 0x prefix is (still) legal */
76#define NZDIGITS 0x10000 /* no zero digits detected */
77
78/*
79 * Conversion types.
80 */
81#define CT_CHAR 0   /* %c conversion */
82#define CT_CCL 1    /* %[...] conversion */
83#define CT_STRING 2 /* %s conversion */
84#define CT_INT 3    /* integer, i.e., strtoimax or strtoumax */
85#define CT_FLOAT 4  /* floating, i.e., strtod */
86
87// An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
88// be a little too expensive, and some kind of compressed version isn't worth the trouble.
89static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
90  // Is this a negated set?
91  bool member_result = true;
92  if (*ccl == '^') {
93    member_result = false;
94    ++ccl;
95  }
96
97  // The first character may be ']' or '-' without being special.
98  if (*ccl == '-' || *ccl == ']') {
99    // A literal match?
100    if (*ccl == wc) return member_result;
101    ++ccl;
102  }
103
104  while (*ccl && *ccl != ']') {
105    // The last character may be '-' without being special.
106    if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
107      wchar_t first = *(ccl - 1);
108      wchar_t last = *(ccl + 1);
109      if (first <= last) {
110        // In the range?
111        if (wc >= first && wc <= last) return member_result;
112        ccl += 2;
113        continue;
114      }
115      // A '-' is not considered to be part of a range if the character after
116      // is not greater than the character before, so fall through...
117    }
118    // A literal match?
119    if (*ccl == wc) return member_result;
120    ++ccl;
121  }
122  return !member_result;
123}
124
125#pragma GCC diagnostic push
126#pragma GCC diagnostic ignored "-Wframe-larger-than="
127
128/*
129 * vfwscanf
130 */
131int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
132  wint_t c;               /* character from format, or conversion */
133  size_t width;           /* field width, or 0 */
134  wchar_t* p;             /* points into all kinds of strings */
135  int n;                  /* handy integer */
136  int flags;              /* flags as defined above */
137  wchar_t* p0;            /* saves original value of p when necessary */
138  int nassigned;          /* number of fields assigned */
139  int nconversions;       /* number of conversions */
140  int nread;              /* number of characters consumed from fp */
141  int base;               /* base argument to strtoimax/strtouimax */
142  wchar_t buf[BUF];       /* buffer for numeric conversions */
143  const wchar_t* ccl;
144  wint_t wi;              /* handy wint_t */
145  char* mbp;              /* multibyte string pointer for %c %s %[ */
146  size_t nconv;           /* number of bytes in mb. conversion */
147  char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
148  mbstate_t mbs;
149
150  /* `basefix' is used to avoid `if' tests in the integer scanner */
151  static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
152
153  _SET_ORIENTATION(fp, 1);
154
155  nassigned = 0;
156  nconversions = 0;
157  nread = 0;
158  base = 0; /* XXX just to keep gcc happy */
159  for (;;) {
160    c = *fmt++;
161    if (c == 0) {
162      return (nassigned);
163    }
164    if (iswspace(c)) {
165      while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
166        ;
167      if (c != WEOF) __ungetwc(c, fp);
168      continue;
169    }
170    if (c != '%') goto literal;
171    width = 0;
172    flags = 0;
173    /*
174     * switch on the format.  continue if done;
175     * break once format type is derived.
176     */
177  again:
178    c = *fmt++;
179    switch (c) {
180      case '%':
181      literal:
182        if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
183        if (wi != c) {
184          __ungetwc(wi, fp);
185          goto match_failure;
186        }
187        nread++;
188        continue;
189
190      case '*':
191        flags |= SUPPRESS;
192        goto again;
193      case 'j':
194        flags |= MAXINT;
195        goto again;
196      case 'L':
197        flags |= LONGDBL;
198        goto again;
199      case 'h':
200        if (*fmt == 'h') {
201          fmt++;
202          flags |= SHORTSHORT;
203        } else {
204          flags |= SHORT;
205        }
206        goto again;
207      case 'l':
208        if (*fmt == 'l') {
209          fmt++;
210          flags |= LLONG;
211        } else {
212          flags |= LONG;
213        }
214        goto again;
215      case 'q':
216        flags |= LLONG; /* deprecated */
217        goto again;
218      case 't':
219        flags |= PTRINT;
220        goto again;
221      case 'z':
222        flags |= SIZEINT;
223        goto again;
224
225      case '0':
226      case '1':
227      case '2':
228      case '3':
229      case '4':
230      case '5':
231      case '6':
232      case '7':
233      case '8':
234      case '9':
235        width = width * 10 + c - '0';
236        goto again;
237
238      /*
239       * Conversions.
240       * Those marked `compat' are for 4.[123]BSD compatibility.
241       */
242      case 'D': /* compat */
243        flags |= LONG;
244        /* FALLTHROUGH */
245      case 'd':
246        c = CT_INT;
247        base = 10;
248        break;
249
250      case 'i':
251        c = CT_INT;
252        base = 0;
253        break;
254
255      case 'O': /* compat */
256        flags |= LONG;
257        /* FALLTHROUGH */
258      case 'o':
259        c = CT_INT;
260        flags |= UNSIGNED;
261        base = 8;
262        break;
263
264      case 'u':
265        c = CT_INT;
266        flags |= UNSIGNED;
267        base = 10;
268        break;
269
270      case 'X':
271      case 'x':
272        flags |= PFXOK; /* enable 0x prefixing */
273        c = CT_INT;
274        flags |= UNSIGNED;
275        base = 16;
276        break;
277
278      case 'e':
279      case 'E':
280      case 'f':
281      case 'F':
282      case 'g':
283      case 'G':
284      case 'a':
285      case 'A':
286        c = CT_FLOAT;
287        break;
288
289      case 's':
290        c = CT_STRING;
291        break;
292
293      case '[':
294        ccl = fmt;
295        if (*fmt == '^') fmt++;
296        if (*fmt == ']') fmt++;
297        while (*fmt != '\0' && *fmt != ']') fmt++;
298        fmt++;
299        flags |= NOSKIP;
300        c = CT_CCL;
301        break;
302
303      case 'c':
304        flags |= NOSKIP;
305        c = CT_CHAR;
306        break;
307
308      case 'p': /* pointer format is like hex */
309        flags |= POINTER | PFXOK;
310        c = CT_INT;
311        flags |= UNSIGNED;
312        base = 16;
313        break;
314
315      case 'n':
316        nconversions++;
317        if (flags & SUPPRESS) continue;
318        if (flags & SHORTSHORT)
319          *va_arg(ap, signed char*) = nread;
320        else if (flags & SHORT)
321          *va_arg(ap, short*) = nread;
322        else if (flags & LONG)
323          *va_arg(ap, long*) = nread;
324        else if (flags & SIZEINT)
325          *va_arg(ap, ssize_t*) = nread;
326        else if (flags & PTRINT)
327          *va_arg(ap, ptrdiff_t*) = nread;
328        else if (flags & LLONG)
329          *va_arg(ap, long long*) = nread;
330        else if (flags & MAXINT)
331          *va_arg(ap, intmax_t*) = nread;
332        else
333          *va_arg(ap, int*) = nread;
334        continue;
335
336      /*
337       * Disgusting backwards compatibility hacks.	XXX
338       */
339      case '\0': /* compat */
340        return (EOF);
341
342      default: /* compat */
343        if (iswupper(c)) flags |= LONG;
344        c = CT_INT;
345        base = 10;
346        break;
347    }
348
349    /*
350     * Consume leading white space, except for formats
351     * that suppress this.
352     */
353    if ((flags & NOSKIP) == 0) {
354      while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
355      if (wi == WEOF) goto input_failure;
356      __ungetwc(wi, fp);
357    }
358
359    /*
360     * Do the conversion.
361     */
362    switch (c) {
363      case CT_CHAR:
364        /* scan arbitrary characters (sets NOSKIP) */
365        if (width == 0) width = 1;
366        if (flags & LONG) {
367          if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
368          n = 0;
369          while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
370            if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi;
371            n++;
372          }
373          if (n == 0) goto input_failure;
374          nread += n;
375          if (!(flags & SUPPRESS)) nassigned++;
376        } else {
377          if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
378          n = 0;
379          memset(&mbs, 0, sizeof(mbs));
380          while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
381            if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
382              nconv = wcrtomb(mbp, wi, &mbs);
383              if (nconv == (size_t)-1) goto input_failure;
384            } else {
385              nconv = wcrtomb(mbbuf, wi, &mbs);
386              if (nconv == (size_t)-1) goto input_failure;
387              if (nconv > width) {
388                __ungetwc(wi, fp);
389                break;
390              }
391              if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
392            }
393            if (!(flags & SUPPRESS)) mbp += nconv;
394            width -= nconv;
395            n++;
396          }
397          if (n == 0) goto input_failure;
398          nread += n;
399          if (!(flags & SUPPRESS)) nassigned++;
400        }
401        nconversions++;
402        break;
403
404      case CT_CCL:
405      case CT_STRING:
406        // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
407        // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
408        if (width == 0) width = (size_t)~0; // 'infinity'.
409        if ((flags & SUPPRESS) && (flags & LONG)) {
410          n = 0;
411          while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
412          if (wi != WEOF) __ungetwc(wi, fp);
413        } else if (flags & LONG) {
414          p0 = p = va_arg(ap, wchar_t*);
415          while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
416            *p++ = (wchar_t)wi;
417          }
418          if (wi != WEOF) __ungetwc(wi, fp);
419          n = p - p0;
420        } else {
421          if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
422          n = 0;
423          memset(&mbs, 0, sizeof(mbs));
424          while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
425            if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
426              nconv = wcrtomb(mbp, wi, &mbs);
427              if (nconv == (size_t)-1) goto input_failure;
428            } else {
429              nconv = wcrtomb(mbbuf, wi, &mbs);
430              if (nconv == (size_t)-1) goto input_failure;
431              if (nconv > width) break;
432              if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
433            }
434            if (!(flags & SUPPRESS)) mbp += nconv;
435            width -= nconv;
436            n++;
437          }
438          if (wi != WEOF) __ungetwc(wi, fp);
439        }
440        if (c == CT_CCL && n == 0) goto match_failure;
441        if (!(flags & SUPPRESS)) {
442          if (flags & LONG) {
443            *p = L'\0';
444          } else {
445            *mbp = '\0';
446          }
447          ++nassigned;
448        }
449        nread += n;
450        nconversions++;
451        break;
452
453      case CT_INT:
454        /* scan an integer as if by strtoimax/strtoumax */
455        if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
456          width = sizeof(buf) / sizeof(*buf) - 1;
457        flags |= SIGNOK | NDIGITS | NZDIGITS;
458        for (p = buf; width; width--) {
459          c = __fgetwc_unlock(fp);
460          /*
461           * Switch on the character; `goto ok'
462           * if we accept it as a part of number.
463           */
464          switch (c) {
465            /*
466             * The digit 0 is always legal, but is
467             * special.  For %i conversions, if no
468             * digits (zero or nonzero) have been
469             * scanned (only signs), we will have
470             * base==0.  In that case, we should set
471             * it to 8 and enable 0x prefixing.
472             * Also, if we have not scanned zero digits
473             * before this, do not turn off prefixing
474             * (someone else will turn it off if we
475             * have scanned any nonzero digits).
476             */
477            case '0':
478              if (base == 0) {
479                base = 8;
480                flags |= PFXOK;
481              }
482              if (flags & NZDIGITS)
483                flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
484              else
485                flags &= ~(SIGNOK | PFXOK | NDIGITS);
486              goto ok;
487
488            /* 1 through 7 always legal */
489            case '1':
490            case '2':
491            case '3':
492            case '4':
493            case '5':
494            case '6':
495            case '7':
496              base = basefix[base];
497              flags &= ~(SIGNOK | PFXOK | NDIGITS);
498              goto ok;
499
500            /* digits 8 and 9 ok iff decimal or hex */
501            case '8':
502            case '9':
503              base = basefix[base];
504              if (base <= 8) break; /* not legal here */
505              flags &= ~(SIGNOK | PFXOK | NDIGITS);
506              goto ok;
507
508            /* letters ok iff hex */
509            case 'A':
510            case 'B':
511            case 'C':
512            case 'D':
513            case 'E':
514            case 'F':
515            case 'a':
516            case 'b':
517            case 'c':
518            case 'd':
519            case 'e':
520            case 'f':
521              /* no need to fix base here */
522              if (base <= 10) break; /* not legal here */
523              flags &= ~(SIGNOK | PFXOK | NDIGITS);
524              goto ok;
525
526            /* sign ok only as first character */
527            case '+':
528            case '-':
529              if (flags & SIGNOK) {
530                flags &= ~SIGNOK;
531                flags |= HAVESIGN;
532                goto ok;
533              }
534              break;
535
536            /*
537             * x ok iff flag still set and 2nd char (or
538             * 3rd char if we have a sign).
539             */
540            case 'x':
541            case 'X':
542              if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
543                base = 16; /* if %i */
544                flags &= ~PFXOK;
545                goto ok;
546              }
547              break;
548          }
549
550          /*
551           * If we got here, c is not a legal character
552           * for a number.  Stop accumulating digits.
553           */
554          if (c != WEOF) __ungetwc(c, fp);
555          break;
556        ok:
557          /*
558           * c is legal: store it and look at the next.
559           */
560          *p++ = (wchar_t)c;
561        }
562        /*
563         * If we had only a sign, it is no good; push
564         * back the sign.  If the number ends in `x',
565         * it was [sign] '0' 'x', so push back the x
566         * and treat it as [sign] '0'.
567         */
568        if (flags & NDIGITS) {
569          if (p > buf) __ungetwc(*--p, fp);
570          goto match_failure;
571        }
572        c = p[-1];
573        if (c == 'x' || c == 'X') {
574          --p;
575          __ungetwc(c, fp);
576        }
577        if ((flags & SUPPRESS) == 0) {
578          uintmax_t res;
579
580          *p = '\0';
581          if (flags & UNSIGNED)
582            res = wcstoimax(buf, NULL, base);
583          else
584            res = wcstoumax(buf, NULL, base);
585          if (flags & POINTER)
586            *va_arg(ap, void**) = (void*)(uintptr_t)res;
587          else if (flags & MAXINT)
588            *va_arg(ap, intmax_t*) = res;
589          else if (flags & LLONG)
590            *va_arg(ap, long long*) = res;
591          else if (flags & SIZEINT)
592            *va_arg(ap, ssize_t*) = res;
593          else if (flags & PTRINT)
594            *va_arg(ap, ptrdiff_t*) = res;
595          else if (flags & LONG)
596            *va_arg(ap, long*) = res;
597          else if (flags & SHORT)
598            *va_arg(ap, short*) = res;
599          else if (flags & SHORTSHORT)
600            *va_arg(ap, signed char*) = res;
601          else
602            *va_arg(ap, int*) = res;
603          nassigned++;
604        }
605        nread += p - buf;
606        nconversions++;
607        break;
608
609      case CT_FLOAT:
610        /* scan a floating point number as if by strtod */
611        if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
612          width = sizeof(buf) / sizeof(*buf) - 1;
613        if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
614        if ((flags & SUPPRESS) == 0) {
615          if (flags & LONGDBL) {
616            long double res = wcstold(buf, &p);
617            *va_arg(ap, long double*) = res;
618          } else if (flags & LONG) {
619            double res = wcstod(buf, &p);
620            *va_arg(ap, double*) = res;
621          } else {
622            float res = wcstof(buf, &p);
623            *va_arg(ap, float*) = res;
624          }
625          if (p - buf != (ptrdiff_t)width) abort();
626          nassigned++;
627        }
628        nread += width;
629        nconversions++;
630        break;
631    }
632  }
633input_failure:
634  return (nconversions != 0 ? nassigned : EOF);
635match_failure:
636  return (nassigned);
637}
638#pragma GCC diagnostic pop
639