1/* $OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */ 2/*- 3 * Copyright (c) 1990, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Chris Torek. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#include <inttypes.h> 35#include <limits.h> 36#include <locale.h> 37#include <stdarg.h> 38#include <stddef.h> 39#include <stdio.h> 40#include <stdlib.h> 41#include <string.h> 42#include <wctype.h> 43#include "local.h" 44 45#define BUF 513 /* Maximum length of numeric string. */ 46 47/* 48 * Flags used during conversion. 49 */ 50#define LONG 0x00001 /* l: long or double */ 51#define LONGDBL 0x00002 /* L: long double */ 52#define SHORT 0x00004 /* h: short */ 53#define SHORTSHORT 0x00008 /* hh: 8 bit integer */ 54#define LLONG 0x00010 /* ll: long long (+ deprecated q: quad) */ 55#define POINTER 0x00020 /* p: void * (as hex) */ 56#define SIZEINT 0x00040 /* z: (signed) size_t */ 57#define MAXINT 0x00080 /* j: intmax_t */ 58#define PTRINT 0x00100 /* t: ptrdiff_t */ 59#define NOSKIP 0x00200 /* [ or c: do not skip blanks */ 60#define SUPPRESS 0x00400 /* *: suppress assignment */ 61#define UNSIGNED 0x00800 /* %[oupxX] conversions */ 62 63/* 64 * The following are used in numeric conversions only: 65 * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point; 66 * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral. 67 */ 68#define SIGNOK 0x01000 /* +/- is (still) legal */ 69#define HAVESIGN 0x02000 /* sign detected */ 70#define NDIGITS 0x04000 /* no digits detected */ 71 72#define DPTOK 0x08000 /* (float) decimal point is still legal */ 73#define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */ 74 75#define PFXOK 0x08000 /* 0x prefix is (still) legal */ 76#define NZDIGITS 0x10000 /* no zero digits detected */ 77 78/* 79 * Conversion types. 80 */ 81#define CT_CHAR 0 /* %c conversion */ 82#define CT_CCL 1 /* %[...] conversion */ 83#define CT_STRING 2 /* %s conversion */ 84#define CT_INT 3 /* integer, i.e., strtoimax or strtoumax */ 85#define CT_FLOAT 4 /* floating, i.e., strtod */ 86 87// An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would 88// be a little too expensive, and some kind of compressed version isn't worth the trouble. 89static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) { 90 // Is this a negated set? 91 bool member_result = true; 92 if (*ccl == '^') { 93 member_result = false; 94 ++ccl; 95 } 96 97 // The first character may be ']' or '-' without being special. 98 if (*ccl == '-' || *ccl == ']') { 99 // A literal match? 100 if (*ccl == wc) return member_result; 101 ++ccl; 102 } 103 104 while (*ccl && *ccl != ']') { 105 // The last character may be '-' without being special. 106 if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') { 107 wchar_t first = *(ccl - 1); 108 wchar_t last = *(ccl + 1); 109 if (first <= last) { 110 // In the range? 111 if (wc >= first && wc <= last) return member_result; 112 ccl += 2; 113 continue; 114 } 115 // A '-' is not considered to be part of a range if the character after 116 // is not greater than the character before, so fall through... 117 } 118 // A literal match? 119 if (*ccl == wc) return member_result; 120 ++ccl; 121 } 122 return !member_result; 123} 124 125#pragma GCC diagnostic push 126#pragma GCC diagnostic ignored "-Wframe-larger-than=" 127 128/* 129 * vfwscanf 130 */ 131int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) { 132 wint_t c; /* character from format, or conversion */ 133 size_t width; /* field width, or 0 */ 134 wchar_t* p; /* points into all kinds of strings */ 135 int n; /* handy integer */ 136 int flags; /* flags as defined above */ 137 wchar_t* p0; /* saves original value of p when necessary */ 138 int nassigned; /* number of fields assigned */ 139 int nconversions; /* number of conversions */ 140 int nread; /* number of characters consumed from fp */ 141 int base; /* base argument to strtoimax/strtouimax */ 142 wchar_t buf[BUF]; /* buffer for numeric conversions */ 143 const wchar_t* ccl; 144 wint_t wi; /* handy wint_t */ 145 char* mbp; /* multibyte string pointer for %c %s %[ */ 146 size_t nconv; /* number of bytes in mb. conversion */ 147 char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */ 148 mbstate_t mbs; 149 150 /* `basefix' is used to avoid `if' tests in the integer scanner */ 151 static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 152 153 _SET_ORIENTATION(fp, 1); 154 155 nassigned = 0; 156 nconversions = 0; 157 nread = 0; 158 base = 0; /* XXX just to keep gcc happy */ 159 for (;;) { 160 c = *fmt++; 161 if (c == 0) { 162 return (nassigned); 163 } 164 if (iswspace(c)) { 165 while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c)) 166 ; 167 if (c != WEOF) __ungetwc(c, fp); 168 continue; 169 } 170 if (c != '%') goto literal; 171 width = 0; 172 flags = 0; 173 /* 174 * switch on the format. continue if done; 175 * break once format type is derived. 176 */ 177 again: 178 c = *fmt++; 179 switch (c) { 180 case '%': 181 literal: 182 if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure; 183 if (wi != c) { 184 __ungetwc(wi, fp); 185 goto match_failure; 186 } 187 nread++; 188 continue; 189 190 case '*': 191 flags |= SUPPRESS; 192 goto again; 193 case 'j': 194 flags |= MAXINT; 195 goto again; 196 case 'L': 197 flags |= LONGDBL; 198 goto again; 199 case 'h': 200 if (*fmt == 'h') { 201 fmt++; 202 flags |= SHORTSHORT; 203 } else { 204 flags |= SHORT; 205 } 206 goto again; 207 case 'l': 208 if (*fmt == 'l') { 209 fmt++; 210 flags |= LLONG; 211 } else { 212 flags |= LONG; 213 } 214 goto again; 215 case 'q': 216 flags |= LLONG; /* deprecated */ 217 goto again; 218 case 't': 219 flags |= PTRINT; 220 goto again; 221 case 'z': 222 flags |= SIZEINT; 223 goto again; 224 225 case '0': 226 case '1': 227 case '2': 228 case '3': 229 case '4': 230 case '5': 231 case '6': 232 case '7': 233 case '8': 234 case '9': 235 width = width * 10 + c - '0'; 236 goto again; 237 238 /* 239 * Conversions. 240 * Those marked `compat' are for 4.[123]BSD compatibility. 241 */ 242 case 'D': /* compat */ 243 flags |= LONG; 244 /* FALLTHROUGH */ 245 case 'd': 246 c = CT_INT; 247 base = 10; 248 break; 249 250 case 'i': 251 c = CT_INT; 252 base = 0; 253 break; 254 255 case 'O': /* compat */ 256 flags |= LONG; 257 /* FALLTHROUGH */ 258 case 'o': 259 c = CT_INT; 260 flags |= UNSIGNED; 261 base = 8; 262 break; 263 264 case 'u': 265 c = CT_INT; 266 flags |= UNSIGNED; 267 base = 10; 268 break; 269 270 case 'X': 271 case 'x': 272 flags |= PFXOK; /* enable 0x prefixing */ 273 c = CT_INT; 274 flags |= UNSIGNED; 275 base = 16; 276 break; 277 278 case 'e': 279 case 'E': 280 case 'f': 281 case 'F': 282 case 'g': 283 case 'G': 284 case 'a': 285 case 'A': 286 c = CT_FLOAT; 287 break; 288 289 case 's': 290 c = CT_STRING; 291 break; 292 293 case '[': 294 ccl = fmt; 295 if (*fmt == '^') fmt++; 296 if (*fmt == ']') fmt++; 297 while (*fmt != '\0' && *fmt != ']') fmt++; 298 fmt++; 299 flags |= NOSKIP; 300 c = CT_CCL; 301 break; 302 303 case 'c': 304 flags |= NOSKIP; 305 c = CT_CHAR; 306 break; 307 308 case 'p': /* pointer format is like hex */ 309 flags |= POINTER | PFXOK; 310 c = CT_INT; 311 flags |= UNSIGNED; 312 base = 16; 313 break; 314 315 case 'n': 316 nconversions++; 317 if (flags & SUPPRESS) continue; 318 if (flags & SHORTSHORT) 319 *va_arg(ap, signed char*) = nread; 320 else if (flags & SHORT) 321 *va_arg(ap, short*) = nread; 322 else if (flags & LONG) 323 *va_arg(ap, long*) = nread; 324 else if (flags & SIZEINT) 325 *va_arg(ap, ssize_t*) = nread; 326 else if (flags & PTRINT) 327 *va_arg(ap, ptrdiff_t*) = nread; 328 else if (flags & LLONG) 329 *va_arg(ap, long long*) = nread; 330 else if (flags & MAXINT) 331 *va_arg(ap, intmax_t*) = nread; 332 else 333 *va_arg(ap, int*) = nread; 334 continue; 335 336 /* 337 * Disgusting backwards compatibility hacks. XXX 338 */ 339 case '\0': /* compat */ 340 return (EOF); 341 342 default: /* compat */ 343 if (iswupper(c)) flags |= LONG; 344 c = CT_INT; 345 base = 10; 346 break; 347 } 348 349 /* 350 * Consume leading white space, except for formats 351 * that suppress this. 352 */ 353 if ((flags & NOSKIP) == 0) { 354 while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++; 355 if (wi == WEOF) goto input_failure; 356 __ungetwc(wi, fp); 357 } 358 359 /* 360 * Do the conversion. 361 */ 362 switch (c) { 363 case CT_CHAR: 364 /* scan arbitrary characters (sets NOSKIP) */ 365 if (width == 0) width = 1; 366 if (flags & LONG) { 367 if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*); 368 n = 0; 369 while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) { 370 if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi; 371 n++; 372 } 373 if (n == 0) goto input_failure; 374 nread += n; 375 if (!(flags & SUPPRESS)) nassigned++; 376 } else { 377 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*); 378 n = 0; 379 memset(&mbs, 0, sizeof(mbs)); 380 while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) { 381 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) { 382 nconv = wcrtomb(mbp, wi, &mbs); 383 if (nconv == (size_t)-1) goto input_failure; 384 } else { 385 nconv = wcrtomb(mbbuf, wi, &mbs); 386 if (nconv == (size_t)-1) goto input_failure; 387 if (nconv > width) { 388 __ungetwc(wi, fp); 389 break; 390 } 391 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv); 392 } 393 if (!(flags & SUPPRESS)) mbp += nconv; 394 width -= nconv; 395 n++; 396 } 397 if (n == 0) goto input_failure; 398 nread += n; 399 if (!(flags & SUPPRESS)) nassigned++; 400 } 401 nconversions++; 402 break; 403 404 case CT_CCL: 405 case CT_STRING: 406 // CT_CCL: scan a (nonempty) character class (sets NOSKIP). 407 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP. 408 if (width == 0) width = (size_t)~0; // 'infinity'. 409 if ((flags & SUPPRESS) && (flags & LONG)) { 410 n = 0; 411 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++; 412 if (wi != WEOF) __ungetwc(wi, fp); 413 } else if (flags & LONG) { 414 p0 = p = va_arg(ap, wchar_t*); 415 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) { 416 *p++ = (wchar_t)wi; 417 } 418 if (wi != WEOF) __ungetwc(wi, fp); 419 n = p - p0; 420 } else { 421 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*); 422 n = 0; 423 memset(&mbs, 0, sizeof(mbs)); 424 while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) { 425 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) { 426 nconv = wcrtomb(mbp, wi, &mbs); 427 if (nconv == (size_t)-1) goto input_failure; 428 } else { 429 nconv = wcrtomb(mbbuf, wi, &mbs); 430 if (nconv == (size_t)-1) goto input_failure; 431 if (nconv > width) break; 432 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv); 433 } 434 if (!(flags & SUPPRESS)) mbp += nconv; 435 width -= nconv; 436 n++; 437 } 438 if (wi != WEOF) __ungetwc(wi, fp); 439 } 440 if (c == CT_CCL && n == 0) goto match_failure; 441 if (!(flags & SUPPRESS)) { 442 if (flags & LONG) { 443 *p = L'\0'; 444 } else { 445 *mbp = '\0'; 446 } 447 ++nassigned; 448 } 449 nread += n; 450 nconversions++; 451 break; 452 453 case CT_INT: 454 /* scan an integer as if by strtoimax/strtoumax */ 455 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1) 456 width = sizeof(buf) / sizeof(*buf) - 1; 457 flags |= SIGNOK | NDIGITS | NZDIGITS; 458 for (p = buf; width; width--) { 459 c = __fgetwc_unlock(fp); 460 /* 461 * Switch on the character; `goto ok' 462 * if we accept it as a part of number. 463 */ 464 switch (c) { 465 /* 466 * The digit 0 is always legal, but is 467 * special. For %i conversions, if no 468 * digits (zero or nonzero) have been 469 * scanned (only signs), we will have 470 * base==0. In that case, we should set 471 * it to 8 and enable 0x prefixing. 472 * Also, if we have not scanned zero digits 473 * before this, do not turn off prefixing 474 * (someone else will turn it off if we 475 * have scanned any nonzero digits). 476 */ 477 case '0': 478 if (base == 0) { 479 base = 8; 480 flags |= PFXOK; 481 } 482 if (flags & NZDIGITS) 483 flags &= ~(SIGNOK | NZDIGITS | NDIGITS); 484 else 485 flags &= ~(SIGNOK | PFXOK | NDIGITS); 486 goto ok; 487 488 /* 1 through 7 always legal */ 489 case '1': 490 case '2': 491 case '3': 492 case '4': 493 case '5': 494 case '6': 495 case '7': 496 base = basefix[base]; 497 flags &= ~(SIGNOK | PFXOK | NDIGITS); 498 goto ok; 499 500 /* digits 8 and 9 ok iff decimal or hex */ 501 case '8': 502 case '9': 503 base = basefix[base]; 504 if (base <= 8) break; /* not legal here */ 505 flags &= ~(SIGNOK | PFXOK | NDIGITS); 506 goto ok; 507 508 /* letters ok iff hex */ 509 case 'A': 510 case 'B': 511 case 'C': 512 case 'D': 513 case 'E': 514 case 'F': 515 case 'a': 516 case 'b': 517 case 'c': 518 case 'd': 519 case 'e': 520 case 'f': 521 /* no need to fix base here */ 522 if (base <= 10) break; /* not legal here */ 523 flags &= ~(SIGNOK | PFXOK | NDIGITS); 524 goto ok; 525 526 /* sign ok only as first character */ 527 case '+': 528 case '-': 529 if (flags & SIGNOK) { 530 flags &= ~SIGNOK; 531 flags |= HAVESIGN; 532 goto ok; 533 } 534 break; 535 536 /* 537 * x ok iff flag still set and 2nd char (or 538 * 3rd char if we have a sign). 539 */ 540 case 'x': 541 case 'X': 542 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) { 543 base = 16; /* if %i */ 544 flags &= ~PFXOK; 545 goto ok; 546 } 547 break; 548 } 549 550 /* 551 * If we got here, c is not a legal character 552 * for a number. Stop accumulating digits. 553 */ 554 if (c != WEOF) __ungetwc(c, fp); 555 break; 556 ok: 557 /* 558 * c is legal: store it and look at the next. 559 */ 560 *p++ = (wchar_t)c; 561 } 562 /* 563 * If we had only a sign, it is no good; push 564 * back the sign. If the number ends in `x', 565 * it was [sign] '0' 'x', so push back the x 566 * and treat it as [sign] '0'. 567 */ 568 if (flags & NDIGITS) { 569 if (p > buf) __ungetwc(*--p, fp); 570 goto match_failure; 571 } 572 c = p[-1]; 573 if (c == 'x' || c == 'X') { 574 --p; 575 __ungetwc(c, fp); 576 } 577 if ((flags & SUPPRESS) == 0) { 578 uintmax_t res; 579 580 *p = '\0'; 581 if (flags & UNSIGNED) 582 res = wcstoimax(buf, NULL, base); 583 else 584 res = wcstoumax(buf, NULL, base); 585 if (flags & POINTER) 586 *va_arg(ap, void**) = (void*)(uintptr_t)res; 587 else if (flags & MAXINT) 588 *va_arg(ap, intmax_t*) = res; 589 else if (flags & LLONG) 590 *va_arg(ap, long long*) = res; 591 else if (flags & SIZEINT) 592 *va_arg(ap, ssize_t*) = res; 593 else if (flags & PTRINT) 594 *va_arg(ap, ptrdiff_t*) = res; 595 else if (flags & LONG) 596 *va_arg(ap, long*) = res; 597 else if (flags & SHORT) 598 *va_arg(ap, short*) = res; 599 else if (flags & SHORTSHORT) 600 *va_arg(ap, signed char*) = res; 601 else 602 *va_arg(ap, int*) = res; 603 nassigned++; 604 } 605 nread += p - buf; 606 nconversions++; 607 break; 608 609 case CT_FLOAT: 610 /* scan a floating point number as if by strtod */ 611 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1) 612 width = sizeof(buf) / sizeof(*buf) - 1; 613 if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure; 614 if ((flags & SUPPRESS) == 0) { 615 if (flags & LONGDBL) { 616 long double res = wcstold(buf, &p); 617 *va_arg(ap, long double*) = res; 618 } else if (flags & LONG) { 619 double res = wcstod(buf, &p); 620 *va_arg(ap, double*) = res; 621 } else { 622 float res = wcstof(buf, &p); 623 *va_arg(ap, float*) = res; 624 } 625 if (p - buf != (ptrdiff_t)width) abort(); 626 nassigned++; 627 } 628 nread += width; 629 nconversions++; 630 break; 631 } 632 } 633input_failure: 634 return (nconversions != 0 ? nassigned : EOF); 635match_failure: 636 return (nassigned); 637} 638#pragma GCC diagnostic pop 639