1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 1998-2016, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* 11* File uscnnf_p.c 12* 13* Modification History: 14* 15* Date Name Description 16* 12/02/98 stephen Creation. 17* 03/13/99 stephen Modified for new C API. 18******************************************************************************* 19*/ 20 21#include "unicode/utypes.h" 22 23#if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION 24 25#include "unicode/uchar.h" 26#include "unicode/ustring.h" 27#include "unicode/unum.h" 28#include "unicode/udat.h" 29#include "unicode/uset.h" 30#include "uscanf.h" 31#include "ufmt_cmn.h" 32#include "ufile.h" 33#include "locbund.h" 34 35#include "cmemory.h" 36#include "ustr_cnv.h" 37 38/* flag characters for u_scanf */ 39#define FLAG_ASTERISK 0x002A 40#define FLAG_PAREN 0x0028 41 42#define ISFLAG(s) (s) == FLAG_ASTERISK || \ 43 (s) == FLAG_PAREN 44 45/* special characters for u_scanf */ 46#define SPEC_DOLLARSIGN 0x0024 47 48/* unicode digits */ 49#define DIGIT_ZERO 0x0030 50#define DIGIT_ONE 0x0031 51#define DIGIT_TWO 0x0032 52#define DIGIT_THREE 0x0033 53#define DIGIT_FOUR 0x0034 54#define DIGIT_FIVE 0x0035 55#define DIGIT_SIX 0x0036 56#define DIGIT_SEVEN 0x0037 57#define DIGIT_EIGHT 0x0038 58#define DIGIT_NINE 0x0039 59 60#define ISDIGIT(s) (s) == DIGIT_ZERO || \ 61 (s) == DIGIT_ONE || \ 62 (s) == DIGIT_TWO || \ 63 (s) == DIGIT_THREE || \ 64 (s) == DIGIT_FOUR || \ 65 (s) == DIGIT_FIVE || \ 66 (s) == DIGIT_SIX || \ 67 (s) == DIGIT_SEVEN || \ 68 (s) == DIGIT_EIGHT || \ 69 (s) == DIGIT_NINE 70 71/* u_scanf modifiers */ 72#define MOD_H 0x0068 73#define MOD_LOWERL 0x006C 74#define MOD_L 0x004C 75 76#define ISMOD(s) (s) == MOD_H || \ 77 (s) == MOD_LOWERL || \ 78 (s) == MOD_L 79 80/** 81 * Struct encapsulating a single uscanf format specification. 82 */ 83typedef struct u_scanf_spec_info { 84 int32_t fWidth; /* Width */ 85 86 UChar fSpec; /* Format specification */ 87 88 UChar fPadChar; /* Padding character */ 89 90 UBool fSkipArg; /* TRUE if arg should be skipped */ 91 UBool fIsLongDouble; /* L flag */ 92 UBool fIsShort; /* h flag */ 93 UBool fIsLong; /* l flag */ 94 UBool fIsLongLong; /* ll flag */ 95 UBool fIsString; /* TRUE if this is a NULL-terminated string. */ 96} u_scanf_spec_info; 97 98 99/** 100 * Struct encapsulating a single u_scanf format specification. 101 */ 102typedef struct u_scanf_spec { 103 u_scanf_spec_info fInfo; /* Information on this spec */ 104 int32_t fArgPos; /* Position of data in arg list */ 105} u_scanf_spec; 106 107/** 108 * Parse a single u_scanf format specifier in Unicode. 109 * @param fmt A pointer to a '%' character in a u_scanf format specification. 110 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed 111 * format specifier. 112 * @return The number of characters contained in this specifier. 113 */ 114static int32_t 115u_scanf_parse_spec (const UChar *fmt, 116 u_scanf_spec *spec) 117{ 118 const UChar *s = fmt; 119 const UChar *backup; 120 u_scanf_spec_info *info = &(spec->fInfo); 121 122 /* initialize spec to default values */ 123 spec->fArgPos = -1; 124 125 info->fWidth = -1; 126 info->fSpec = 0x0000; 127 info->fPadChar = 0x0020; 128 info->fSkipArg = FALSE; 129 info->fIsLongDouble = FALSE; 130 info->fIsShort = FALSE; 131 info->fIsLong = FALSE; 132 info->fIsLongLong = FALSE; 133 info->fIsString = TRUE; 134 135 136 /* skip over the initial '%' */ 137 s++; 138 139 /* Check for positional argument */ 140 if(ISDIGIT(*s)) { 141 142 /* Save the current position */ 143 backup = s; 144 145 /* handle positional parameters */ 146 if(ISDIGIT(*s)) { 147 spec->fArgPos = (int) (*s++ - DIGIT_ZERO); 148 149 while(ISDIGIT(*s)) { 150 spec->fArgPos *= 10; 151 spec->fArgPos += (int) (*s++ - DIGIT_ZERO); 152 } 153 } 154 155 /* if there is no '$', don't read anything */ 156 if(*s != SPEC_DOLLARSIGN) { 157 spec->fArgPos = -1; 158 s = backup; 159 } 160 /* munge the '$' */ 161 else 162 s++; 163 } 164 165 /* Get any format flags */ 166 while(ISFLAG(*s)) { 167 switch(*s++) { 168 169 /* skip argument */ 170 case FLAG_ASTERISK: 171 info->fSkipArg = TRUE; 172 break; 173 174 /* pad character specified */ 175 case FLAG_PAREN: 176 177 /* first four characters are hex values for pad char */ 178 info->fPadChar = (UChar)ufmt_digitvalue(*s++); 179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 180 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 181 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 182 183 /* final character is ignored */ 184 s++; 185 186 break; 187 } 188 } 189 190 /* Get the width */ 191 if(ISDIGIT(*s)){ 192 info->fWidth = (int) (*s++ - DIGIT_ZERO); 193 194 while(ISDIGIT(*s)) { 195 info->fWidth *= 10; 196 info->fWidth += (int) (*s++ - DIGIT_ZERO); 197 } 198 } 199 200 /* Get any modifiers */ 201 if(ISMOD(*s)) { 202 switch(*s++) { 203 204 /* short */ 205 case MOD_H: 206 info->fIsShort = TRUE; 207 break; 208 209 /* long or long long */ 210 case MOD_LOWERL: 211 if(*s == MOD_LOWERL) { 212 info->fIsLongLong = TRUE; 213 /* skip over the next 'l' */ 214 s++; 215 } 216 else 217 info->fIsLong = TRUE; 218 break; 219 220 /* long double */ 221 case MOD_L: 222 info->fIsLongDouble = TRUE; 223 break; 224 } 225 } 226 227 /* finally, get the specifier letter */ 228 info->fSpec = *s++; 229 230 /* return # of characters in this specifier */ 231 return (int32_t)(s - fmt); 232} 233 234#define UP_PERCENT 0x0025 235 236 237/* ANSI style formatting */ 238/* Use US-ASCII characters only for formatting */ 239 240/* % */ 241#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} 242/* s */ 243#define UFMT_STRING {ufmt_string, u_scanf_string_handler} 244/* c */ 245#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} 246/* d, i */ 247#define UFMT_INT {ufmt_int, u_scanf_integer_handler} 248/* u */ 249#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} 250/* o */ 251#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} 252/* x, X */ 253#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} 254/* f */ 255#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} 256/* e, E */ 257#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} 258/* g, G */ 259#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} 260/* n */ 261#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} 262/* [ */ 263#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} 264 265/* non-ANSI extensions */ 266/* Use US-ASCII characters only for formatting */ 267 268/* p */ 269#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} 270/* V */ 271#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} 272/* P */ 273#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} 274/* C K is old format */ 275#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} 276/* S U is old format */ 277#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} 278 279 280#define UFMT_EMPTY {ufmt_empty, NULL} 281 282/** 283 * A u_scanf handler function. 284 * A u_scanf handler is responsible for handling a single u_scanf 285 * format specification, for example 'd' or 's'. 286 * @param stream The UFILE to which to write output. 287 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing 288 * information on the format specification. 289 * @param args A pointer to the argument data 290 * @param fmt A pointer to the first character in the format string 291 * following the spec. 292 * @param fmtConsumed On output, set to the number of characters consumed 293 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. 294 * @param argConverted The number of arguments converted and assigned, or -1 if an 295 * error occurred. 296 * @return The number of code points consumed during reading. 297 */ 298typedef int32_t (*u_scanf_handler) (UFILE *stream, 299 u_scanf_spec_info *info, 300 ufmt_args *args, 301 const UChar *fmt, 302 int32_t *fmtConsumed, 303 int32_t *argConverted); 304 305typedef struct u_scanf_info { 306 ufmt_type_info info; 307 u_scanf_handler handler; 308} u_scanf_info; 309 310#define USCANF_NUM_FMT_HANDLERS 108 311#define USCANF_SYMBOL_BUFFER_SIZE 8 312 313/* We do not use handlers for 0-0x1f */ 314#define USCANF_BASE_FMT_HANDLERS 0x20 315 316 317static int32_t 318u_scanf_skip_leading_ws(UFILE *input, 319 UChar pad) 320{ 321 UChar c; 322 int32_t count = 0; 323 UBool isNotEOF; 324 325 /* skip all leading ws in the input */ 326 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) 327 { 328 count++; 329 } 330 331 /* put the final character back on the input */ 332 if(isNotEOF) 333 u_fungetc(c, input); 334 335 return count; 336} 337 338/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ 339static int32_t 340u_scanf_skip_leading_positive_sign(UFILE *input, 341 UNumberFormat *format, 342 UErrorCode *status) 343{ 344 UChar c; 345 int32_t count = 0; 346 UBool isNotEOF; 347 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; 348 int32_t symbolLen; 349 UErrorCode localStatus = U_ZERO_ERROR; 350 351 if (U_SUCCESS(*status)) { 352 symbolLen = unum_getSymbol(format, 353 UNUM_PLUS_SIGN_SYMBOL, 354 plusSymbol, 355 UPRV_LENGTHOF(plusSymbol), 356 &localStatus); 357 358 if (U_SUCCESS(localStatus)) { 359 /* skip all leading ws in the input */ 360 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) 361 { 362 count++; 363 } 364 365 /* put the final character back on the input */ 366 if(isNotEOF) { 367 u_fungetc(c, input); 368 } 369 } 370 } 371 372 return count; 373} 374 375static int32_t 376u_scanf_simple_percent_handler(UFILE *input, 377 u_scanf_spec_info *info, 378 ufmt_args *args, 379 const UChar *fmt, 380 int32_t *fmtConsumed, 381 int32_t *argConverted) 382{ 383 (void)info; 384 (void)args; 385 (void)fmt; 386 (void)fmtConsumed; 387 388 /* make sure the next character in the input is a percent */ 389 *argConverted = 0; 390 if(u_fgetc(input) != 0x0025) { 391 *argConverted = -1; 392 } 393 return 1; 394} 395 396static int32_t 397u_scanf_count_handler(UFILE *input, 398 u_scanf_spec_info *info, 399 ufmt_args *args, 400 const UChar *fmt, 401 int32_t *fmtConsumed, 402 int32_t *argConverted) 403{ 404 (void)input; 405 (void)fmt; 406 (void)fmtConsumed; 407 408 /* in the special case of count, the u_scanf_spec_info's width */ 409 /* will contain the # of items converted thus far */ 410 if (!info->fSkipArg) { 411 if (info->fIsShort) 412 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); 413 else if (info->fIsLongLong) 414 *(int64_t*)(args[0].ptrValue) = info->fWidth; 415 else 416 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); 417 } 418 *argConverted = 0; 419 420 /* we converted 0 args */ 421 return 0; 422} 423 424static int32_t 425u_scanf_double_handler(UFILE *input, 426 u_scanf_spec_info *info, 427 ufmt_args *args, 428 const UChar *fmt, 429 int32_t *fmtConsumed, 430 int32_t *argConverted) 431{ 432 (void)fmt; 433 (void)fmtConsumed; 434 435 int32_t len; 436 double num; 437 UNumberFormat *format; 438 int32_t parsePos = 0; 439 int32_t skipped; 440 UErrorCode status = U_ZERO_ERROR; 441 442 443 /* skip all ws in the input */ 444 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 445 446 /* fill the input's internal buffer */ 447 ufile_fill_uchar_buffer(input); 448 449 /* determine the size of the input's buffer */ 450 len = (int32_t)(input->str.fLimit - input->str.fPos); 451 452 /* truncate to the width, if specified */ 453 if(info->fWidth != -1) 454 len = ufmt_min(len, info->fWidth); 455 456 /* get the formatter */ 457 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 458 459 /* handle error */ 460 if(format == 0) 461 return 0; 462 463 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 464 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 465 466 /* parse the number */ 467 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 468 469 if (!info->fSkipArg) { 470 if (info->fIsLong) 471 *(double*)(args[0].ptrValue) = num; 472 else if (info->fIsLongDouble) 473 *(long double*)(args[0].ptrValue) = num; 474 else 475 *(float*)(args[0].ptrValue) = (float)num; 476 } 477 478 /* mask off any necessary bits */ 479 /* if(! info->fIsLong_double) 480 num &= DBL_MAX;*/ 481 482 /* update the input's position to reflect consumed data */ 483 input->str.fPos += parsePos; 484 485 /* we converted 1 arg */ 486 *argConverted = !info->fSkipArg; 487 return parsePos + skipped; 488} 489 490#define UPRINTF_SYMBOL_BUFFER_SIZE 8 491 492static int32_t 493u_scanf_scientific_handler(UFILE *input, 494 u_scanf_spec_info *info, 495 ufmt_args *args, 496 const UChar *fmt, 497 int32_t *fmtConsumed, 498 int32_t *argConverted) 499{ 500 (void)fmt; 501 (void)fmtConsumed; 502 503 int32_t len; 504 double num; 505 UNumberFormat *format; 506 int32_t parsePos = 0; 507 int32_t skipped; 508 UErrorCode status = U_ZERO_ERROR; 509 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; 510 int32_t srcLen, expLen; 511 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; 512 513 514 /* skip all ws in the input */ 515 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 516 517 /* fill the input's internal buffer */ 518 ufile_fill_uchar_buffer(input); 519 520 /* determine the size of the input's buffer */ 521 len = (int32_t)(input->str.fLimit - input->str.fPos); 522 523 /* truncate to the width, if specified */ 524 if(info->fWidth != -1) 525 len = ufmt_min(len, info->fWidth); 526 527 /* get the formatter */ 528 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 529 530 /* handle error */ 531 if(format == 0) 532 return 0; 533 534 /* set the appropriate flags on the formatter */ 535 536 srcLen = unum_getSymbol(format, 537 UNUM_EXPONENTIAL_SYMBOL, 538 srcExpBuf, 539 sizeof(srcExpBuf), 540 &status); 541 542 /* Upper/lower case the e */ 543 if (info->fSpec == (UChar)0x65 /* e */) { 544 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), 545 srcExpBuf, srcLen, 546 input->str.fBundle.fLocale, 547 &status); 548 } 549 else { 550 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), 551 srcExpBuf, srcLen, 552 input->str.fBundle.fLocale, 553 &status); 554 } 555 556 unum_setSymbol(format, 557 UNUM_EXPONENTIAL_SYMBOL, 558 expBuf, 559 expLen, 560 &status); 561 562 563 564 565 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 566 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 567 568 /* parse the number */ 569 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 570 571 if (!info->fSkipArg) { 572 if (info->fIsLong) 573 *(double*)(args[0].ptrValue) = num; 574 else if (info->fIsLongDouble) 575 *(long double*)(args[0].ptrValue) = num; 576 else 577 *(float*)(args[0].ptrValue) = (float)num; 578 } 579 580 /* mask off any necessary bits */ 581 /* if(! info->fIsLong_double) 582 num &= DBL_MAX;*/ 583 584 /* update the input's position to reflect consumed data */ 585 input->str.fPos += parsePos; 586 587 /* we converted 1 arg */ 588 *argConverted = !info->fSkipArg; 589 return parsePos + skipped; 590} 591 592static int32_t 593u_scanf_scidbl_handler(UFILE *input, 594 u_scanf_spec_info *info, 595 ufmt_args *args, 596 const UChar *fmt, 597 int32_t *fmtConsumed, 598 int32_t *argConverted) 599{ 600 (void)fmt; 601 (void)fmtConsumed; 602 603 int32_t len; 604 double num; 605 UNumberFormat *scientificFormat, *genericFormat; 606 /*int32_t scientificResult, genericResult;*/ 607 double scientificResult, genericResult; 608 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; 609 int32_t skipped; 610 UErrorCode scientificStatus = U_ZERO_ERROR; 611 UErrorCode genericStatus = U_ZERO_ERROR; 612 613 614 /* since we can't determine by scanning the characters whether */ 615 /* a number was formatted in the 'f' or 'g' styles, parse the */ 616 /* string with both formatters, and assume whichever one */ 617 /* parsed the most is the correct formatter to use */ 618 619 620 /* skip all ws in the input */ 621 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 622 623 /* fill the input's internal buffer */ 624 ufile_fill_uchar_buffer(input); 625 626 /* determine the size of the input's buffer */ 627 len = (int32_t)(input->str.fLimit - input->str.fPos); 628 629 /* truncate to the width, if specified */ 630 if(info->fWidth != -1) 631 len = ufmt_min(len, info->fWidth); 632 633 /* get the formatters */ 634 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 635 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 636 637 /* handle error */ 638 if(scientificFormat == 0 || genericFormat == 0) 639 return 0; 640 641 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 642 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); 643 644 /* parse the number using each format*/ 645 646 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, 647 &scientificParsePos, &scientificStatus); 648 649 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, 650 &genericParsePos, &genericStatus); 651 652 /* determine which parse made it farther */ 653 if(scientificParsePos > genericParsePos) { 654 /* stash the result in num */ 655 num = scientificResult; 656 /* update the input's position to reflect consumed data */ 657 parsePos += scientificParsePos; 658 } 659 else { 660 /* stash the result in num */ 661 num = genericResult; 662 /* update the input's position to reflect consumed data */ 663 parsePos += genericParsePos; 664 } 665 input->str.fPos += parsePos; 666 667 if (!info->fSkipArg) { 668 if (info->fIsLong) 669 *(double*)(args[0].ptrValue) = num; 670 else if (info->fIsLongDouble) 671 *(long double*)(args[0].ptrValue) = num; 672 else 673 *(float*)(args[0].ptrValue) = (float)num; 674 } 675 676 /* mask off any necessary bits */ 677 /* if(! info->fIsLong_double) 678 num &= DBL_MAX;*/ 679 680 /* we converted 1 arg */ 681 *argConverted = !info->fSkipArg; 682 return parsePos + skipped; 683} 684 685static int32_t 686u_scanf_integer_handler(UFILE *input, 687 u_scanf_spec_info *info, 688 ufmt_args *args, 689 const UChar *fmt, 690 int32_t *fmtConsumed, 691 int32_t *argConverted) 692{ 693 (void)fmt; 694 (void)fmtConsumed; 695 696 int32_t len; 697 void *num = (void*) (args[0].ptrValue); 698 UNumberFormat *format; 699 int32_t parsePos = 0; 700 int32_t skipped; 701 UErrorCode status = U_ZERO_ERROR; 702 int64_t result; 703 704 705 /* skip all ws in the input */ 706 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 707 708 /* fill the input's internal buffer */ 709 ufile_fill_uchar_buffer(input); 710 711 /* determine the size of the input's buffer */ 712 len = (int32_t)(input->str.fLimit - input->str.fPos); 713 714 /* truncate to the width, if specified */ 715 if(info->fWidth != -1) 716 len = ufmt_min(len, info->fWidth); 717 718 /* get the formatter */ 719 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 720 721 /* handle error */ 722 if(format == 0) 723 return 0; 724 725 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 726 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 727 728 /* parse the number */ 729 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); 730 731 /* mask off any necessary bits */ 732 if (!info->fSkipArg) { 733 if (info->fIsShort) 734 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 735 else if (info->fIsLongLong) 736 *(int64_t*)num = result; 737 else 738 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 739 } 740 741 /* update the input's position to reflect consumed data */ 742 input->str.fPos += parsePos; 743 744 /* we converted 1 arg */ 745 *argConverted = !info->fSkipArg; 746 return parsePos + skipped; 747} 748 749static int32_t 750u_scanf_uinteger_handler(UFILE *input, 751 u_scanf_spec_info *info, 752 ufmt_args *args, 753 const UChar *fmt, 754 int32_t *fmtConsumed, 755 int32_t *argConverted) 756{ 757 /* TODO Fix this when Numberformat handles uint64_t */ 758 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); 759} 760 761static int32_t 762u_scanf_percent_handler(UFILE *input, 763 u_scanf_spec_info *info, 764 ufmt_args *args, 765 const UChar *fmt, 766 int32_t *fmtConsumed, 767 int32_t *argConverted) 768{ 769 (void)fmt; 770 (void)fmtConsumed; 771 772 int32_t len; 773 double num; 774 UNumberFormat *format; 775 int32_t parsePos = 0; 776 UErrorCode status = U_ZERO_ERROR; 777 778 779 /* skip all ws in the input */ 780 u_scanf_skip_leading_ws(input, info->fPadChar); 781 782 /* fill the input's internal buffer */ 783 ufile_fill_uchar_buffer(input); 784 785 /* determine the size of the input's buffer */ 786 len = (int32_t)(input->str.fLimit - input->str.fPos); 787 788 /* truncate to the width, if specified */ 789 if(info->fWidth != -1) 790 len = ufmt_min(len, info->fWidth); 791 792 /* get the formatter */ 793 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); 794 795 /* handle error */ 796 if(format == 0) 797 return 0; 798 799 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 800 u_scanf_skip_leading_positive_sign(input, format, &status); 801 802 /* parse the number */ 803 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 804 805 if (!info->fSkipArg) { 806 *(double*)(args[0].ptrValue) = num; 807 } 808 809 /* mask off any necessary bits */ 810 /* if(! info->fIsLong_double) 811 num &= DBL_MAX;*/ 812 813 /* update the input's position to reflect consumed data */ 814 input->str.fPos += parsePos; 815 816 /* we converted 1 arg */ 817 *argConverted = !info->fSkipArg; 818 return parsePos; 819} 820 821static int32_t 822u_scanf_string_handler(UFILE *input, 823 u_scanf_spec_info *info, 824 ufmt_args *args, 825 const UChar *fmt, 826 int32_t *fmtConsumed, 827 int32_t *argConverted) 828{ 829 (void)fmt; 830 (void)fmtConsumed; 831 832 const UChar *source; 833 UConverter *conv; 834 char *arg = (char*)(args[0].ptrValue); 835 char *alias = arg; 836 char *limit; 837 UErrorCode status = U_ZERO_ERROR; 838 int32_t count; 839 int32_t skipped = 0; 840 UChar c; 841 UBool isNotEOF = FALSE; 842 843 /* skip all ws in the input */ 844 if (info->fIsString) { 845 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 846 } 847 848 /* get the string one character at a time, truncating to the width */ 849 count = 0; 850 851 /* open the default converter */ 852 conv = u_getDefaultConverter(&status); 853 854 if(U_FAILURE(status)) 855 return -1; 856 857 while( (info->fWidth == -1 || count < info->fWidth) 858 && (isNotEOF = ufile_getch(input, &c)) 859 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 860 { 861 862 if (!info->fSkipArg) { 863 /* put the character from the input onto the target */ 864 source = &c; 865 /* Since we do this one character at a time, do it this way. */ 866 if (info->fWidth > 0) { 867 limit = alias + info->fWidth - count; 868 } 869 else { 870 limit = alias + ucnv_getMaxCharSize(conv); 871 } 872 873 /* convert the character to the default codepage */ 874 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, 875 NULL, TRUE, &status); 876 877 if(U_FAILURE(status)) { 878 /* clean up */ 879 u_releaseDefaultConverter(conv); 880 return -1; 881 } 882 } 883 884 /* increment the count */ 885 ++count; 886 } 887 888 /* put the final character we read back on the input */ 889 if (!info->fSkipArg) { 890 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) 891 u_fungetc(c, input); 892 893 /* add the terminator */ 894 if (info->fIsString) { 895 *alias = 0x00; 896 } 897 } 898 899 /* clean up */ 900 u_releaseDefaultConverter(conv); 901 902 /* we converted 1 arg */ 903 *argConverted = !info->fSkipArg; 904 return count + skipped; 905} 906 907static int32_t 908u_scanf_char_handler(UFILE *input, 909 u_scanf_spec_info *info, 910 ufmt_args *args, 911 const UChar *fmt, 912 int32_t *fmtConsumed, 913 int32_t *argConverted) 914{ 915 if (info->fWidth < 0) { 916 info->fWidth = 1; 917 } 918 info->fIsString = FALSE; 919 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); 920} 921 922static int32_t 923u_scanf_ustring_handler(UFILE *input, 924 u_scanf_spec_info *info, 925 ufmt_args *args, 926 const UChar *fmt, 927 int32_t *fmtConsumed, 928 int32_t *argConverted) 929{ 930 (void)fmt; 931 (void)fmtConsumed; 932 933 UChar *arg = (UChar*)(args[0].ptrValue); 934 UChar *alias = arg; 935 int32_t count; 936 int32_t skipped = 0; 937 UChar c; 938 UBool isNotEOF = FALSE; 939 940 /* skip all ws in the input */ 941 if (info->fIsString) { 942 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 943 } 944 945 /* get the string one character at a time, truncating to the width */ 946 count = 0; 947 948 while( (info->fWidth == -1 || count < info->fWidth) 949 && (isNotEOF = ufile_getch(input, &c)) 950 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 951 { 952 953 /* put the character from the input onto the target */ 954 if (!info->fSkipArg) { 955 *alias++ = c; 956 } 957 958 /* increment the count */ 959 ++count; 960 } 961 962 /* put the final character we read back on the input */ 963 if (!info->fSkipArg) { 964 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { 965 u_fungetc(c, input); 966 } 967 968 /* add the terminator */ 969 if (info->fIsString) { 970 *alias = 0x0000; 971 } 972 } 973 974 /* we converted 1 arg */ 975 *argConverted = !info->fSkipArg; 976 return count + skipped; 977} 978 979static int32_t 980u_scanf_uchar_handler(UFILE *input, 981 u_scanf_spec_info *info, 982 ufmt_args *args, 983 const UChar *fmt, 984 int32_t *fmtConsumed, 985 int32_t *argConverted) 986{ 987 if (info->fWidth < 0) { 988 info->fWidth = 1; 989 } 990 info->fIsString = FALSE; 991 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); 992} 993 994static int32_t 995u_scanf_spellout_handler(UFILE *input, 996 u_scanf_spec_info *info, 997 ufmt_args *args, 998 const UChar *fmt, 999 int32_t *fmtConsumed, 1000 int32_t *argConverted) 1001{ 1002 (void)fmt; 1003 (void)fmtConsumed; 1004 1005 int32_t len; 1006 double num; 1007 UNumberFormat *format; 1008 int32_t parsePos = 0; 1009 int32_t skipped; 1010 UErrorCode status = U_ZERO_ERROR; 1011 1012 1013 /* skip all ws in the input */ 1014 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1015 1016 /* fill the input's internal buffer */ 1017 ufile_fill_uchar_buffer(input); 1018 1019 /* determine the size of the input's buffer */ 1020 len = (int32_t)(input->str.fLimit - input->str.fPos); 1021 1022 /* truncate to the width, if specified */ 1023 if(info->fWidth != -1) 1024 len = ufmt_min(len, info->fWidth); 1025 1026 /* get the formatter */ 1027 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); 1028 1029 /* handle error */ 1030 if(format == 0) 1031 return 0; 1032 1033 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 1034 /* This is not applicable to RBNF. */ 1035 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ 1036 1037 /* parse the number */ 1038 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 1039 1040 if (!info->fSkipArg) { 1041 *(double*)(args[0].ptrValue) = num; 1042 } 1043 1044 /* mask off any necessary bits */ 1045 /* if(! info->fIsLong_double) 1046 num &= DBL_MAX;*/ 1047 1048 /* update the input's position to reflect consumed data */ 1049 input->str.fPos += parsePos; 1050 1051 /* we converted 1 arg */ 1052 *argConverted = !info->fSkipArg; 1053 return parsePos + skipped; 1054} 1055 1056static int32_t 1057u_scanf_hex_handler(UFILE *input, 1058 u_scanf_spec_info *info, 1059 ufmt_args *args, 1060 const UChar *fmt, 1061 int32_t *fmtConsumed, 1062 int32_t *argConverted) 1063{ 1064 (void)fmt; 1065 (void)fmtConsumed; 1066 1067 int32_t len; 1068 int32_t skipped; 1069 void *num = (void*) (args[0].ptrValue); 1070 int64_t result; 1071 1072 /* skip all ws in the input */ 1073 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1074 1075 /* fill the input's internal buffer */ 1076 ufile_fill_uchar_buffer(input); 1077 1078 /* determine the size of the input's buffer */ 1079 len = (int32_t)(input->str.fLimit - input->str.fPos); 1080 1081 /* truncate to the width, if specified */ 1082 if(info->fWidth != -1) 1083 len = ufmt_min(len, info->fWidth); 1084 1085 /* check for alternate form */ 1086 if( *(input->str.fPos) == 0x0030 && 1087 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { 1088 1089 /* skip the '0' and 'x' or 'X' if present */ 1090 input->str.fPos += 2; 1091 len -= 2; 1092 } 1093 1094 /* parse the number */ 1095 result = ufmt_uto64(input->str.fPos, &len, 16); 1096 1097 /* update the input's position to reflect consumed data */ 1098 input->str.fPos += len; 1099 1100 /* mask off any necessary bits */ 1101 if (!info->fSkipArg) { 1102 if (info->fIsShort) 1103 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1104 else if (info->fIsLongLong) 1105 *(int64_t*)num = result; 1106 else 1107 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1108 } 1109 1110 /* we converted 1 arg */ 1111 *argConverted = !info->fSkipArg; 1112 return len + skipped; 1113} 1114 1115static int32_t 1116u_scanf_octal_handler(UFILE *input, 1117 u_scanf_spec_info *info, 1118 ufmt_args *args, 1119 const UChar *fmt, 1120 int32_t *fmtConsumed, 1121 int32_t *argConverted) 1122{ 1123 (void)fmt; 1124 (void)fmtConsumed; 1125 1126 int32_t len; 1127 int32_t skipped; 1128 void *num = (void*) (args[0].ptrValue); 1129 int64_t result; 1130 1131 /* skip all ws in the input */ 1132 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1133 1134 /* fill the input's internal buffer */ 1135 ufile_fill_uchar_buffer(input); 1136 1137 /* determine the size of the input's buffer */ 1138 len = (int32_t)(input->str.fLimit - input->str.fPos); 1139 1140 /* truncate to the width, if specified */ 1141 if(info->fWidth != -1) 1142 len = ufmt_min(len, info->fWidth); 1143 1144 /* parse the number */ 1145 result = ufmt_uto64(input->str.fPos, &len, 8); 1146 1147 /* update the input's position to reflect consumed data */ 1148 input->str.fPos += len; 1149 1150 /* mask off any necessary bits */ 1151 if (!info->fSkipArg) { 1152 if (info->fIsShort) 1153 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1154 else if (info->fIsLongLong) 1155 *(int64_t*)num = result; 1156 else 1157 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1158 } 1159 1160 /* we converted 1 arg */ 1161 *argConverted = !info->fSkipArg; 1162 return len + skipped; 1163} 1164 1165static int32_t 1166u_scanf_pointer_handler(UFILE *input, 1167 u_scanf_spec_info *info, 1168 ufmt_args *args, 1169 const UChar *fmt, 1170 int32_t *fmtConsumed, 1171 int32_t *argConverted) 1172{ 1173 (void)fmt; 1174 (void)fmtConsumed; 1175 1176 int32_t len; 1177 int32_t skipped; 1178 void *result; 1179 void **p = (void**)(args[0].ptrValue); 1180 1181 1182 /* skip all ws in the input */ 1183 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1184 1185 /* fill the input's internal buffer */ 1186 ufile_fill_uchar_buffer(input); 1187 1188 /* determine the size of the input's buffer */ 1189 len = (int32_t)(input->str.fLimit - input->str.fPos); 1190 1191 /* truncate to the width, if specified */ 1192 if(info->fWidth != -1) { 1193 len = ufmt_min(len, info->fWidth); 1194 } 1195 1196 /* Make sure that we don't consume too much */ 1197 if (len > (int32_t)(sizeof(void*)*2)) { 1198 len = (int32_t)(sizeof(void*)*2); 1199 } 1200 1201 /* parse the pointer - assign to temporary value */ 1202 result = ufmt_utop(input->str.fPos, &len); 1203 1204 if (!info->fSkipArg) { 1205 *p = result; 1206 } 1207 1208 /* update the input's position to reflect consumed data */ 1209 input->str.fPos += len; 1210 1211 /* we converted 1 arg */ 1212 *argConverted = !info->fSkipArg; 1213 return len + skipped; 1214} 1215 1216static int32_t 1217u_scanf_scanset_handler(UFILE *input, 1218 u_scanf_spec_info *info, 1219 ufmt_args *args, 1220 const UChar *fmt, 1221 int32_t *fmtConsumed, 1222 int32_t *argConverted) 1223{ 1224 USet *scanset; 1225 UErrorCode status = U_ZERO_ERROR; 1226 int32_t chLeft = INT32_MAX; 1227 UChar32 c; 1228 UChar *alias = (UChar*) (args[0].ptrValue); 1229 UBool isNotEOF = FALSE; 1230 UBool readCharacter = FALSE; 1231 1232 /* Create an empty set */ 1233 scanset = uset_open(0, -1); 1234 1235 /* Back up one to get the [ */ 1236 fmt--; 1237 1238 /* truncate to the width, if specified and alias the target */ 1239 if(info->fWidth >= 0) { 1240 chLeft = info->fWidth; 1241 } 1242 1243 /* parse the scanset from the fmt string */ 1244 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); 1245 1246 /* verify that the parse was successful */ 1247 if (U_SUCCESS(status)) { 1248 c=0; 1249 1250 /* grab characters one at a time and make sure they are in the scanset */ 1251 while(chLeft > 0) { 1252 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { 1253 readCharacter = TRUE; 1254 if (!info->fSkipArg) { 1255 int32_t idx = 0; 1256 UBool isError = FALSE; 1257 1258 U16_APPEND(alias, idx, chLeft, c, isError); 1259 if (isError) { 1260 break; 1261 } 1262 alias += idx; 1263 } 1264 chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); 1265 } 1266 else { 1267 /* if the character's not in the scanset, break out */ 1268 break; 1269 } 1270 } 1271 1272 /* put the final character we read back on the input */ 1273 if(isNotEOF && chLeft > 0) { 1274 u_fungetc(c, input); 1275 } 1276 } 1277 1278 uset_close(scanset); 1279 1280 /* if we didn't match at least 1 character, fail */ 1281 if(!readCharacter) 1282 return -1; 1283 /* otherwise, add the terminator */ 1284 else if (!info->fSkipArg) { 1285 *alias = 0x00; 1286 } 1287 1288 /* we converted 1 arg */ 1289 *argConverted = !info->fSkipArg; 1290 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; 1291} 1292 1293/* Use US-ASCII characters only for formatting. Most codepages have 1294 characters 20-7F from Unicode. Using any other codepage specific 1295 characters will make it very difficult to format the string on 1296 non-Unicode machines */ 1297static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { 1298/* 0x20 */ 1299 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1300 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, 1301 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1302 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1303 1304/* 0x30 */ 1305 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1306 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1307 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1308 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1309 1310/* 0x40 */ 1311 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, 1312 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, 1313#ifdef U_USE_OBSOLETE_IO_FORMATTING 1314 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, 1315#else 1316 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1317#endif 1318 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1319 1320/* 0x50 */ 1321 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, 1322#ifdef U_USE_OBSOLETE_IO_FORMATTING 1323 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, 1324#else 1325 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, 1326#endif 1327 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, 1328 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1329 1330/* 0x60 */ 1331 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, 1332 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, 1333 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, 1334 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, 1335 1336/* 0x70 */ 1337 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, 1338 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, 1339 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1340 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1341}; 1342 1343U_CFUNC int32_t 1344u_scanf_parse(UFILE *f, 1345 const UChar *patternSpecification, 1346 va_list ap) 1347{ 1348 const UChar *alias; 1349 int32_t count, converted, argConsumed, cpConsumed; 1350 uint16_t handlerNum; 1351 1352 ufmt_args args; 1353 u_scanf_spec spec; 1354 ufmt_type_info info; 1355 u_scanf_handler handler; 1356 1357 /* alias the pattern */ 1358 alias = patternSpecification; 1359 1360 /* haven't converted anything yet */ 1361 argConsumed = 0; 1362 converted = 0; 1363 cpConsumed = 0; 1364 1365 /* iterate through the pattern */ 1366 for(;;) { 1367 1368 /* match any characters up to the next '%' */ 1369 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { 1370 alias++; 1371 } 1372 1373 /* if we aren't at a '%', or if we're at end of string, break*/ 1374 if(*alias != UP_PERCENT || *alias == 0x0000) 1375 break; 1376 1377 /* parse the specifier */ 1378 count = u_scanf_parse_spec(alias, &spec); 1379 1380 /* update the pointer in pattern */ 1381 alias += count; 1382 1383 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); 1384 if (handlerNum < USCANF_NUM_FMT_HANDLERS) { 1385 /* skip the argument, if necessary */ 1386 /* query the info function for argument information */ 1387 info = g_u_scanf_infos[ handlerNum ].info; 1388 if (info != ufmt_count && u_feof(f)) { 1389 break; 1390 } 1391 else if(spec.fInfo.fSkipArg) { 1392 args.ptrValue = NULL; 1393 } 1394 else { 1395 switch(info) { 1396 case ufmt_count: 1397 /* set the spec's width to the # of items converted */ 1398 spec.fInfo.fWidth = cpConsumed; 1399 U_FALLTHROUGH; 1400 case ufmt_char: 1401 case ufmt_uchar: 1402 case ufmt_int: 1403 case ufmt_string: 1404 case ufmt_ustring: 1405 case ufmt_pointer: 1406 case ufmt_float: 1407 case ufmt_double: 1408 args.ptrValue = va_arg(ap, void*); 1409 break; 1410 1411 default: 1412 /* else args is ignored */ 1413 args.ptrValue = NULL; 1414 break; 1415 } 1416 } 1417 1418 /* call the handler function */ 1419 handler = g_u_scanf_infos[ handlerNum ].handler; 1420 if(handler != 0) { 1421 1422 /* reset count to 1 so that += for alias works. */ 1423 count = 1; 1424 1425 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); 1426 1427 /* if the handler encountered an error condition, break */ 1428 if(argConsumed < 0) { 1429 converted = -1; 1430 break; 1431 } 1432 1433 /* add to the # of items converted */ 1434 converted += argConsumed; 1435 1436 /* update the pointer in pattern */ 1437 alias += count-1; 1438 } 1439 /* else do nothing */ 1440 } 1441 /* else do nothing */ 1442 1443 /* just ignore unknown tags */ 1444 } 1445 1446 /* return # of items converted */ 1447 return converted; 1448} 1449 1450#endif /* #if !UCONFIG_NO_FORMATTING */ 1451