uscanf_p.c revision 83a171d1a62abf406f7f44ae671823d5ec20db7d
1/* 2******************************************************************************* 3* 4* Copyright (C) 1998-2011, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* 9* File uscnnf_p.c 10* 11* Modification History: 12* 13* Date Name Description 14* 12/02/98 stephen Creation. 15* 03/13/99 stephen Modified for new C API. 16******************************************************************************* 17*/ 18 19#include "unicode/utypes.h" 20 21#if !UCONFIG_NO_FORMATTING 22 23#include "unicode/uchar.h" 24#include "unicode/ustring.h" 25#include "unicode/unum.h" 26#include "unicode/udat.h" 27#include "unicode/uset.h" 28#include "uscanf.h" 29#include "ufmt_cmn.h" 30#include "ufile.h" 31#include "locbund.h" 32 33#include "cmemory.h" 34#include "ustr_cnv.h" 35 36/* flag characters for u_scanf */ 37#define FLAG_ASTERISK 0x002A 38#define FLAG_PAREN 0x0028 39 40#define ISFLAG(s) (s) == FLAG_ASTERISK || \ 41 (s) == FLAG_PAREN 42 43/* special characters for u_scanf */ 44#define SPEC_DOLLARSIGN 0x0024 45 46/* unicode digits */ 47#define DIGIT_ZERO 0x0030 48#define DIGIT_ONE 0x0031 49#define DIGIT_TWO 0x0032 50#define DIGIT_THREE 0x0033 51#define DIGIT_FOUR 0x0034 52#define DIGIT_FIVE 0x0035 53#define DIGIT_SIX 0x0036 54#define DIGIT_SEVEN 0x0037 55#define DIGIT_EIGHT 0x0038 56#define DIGIT_NINE 0x0039 57 58#define ISDIGIT(s) (s) == DIGIT_ZERO || \ 59 (s) == DIGIT_ONE || \ 60 (s) == DIGIT_TWO || \ 61 (s) == DIGIT_THREE || \ 62 (s) == DIGIT_FOUR || \ 63 (s) == DIGIT_FIVE || \ 64 (s) == DIGIT_SIX || \ 65 (s) == DIGIT_SEVEN || \ 66 (s) == DIGIT_EIGHT || \ 67 (s) == DIGIT_NINE 68 69/* u_scanf modifiers */ 70#define MOD_H 0x0068 71#define MOD_LOWERL 0x006C 72#define MOD_L 0x004C 73 74#define ISMOD(s) (s) == MOD_H || \ 75 (s) == MOD_LOWERL || \ 76 (s) == MOD_L 77 78/** 79 * Struct encapsulating a single uscanf format specification. 80 */ 81typedef struct u_scanf_spec_info { 82 int32_t fWidth; /* Width */ 83 84 UChar fSpec; /* Format specification */ 85 86 UChar fPadChar; /* Padding character */ 87 88 UBool fSkipArg; /* TRUE if arg should be skipped */ 89 UBool fIsLongDouble; /* L flag */ 90 UBool fIsShort; /* h flag */ 91 UBool fIsLong; /* l flag */ 92 UBool fIsLongLong; /* ll flag */ 93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */ 94} u_scanf_spec_info; 95 96 97/** 98 * Struct encapsulating a single u_scanf format specification. 99 */ 100typedef struct u_scanf_spec { 101 u_scanf_spec_info fInfo; /* Information on this spec */ 102 int32_t fArgPos; /* Position of data in arg list */ 103} u_scanf_spec; 104 105/** 106 * Parse a single u_scanf format specifier in Unicode. 107 * @param fmt A pointer to a '%' character in a u_scanf format specification. 108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed 109 * format specifier. 110 * @return The number of characters contained in this specifier. 111 */ 112static int32_t 113u_scanf_parse_spec (const UChar *fmt, 114 u_scanf_spec *spec) 115{ 116 const UChar *s = fmt; 117 const UChar *backup; 118 u_scanf_spec_info *info = &(spec->fInfo); 119 120 /* initialize spec to default values */ 121 spec->fArgPos = -1; 122 123 info->fWidth = -1; 124 info->fSpec = 0x0000; 125 info->fPadChar = 0x0020; 126 info->fSkipArg = FALSE; 127 info->fIsLongDouble = FALSE; 128 info->fIsShort = FALSE; 129 info->fIsLong = FALSE; 130 info->fIsLongLong = FALSE; 131 info->fIsString = TRUE; 132 133 134 /* skip over the initial '%' */ 135 s++; 136 137 /* Check for positional argument */ 138 if(ISDIGIT(*s)) { 139 140 /* Save the current position */ 141 backup = s; 142 143 /* handle positional parameters */ 144 if(ISDIGIT(*s)) { 145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO); 146 147 while(ISDIGIT(*s)) { 148 spec->fArgPos *= 10; 149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO); 150 } 151 } 152 153 /* if there is no '$', don't read anything */ 154 if(*s != SPEC_DOLLARSIGN) { 155 spec->fArgPos = -1; 156 s = backup; 157 } 158 /* munge the '$' */ 159 else 160 s++; 161 } 162 163 /* Get any format flags */ 164 while(ISFLAG(*s)) { 165 switch(*s++) { 166 167 /* skip argument */ 168 case FLAG_ASTERISK: 169 info->fSkipArg = TRUE; 170 break; 171 172 /* pad character specified */ 173 case FLAG_PAREN: 174 175 /* first four characters are hex values for pad char */ 176 info->fPadChar = (UChar)ufmt_digitvalue(*s++); 177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 180 181 /* final character is ignored */ 182 s++; 183 184 break; 185 } 186 } 187 188 /* Get the width */ 189 if(ISDIGIT(*s)){ 190 info->fWidth = (int) (*s++ - DIGIT_ZERO); 191 192 while(ISDIGIT(*s)) { 193 info->fWidth *= 10; 194 info->fWidth += (int) (*s++ - DIGIT_ZERO); 195 } 196 } 197 198 /* Get any modifiers */ 199 if(ISMOD(*s)) { 200 switch(*s++) { 201 202 /* short */ 203 case MOD_H: 204 info->fIsShort = TRUE; 205 break; 206 207 /* long or long long */ 208 case MOD_LOWERL: 209 if(*s == MOD_LOWERL) { 210 info->fIsLongLong = TRUE; 211 /* skip over the next 'l' */ 212 s++; 213 } 214 else 215 info->fIsLong = TRUE; 216 break; 217 218 /* long double */ 219 case MOD_L: 220 info->fIsLongDouble = TRUE; 221 break; 222 } 223 } 224 225 /* finally, get the specifier letter */ 226 info->fSpec = *s++; 227 228 /* return # of characters in this specifier */ 229 return (int32_t)(s - fmt); 230} 231 232#define UP_PERCENT 0x0025 233 234 235/* ANSI style formatting */ 236/* Use US-ASCII characters only for formatting */ 237 238/* % */ 239#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} 240/* s */ 241#define UFMT_STRING {ufmt_string, u_scanf_string_handler} 242/* c */ 243#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} 244/* d, i */ 245#define UFMT_INT {ufmt_int, u_scanf_integer_handler} 246/* u */ 247#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} 248/* o */ 249#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} 250/* x, X */ 251#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} 252/* f */ 253#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} 254/* e, E */ 255#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} 256/* g, G */ 257#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} 258/* n */ 259#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} 260/* [ */ 261#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} 262 263/* non-ANSI extensions */ 264/* Use US-ASCII characters only for formatting */ 265 266/* p */ 267#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} 268/* V */ 269#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} 270/* P */ 271#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} 272/* C K is old format */ 273#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} 274/* S U is old format */ 275#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} 276 277 278#define UFMT_EMPTY {ufmt_empty, NULL} 279 280/** 281 * A u_scanf handler function. 282 * A u_scanf handler is responsible for handling a single u_scanf 283 * format specification, for example 'd' or 's'. 284 * @param stream The UFILE to which to write output. 285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing 286 * information on the format specification. 287 * @param args A pointer to the argument data 288 * @param fmt A pointer to the first character in the format string 289 * following the spec. 290 * @param fmtConsumed On output, set to the number of characters consumed 291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. 292 * @param argConverted The number of arguments converted and assigned, or -1 if an 293 * error occurred. 294 * @return The number of code points consumed during reading. 295 */ 296typedef int32_t (*u_scanf_handler) (UFILE *stream, 297 u_scanf_spec_info *info, 298 ufmt_args *args, 299 const UChar *fmt, 300 int32_t *fmtConsumed, 301 int32_t *argConverted); 302 303typedef struct u_scanf_info { 304 ufmt_type_info info; 305 u_scanf_handler handler; 306} u_scanf_info; 307 308#define USCANF_NUM_FMT_HANDLERS 108 309#define USCANF_SYMBOL_BUFFER_SIZE 8 310 311/* We do not use handlers for 0-0x1f */ 312#define USCANF_BASE_FMT_HANDLERS 0x20 313 314 315static int32_t 316u_scanf_skip_leading_ws(UFILE *input, 317 UChar pad) 318{ 319 UChar c; 320 int32_t count = 0; 321 UBool isNotEOF; 322 323 /* skip all leading ws in the input */ 324 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) 325 { 326 count++; 327 } 328 329 /* put the final character back on the input */ 330 if(isNotEOF) 331 u_fungetc(c, input); 332 333 return count; 334} 335 336/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ 337static int32_t 338u_scanf_skip_leading_positive_sign(UFILE *input, 339 UNumberFormat *format, 340 UErrorCode *status) 341{ 342 UChar c; 343 int32_t count = 0; 344 UBool isNotEOF; 345 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; 346 int32_t symbolLen; 347 UErrorCode localStatus = U_ZERO_ERROR; 348 349 if (U_SUCCESS(*status)) { 350 symbolLen = unum_getSymbol(format, 351 UNUM_PLUS_SIGN_SYMBOL, 352 plusSymbol, 353 sizeof(plusSymbol)/sizeof(*plusSymbol), 354 &localStatus); 355 356 if (U_SUCCESS(localStatus)) { 357 /* skip all leading ws in the input */ 358 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) 359 { 360 count++; 361 } 362 363 /* put the final character back on the input */ 364 if(isNotEOF) { 365 u_fungetc(c, input); 366 } 367 } 368 } 369 370 return count; 371} 372 373static int32_t 374u_scanf_simple_percent_handler(UFILE *input, 375 u_scanf_spec_info *info, 376 ufmt_args *args, 377 const UChar *fmt, 378 int32_t *fmtConsumed, 379 int32_t *argConverted) 380{ 381 /* make sure the next character in the input is a percent */ 382 *argConverted = 0; 383 if(u_fgetc(input) != 0x0025) { 384 *argConverted = -1; 385 } 386 return 1; 387} 388 389static int32_t 390u_scanf_count_handler(UFILE *input, 391 u_scanf_spec_info *info, 392 ufmt_args *args, 393 const UChar *fmt, 394 int32_t *fmtConsumed, 395 int32_t *argConverted) 396{ 397 /* in the special case of count, the u_scanf_spec_info's width */ 398 /* will contain the # of items converted thus far */ 399 if (!info->fSkipArg) { 400 if (info->fIsShort) 401 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); 402 else if (info->fIsLongLong) 403 *(int64_t*)(args[0].ptrValue) = info->fWidth; 404 else 405 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); 406 } 407 *argConverted = 0; 408 409 /* we converted 0 args */ 410 return 0; 411} 412 413static int32_t 414u_scanf_double_handler(UFILE *input, 415 u_scanf_spec_info *info, 416 ufmt_args *args, 417 const UChar *fmt, 418 int32_t *fmtConsumed, 419 int32_t *argConverted) 420{ 421 int32_t len; 422 double num; 423 UNumberFormat *format; 424 int32_t parsePos = 0; 425 int32_t skipped; 426 UErrorCode status = U_ZERO_ERROR; 427 428 429 /* skip all ws in the input */ 430 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 431 432 /* fill the input's internal buffer */ 433 ufile_fill_uchar_buffer(input); 434 435 /* determine the size of the input's buffer */ 436 len = (int32_t)(input->str.fLimit - input->str.fPos); 437 438 /* truncate to the width, if specified */ 439 if(info->fWidth != -1) 440 len = ufmt_min(len, info->fWidth); 441 442 /* get the formatter */ 443 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 444 445 /* handle error */ 446 if(format == 0) 447 return 0; 448 449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 450 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 451 452 /* parse the number */ 453 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 454 455 if (!info->fSkipArg) { 456 if (info->fIsLong) 457 *(double*)(args[0].ptrValue) = num; 458 else if (info->fIsLongDouble) 459 *(long double*)(args[0].ptrValue) = num; 460 else 461 *(float*)(args[0].ptrValue) = (float)num; 462 } 463 464 /* mask off any necessary bits */ 465 /* if(! info->fIsLong_double) 466 num &= DBL_MAX;*/ 467 468 /* update the input's position to reflect consumed data */ 469 input->str.fPos += parsePos; 470 471 /* we converted 1 arg */ 472 *argConverted = !info->fSkipArg; 473 return parsePos + skipped; 474} 475 476#define UPRINTF_SYMBOL_BUFFER_SIZE 8 477 478static int32_t 479u_scanf_scientific_handler(UFILE *input, 480 u_scanf_spec_info *info, 481 ufmt_args *args, 482 const UChar *fmt, 483 int32_t *fmtConsumed, 484 int32_t *argConverted) 485{ 486 int32_t len; 487 double num; 488 UNumberFormat *format; 489 int32_t parsePos = 0; 490 int32_t skipped; 491 UErrorCode status = U_ZERO_ERROR; 492 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; 493 int32_t srcLen, expLen; 494 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; 495 496 497 /* skip all ws in the input */ 498 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 499 500 /* fill the input's internal buffer */ 501 ufile_fill_uchar_buffer(input); 502 503 /* determine the size of the input's buffer */ 504 len = (int32_t)(input->str.fLimit - input->str.fPos); 505 506 /* truncate to the width, if specified */ 507 if(info->fWidth != -1) 508 len = ufmt_min(len, info->fWidth); 509 510 /* get the formatter */ 511 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 512 513 /* handle error */ 514 if(format == 0) 515 return 0; 516 517 /* set the appropriate flags on the formatter */ 518 519 srcLen = unum_getSymbol(format, 520 UNUM_EXPONENTIAL_SYMBOL, 521 srcExpBuf, 522 sizeof(srcExpBuf), 523 &status); 524 525 /* Upper/lower case the e */ 526 if (info->fSpec == (UChar)0x65 /* e */) { 527 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), 528 srcExpBuf, srcLen, 529 input->str.fBundle.fLocale, 530 &status); 531 } 532 else { 533 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), 534 srcExpBuf, srcLen, 535 input->str.fBundle.fLocale, 536 &status); 537 } 538 539 unum_setSymbol(format, 540 UNUM_EXPONENTIAL_SYMBOL, 541 expBuf, 542 expLen, 543 &status); 544 545 546 547 548 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 549 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 550 551 /* parse the number */ 552 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 553 554 if (!info->fSkipArg) { 555 if (info->fIsLong) 556 *(double*)(args[0].ptrValue) = num; 557 else if (info->fIsLongDouble) 558 *(long double*)(args[0].ptrValue) = num; 559 else 560 *(float*)(args[0].ptrValue) = (float)num; 561 } 562 563 /* mask off any necessary bits */ 564 /* if(! info->fIsLong_double) 565 num &= DBL_MAX;*/ 566 567 /* update the input's position to reflect consumed data */ 568 input->str.fPos += parsePos; 569 570 /* we converted 1 arg */ 571 *argConverted = !info->fSkipArg; 572 return parsePos + skipped; 573} 574 575static int32_t 576u_scanf_scidbl_handler(UFILE *input, 577 u_scanf_spec_info *info, 578 ufmt_args *args, 579 const UChar *fmt, 580 int32_t *fmtConsumed, 581 int32_t *argConverted) 582{ 583 int32_t len; 584 double num; 585 UNumberFormat *scientificFormat, *genericFormat; 586 /*int32_t scientificResult, genericResult;*/ 587 double scientificResult, genericResult; 588 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; 589 int32_t skipped; 590 UErrorCode scientificStatus = U_ZERO_ERROR; 591 UErrorCode genericStatus = U_ZERO_ERROR; 592 593 594 /* since we can't determine by scanning the characters whether */ 595 /* a number was formatted in the 'f' or 'g' styles, parse the */ 596 /* string with both formatters, and assume whichever one */ 597 /* parsed the most is the correct formatter to use */ 598 599 600 /* skip all ws in the input */ 601 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 602 603 /* fill the input's internal buffer */ 604 ufile_fill_uchar_buffer(input); 605 606 /* determine the size of the input's buffer */ 607 len = (int32_t)(input->str.fLimit - input->str.fPos); 608 609 /* truncate to the width, if specified */ 610 if(info->fWidth != -1) 611 len = ufmt_min(len, info->fWidth); 612 613 /* get the formatters */ 614 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 615 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 616 617 /* handle error */ 618 if(scientificFormat == 0 || genericFormat == 0) 619 return 0; 620 621 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 622 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); 623 624 /* parse the number using each format*/ 625 626 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, 627 &scientificParsePos, &scientificStatus); 628 629 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, 630 &genericParsePos, &genericStatus); 631 632 /* determine which parse made it farther */ 633 if(scientificParsePos > genericParsePos) { 634 /* stash the result in num */ 635 num = scientificResult; 636 /* update the input's position to reflect consumed data */ 637 parsePos += scientificParsePos; 638 } 639 else { 640 /* stash the result in num */ 641 num = genericResult; 642 /* update the input's position to reflect consumed data */ 643 parsePos += genericParsePos; 644 } 645 input->str.fPos += parsePos; 646 647 if (!info->fSkipArg) { 648 if (info->fIsLong) 649 *(double*)(args[0].ptrValue) = num; 650 else if (info->fIsLongDouble) 651 *(long double*)(args[0].ptrValue) = num; 652 else 653 *(float*)(args[0].ptrValue) = (float)num; 654 } 655 656 /* mask off any necessary bits */ 657 /* if(! info->fIsLong_double) 658 num &= DBL_MAX;*/ 659 660 /* we converted 1 arg */ 661 *argConverted = !info->fSkipArg; 662 return parsePos + skipped; 663} 664 665static int32_t 666u_scanf_integer_handler(UFILE *input, 667 u_scanf_spec_info *info, 668 ufmt_args *args, 669 const UChar *fmt, 670 int32_t *fmtConsumed, 671 int32_t *argConverted) 672{ 673 int32_t len; 674 void *num = (void*) (args[0].ptrValue); 675 UNumberFormat *format; 676 int32_t parsePos = 0; 677 int32_t skipped; 678 UErrorCode status = U_ZERO_ERROR; 679 int64_t result; 680 681 682 /* skip all ws in the input */ 683 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 684 685 /* fill the input's internal buffer */ 686 ufile_fill_uchar_buffer(input); 687 688 /* determine the size of the input's buffer */ 689 len = (int32_t)(input->str.fLimit - input->str.fPos); 690 691 /* truncate to the width, if specified */ 692 if(info->fWidth != -1) 693 len = ufmt_min(len, info->fWidth); 694 695 /* get the formatter */ 696 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 697 698 /* handle error */ 699 if(format == 0) 700 return 0; 701 702 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 703 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 704 705 /* parse the number */ 706 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); 707 708 /* mask off any necessary bits */ 709 if (!info->fSkipArg) { 710 if (info->fIsShort) 711 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 712 else if (info->fIsLongLong) 713 *(int64_t*)num = result; 714 else 715 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 716 } 717 718 /* update the input's position to reflect consumed data */ 719 input->str.fPos += parsePos; 720 721 /* we converted 1 arg */ 722 *argConverted = !info->fSkipArg; 723 return parsePos + skipped; 724} 725 726static int32_t 727u_scanf_uinteger_handler(UFILE *input, 728 u_scanf_spec_info *info, 729 ufmt_args *args, 730 const UChar *fmt, 731 int32_t *fmtConsumed, 732 int32_t *argConverted) 733{ 734 /* TODO Fix this when Numberformat handles uint64_t */ 735 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); 736} 737 738static int32_t 739u_scanf_percent_handler(UFILE *input, 740 u_scanf_spec_info *info, 741 ufmt_args *args, 742 const UChar *fmt, 743 int32_t *fmtConsumed, 744 int32_t *argConverted) 745{ 746 int32_t len; 747 double num; 748 UNumberFormat *format; 749 int32_t parsePos = 0; 750 UErrorCode status = U_ZERO_ERROR; 751 752 753 /* skip all ws in the input */ 754 u_scanf_skip_leading_ws(input, info->fPadChar); 755 756 /* fill the input's internal buffer */ 757 ufile_fill_uchar_buffer(input); 758 759 /* determine the size of the input's buffer */ 760 len = (int32_t)(input->str.fLimit - input->str.fPos); 761 762 /* truncate to the width, if specified */ 763 if(info->fWidth != -1) 764 len = ufmt_min(len, info->fWidth); 765 766 /* get the formatter */ 767 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); 768 769 /* handle error */ 770 if(format == 0) 771 return 0; 772 773 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 774 u_scanf_skip_leading_positive_sign(input, format, &status); 775 776 /* parse the number */ 777 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 778 779 if (!info->fSkipArg) { 780 *(double*)(args[0].ptrValue) = num; 781 } 782 783 /* mask off any necessary bits */ 784 /* if(! info->fIsLong_double) 785 num &= DBL_MAX;*/ 786 787 /* update the input's position to reflect consumed data */ 788 input->str.fPos += parsePos; 789 790 /* we converted 1 arg */ 791 *argConverted = !info->fSkipArg; 792 return parsePos; 793} 794 795static int32_t 796u_scanf_string_handler(UFILE *input, 797 u_scanf_spec_info *info, 798 ufmt_args *args, 799 const UChar *fmt, 800 int32_t *fmtConsumed, 801 int32_t *argConverted) 802{ 803 const UChar *source; 804 UConverter *conv; 805 char *arg = (char*)(args[0].ptrValue); 806 char *alias = arg; 807 char *limit; 808 UErrorCode status = U_ZERO_ERROR; 809 int32_t count; 810 int32_t skipped = 0; 811 UChar c; 812 UBool isNotEOF = FALSE; 813 814 /* skip all ws in the input */ 815 if (info->fIsString) { 816 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 817 } 818 819 /* get the string one character at a time, truncating to the width */ 820 count = 0; 821 822 /* open the default converter */ 823 conv = u_getDefaultConverter(&status); 824 825 if(U_FAILURE(status)) 826 return -1; 827 828 while( (info->fWidth == -1 || count < info->fWidth) 829 && (isNotEOF = ufile_getch(input, &c)) 830 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 831 { 832 833 if (!info->fSkipArg) { 834 /* put the character from the input onto the target */ 835 source = &c; 836 /* Since we do this one character at a time, do it this way. */ 837 if (info->fWidth > 0) { 838 limit = alias + info->fWidth - count; 839 } 840 else { 841 limit = alias + ucnv_getMaxCharSize(conv); 842 } 843 844 /* convert the character to the default codepage */ 845 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, 846 NULL, TRUE, &status); 847 848 if(U_FAILURE(status)) { 849 /* clean up */ 850 u_releaseDefaultConverter(conv); 851 return -1; 852 } 853 } 854 855 /* increment the count */ 856 ++count; 857 } 858 859 /* put the final character we read back on the input */ 860 if (!info->fSkipArg) { 861 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) 862 u_fungetc(c, input); 863 864 /* add the terminator */ 865 if (info->fIsString) { 866 *alias = 0x00; 867 } 868 } 869 870 /* clean up */ 871 u_releaseDefaultConverter(conv); 872 873 /* we converted 1 arg */ 874 *argConverted = !info->fSkipArg; 875 return count + skipped; 876} 877 878static int32_t 879u_scanf_char_handler(UFILE *input, 880 u_scanf_spec_info *info, 881 ufmt_args *args, 882 const UChar *fmt, 883 int32_t *fmtConsumed, 884 int32_t *argConverted) 885{ 886 if (info->fWidth < 0) { 887 info->fWidth = 1; 888 } 889 info->fIsString = FALSE; 890 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); 891} 892 893static int32_t 894u_scanf_ustring_handler(UFILE *input, 895 u_scanf_spec_info *info, 896 ufmt_args *args, 897 const UChar *fmt, 898 int32_t *fmtConsumed, 899 int32_t *argConverted) 900{ 901 UChar *arg = (UChar*)(args[0].ptrValue); 902 UChar *alias = arg; 903 int32_t count; 904 int32_t skipped = 0; 905 UChar c; 906 UBool isNotEOF = FALSE; 907 908 /* skip all ws in the input */ 909 if (info->fIsString) { 910 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 911 } 912 913 /* get the string one character at a time, truncating to the width */ 914 count = 0; 915 916 while( (info->fWidth == -1 || count < info->fWidth) 917 && (isNotEOF = ufile_getch(input, &c)) 918 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 919 { 920 921 /* put the character from the input onto the target */ 922 if (!info->fSkipArg) { 923 *alias++ = c; 924 } 925 926 /* increment the count */ 927 ++count; 928 } 929 930 /* put the final character we read back on the input */ 931 if (!info->fSkipArg) { 932 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { 933 u_fungetc(c, input); 934 } 935 936 /* add the terminator */ 937 if (info->fIsString) { 938 *alias = 0x0000; 939 } 940 } 941 942 /* we converted 1 arg */ 943 *argConverted = !info->fSkipArg; 944 return count + skipped; 945} 946 947static int32_t 948u_scanf_uchar_handler(UFILE *input, 949 u_scanf_spec_info *info, 950 ufmt_args *args, 951 const UChar *fmt, 952 int32_t *fmtConsumed, 953 int32_t *argConverted) 954{ 955 if (info->fWidth < 0) { 956 info->fWidth = 1; 957 } 958 info->fIsString = FALSE; 959 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); 960} 961 962static int32_t 963u_scanf_spellout_handler(UFILE *input, 964 u_scanf_spec_info *info, 965 ufmt_args *args, 966 const UChar *fmt, 967 int32_t *fmtConsumed, 968 int32_t *argConverted) 969{ 970 int32_t len; 971 double num; 972 UNumberFormat *format; 973 int32_t parsePos = 0; 974 int32_t skipped; 975 UErrorCode status = U_ZERO_ERROR; 976 977 978 /* skip all ws in the input */ 979 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 980 981 /* fill the input's internal buffer */ 982 ufile_fill_uchar_buffer(input); 983 984 /* determine the size of the input's buffer */ 985 len = (int32_t)(input->str.fLimit - input->str.fPos); 986 987 /* truncate to the width, if specified */ 988 if(info->fWidth != -1) 989 len = ufmt_min(len, info->fWidth); 990 991 /* get the formatter */ 992 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); 993 994 /* handle error */ 995 if(format == 0) 996 return 0; 997 998 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 999 /* This is not applicable to RBNF. */ 1000 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ 1001 1002 /* parse the number */ 1003 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 1004 1005 if (!info->fSkipArg) { 1006 *(double*)(args[0].ptrValue) = num; 1007 } 1008 1009 /* mask off any necessary bits */ 1010 /* if(! info->fIsLong_double) 1011 num &= DBL_MAX;*/ 1012 1013 /* update the input's position to reflect consumed data */ 1014 input->str.fPos += parsePos; 1015 1016 /* we converted 1 arg */ 1017 *argConverted = !info->fSkipArg; 1018 return parsePos + skipped; 1019} 1020 1021static int32_t 1022u_scanf_hex_handler(UFILE *input, 1023 u_scanf_spec_info *info, 1024 ufmt_args *args, 1025 const UChar *fmt, 1026 int32_t *fmtConsumed, 1027 int32_t *argConverted) 1028{ 1029 int32_t len; 1030 int32_t skipped; 1031 void *num = (void*) (args[0].ptrValue); 1032 int64_t result; 1033 1034 /* skip all ws in the input */ 1035 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1036 1037 /* fill the input's internal buffer */ 1038 ufile_fill_uchar_buffer(input); 1039 1040 /* determine the size of the input's buffer */ 1041 len = (int32_t)(input->str.fLimit - input->str.fPos); 1042 1043 /* truncate to the width, if specified */ 1044 if(info->fWidth != -1) 1045 len = ufmt_min(len, info->fWidth); 1046 1047 /* check for alternate form */ 1048 if( *(input->str.fPos) == 0x0030 && 1049 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { 1050 1051 /* skip the '0' and 'x' or 'X' if present */ 1052 input->str.fPos += 2; 1053 len -= 2; 1054 } 1055 1056 /* parse the number */ 1057 result = ufmt_uto64(input->str.fPos, &len, 16); 1058 1059 /* update the input's position to reflect consumed data */ 1060 input->str.fPos += len; 1061 1062 /* mask off any necessary bits */ 1063 if (!info->fSkipArg) { 1064 if (info->fIsShort) 1065 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1066 else if (info->fIsLongLong) 1067 *(int64_t*)num = result; 1068 else 1069 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1070 } 1071 1072 /* we converted 1 arg */ 1073 *argConverted = !info->fSkipArg; 1074 return len + skipped; 1075} 1076 1077static int32_t 1078u_scanf_octal_handler(UFILE *input, 1079 u_scanf_spec_info *info, 1080 ufmt_args *args, 1081 const UChar *fmt, 1082 int32_t *fmtConsumed, 1083 int32_t *argConverted) 1084{ 1085 int32_t len; 1086 int32_t skipped; 1087 void *num = (void*) (args[0].ptrValue); 1088 int64_t result; 1089 1090 /* skip all ws in the input */ 1091 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1092 1093 /* fill the input's internal buffer */ 1094 ufile_fill_uchar_buffer(input); 1095 1096 /* determine the size of the input's buffer */ 1097 len = (int32_t)(input->str.fLimit - input->str.fPos); 1098 1099 /* truncate to the width, if specified */ 1100 if(info->fWidth != -1) 1101 len = ufmt_min(len, info->fWidth); 1102 1103 /* parse the number */ 1104 result = ufmt_uto64(input->str.fPos, &len, 8); 1105 1106 /* update the input's position to reflect consumed data */ 1107 input->str.fPos += len; 1108 1109 /* mask off any necessary bits */ 1110 if (!info->fSkipArg) { 1111 if (info->fIsShort) 1112 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1113 else if (info->fIsLongLong) 1114 *(int64_t*)num = result; 1115 else 1116 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1117 } 1118 1119 /* we converted 1 arg */ 1120 *argConverted = !info->fSkipArg; 1121 return len + skipped; 1122} 1123 1124static int32_t 1125u_scanf_pointer_handler(UFILE *input, 1126 u_scanf_spec_info *info, 1127 ufmt_args *args, 1128 const UChar *fmt, 1129 int32_t *fmtConsumed, 1130 int32_t *argConverted) 1131{ 1132 int32_t len; 1133 int32_t skipped; 1134 void *result; 1135 void **p = (void**)(args[0].ptrValue); 1136 1137 1138 /* skip all ws in the input */ 1139 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1140 1141 /* fill the input's internal buffer */ 1142 ufile_fill_uchar_buffer(input); 1143 1144 /* determine the size of the input's buffer */ 1145 len = (int32_t)(input->str.fLimit - input->str.fPos); 1146 1147 /* truncate to the width, if specified */ 1148 if(info->fWidth != -1) { 1149 len = ufmt_min(len, info->fWidth); 1150 } 1151 1152 /* Make sure that we don't consume too much */ 1153 if (len > (int32_t)(sizeof(void*)*2)) { 1154 len = (int32_t)(sizeof(void*)*2); 1155 } 1156 1157 /* parse the pointer - assign to temporary value */ 1158 result = ufmt_utop(input->str.fPos, &len); 1159 1160 if (!info->fSkipArg) { 1161 *p = result; 1162 } 1163 1164 /* update the input's position to reflect consumed data */ 1165 input->str.fPos += len; 1166 1167 /* we converted 1 arg */ 1168 *argConverted = !info->fSkipArg; 1169 return len + skipped; 1170} 1171 1172static int32_t 1173u_scanf_scanset_handler(UFILE *input, 1174 u_scanf_spec_info *info, 1175 ufmt_args *args, 1176 const UChar *fmt, 1177 int32_t *fmtConsumed, 1178 int32_t *argConverted) 1179{ 1180 USet *scanset; 1181 UErrorCode status = U_ZERO_ERROR; 1182 int32_t chLeft = INT32_MAX; 1183 UChar32 c; 1184 UChar *alias = (UChar*) (args[0].ptrValue); 1185 UBool isNotEOF = FALSE; 1186 UBool readCharacter = FALSE; 1187 1188 /* Create an empty set */ 1189 scanset = uset_open(0, -1); 1190 1191 /* Back up one to get the [ */ 1192 fmt--; 1193 1194 /* truncate to the width, if specified and alias the target */ 1195 if(info->fWidth >= 0) { 1196 chLeft = info->fWidth; 1197 } 1198 1199 /* parse the scanset from the fmt string */ 1200 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); 1201 1202 /* verify that the parse was successful */ 1203 if (U_SUCCESS(status)) { 1204 c=0; 1205 1206 /* grab characters one at a time and make sure they are in the scanset */ 1207 while(chLeft > 0) { 1208 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { 1209 readCharacter = TRUE; 1210 if (!info->fSkipArg) { 1211 int32_t idx = 0; 1212 UBool isError = FALSE; 1213 1214 U16_APPEND(alias, idx, chLeft, c, isError); 1215 if (isError) { 1216 break; 1217 } 1218 alias += idx; 1219 } 1220 chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); 1221 } 1222 else { 1223 /* if the character's not in the scanset, break out */ 1224 break; 1225 } 1226 } 1227 1228 /* put the final character we read back on the input */ 1229 if(isNotEOF && chLeft > 0) { 1230 u_fungetc(c, input); 1231 } 1232 } 1233 1234 uset_close(scanset); 1235 1236 /* if we didn't match at least 1 character, fail */ 1237 if(!readCharacter) 1238 return -1; 1239 /* otherwise, add the terminator */ 1240 else if (!info->fSkipArg) { 1241 *alias = 0x00; 1242 } 1243 1244 /* we converted 1 arg */ 1245 *argConverted = !info->fSkipArg; 1246 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; 1247} 1248 1249/* Use US-ASCII characters only for formatting. Most codepages have 1250 characters 20-7F from Unicode. Using any other codepage specific 1251 characters will make it very difficult to format the string on 1252 non-Unicode machines */ 1253static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { 1254/* 0x20 */ 1255 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1256 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, 1257 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1258 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1259 1260/* 0x30 */ 1261 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1262 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1263 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1264 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1265 1266/* 0x40 */ 1267 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, 1268 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, 1269#ifdef U_USE_OBSOLETE_IO_FORMATTING 1270 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, 1271#else 1272 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1273#endif 1274 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1275 1276/* 0x50 */ 1277 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, 1278#ifdef U_USE_OBSOLETE_IO_FORMATTING 1279 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, 1280#else 1281 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, 1282#endif 1283 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, 1284 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1285 1286/* 0x60 */ 1287 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, 1288 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, 1289 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, 1290 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, 1291 1292/* 0x70 */ 1293 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, 1294 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, 1295 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1296 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1297}; 1298 1299U_CFUNC int32_t 1300u_scanf_parse(UFILE *f, 1301 const UChar *patternSpecification, 1302 va_list ap) 1303{ 1304 const UChar *alias; 1305 int32_t count, converted, argConsumed, cpConsumed; 1306 uint16_t handlerNum; 1307 1308 ufmt_args args; 1309 u_scanf_spec spec; 1310 ufmt_type_info info; 1311 u_scanf_handler handler; 1312 1313 /* alias the pattern */ 1314 alias = patternSpecification; 1315 1316 /* haven't converted anything yet */ 1317 argConsumed = 0; 1318 converted = 0; 1319 cpConsumed = 0; 1320 1321 /* iterate through the pattern */ 1322 for(;;) { 1323 1324 /* match any characters up to the next '%' */ 1325 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { 1326 alias++; 1327 } 1328 1329 /* if we aren't at a '%', or if we're at end of string, break*/ 1330 if(*alias != UP_PERCENT || *alias == 0x0000) 1331 break; 1332 1333 /* parse the specifier */ 1334 count = u_scanf_parse_spec(alias, &spec); 1335 1336 /* update the pointer in pattern */ 1337 alias += count; 1338 1339 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); 1340 if (handlerNum < USCANF_NUM_FMT_HANDLERS) { 1341 /* skip the argument, if necessary */ 1342 /* query the info function for argument information */ 1343 info = g_u_scanf_infos[ handlerNum ].info; 1344 if (info != ufmt_count && u_feof(f)) { 1345 break; 1346 } 1347 else if(spec.fInfo.fSkipArg) { 1348 args.ptrValue = NULL; 1349 } 1350 else { 1351 switch(info) { 1352 case ufmt_count: 1353 /* set the spec's width to the # of items converted */ 1354 spec.fInfo.fWidth = cpConsumed; 1355 /* fall through to next case */ 1356 case ufmt_char: 1357 case ufmt_uchar: 1358 case ufmt_int: 1359 case ufmt_string: 1360 case ufmt_ustring: 1361 case ufmt_pointer: 1362 case ufmt_float: 1363 case ufmt_double: 1364 args.ptrValue = va_arg(ap, void*); 1365 break; 1366 1367 default: 1368 /* else args is ignored */ 1369 args.ptrValue = NULL; 1370 break; 1371 } 1372 } 1373 1374 /* call the handler function */ 1375 handler = g_u_scanf_infos[ handlerNum ].handler; 1376 if(handler != 0) { 1377 1378 /* reset count to 1 so that += for alias works. */ 1379 count = 1; 1380 1381 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); 1382 1383 /* if the handler encountered an error condition, break */ 1384 if(argConsumed < 0) { 1385 converted = -1; 1386 break; 1387 } 1388 1389 /* add to the # of items converted */ 1390 converted += argConsumed; 1391 1392 /* update the pointer in pattern */ 1393 alias += count-1; 1394 } 1395 /* else do nothing */ 1396 } 1397 /* else do nothing */ 1398 1399 /* just ignore unknown tags */ 1400 } 1401 1402 /* return # of items converted */ 1403 return converted; 1404} 1405 1406#endif /* #if !UCONFIG_NO_FORMATTING */ 1407