uscanf_p.c revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/* 2******************************************************************************* 3* 4* Copyright (C) 1998-2006, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* 9* File uscnnf_p.c 10* 11* Modification History: 12* 13* Date Name Description 14* 12/02/98 stephen Creation. 15* 03/13/99 stephen Modified for new C API. 16******************************************************************************* 17*/ 18 19#include "unicode/utypes.h" 20 21#if !UCONFIG_NO_FORMATTING 22 23#include "unicode/uchar.h" 24#include "unicode/ustring.h" 25#include "unicode/unum.h" 26#include "unicode/udat.h" 27#include "unicode/uset.h" 28#include "uscanf.h" 29#include "ufmt_cmn.h" 30#include "ufile.h" 31#include "locbund.h" 32 33#include "cmemory.h" 34#include "ustr_cnv.h" 35 36/* flag characters for u_scanf */ 37#define FLAG_ASTERISK 0x002A 38#define FLAG_PAREN 0x0028 39 40#define ISFLAG(s) (s) == FLAG_ASTERISK || \ 41 (s) == FLAG_PAREN 42 43/* special characters for u_scanf */ 44#define SPEC_DOLLARSIGN 0x0024 45 46/* unicode digits */ 47#define DIGIT_ZERO 0x0030 48#define DIGIT_ONE 0x0031 49#define DIGIT_TWO 0x0032 50#define DIGIT_THREE 0x0033 51#define DIGIT_FOUR 0x0034 52#define DIGIT_FIVE 0x0035 53#define DIGIT_SIX 0x0036 54#define DIGIT_SEVEN 0x0037 55#define DIGIT_EIGHT 0x0038 56#define DIGIT_NINE 0x0039 57 58#define ISDIGIT(s) (s) == DIGIT_ZERO || \ 59 (s) == DIGIT_ONE || \ 60 (s) == DIGIT_TWO || \ 61 (s) == DIGIT_THREE || \ 62 (s) == DIGIT_FOUR || \ 63 (s) == DIGIT_FIVE || \ 64 (s) == DIGIT_SIX || \ 65 (s) == DIGIT_SEVEN || \ 66 (s) == DIGIT_EIGHT || \ 67 (s) == DIGIT_NINE 68 69/* u_scanf modifiers */ 70#define MOD_H 0x0068 71#define MOD_LOWERL 0x006C 72#define MOD_L 0x004C 73 74#define ISMOD(s) (s) == MOD_H || \ 75 (s) == MOD_LOWERL || \ 76 (s) == MOD_L 77 78/** 79 * Struct encapsulating a single uscanf format specification. 80 */ 81typedef struct u_scanf_spec_info { 82 int32_t fWidth; /* Width */ 83 84 UChar fSpec; /* Format specification */ 85 86 UChar fPadChar; /* Padding character */ 87 88 UBool fSkipArg; /* TRUE if arg should be skipped */ 89 UBool fIsLongDouble; /* L flag */ 90 UBool fIsShort; /* h flag */ 91 UBool fIsLong; /* l flag */ 92 UBool fIsLongLong; /* ll flag */ 93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */ 94} u_scanf_spec_info; 95 96 97/** 98 * Struct encapsulating a single u_scanf format specification. 99 */ 100typedef struct u_scanf_spec { 101 u_scanf_spec_info fInfo; /* Information on this spec */ 102 int32_t fArgPos; /* Position of data in arg list */ 103} u_scanf_spec; 104 105/** 106 * Parse a single u_scanf format specifier in Unicode. 107 * @param fmt A pointer to a '%' character in a u_scanf format specification. 108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed 109 * format specifier. 110 * @return The number of characters contained in this specifier. 111 */ 112static int32_t 113u_scanf_parse_spec (const UChar *fmt, 114 u_scanf_spec *spec) 115{ 116 const UChar *s = fmt; 117 const UChar *backup; 118 u_scanf_spec_info *info = &(spec->fInfo); 119 120 /* initialize spec to default values */ 121 spec->fArgPos = -1; 122 123 info->fWidth = -1; 124 info->fSpec = 0x0000; 125 info->fPadChar = 0x0020; 126 info->fSkipArg = FALSE; 127 info->fIsLongDouble = FALSE; 128 info->fIsShort = FALSE; 129 info->fIsLong = FALSE; 130 info->fIsLongLong = FALSE; 131 info->fIsString = TRUE; 132 133 134 /* skip over the initial '%' */ 135 s++; 136 137 /* Check for positional argument */ 138 if(ISDIGIT(*s)) { 139 140 /* Save the current position */ 141 backup = s; 142 143 /* handle positional parameters */ 144 if(ISDIGIT(*s)) { 145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO); 146 147 while(ISDIGIT(*s)) { 148 spec->fArgPos *= 10; 149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO); 150 } 151 } 152 153 /* if there is no '$', don't read anything */ 154 if(*s != SPEC_DOLLARSIGN) { 155 spec->fArgPos = -1; 156 s = backup; 157 } 158 /* munge the '$' */ 159 else 160 s++; 161 } 162 163 /* Get any format flags */ 164 while(ISFLAG(*s)) { 165 switch(*s++) { 166 167 /* skip argument */ 168 case FLAG_ASTERISK: 169 info->fSkipArg = TRUE; 170 break; 171 172 /* pad character specified */ 173 case FLAG_PAREN: 174 175 /* first four characters are hex values for pad char */ 176 info->fPadChar = (UChar)ufmt_digitvalue(*s++); 177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 180 181 /* final character is ignored */ 182 s++; 183 184 break; 185 } 186 } 187 188 /* Get the width */ 189 if(ISDIGIT(*s)){ 190 info->fWidth = (int) (*s++ - DIGIT_ZERO); 191 192 while(ISDIGIT(*s)) { 193 info->fWidth *= 10; 194 info->fWidth += (int) (*s++ - DIGIT_ZERO); 195 } 196 } 197 198 /* Get any modifiers */ 199 if(ISMOD(*s)) { 200 switch(*s++) { 201 202 /* short */ 203 case MOD_H: 204 info->fIsShort = TRUE; 205 break; 206 207 /* long or long long */ 208 case MOD_LOWERL: 209 if(*s == MOD_LOWERL) { 210 info->fIsLongLong = TRUE; 211 /* skip over the next 'l' */ 212 s++; 213 } 214 else 215 info->fIsLong = TRUE; 216 break; 217 218 /* long double */ 219 case MOD_L: 220 info->fIsLongDouble = TRUE; 221 break; 222 } 223 } 224 225 /* finally, get the specifier letter */ 226 info->fSpec = *s++; 227 228 /* return # of characters in this specifier */ 229 return (int32_t)(s - fmt); 230} 231 232#define UP_PERCENT 0x0025 233 234 235/* ANSI style formatting */ 236/* Use US-ASCII characters only for formatting */ 237 238/* % */ 239#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} 240/* s */ 241#define UFMT_STRING {ufmt_string, u_scanf_string_handler} 242/* c */ 243#define UFMT_CHAR {ufmt_string, u_scanf_char_handler} 244/* d, i */ 245#define UFMT_INT {ufmt_int, u_scanf_integer_handler} 246/* u */ 247#define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} 248/* o */ 249#define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} 250/* x, X */ 251#define UFMT_HEX {ufmt_int, u_scanf_hex_handler} 252/* f */ 253#define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} 254/* e, E */ 255#define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} 256/* g, G */ 257#define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} 258/* n */ 259#define UFMT_COUNT {ufmt_count, u_scanf_count_handler} 260/* [ */ 261#define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} 262 263/* non-ANSI extensions */ 264/* Use US-ASCII characters only for formatting */ 265 266/* p */ 267#define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} 268/* V */ 269#define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} 270/* P */ 271#define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} 272/* C K is old format */ 273#define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} 274/* S U is old format */ 275#define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} 276 277 278#define UFMT_EMPTY {ufmt_empty, NULL} 279 280/** 281 * A u_scanf handler function. 282 * A u_scanf handler is responsible for handling a single u_scanf 283 * format specification, for example 'd' or 's'. 284 * @param stream The UFILE to which to write output. 285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing 286 * information on the format specification. 287 * @param args A pointer to the argument data 288 * @param fmt A pointer to the first character in the format string 289 * following the spec. 290 * @param fmtConsumed On output, set to the number of characters consumed 291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. 292 * @param argConverted The number of arguments converted and assigned, or -1 if an 293 * error occurred. 294 * @return The number of code points consumed during reading. 295 */ 296typedef int32_t (*u_scanf_handler) (UFILE *stream, 297 u_scanf_spec_info *info, 298 ufmt_args *args, 299 const UChar *fmt, 300 int32_t *fmtConsumed, 301 int32_t *argConverted); 302 303typedef struct u_scanf_info { 304 ufmt_type_info info; 305 u_scanf_handler handler; 306} u_scanf_info; 307 308#define USCANF_NUM_FMT_HANDLERS 108 309#define USCANF_SYMBOL_BUFFER_SIZE 8 310 311/* We do not use handlers for 0-0x1f */ 312#define USCANF_BASE_FMT_HANDLERS 0x20 313 314 315static int32_t 316u_scanf_skip_leading_ws(UFILE *input, 317 UChar pad) 318{ 319 UChar c; 320 int32_t count = 0; 321 UBool isNotEOF; 322 323 /* skip all leading ws in the input */ 324 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) 325 { 326 count++; 327 } 328 329 /* put the final character back on the input */ 330 if(isNotEOF) 331 u_fungetc(c, input); 332 333 return count; 334} 335 336/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ 337static int32_t 338u_scanf_skip_leading_positive_sign(UFILE *input, 339 UNumberFormat *format, 340 UErrorCode *status) 341{ 342 UChar c; 343 int32_t count = 0; 344 UBool isNotEOF; 345 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; 346 int32_t symbolLen; 347 UErrorCode localStatus = U_ZERO_ERROR; 348 349 if (U_SUCCESS(*status)) { 350 symbolLen = unum_getSymbol(format, 351 UNUM_PLUS_SIGN_SYMBOL, 352 plusSymbol, 353 sizeof(plusSymbol)/sizeof(*plusSymbol), 354 &localStatus); 355 356 if (U_SUCCESS(localStatus)) { 357 /* skip all leading ws in the input */ 358 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) 359 { 360 count++; 361 } 362 363 /* put the final character back on the input */ 364 if(isNotEOF) { 365 u_fungetc(c, input); 366 } 367 } 368 } 369 370 return count; 371} 372 373static int32_t 374u_scanf_simple_percent_handler(UFILE *input, 375 u_scanf_spec_info *info, 376 ufmt_args *args, 377 const UChar *fmt, 378 int32_t *fmtConsumed, 379 int32_t *argConverted) 380{ 381 /* make sure the next character in the input is a percent */ 382 *argConverted = 0; 383 if(u_fgetc(input) != 0x0025) { 384 *argConverted = -1; 385 } 386 return 1; 387} 388 389static int32_t 390u_scanf_count_handler(UFILE *input, 391 u_scanf_spec_info *info, 392 ufmt_args *args, 393 const UChar *fmt, 394 int32_t *fmtConsumed, 395 int32_t *argConverted) 396{ 397 /* in the special case of count, the u_scanf_spec_info's width */ 398 /* will contain the # of items converted thus far */ 399 if (!info->fSkipArg) { 400 if (info->fIsShort) 401 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); 402 else if (info->fIsLongLong) 403 *(int64_t*)(args[0].ptrValue) = info->fWidth; 404 else 405 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); 406 } 407 *argConverted = 0; 408 409 /* we converted 0 args */ 410 return 0; 411} 412 413static int32_t 414u_scanf_double_handler(UFILE *input, 415 u_scanf_spec_info *info, 416 ufmt_args *args, 417 const UChar *fmt, 418 int32_t *fmtConsumed, 419 int32_t *argConverted) 420{ 421 int32_t len; 422 double num; 423 UNumberFormat *format; 424 int32_t parsePos = 0; 425 int32_t skipped; 426 UErrorCode status = U_ZERO_ERROR; 427 428 429 /* skip all ws in the input */ 430 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 431 432 /* fill the input's internal buffer */ 433 ufile_fill_uchar_buffer(input); 434 435 /* determine the size of the input's buffer */ 436 len = (int32_t)(input->str.fLimit - input->str.fPos); 437 438 /* truncate to the width, if specified */ 439 if(info->fWidth != -1) 440 len = ufmt_min(len, info->fWidth); 441 442 /* get the formatter */ 443 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 444 445 /* handle error */ 446 if(format == 0) 447 return 0; 448 449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 450 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 451 452 /* parse the number */ 453 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 454 455 if (!info->fSkipArg) { 456 if (info->fIsLong) 457 *(double*)(args[0].ptrValue) = num; 458 else if (info->fIsLongDouble) 459 *(long double*)(args[0].ptrValue) = num; 460 else 461 *(float*)(args[0].ptrValue) = (float)num; 462 } 463 464 /* mask off any necessary bits */ 465 /* if(! info->fIsLong_double) 466 num &= DBL_MAX;*/ 467 468 /* update the input's position to reflect consumed data */ 469 input->str.fPos += parsePos; 470 471 /* we converted 1 arg */ 472 *argConverted = !info->fSkipArg; 473 return parsePos + skipped; 474} 475 476static int32_t 477u_scanf_scientific_handler(UFILE *input, 478 u_scanf_spec_info *info, 479 ufmt_args *args, 480 const UChar *fmt, 481 int32_t *fmtConsumed, 482 int32_t *argConverted) 483{ 484 int32_t len; 485 double num; 486 UNumberFormat *format; 487 int32_t parsePos = 0; 488 int32_t skipped; 489 UErrorCode status = U_ZERO_ERROR; 490 491 492 /* skip all ws in the input */ 493 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 494 495 /* fill the input's internal buffer */ 496 ufile_fill_uchar_buffer(input); 497 498 /* determine the size of the input's buffer */ 499 len = (int32_t)(input->str.fLimit - input->str.fPos); 500 501 /* truncate to the width, if specified */ 502 if(info->fWidth != -1) 503 len = ufmt_min(len, info->fWidth); 504 505 /* get the formatter */ 506 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 507 508 /* handle error */ 509 if(format == 0) 510 return 0; 511 512 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 513 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 514 515 /* parse the number */ 516 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 517 518 if (!info->fSkipArg) { 519 if (info->fIsLong) 520 *(double*)(args[0].ptrValue) = num; 521 else if (info->fIsLongDouble) 522 *(long double*)(args[0].ptrValue) = num; 523 else 524 *(float*)(args[0].ptrValue) = (float)num; 525 } 526 527 /* mask off any necessary bits */ 528 /* if(! info->fIsLong_double) 529 num &= DBL_MAX;*/ 530 531 /* update the input's position to reflect consumed data */ 532 input->str.fPos += parsePos; 533 534 /* we converted 1 arg */ 535 *argConverted = !info->fSkipArg; 536 return parsePos + skipped; 537} 538 539static int32_t 540u_scanf_scidbl_handler(UFILE *input, 541 u_scanf_spec_info *info, 542 ufmt_args *args, 543 const UChar *fmt, 544 int32_t *fmtConsumed, 545 int32_t *argConverted) 546{ 547 int32_t len; 548 double num; 549 UNumberFormat *scientificFormat, *genericFormat; 550 /*int32_t scientificResult, genericResult;*/ 551 double scientificResult, genericResult; 552 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; 553 int32_t skipped; 554 UErrorCode scientificStatus = U_ZERO_ERROR; 555 UErrorCode genericStatus = U_ZERO_ERROR; 556 557 558 /* since we can't determine by scanning the characters whether */ 559 /* a number was formatted in the 'f' or 'g' styles, parse the */ 560 /* string with both formatters, and assume whichever one */ 561 /* parsed the most is the correct formatter to use */ 562 563 564 /* skip all ws in the input */ 565 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 566 567 /* fill the input's internal buffer */ 568 ufile_fill_uchar_buffer(input); 569 570 /* determine the size of the input's buffer */ 571 len = (int32_t)(input->str.fLimit - input->str.fPos); 572 573 /* truncate to the width, if specified */ 574 if(info->fWidth != -1) 575 len = ufmt_min(len, info->fWidth); 576 577 /* get the formatters */ 578 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 579 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 580 581 /* handle error */ 582 if(scientificFormat == 0 || genericFormat == 0) 583 return 0; 584 585 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 586 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); 587 588 /* parse the number using each format*/ 589 590 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, 591 &scientificParsePos, &scientificStatus); 592 593 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, 594 &genericParsePos, &genericStatus); 595 596 /* determine which parse made it farther */ 597 if(scientificParsePos > genericParsePos) { 598 /* stash the result in num */ 599 num = scientificResult; 600 /* update the input's position to reflect consumed data */ 601 parsePos += scientificParsePos; 602 } 603 else { 604 /* stash the result in num */ 605 num = genericResult; 606 /* update the input's position to reflect consumed data */ 607 parsePos += genericParsePos; 608 } 609 input->str.fPos += parsePos; 610 611 if (!info->fSkipArg) { 612 if (info->fIsLong) 613 *(double*)(args[0].ptrValue) = num; 614 else if (info->fIsLongDouble) 615 *(long double*)(args[0].ptrValue) = num; 616 else 617 *(float*)(args[0].ptrValue) = (float)num; 618 } 619 620 /* mask off any necessary bits */ 621 /* if(! info->fIsLong_double) 622 num &= DBL_MAX;*/ 623 624 /* we converted 1 arg */ 625 *argConverted = !info->fSkipArg; 626 return parsePos + skipped; 627} 628 629static int32_t 630u_scanf_integer_handler(UFILE *input, 631 u_scanf_spec_info *info, 632 ufmt_args *args, 633 const UChar *fmt, 634 int32_t *fmtConsumed, 635 int32_t *argConverted) 636{ 637 int32_t len; 638 void *num = (void*) (args[0].ptrValue); 639 UNumberFormat *format; 640 int32_t parsePos = 0; 641 int32_t skipped; 642 UErrorCode status = U_ZERO_ERROR; 643 int64_t result; 644 645 646 /* skip all ws in the input */ 647 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 648 649 /* fill the input's internal buffer */ 650 ufile_fill_uchar_buffer(input); 651 652 /* determine the size of the input's buffer */ 653 len = (int32_t)(input->str.fLimit - input->str.fPos); 654 655 /* truncate to the width, if specified */ 656 if(info->fWidth != -1) 657 len = ufmt_min(len, info->fWidth); 658 659 /* get the formatter */ 660 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 661 662 /* handle error */ 663 if(format == 0) 664 return 0; 665 666 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 667 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 668 669 /* parse the number */ 670 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); 671 672 /* mask off any necessary bits */ 673 if (!info->fSkipArg) { 674 if (info->fIsShort) 675 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 676 else if (info->fIsLongLong) 677 *(int64_t*)num = result; 678 else 679 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 680 } 681 682 /* update the input's position to reflect consumed data */ 683 input->str.fPos += parsePos; 684 685 /* we converted 1 arg */ 686 *argConverted = !info->fSkipArg; 687 return parsePos + skipped; 688} 689 690static int32_t 691u_scanf_uinteger_handler(UFILE *input, 692 u_scanf_spec_info *info, 693 ufmt_args *args, 694 const UChar *fmt, 695 int32_t *fmtConsumed, 696 int32_t *argConverted) 697{ 698 /* TODO Fix this when Numberformat handles uint64_t */ 699 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); 700} 701 702static int32_t 703u_scanf_percent_handler(UFILE *input, 704 u_scanf_spec_info *info, 705 ufmt_args *args, 706 const UChar *fmt, 707 int32_t *fmtConsumed, 708 int32_t *argConverted) 709{ 710 int32_t len; 711 double num; 712 UNumberFormat *format; 713 int32_t parsePos = 0; 714 int32_t skipped; 715 UErrorCode status = U_ZERO_ERROR; 716 717 718 /* skip all ws in the input */ 719 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 720 721 /* fill the input's internal buffer */ 722 ufile_fill_uchar_buffer(input); 723 724 /* determine the size of the input's buffer */ 725 len = (int32_t)(input->str.fLimit - input->str.fPos); 726 727 /* truncate to the width, if specified */ 728 if(info->fWidth != -1) 729 len = ufmt_min(len, info->fWidth); 730 731 /* get the formatter */ 732 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); 733 734 /* handle error */ 735 if(format == 0) 736 return 0; 737 738 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 739 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 740 741 /* parse the number */ 742 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 743 744 if (!info->fSkipArg) { 745 *(double*)(args[0].ptrValue) = num; 746 } 747 748 /* mask off any necessary bits */ 749 /* if(! info->fIsLong_double) 750 num &= DBL_MAX;*/ 751 752 /* update the input's position to reflect consumed data */ 753 input->str.fPos += parsePos; 754 755 /* we converted 1 arg */ 756 *argConverted = !info->fSkipArg; 757 return parsePos; 758} 759 760static int32_t 761u_scanf_string_handler(UFILE *input, 762 u_scanf_spec_info *info, 763 ufmt_args *args, 764 const UChar *fmt, 765 int32_t *fmtConsumed, 766 int32_t *argConverted) 767{ 768 const UChar *source; 769 UConverter *conv; 770 char *arg = (char*)(args[0].ptrValue); 771 char *alias = arg; 772 char *limit; 773 UErrorCode status = U_ZERO_ERROR; 774 int32_t count; 775 int32_t skipped = 0; 776 UChar c; 777 UBool isNotEOF = FALSE; 778 779 /* skip all ws in the input */ 780 if (info->fIsString) { 781 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 782 } 783 784 /* get the string one character at a time, truncating to the width */ 785 count = 0; 786 787 /* open the default converter */ 788 conv = u_getDefaultConverter(&status); 789 790 if(U_FAILURE(status)) 791 return -1; 792 793 while( (info->fWidth == -1 || count < info->fWidth) 794 && (isNotEOF = ufile_getch(input, &c)) 795 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 796 { 797 798 if (!info->fSkipArg) { 799 /* put the character from the input onto the target */ 800 source = &c; 801 /* Since we do this one character at a time, do it this way. */ 802 if (info->fWidth > 0) { 803 limit = alias + info->fWidth - count; 804 } 805 else { 806 limit = alias + ucnv_getMaxCharSize(conv); 807 } 808 809 /* convert the character to the default codepage */ 810 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, 811 NULL, TRUE, &status); 812 813 if(U_FAILURE(status)) { 814 /* clean up */ 815 u_releaseDefaultConverter(conv); 816 return -1; 817 } 818 } 819 820 /* increment the count */ 821 ++count; 822 } 823 824 /* put the final character we read back on the input */ 825 if (!info->fSkipArg) { 826 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) 827 u_fungetc(c, input); 828 829 /* add the terminator */ 830 if (info->fIsString) { 831 *alias = 0x00; 832 } 833 } 834 835 /* clean up */ 836 u_releaseDefaultConverter(conv); 837 838 /* we converted 1 arg */ 839 *argConverted = !info->fSkipArg; 840 return count + skipped; 841} 842 843static int32_t 844u_scanf_char_handler(UFILE *input, 845 u_scanf_spec_info *info, 846 ufmt_args *args, 847 const UChar *fmt, 848 int32_t *fmtConsumed, 849 int32_t *argConverted) 850{ 851 if (info->fWidth < 0) { 852 info->fWidth = 1; 853 } 854 info->fIsString = FALSE; 855 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); 856} 857 858static int32_t 859u_scanf_ustring_handler(UFILE *input, 860 u_scanf_spec_info *info, 861 ufmt_args *args, 862 const UChar *fmt, 863 int32_t *fmtConsumed, 864 int32_t *argConverted) 865{ 866 UChar *arg = (UChar*)(args[0].ptrValue); 867 UChar *alias = arg; 868 int32_t count; 869 int32_t skipped = 0; 870 UChar c; 871 UBool isNotEOF = FALSE; 872 873 /* skip all ws in the input */ 874 if (info->fIsString) { 875 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 876 } 877 878 /* get the string one character at a time, truncating to the width */ 879 count = 0; 880 881 while( (info->fWidth == -1 || count < info->fWidth) 882 && (isNotEOF = ufile_getch(input, &c)) 883 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 884 { 885 886 /* put the character from the input onto the target */ 887 if (!info->fSkipArg) { 888 *alias++ = c; 889 } 890 891 /* increment the count */ 892 ++count; 893 } 894 895 /* put the final character we read back on the input */ 896 if (!info->fSkipArg) { 897 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { 898 u_fungetc(c, input); 899 } 900 901 /* add the terminator */ 902 if (info->fIsString) { 903 *alias = 0x0000; 904 } 905 } 906 907 /* we converted 1 arg */ 908 *argConverted = !info->fSkipArg; 909 return count + skipped; 910} 911 912static int32_t 913u_scanf_uchar_handler(UFILE *input, 914 u_scanf_spec_info *info, 915 ufmt_args *args, 916 const UChar *fmt, 917 int32_t *fmtConsumed, 918 int32_t *argConverted) 919{ 920 if (info->fWidth < 0) { 921 info->fWidth = 1; 922 } 923 info->fIsString = FALSE; 924 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); 925} 926 927static int32_t 928u_scanf_spellout_handler(UFILE *input, 929 u_scanf_spec_info *info, 930 ufmt_args *args, 931 const UChar *fmt, 932 int32_t *fmtConsumed, 933 int32_t *argConverted) 934{ 935 int32_t len; 936 double num; 937 UNumberFormat *format; 938 int32_t parsePos = 0; 939 int32_t skipped; 940 UErrorCode status = U_ZERO_ERROR; 941 942 943 /* skip all ws in the input */ 944 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 945 946 /* fill the input's internal buffer */ 947 ufile_fill_uchar_buffer(input); 948 949 /* determine the size of the input's buffer */ 950 len = (int32_t)(input->str.fLimit - input->str.fPos); 951 952 /* truncate to the width, if specified */ 953 if(info->fWidth != -1) 954 len = ufmt_min(len, info->fWidth); 955 956 /* get the formatter */ 957 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); 958 959 /* handle error */ 960 if(format == 0) 961 return 0; 962 963 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 964 /* This is not applicable to RBNF. */ 965 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ 966 967 /* parse the number */ 968 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 969 970 if (!info->fSkipArg) { 971 *(double*)(args[0].ptrValue) = num; 972 } 973 974 /* mask off any necessary bits */ 975 /* if(! info->fIsLong_double) 976 num &= DBL_MAX;*/ 977 978 /* update the input's position to reflect consumed data */ 979 input->str.fPos += parsePos; 980 981 /* we converted 1 arg */ 982 *argConverted = !info->fSkipArg; 983 return parsePos + skipped; 984} 985 986static int32_t 987u_scanf_hex_handler(UFILE *input, 988 u_scanf_spec_info *info, 989 ufmt_args *args, 990 const UChar *fmt, 991 int32_t *fmtConsumed, 992 int32_t *argConverted) 993{ 994 int32_t len; 995 int32_t skipped; 996 void *num = (void*) (args[0].ptrValue); 997 int64_t result; 998 999 /* skip all ws in the input */ 1000 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1001 1002 /* fill the input's internal buffer */ 1003 ufile_fill_uchar_buffer(input); 1004 1005 /* determine the size of the input's buffer */ 1006 len = (int32_t)(input->str.fLimit - input->str.fPos); 1007 1008 /* truncate to the width, if specified */ 1009 if(info->fWidth != -1) 1010 len = ufmt_min(len, info->fWidth); 1011 1012 /* check for alternate form */ 1013 if( *(input->str.fPos) == 0x0030 && 1014 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { 1015 1016 /* skip the '0' and 'x' or 'X' if present */ 1017 input->str.fPos += 2; 1018 len -= 2; 1019 } 1020 1021 /* parse the number */ 1022 result = ufmt_uto64(input->str.fPos, &len, 16); 1023 1024 /* update the input's position to reflect consumed data */ 1025 input->str.fPos += len; 1026 1027 /* mask off any necessary bits */ 1028 if (!info->fSkipArg) { 1029 if (info->fIsShort) 1030 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1031 else if (info->fIsLongLong) 1032 *(int64_t*)num = result; 1033 else 1034 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1035 } 1036 1037 /* we converted 1 arg */ 1038 *argConverted = !info->fSkipArg; 1039 return len + skipped; 1040} 1041 1042static int32_t 1043u_scanf_octal_handler(UFILE *input, 1044 u_scanf_spec_info *info, 1045 ufmt_args *args, 1046 const UChar *fmt, 1047 int32_t *fmtConsumed, 1048 int32_t *argConverted) 1049{ 1050 int32_t len; 1051 int32_t skipped; 1052 void *num = (void*) (args[0].ptrValue); 1053 int64_t result; 1054 1055 /* skip all ws in the input */ 1056 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1057 1058 /* fill the input's internal buffer */ 1059 ufile_fill_uchar_buffer(input); 1060 1061 /* determine the size of the input's buffer */ 1062 len = (int32_t)(input->str.fLimit - input->str.fPos); 1063 1064 /* truncate to the width, if specified */ 1065 if(info->fWidth != -1) 1066 len = ufmt_min(len, info->fWidth); 1067 1068 /* parse the number */ 1069 result = ufmt_uto64(input->str.fPos, &len, 8); 1070 1071 /* update the input's position to reflect consumed data */ 1072 input->str.fPos += len; 1073 1074 /* mask off any necessary bits */ 1075 if (!info->fSkipArg) { 1076 if (info->fIsShort) 1077 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1078 else if (info->fIsLongLong) 1079 *(int64_t*)num = result; 1080 else 1081 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1082 } 1083 1084 /* we converted 1 arg */ 1085 *argConverted = !info->fSkipArg; 1086 return len + skipped; 1087} 1088 1089static int32_t 1090u_scanf_pointer_handler(UFILE *input, 1091 u_scanf_spec_info *info, 1092 ufmt_args *args, 1093 const UChar *fmt, 1094 int32_t *fmtConsumed, 1095 int32_t *argConverted) 1096{ 1097 int32_t len; 1098 int32_t skipped; 1099 void *result; 1100 void **p = (void**)(args[0].ptrValue); 1101 1102 1103 /* skip all ws in the input */ 1104 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1105 1106 /* fill the input's internal buffer */ 1107 ufile_fill_uchar_buffer(input); 1108 1109 /* determine the size of the input's buffer */ 1110 len = (int32_t)(input->str.fLimit - input->str.fPos); 1111 1112 /* truncate to the width, if specified */ 1113 if(info->fWidth != -1) { 1114 len = ufmt_min(len, info->fWidth); 1115 } 1116 1117 /* Make sure that we don't consume too much */ 1118 if (len > (int32_t)(sizeof(void*)*2)) { 1119 len = (int32_t)(sizeof(void*)*2); 1120 } 1121 1122 /* parse the pointer - assign to temporary value */ 1123 result = ufmt_utop(input->str.fPos, &len); 1124 1125 if (!info->fSkipArg) { 1126 *p = result; 1127 } 1128 1129 /* update the input's position to reflect consumed data */ 1130 input->str.fPos += len; 1131 1132 /* we converted 1 arg */ 1133 *argConverted = !info->fSkipArg; 1134 return len + skipped; 1135} 1136 1137static int32_t 1138u_scanf_scanset_handler(UFILE *input, 1139 u_scanf_spec_info *info, 1140 ufmt_args *args, 1141 const UChar *fmt, 1142 int32_t *fmtConsumed, 1143 int32_t *argConverted) 1144{ 1145 USet *scanset; 1146 UErrorCode status = U_ZERO_ERROR; 1147 int32_t chLeft = INT32_MAX; 1148 UChar32 c; 1149 UChar *alias = (UChar*) (args[0].ptrValue); 1150 UBool isNotEOF = FALSE; 1151 UBool readCharacter = FALSE; 1152 1153 /* Create an empty set */ 1154 scanset = uset_open(0, -1); 1155 1156 /* Back up one to get the [ */ 1157 fmt--; 1158 1159 /* truncate to the width, if specified and alias the target */ 1160 if(info->fWidth >= 0) { 1161 chLeft = info->fWidth; 1162 } 1163 1164 /* parse the scanset from the fmt string */ 1165 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); 1166 1167 /* verify that the parse was successful */ 1168 if (U_SUCCESS(status)) { 1169 c=0; 1170 1171 /* grab characters one at a time and make sure they are in the scanset */ 1172 while(chLeft > 0) { 1173 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { 1174 readCharacter = TRUE; 1175 if (!info->fSkipArg) { 1176 int32_t idx = 0; 1177 UBool isError = FALSE; 1178 1179 U16_APPEND(alias, idx, chLeft, c, isError); 1180 if (isError) { 1181 break; 1182 } 1183 alias += idx; 1184 } 1185 chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); 1186 } 1187 else { 1188 /* if the character's not in the scanset, break out */ 1189 break; 1190 } 1191 } 1192 1193 /* put the final character we read back on the input */ 1194 if(isNotEOF && chLeft > 0) { 1195 u_fungetc(c, input); 1196 } 1197 } 1198 1199 uset_close(scanset); 1200 1201 /* if we didn't match at least 1 character, fail */ 1202 if(!readCharacter) 1203 return -1; 1204 /* otherwise, add the terminator */ 1205 else if (!info->fSkipArg) { 1206 *alias = 0x00; 1207 } 1208 1209 /* we converted 1 arg */ 1210 *argConverted = !info->fSkipArg; 1211 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; 1212} 1213 1214/* Use US-ASCII characters only for formatting. Most codepages have 1215 characters 20-7F from Unicode. Using any other codepage specific 1216 characters will make it very difficult to format the string on 1217 non-Unicode machines */ 1218static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { 1219/* 0x20 */ 1220 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1221 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, 1222 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1223 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1224 1225/* 0x30 */ 1226 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1227 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1228 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1229 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1230 1231/* 0x40 */ 1232 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, 1233 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, 1234#ifdef U_USE_OBSOLETE_IO_FORMATTING 1235 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, 1236#else 1237 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1238#endif 1239 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1240 1241/* 0x50 */ 1242 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, 1243#ifdef U_USE_OBSOLETE_IO_FORMATTING 1244 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, 1245#else 1246 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, 1247#endif 1248 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, 1249 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1250 1251/* 0x60 */ 1252 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, 1253 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, 1254 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, 1255 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, 1256 1257/* 0x70 */ 1258 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, 1259 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, 1260 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1261 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1262}; 1263 1264U_CFUNC int32_t 1265u_scanf_parse(UFILE *f, 1266 const UChar *patternSpecification, 1267 va_list ap) 1268{ 1269 const UChar *alias; 1270 int32_t count, converted, argConsumed, cpConsumed; 1271 uint16_t handlerNum; 1272 1273 ufmt_args args; 1274 u_scanf_spec spec; 1275 ufmt_type_info info; 1276 u_scanf_handler handler; 1277 1278 /* alias the pattern */ 1279 alias = patternSpecification; 1280 1281 /* haven't converted anything yet */ 1282 argConsumed = 0; 1283 converted = 0; 1284 cpConsumed = 0; 1285 1286 /* iterate through the pattern */ 1287 for(;;) { 1288 1289 /* match any characters up to the next '%' */ 1290 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { 1291 alias++; 1292 } 1293 1294 /* if we aren't at a '%', or if we're at end of string, break*/ 1295 if(*alias != UP_PERCENT || *alias == 0x0000) 1296 break; 1297 1298 /* parse the specifier */ 1299 count = u_scanf_parse_spec(alias, &spec); 1300 1301 /* update the pointer in pattern */ 1302 alias += count; 1303 1304 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); 1305 if (handlerNum < USCANF_NUM_FMT_HANDLERS) { 1306 /* skip the argument, if necessary */ 1307 /* query the info function for argument information */ 1308 info = g_u_scanf_infos[ handlerNum ].info; 1309 if (info != ufmt_count && u_feof(f)) { 1310 break; 1311 } 1312 else if(spec.fInfo.fSkipArg) { 1313 args.ptrValue = NULL; 1314 } 1315 else { 1316 switch(info) { 1317 case ufmt_count: 1318 /* set the spec's width to the # of items converted */ 1319 spec.fInfo.fWidth = cpConsumed; 1320 /* fall through to next case */ 1321 case ufmt_char: 1322 case ufmt_uchar: 1323 case ufmt_int: 1324 case ufmt_string: 1325 case ufmt_ustring: 1326 case ufmt_pointer: 1327 case ufmt_float: 1328 case ufmt_double: 1329 args.ptrValue = va_arg(ap, void*); 1330 break; 1331 1332 default: 1333 /* else args is ignored */ 1334 args.ptrValue = NULL; 1335 break; 1336 } 1337 } 1338 1339 /* call the handler function */ 1340 handler = g_u_scanf_infos[ handlerNum ].handler; 1341 if(handler != 0) { 1342 1343 /* reset count to 1 so that += for alias works. */ 1344 count = 1; 1345 1346 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); 1347 1348 /* if the handler encountered an error condition, break */ 1349 if(argConsumed < 0) { 1350 converted = -1; 1351 break; 1352 } 1353 1354 /* add to the # of items converted */ 1355 converted += argConsumed; 1356 1357 /* update the pointer in pattern */ 1358 alias += count-1; 1359 } 1360 /* else do nothing */ 1361 } 1362 /* else do nothing */ 1363 1364 /* just ignore unknown tags */ 1365 } 1366 1367 /* return # of items converted */ 1368 return converted; 1369} 1370 1371#endif /* #if !UCONFIG_NO_FORMATTING */ 1372