1/*
2*******************************************************************************
3*
4*   Copyright (C) 1998-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File uscnnf_p.c
10*
11* Modification History:
12*
13*   Date        Name        Description
14*   12/02/98    stephen        Creation.
15*   03/13/99    stephen     Modified for new C API.
16*******************************************************************************
17*/
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_FORMATTING
22
23#include "unicode/uchar.h"
24#include "unicode/ustring.h"
25#include "unicode/unum.h"
26#include "unicode/udat.h"
27#include "unicode/uset.h"
28#include "uscanf.h"
29#include "ufmt_cmn.h"
30#include "ufile.h"
31#include "locbund.h"
32
33#include "cmemory.h"
34#include "ustr_cnv.h"
35
36/* flag characters for u_scanf */
37#define FLAG_ASTERISK 0x002A
38#define FLAG_PAREN 0x0028
39
40#define ISFLAG(s)    (s) == FLAG_ASTERISK || \
41            (s) == FLAG_PAREN
42
43/* special characters for u_scanf */
44#define SPEC_DOLLARSIGN 0x0024
45
46/* unicode digits */
47#define DIGIT_ZERO 0x0030
48#define DIGIT_ONE 0x0031
49#define DIGIT_TWO 0x0032
50#define DIGIT_THREE 0x0033
51#define DIGIT_FOUR 0x0034
52#define DIGIT_FIVE 0x0035
53#define DIGIT_SIX 0x0036
54#define DIGIT_SEVEN 0x0037
55#define DIGIT_EIGHT 0x0038
56#define DIGIT_NINE 0x0039
57
58#define ISDIGIT(s)    (s) == DIGIT_ZERO || \
59            (s) == DIGIT_ONE || \
60            (s) == DIGIT_TWO || \
61            (s) == DIGIT_THREE || \
62            (s) == DIGIT_FOUR || \
63            (s) == DIGIT_FIVE || \
64            (s) == DIGIT_SIX || \
65            (s) == DIGIT_SEVEN || \
66            (s) == DIGIT_EIGHT || \
67            (s) == DIGIT_NINE
68
69/* u_scanf modifiers */
70#define MOD_H 0x0068
71#define MOD_LOWERL 0x006C
72#define MOD_L 0x004C
73
74#define ISMOD(s)    (s) == MOD_H || \
75            (s) == MOD_LOWERL || \
76            (s) == MOD_L
77
78/**
79 * Struct encapsulating a single uscanf format specification.
80 */
81typedef struct u_scanf_spec_info {
82    int32_t fWidth;         /* Width  */
83
84    UChar   fSpec;          /* Format specification  */
85
86    UChar   fPadChar;       /* Padding character  */
87
88    UBool   fSkipArg;       /* TRUE if arg should be skipped */
89    UBool   fIsLongDouble;  /* L flag  */
90    UBool   fIsShort;       /* h flag  */
91    UBool   fIsLong;        /* l flag  */
92    UBool   fIsLongLong;    /* ll flag  */
93    UBool   fIsString;      /* TRUE if this is a NULL-terminated string. */
94} u_scanf_spec_info;
95
96
97/**
98 * Struct encapsulating a single u_scanf format specification.
99 */
100typedef struct u_scanf_spec {
101    u_scanf_spec_info    fInfo;        /* Information on this spec */
102    int32_t        fArgPos;    /* Position of data in arg list */
103} u_scanf_spec;
104
105/**
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
109 * format specifier.
110 * @return The number of characters contained in this specifier.
111 */
112static int32_t
113u_scanf_parse_spec (const UChar     *fmt,
114            u_scanf_spec    *spec)
115{
116    const UChar *s = fmt;
117    const UChar *backup;
118    u_scanf_spec_info *info = &(spec->fInfo);
119
120    /* initialize spec to default values */
121    spec->fArgPos             = -1;
122
123    info->fWidth        = -1;
124    info->fSpec         = 0x0000;
125    info->fPadChar      = 0x0020;
126    info->fSkipArg      = FALSE;
127    info->fIsLongDouble = FALSE;
128    info->fIsShort      = FALSE;
129    info->fIsLong       = FALSE;
130    info->fIsLongLong   = FALSE;
131    info->fIsString     = TRUE;
132
133
134    /* skip over the initial '%' */
135    s++;
136
137    /* Check for positional argument */
138    if(ISDIGIT(*s)) {
139
140        /* Save the current position */
141        backup = s;
142
143        /* handle positional parameters */
144        if(ISDIGIT(*s)) {
145            spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
146
147            while(ISDIGIT(*s)) {
148                spec->fArgPos *= 10;
149                spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
150            }
151        }
152
153        /* if there is no '$', don't read anything */
154        if(*s != SPEC_DOLLARSIGN) {
155            spec->fArgPos = -1;
156            s = backup;
157        }
158        /* munge the '$' */
159        else
160            s++;
161    }
162
163    /* Get any format flags */
164    while(ISFLAG(*s)) {
165        switch(*s++) {
166
167            /* skip argument */
168        case FLAG_ASTERISK:
169            info->fSkipArg = TRUE;
170            break;
171
172            /* pad character specified */
173        case FLAG_PAREN:
174
175            /* first four characters are hex values for pad char */
176            info->fPadChar = (UChar)ufmt_digitvalue(*s++);
177            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
178            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
179            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180
181            /* final character is ignored */
182            s++;
183
184            break;
185        }
186    }
187
188    /* Get the width */
189    if(ISDIGIT(*s)){
190        info->fWidth = (int) (*s++ - DIGIT_ZERO);
191
192        while(ISDIGIT(*s)) {
193            info->fWidth *= 10;
194            info->fWidth += (int) (*s++ - DIGIT_ZERO);
195        }
196    }
197
198    /* Get any modifiers */
199    if(ISMOD(*s)) {
200        switch(*s++) {
201
202            /* short */
203        case MOD_H:
204            info->fIsShort = TRUE;
205            break;
206
207            /* long or long long */
208        case MOD_LOWERL:
209            if(*s == MOD_LOWERL) {
210                info->fIsLongLong = TRUE;
211                /* skip over the next 'l' */
212                s++;
213            }
214            else
215                info->fIsLong = TRUE;
216            break;
217
218            /* long double */
219        case MOD_L:
220            info->fIsLongDouble = TRUE;
221            break;
222        }
223    }
224
225    /* finally, get the specifier letter */
226    info->fSpec = *s++;
227
228    /* return # of characters in this specifier */
229    return (int32_t)(s - fmt);
230}
231
232#define UP_PERCENT 0x0025
233
234
235/* ANSI style formatting */
236/* Use US-ASCII characters only for formatting */
237
238/* % */
239#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
240/* s */
241#define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
242/* c */
243#define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
244/* d, i */
245#define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
246/* u */
247#define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
248/* o */
249#define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
250/* x, X */
251#define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
252/* f */
253#define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
254/* e, E */
255#define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
256/* g, G */
257#define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
258/* n */
259#define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
260/* [ */
261#define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}
262
263/* non-ANSI extensions */
264/* Use US-ASCII characters only for formatting */
265
266/* p */
267#define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
268/* V */
269#define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
270/* P */
271#define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
272/* C  K is old format */
273#define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
274/* S  U is old format */
275#define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}
276
277
278#define UFMT_EMPTY {ufmt_empty, NULL}
279
280/**
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
293 * error occurred.
294 * @return The number of code points consumed during reading.
295 */
296typedef int32_t (*u_scanf_handler) (UFILE   *stream,
297                   u_scanf_spec_info  *info,
298                   ufmt_args                *args,
299                   const UChar              *fmt,
300                   int32_t                  *fmtConsumed,
301                   int32_t                  *argConverted);
302
303typedef struct u_scanf_info {
304    ufmt_type_info info;
305    u_scanf_handler handler;
306} u_scanf_info;
307
308#define USCANF_NUM_FMT_HANDLERS 108
309#define USCANF_SYMBOL_BUFFER_SIZE 8
310
311/* We do not use handlers for 0-0x1f */
312#define USCANF_BASE_FMT_HANDLERS 0x20
313
314
315static int32_t
316u_scanf_skip_leading_ws(UFILE   *input,
317                        UChar   pad)
318{
319    UChar   c;
320    int32_t count = 0;
321    UBool isNotEOF;
322
323    /* skip all leading ws in the input */
324    while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
325    {
326        count++;
327    }
328
329    /* put the final character back on the input */
330    if(isNotEOF)
331        u_fungetc(c, input);
332
333    return count;
334}
335
336/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
337static int32_t
338u_scanf_skip_leading_positive_sign(UFILE   *input,
339                                   UNumberFormat *format,
340                                   UErrorCode *status)
341{
342    UChar   c;
343    int32_t count = 0;
344    UBool isNotEOF;
345    UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
346    int32_t symbolLen;
347    UErrorCode localStatus = U_ZERO_ERROR;
348
349    if (U_SUCCESS(*status)) {
350        symbolLen = unum_getSymbol(format,
351            UNUM_PLUS_SIGN_SYMBOL,
352            plusSymbol,
353            sizeof(plusSymbol)/sizeof(*plusSymbol),
354            &localStatus);
355
356        if (U_SUCCESS(localStatus)) {
357            /* skip all leading ws in the input */
358            while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
359            {
360                count++;
361            }
362
363            /* put the final character back on the input */
364            if(isNotEOF) {
365                u_fungetc(c, input);
366            }
367        }
368    }
369
370    return count;
371}
372
373static int32_t
374u_scanf_simple_percent_handler(UFILE        *input,
375                               u_scanf_spec_info *info,
376                               ufmt_args    *args,
377                               const UChar  *fmt,
378                               int32_t      *fmtConsumed,
379                               int32_t      *argConverted)
380{
381    /* make sure the next character in the input is a percent */
382    *argConverted = 0;
383    if(u_fgetc(input) != 0x0025) {
384        *argConverted = -1;
385    }
386    return 1;
387}
388
389static int32_t
390u_scanf_count_handler(UFILE         *input,
391                      u_scanf_spec_info *info,
392                      ufmt_args     *args,
393                      const UChar   *fmt,
394                      int32_t       *fmtConsumed,
395                      int32_t       *argConverted)
396{
397    /* in the special case of count, the u_scanf_spec_info's width */
398    /* will contain the # of items converted thus far */
399    if (!info->fSkipArg) {
400        if (info->fIsShort)
401            *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
402        else if (info->fIsLongLong)
403            *(int64_t*)(args[0].ptrValue) = info->fWidth;
404        else
405            *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
406    }
407    *argConverted = 0;
408
409    /* we converted 0 args */
410    return 0;
411}
412
413static int32_t
414u_scanf_double_handler(UFILE        *input,
415                       u_scanf_spec_info *info,
416                       ufmt_args    *args,
417                       const UChar  *fmt,
418                       int32_t      *fmtConsumed,
419                       int32_t      *argConverted)
420{
421    int32_t         len;
422    double          num;
423    UNumberFormat   *format;
424    int32_t         parsePos    = 0;
425    int32_t         skipped;
426    UErrorCode      status      = U_ZERO_ERROR;
427
428
429    /* skip all ws in the input */
430    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
431
432    /* fill the input's internal buffer */
433    ufile_fill_uchar_buffer(input);
434
435    /* determine the size of the input's buffer */
436    len = (int32_t)(input->str.fLimit - input->str.fPos);
437
438    /* truncate to the width, if specified */
439    if(info->fWidth != -1)
440        len = ufmt_min(len, info->fWidth);
441
442    /* get the formatter */
443    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
444
445    /* handle error */
446    if(format == 0)
447        return 0;
448
449    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
451
452    /* parse the number */
453    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
454
455    if (!info->fSkipArg) {
456        if (info->fIsLong)
457            *(double*)(args[0].ptrValue) = num;
458        else if (info->fIsLongDouble)
459            *(long double*)(args[0].ptrValue) = num;
460        else
461            *(float*)(args[0].ptrValue) = (float)num;
462    }
463
464    /* mask off any necessary bits */
465    /*  if(! info->fIsLong_double)
466    num &= DBL_MAX;*/
467
468    /* update the input's position to reflect consumed data */
469    input->str.fPos += parsePos;
470
471    /* we converted 1 arg */
472    *argConverted = !info->fSkipArg;
473    return parsePos + skipped;
474}
475
476#define UPRINTF_SYMBOL_BUFFER_SIZE 8
477
478static int32_t
479u_scanf_scientific_handler(UFILE        *input,
480                           u_scanf_spec_info *info,
481                           ufmt_args    *args,
482                           const UChar  *fmt,
483                           int32_t      *fmtConsumed,
484                           int32_t      *argConverted)
485{
486    int32_t         len;
487    double          num;
488    UNumberFormat   *format;
489    int32_t         parsePos    = 0;
490    int32_t         skipped;
491    UErrorCode      status      = U_ZERO_ERROR;
492    UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
493    int32_t srcLen, expLen;
494    UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
495
496
497    /* skip all ws in the input */
498    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
499
500    /* fill the input's internal buffer */
501    ufile_fill_uchar_buffer(input);
502
503    /* determine the size of the input's buffer */
504    len = (int32_t)(input->str.fLimit - input->str.fPos);
505
506    /* truncate to the width, if specified */
507    if(info->fWidth != -1)
508        len = ufmt_min(len, info->fWidth);
509
510    /* get the formatter */
511    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
512
513    /* handle error */
514    if(format == 0)
515        return 0;
516
517    /* set the appropriate flags on the formatter */
518
519    srcLen = unum_getSymbol(format,
520        UNUM_EXPONENTIAL_SYMBOL,
521        srcExpBuf,
522        sizeof(srcExpBuf),
523        &status);
524
525    /* Upper/lower case the e */
526    if (info->fSpec == (UChar)0x65 /* e */) {
527        expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
528            srcExpBuf, srcLen,
529            input->str.fBundle.fLocale,
530            &status);
531    }
532    else {
533        expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
534            srcExpBuf, srcLen,
535            input->str.fBundle.fLocale,
536            &status);
537    }
538
539    unum_setSymbol(format,
540        UNUM_EXPONENTIAL_SYMBOL,
541        expBuf,
542        expLen,
543        &status);
544
545
546
547
548    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
549    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
550
551    /* parse the number */
552    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
553
554    if (!info->fSkipArg) {
555        if (info->fIsLong)
556            *(double*)(args[0].ptrValue) = num;
557        else if (info->fIsLongDouble)
558            *(long double*)(args[0].ptrValue) = num;
559        else
560            *(float*)(args[0].ptrValue) = (float)num;
561    }
562
563    /* mask off any necessary bits */
564    /*  if(! info->fIsLong_double)
565    num &= DBL_MAX;*/
566
567    /* update the input's position to reflect consumed data */
568    input->str.fPos += parsePos;
569
570    /* we converted 1 arg */
571    *argConverted = !info->fSkipArg;
572    return parsePos + skipped;
573}
574
575static int32_t
576u_scanf_scidbl_handler(UFILE        *input,
577                       u_scanf_spec_info *info,
578                       ufmt_args    *args,
579                       const UChar  *fmt,
580                       int32_t      *fmtConsumed,
581                       int32_t      *argConverted)
582{
583    int32_t       len;
584    double        num;
585    UNumberFormat *scientificFormat, *genericFormat;
586    /*int32_t       scientificResult, genericResult;*/
587    double        scientificResult, genericResult;
588    int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
589    int32_t       skipped;
590    UErrorCode    scientificStatus = U_ZERO_ERROR;
591    UErrorCode    genericStatus = U_ZERO_ERROR;
592
593
594    /* since we can't determine by scanning the characters whether */
595    /* a number was formatted in the 'f' or 'g' styles, parse the */
596    /* string with both formatters, and assume whichever one */
597    /* parsed the most is the correct formatter to use */
598
599
600    /* skip all ws in the input */
601    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
602
603    /* fill the input's internal buffer */
604    ufile_fill_uchar_buffer(input);
605
606    /* determine the size of the input's buffer */
607    len = (int32_t)(input->str.fLimit - input->str.fPos);
608
609    /* truncate to the width, if specified */
610    if(info->fWidth != -1)
611        len = ufmt_min(len, info->fWidth);
612
613    /* get the formatters */
614    scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
615    genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
616
617    /* handle error */
618    if(scientificFormat == 0 || genericFormat == 0)
619        return 0;
620
621    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
622    skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
623
624    /* parse the number using each format*/
625
626    scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
627        &scientificParsePos, &scientificStatus);
628
629    genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
630        &genericParsePos, &genericStatus);
631
632    /* determine which parse made it farther */
633    if(scientificParsePos > genericParsePos) {
634        /* stash the result in num */
635        num = scientificResult;
636        /* update the input's position to reflect consumed data */
637        parsePos += scientificParsePos;
638    }
639    else {
640        /* stash the result in num */
641        num = genericResult;
642        /* update the input's position to reflect consumed data */
643        parsePos += genericParsePos;
644    }
645    input->str.fPos += parsePos;
646
647    if (!info->fSkipArg) {
648        if (info->fIsLong)
649            *(double*)(args[0].ptrValue) = num;
650        else if (info->fIsLongDouble)
651            *(long double*)(args[0].ptrValue) = num;
652        else
653            *(float*)(args[0].ptrValue) = (float)num;
654    }
655
656    /* mask off any necessary bits */
657    /*  if(! info->fIsLong_double)
658    num &= DBL_MAX;*/
659
660    /* we converted 1 arg */
661    *argConverted = !info->fSkipArg;
662    return parsePos + skipped;
663}
664
665static int32_t
666u_scanf_integer_handler(UFILE       *input,
667                        u_scanf_spec_info *info,
668                        ufmt_args   *args,
669                        const UChar *fmt,
670                        int32_t     *fmtConsumed,
671                        int32_t     *argConverted)
672{
673    int32_t         len;
674    void            *num        = (void*) (args[0].ptrValue);
675    UNumberFormat   *format;
676    int32_t         parsePos    = 0;
677    int32_t         skipped;
678    UErrorCode      status      = U_ZERO_ERROR;
679    int64_t         result;
680
681
682    /* skip all ws in the input */
683    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
684
685    /* fill the input's internal buffer */
686    ufile_fill_uchar_buffer(input);
687
688    /* determine the size of the input's buffer */
689    len = (int32_t)(input->str.fLimit - input->str.fPos);
690
691    /* truncate to the width, if specified */
692    if(info->fWidth != -1)
693        len = ufmt_min(len, info->fWidth);
694
695    /* get the formatter */
696    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
697
698    /* handle error */
699    if(format == 0)
700        return 0;
701
702    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
703    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
704
705    /* parse the number */
706    result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
707
708    /* mask off any necessary bits */
709    if (!info->fSkipArg) {
710        if (info->fIsShort)
711            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
712        else if (info->fIsLongLong)
713            *(int64_t*)num = result;
714        else
715            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
716    }
717
718    /* update the input's position to reflect consumed data */
719    input->str.fPos += parsePos;
720
721    /* we converted 1 arg */
722    *argConverted = !info->fSkipArg;
723    return parsePos + skipped;
724}
725
726static int32_t
727u_scanf_uinteger_handler(UFILE          *input,
728                         u_scanf_spec_info *info,
729                         ufmt_args      *args,
730                         const UChar    *fmt,
731                         int32_t        *fmtConsumed,
732                         int32_t        *argConverted)
733{
734    /* TODO Fix this when Numberformat handles uint64_t */
735    return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
736}
737
738static int32_t
739u_scanf_percent_handler(UFILE       *input,
740                        u_scanf_spec_info *info,
741                        ufmt_args   *args,
742                        const UChar *fmt,
743                        int32_t     *fmtConsumed,
744                        int32_t     *argConverted)
745{
746    int32_t         len;
747    double          num;
748    UNumberFormat   *format;
749    int32_t         parsePos    = 0;
750    int32_t         skipped;
751    UErrorCode      status      = U_ZERO_ERROR;
752
753
754    /* skip all ws in the input */
755    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
756
757    /* fill the input's internal buffer */
758    ufile_fill_uchar_buffer(input);
759
760    /* determine the size of the input's buffer */
761    len = (int32_t)(input->str.fLimit - input->str.fPos);
762
763    /* truncate to the width, if specified */
764    if(info->fWidth != -1)
765        len = ufmt_min(len, info->fWidth);
766
767    /* get the formatter */
768    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
769
770    /* handle error */
771    if(format == 0)
772        return 0;
773
774    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
775    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
776
777    /* parse the number */
778    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
779
780    if (!info->fSkipArg) {
781        *(double*)(args[0].ptrValue) = num;
782    }
783
784    /* mask off any necessary bits */
785    /*  if(! info->fIsLong_double)
786    num &= DBL_MAX;*/
787
788    /* update the input's position to reflect consumed data */
789    input->str.fPos += parsePos;
790
791    /* we converted 1 arg */
792    *argConverted = !info->fSkipArg;
793    return parsePos;
794}
795
796static int32_t
797u_scanf_string_handler(UFILE        *input,
798                       u_scanf_spec_info *info,
799                       ufmt_args    *args,
800                       const UChar  *fmt,
801                       int32_t      *fmtConsumed,
802                       int32_t      *argConverted)
803{
804    const UChar *source;
805    UConverter  *conv;
806    char        *arg    = (char*)(args[0].ptrValue);
807    char        *alias  = arg;
808    char        *limit;
809    UErrorCode  status  = U_ZERO_ERROR;
810    int32_t     count;
811    int32_t     skipped = 0;
812    UChar       c;
813    UBool       isNotEOF = FALSE;
814
815    /* skip all ws in the input */
816    if (info->fIsString) {
817        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
818    }
819
820    /* get the string one character at a time, truncating to the width */
821    count = 0;
822
823    /* open the default converter */
824    conv = u_getDefaultConverter(&status);
825
826    if(U_FAILURE(status))
827        return -1;
828
829    while( (info->fWidth == -1 || count < info->fWidth)
830        && (isNotEOF = ufile_getch(input, &c))
831        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
832    {
833
834        if (!info->fSkipArg) {
835            /* put the character from the input onto the target */
836            source = &c;
837            /* Since we do this one character at a time, do it this way. */
838            if (info->fWidth > 0) {
839                limit = alias + info->fWidth - count;
840            }
841            else {
842                limit = alias + ucnv_getMaxCharSize(conv);
843            }
844
845            /* convert the character to the default codepage */
846            ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
847                NULL, TRUE, &status);
848
849            if(U_FAILURE(status)) {
850                /* clean up */
851                u_releaseDefaultConverter(conv);
852                return -1;
853            }
854        }
855
856        /* increment the count */
857        ++count;
858    }
859
860    /* put the final character we read back on the input */
861    if (!info->fSkipArg) {
862        if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
863            u_fungetc(c, input);
864
865        /* add the terminator */
866        if (info->fIsString) {
867            *alias = 0x00;
868        }
869    }
870
871    /* clean up */
872    u_releaseDefaultConverter(conv);
873
874    /* we converted 1 arg */
875    *argConverted = !info->fSkipArg;
876    return count + skipped;
877}
878
879static int32_t
880u_scanf_char_handler(UFILE          *input,
881                     u_scanf_spec_info *info,
882                     ufmt_args      *args,
883                     const UChar    *fmt,
884                     int32_t        *fmtConsumed,
885                     int32_t        *argConverted)
886{
887    if (info->fWidth < 0) {
888        info->fWidth = 1;
889    }
890    info->fIsString = FALSE;
891    return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
892}
893
894static int32_t
895u_scanf_ustring_handler(UFILE       *input,
896                        u_scanf_spec_info *info,
897                        ufmt_args   *args,
898                        const UChar *fmt,
899                        int32_t     *fmtConsumed,
900                        int32_t     *argConverted)
901{
902    UChar   *arg     = (UChar*)(args[0].ptrValue);
903    UChar   *alias     = arg;
904    int32_t count;
905    int32_t skipped = 0;
906    UChar   c;
907    UBool   isNotEOF = FALSE;
908
909    /* skip all ws in the input */
910    if (info->fIsString) {
911        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
912    }
913
914    /* get the string one character at a time, truncating to the width */
915    count = 0;
916
917    while( (info->fWidth == -1 || count < info->fWidth)
918        && (isNotEOF = ufile_getch(input, &c))
919        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
920    {
921
922        /* put the character from the input onto the target */
923        if (!info->fSkipArg) {
924            *alias++ = c;
925        }
926
927        /* increment the count */
928        ++count;
929    }
930
931    /* put the final character we read back on the input */
932    if (!info->fSkipArg) {
933        if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
934            u_fungetc(c, input);
935        }
936
937        /* add the terminator */
938        if (info->fIsString) {
939            *alias = 0x0000;
940        }
941    }
942
943    /* we converted 1 arg */
944    *argConverted = !info->fSkipArg;
945    return count + skipped;
946}
947
948static int32_t
949u_scanf_uchar_handler(UFILE         *input,
950                      u_scanf_spec_info *info,
951                      ufmt_args     *args,
952                      const UChar   *fmt,
953                      int32_t       *fmtConsumed,
954                      int32_t       *argConverted)
955{
956    if (info->fWidth < 0) {
957        info->fWidth = 1;
958    }
959    info->fIsString = FALSE;
960    return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
961}
962
963static int32_t
964u_scanf_spellout_handler(UFILE          *input,
965                         u_scanf_spec_info *info,
966                         ufmt_args      *args,
967                         const UChar    *fmt,
968                         int32_t        *fmtConsumed,
969                         int32_t        *argConverted)
970{
971    int32_t         len;
972    double          num;
973    UNumberFormat   *format;
974    int32_t         parsePos    = 0;
975    int32_t         skipped;
976    UErrorCode      status      = U_ZERO_ERROR;
977
978
979    /* skip all ws in the input */
980    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
981
982    /* fill the input's internal buffer */
983    ufile_fill_uchar_buffer(input);
984
985    /* determine the size of the input's buffer */
986    len = (int32_t)(input->str.fLimit - input->str.fPos);
987
988    /* truncate to the width, if specified */
989    if(info->fWidth != -1)
990        len = ufmt_min(len, info->fWidth);
991
992    /* get the formatter */
993    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
994
995    /* handle error */
996    if(format == 0)
997        return 0;
998
999    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
1000    /* This is not applicable to RBNF. */
1001    /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1002
1003    /* parse the number */
1004    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
1005
1006    if (!info->fSkipArg) {
1007        *(double*)(args[0].ptrValue) = num;
1008    }
1009
1010    /* mask off any necessary bits */
1011    /*  if(! info->fIsLong_double)
1012    num &= DBL_MAX;*/
1013
1014    /* update the input's position to reflect consumed data */
1015    input->str.fPos += parsePos;
1016
1017    /* we converted 1 arg */
1018    *argConverted = !info->fSkipArg;
1019    return parsePos + skipped;
1020}
1021
1022static int32_t
1023u_scanf_hex_handler(UFILE       *input,
1024                    u_scanf_spec_info *info,
1025                    ufmt_args   *args,
1026                    const UChar *fmt,
1027                    int32_t     *fmtConsumed,
1028                    int32_t     *argConverted)
1029{
1030    int32_t     len;
1031    int32_t     skipped;
1032    void        *num    = (void*) (args[0].ptrValue);
1033    int64_t     result;
1034
1035    /* skip all ws in the input */
1036    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1037
1038    /* fill the input's internal buffer */
1039    ufile_fill_uchar_buffer(input);
1040
1041    /* determine the size of the input's buffer */
1042    len = (int32_t)(input->str.fLimit - input->str.fPos);
1043
1044    /* truncate to the width, if specified */
1045    if(info->fWidth != -1)
1046        len = ufmt_min(len, info->fWidth);
1047
1048    /* check for alternate form */
1049    if( *(input->str.fPos) == 0x0030 &&
1050        (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1051
1052        /* skip the '0' and 'x' or 'X' if present */
1053        input->str.fPos += 2;
1054        len -= 2;
1055    }
1056
1057    /* parse the number */
1058    result = ufmt_uto64(input->str.fPos, &len, 16);
1059
1060    /* update the input's position to reflect consumed data */
1061    input->str.fPos += len;
1062
1063    /* mask off any necessary bits */
1064    if (!info->fSkipArg) {
1065        if (info->fIsShort)
1066            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1067        else if (info->fIsLongLong)
1068            *(int64_t*)num = result;
1069        else
1070            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1071    }
1072
1073    /* we converted 1 arg */
1074    *argConverted = !info->fSkipArg;
1075    return len + skipped;
1076}
1077
1078static int32_t
1079u_scanf_octal_handler(UFILE         *input,
1080                      u_scanf_spec_info *info,
1081                      ufmt_args     *args,
1082                      const UChar   *fmt,
1083                      int32_t       *fmtConsumed,
1084                      int32_t       *argConverted)
1085{
1086    int32_t     len;
1087    int32_t     skipped;
1088    void        *num         = (void*) (args[0].ptrValue);
1089    int64_t     result;
1090
1091    /* skip all ws in the input */
1092    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1093
1094    /* fill the input's internal buffer */
1095    ufile_fill_uchar_buffer(input);
1096
1097    /* determine the size of the input's buffer */
1098    len = (int32_t)(input->str.fLimit - input->str.fPos);
1099
1100    /* truncate to the width, if specified */
1101    if(info->fWidth != -1)
1102        len = ufmt_min(len, info->fWidth);
1103
1104    /* parse the number */
1105    result = ufmt_uto64(input->str.fPos, &len, 8);
1106
1107    /* update the input's position to reflect consumed data */
1108    input->str.fPos += len;
1109
1110    /* mask off any necessary bits */
1111    if (!info->fSkipArg) {
1112        if (info->fIsShort)
1113            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1114        else if (info->fIsLongLong)
1115            *(int64_t*)num = result;
1116        else
1117            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1118    }
1119
1120    /* we converted 1 arg */
1121    *argConverted = !info->fSkipArg;
1122    return len + skipped;
1123}
1124
1125static int32_t
1126u_scanf_pointer_handler(UFILE       *input,
1127                        u_scanf_spec_info *info,
1128                        ufmt_args   *args,
1129                        const UChar *fmt,
1130                        int32_t     *fmtConsumed,
1131                        int32_t     *argConverted)
1132{
1133    int32_t len;
1134    int32_t skipped;
1135    void    *result;
1136    void    **p     = (void**)(args[0].ptrValue);
1137
1138
1139    /* skip all ws in the input */
1140    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1141
1142    /* fill the input's internal buffer */
1143    ufile_fill_uchar_buffer(input);
1144
1145    /* determine the size of the input's buffer */
1146    len = (int32_t)(input->str.fLimit - input->str.fPos);
1147
1148    /* truncate to the width, if specified */
1149    if(info->fWidth != -1) {
1150        len = ufmt_min(len, info->fWidth);
1151    }
1152
1153    /* Make sure that we don't consume too much */
1154    if (len > (int32_t)(sizeof(void*)*2)) {
1155        len = (int32_t)(sizeof(void*)*2);
1156    }
1157
1158    /* parse the pointer - assign to temporary value */
1159    result = ufmt_utop(input->str.fPos, &len);
1160
1161    if (!info->fSkipArg) {
1162        *p = result;
1163    }
1164
1165    /* update the input's position to reflect consumed data */
1166    input->str.fPos += len;
1167
1168    /* we converted 1 arg */
1169    *argConverted = !info->fSkipArg;
1170    return len + skipped;
1171}
1172
1173static int32_t
1174u_scanf_scanset_handler(UFILE       *input,
1175                        u_scanf_spec_info *info,
1176                        ufmt_args   *args,
1177                        const UChar *fmt,
1178                        int32_t     *fmtConsumed,
1179                        int32_t     *argConverted)
1180{
1181    USet        *scanset;
1182    UErrorCode  status = U_ZERO_ERROR;
1183    int32_t     chLeft = INT32_MAX;
1184    UChar32     c;
1185    UChar       *alias = (UChar*) (args[0].ptrValue);
1186    UBool       isNotEOF = FALSE;
1187    UBool       readCharacter = FALSE;
1188
1189    /* Create an empty set */
1190    scanset = uset_open(0, -1);
1191
1192    /* Back up one to get the [ */
1193    fmt--;
1194
1195    /* truncate to the width, if specified and alias the target */
1196    if(info->fWidth >= 0) {
1197        chLeft = info->fWidth;
1198    }
1199
1200    /* parse the scanset from the fmt string */
1201    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1202
1203    /* verify that the parse was successful */
1204    if (U_SUCCESS(status)) {
1205        c=0;
1206
1207        /* grab characters one at a time and make sure they are in the scanset */
1208        while(chLeft > 0) {
1209            if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1210                readCharacter = TRUE;
1211                if (!info->fSkipArg) {
1212                    int32_t idx = 0;
1213                    UBool isError = FALSE;
1214
1215                    U16_APPEND(alias, idx, chLeft, c, isError);
1216                    if (isError) {
1217                        break;
1218                    }
1219                    alias += idx;
1220                }
1221                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1222            }
1223            else {
1224                /* if the character's not in the scanset, break out */
1225                break;
1226            }
1227        }
1228
1229        /* put the final character we read back on the input */
1230        if(isNotEOF && chLeft > 0) {
1231            u_fungetc(c, input);
1232        }
1233    }
1234
1235    uset_close(scanset);
1236
1237    /* if we didn't match at least 1 character, fail */
1238    if(!readCharacter)
1239        return -1;
1240    /* otherwise, add the terminator */
1241    else if (!info->fSkipArg) {
1242        *alias = 0x00;
1243    }
1244
1245    /* we converted 1 arg */
1246    *argConverted = !info->fSkipArg;
1247    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1248}
1249
1250/* Use US-ASCII characters only for formatting. Most codepages have
1251 characters 20-7F from Unicode. Using any other codepage specific
1252 characters will make it very difficult to format the string on
1253 non-Unicode machines */
1254static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1255/* 0x20 */
1256    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1257    UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
1258    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1259    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1260
1261/* 0x30 */
1262    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1263    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1264    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1265    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1266
1267/* 0x40 */
1268    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
1269    UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
1270#ifdef U_USE_OBSOLETE_IO_FORMATTING
1271    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
1272#else
1273    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1274#endif
1275    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1276
1277/* 0x50 */
1278    UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
1279#ifdef U_USE_OBSOLETE_IO_FORMATTING
1280    UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
1281#else
1282    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
1283#endif
1284    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
1285    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1286
1287/* 0x60 */
1288    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
1289    UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
1290    UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
1291    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,
1292
1293/* 0x70 */
1294    UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
1295    UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
1296    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1297    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1298};
1299
1300U_CFUNC int32_t
1301u_scanf_parse(UFILE     *f,
1302            const UChar *patternSpecification,
1303            va_list     ap)
1304{
1305    const UChar     *alias;
1306    int32_t         count, converted, argConsumed, cpConsumed;
1307    uint16_t        handlerNum;
1308
1309    ufmt_args       args;
1310    u_scanf_spec    spec;
1311    ufmt_type_info  info;
1312    u_scanf_handler handler;
1313
1314    /* alias the pattern */
1315    alias = patternSpecification;
1316
1317    /* haven't converted anything yet */
1318    argConsumed = 0;
1319    converted = 0;
1320    cpConsumed = 0;
1321
1322    /* iterate through the pattern */
1323    for(;;) {
1324
1325        /* match any characters up to the next '%' */
1326        while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1327            alias++;
1328        }
1329
1330        /* if we aren't at a '%', or if we're at end of string, break*/
1331        if(*alias != UP_PERCENT || *alias == 0x0000)
1332            break;
1333
1334        /* parse the specifier */
1335        count = u_scanf_parse_spec(alias, &spec);
1336
1337        /* update the pointer in pattern */
1338        alias += count;
1339
1340        handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1341        if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1342            /* skip the argument, if necessary */
1343            /* query the info function for argument information */
1344            info = g_u_scanf_infos[ handlerNum ].info;
1345            if (info != ufmt_count && u_feof(f)) {
1346                break;
1347            }
1348            else if(spec.fInfo.fSkipArg) {
1349                args.ptrValue = NULL;
1350            }
1351            else {
1352                switch(info) {
1353                case ufmt_count:
1354                    /* set the spec's width to the # of items converted */
1355                    spec.fInfo.fWidth = cpConsumed;
1356                    /* fall through to next case */
1357                case ufmt_char:
1358                case ufmt_uchar:
1359                case ufmt_int:
1360                case ufmt_string:
1361                case ufmt_ustring:
1362                case ufmt_pointer:
1363                case ufmt_float:
1364                case ufmt_double:
1365                    args.ptrValue = va_arg(ap, void*);
1366                    break;
1367
1368                default:
1369                    /* else args is ignored */
1370                    args.ptrValue = NULL;
1371                    break;
1372                }
1373            }
1374
1375            /* call the handler function */
1376            handler = g_u_scanf_infos[ handlerNum ].handler;
1377            if(handler != 0) {
1378
1379                /* reset count to 1 so that += for alias works. */
1380                count = 1;
1381
1382                cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1383
1384                /* if the handler encountered an error condition, break */
1385                if(argConsumed < 0) {
1386                    converted = -1;
1387                    break;
1388                }
1389
1390                /* add to the # of items converted */
1391                converted += argConsumed;
1392
1393                /* update the pointer in pattern */
1394                alias += count-1;
1395            }
1396            /* else do nothing */
1397        }
1398        /* else do nothing */
1399
1400        /* just ignore unknown tags */
1401    }
1402
1403    /* return # of items converted */
1404    return converted;
1405}
1406
1407#endif /* #if !UCONFIG_NO_FORMATTING */
1408