uscanf_p.c revision 83a171d1a62abf406f7f44ae671823d5ec20db7d
1/*
2*******************************************************************************
3*
4*   Copyright (C) 1998-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File uscnnf_p.c
10*
11* Modification History:
12*
13*   Date        Name        Description
14*   12/02/98    stephen        Creation.
15*   03/13/99    stephen     Modified for new C API.
16*******************************************************************************
17*/
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_FORMATTING
22
23#include "unicode/uchar.h"
24#include "unicode/ustring.h"
25#include "unicode/unum.h"
26#include "unicode/udat.h"
27#include "unicode/uset.h"
28#include "uscanf.h"
29#include "ufmt_cmn.h"
30#include "ufile.h"
31#include "locbund.h"
32
33#include "cmemory.h"
34#include "ustr_cnv.h"
35
36/* flag characters for u_scanf */
37#define FLAG_ASTERISK 0x002A
38#define FLAG_PAREN 0x0028
39
40#define ISFLAG(s)    (s) == FLAG_ASTERISK || \
41            (s) == FLAG_PAREN
42
43/* special characters for u_scanf */
44#define SPEC_DOLLARSIGN 0x0024
45
46/* unicode digits */
47#define DIGIT_ZERO 0x0030
48#define DIGIT_ONE 0x0031
49#define DIGIT_TWO 0x0032
50#define DIGIT_THREE 0x0033
51#define DIGIT_FOUR 0x0034
52#define DIGIT_FIVE 0x0035
53#define DIGIT_SIX 0x0036
54#define DIGIT_SEVEN 0x0037
55#define DIGIT_EIGHT 0x0038
56#define DIGIT_NINE 0x0039
57
58#define ISDIGIT(s)    (s) == DIGIT_ZERO || \
59            (s) == DIGIT_ONE || \
60            (s) == DIGIT_TWO || \
61            (s) == DIGIT_THREE || \
62            (s) == DIGIT_FOUR || \
63            (s) == DIGIT_FIVE || \
64            (s) == DIGIT_SIX || \
65            (s) == DIGIT_SEVEN || \
66            (s) == DIGIT_EIGHT || \
67            (s) == DIGIT_NINE
68
69/* u_scanf modifiers */
70#define MOD_H 0x0068
71#define MOD_LOWERL 0x006C
72#define MOD_L 0x004C
73
74#define ISMOD(s)    (s) == MOD_H || \
75            (s) == MOD_LOWERL || \
76            (s) == MOD_L
77
78/**
79 * Struct encapsulating a single uscanf format specification.
80 */
81typedef struct u_scanf_spec_info {
82    int32_t fWidth;         /* Width  */
83
84    UChar   fSpec;          /* Format specification  */
85
86    UChar   fPadChar;       /* Padding character  */
87
88    UBool   fSkipArg;       /* TRUE if arg should be skipped */
89    UBool   fIsLongDouble;  /* L flag  */
90    UBool   fIsShort;       /* h flag  */
91    UBool   fIsLong;        /* l flag  */
92    UBool   fIsLongLong;    /* ll flag  */
93    UBool   fIsString;      /* TRUE if this is a NULL-terminated string. */
94} u_scanf_spec_info;
95
96
97/**
98 * Struct encapsulating a single u_scanf format specification.
99 */
100typedef struct u_scanf_spec {
101    u_scanf_spec_info    fInfo;        /* Information on this spec */
102    int32_t        fArgPos;    /* Position of data in arg list */
103} u_scanf_spec;
104
105/**
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
109 * format specifier.
110 * @return The number of characters contained in this specifier.
111 */
112static int32_t
113u_scanf_parse_spec (const UChar     *fmt,
114            u_scanf_spec    *spec)
115{
116    const UChar *s = fmt;
117    const UChar *backup;
118    u_scanf_spec_info *info = &(spec->fInfo);
119
120    /* initialize spec to default values */
121    spec->fArgPos             = -1;
122
123    info->fWidth        = -1;
124    info->fSpec         = 0x0000;
125    info->fPadChar      = 0x0020;
126    info->fSkipArg      = FALSE;
127    info->fIsLongDouble = FALSE;
128    info->fIsShort      = FALSE;
129    info->fIsLong       = FALSE;
130    info->fIsLongLong   = FALSE;
131    info->fIsString     = TRUE;
132
133
134    /* skip over the initial '%' */
135    s++;
136
137    /* Check for positional argument */
138    if(ISDIGIT(*s)) {
139
140        /* Save the current position */
141        backup = s;
142
143        /* handle positional parameters */
144        if(ISDIGIT(*s)) {
145            spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
146
147            while(ISDIGIT(*s)) {
148                spec->fArgPos *= 10;
149                spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
150            }
151        }
152
153        /* if there is no '$', don't read anything */
154        if(*s != SPEC_DOLLARSIGN) {
155            spec->fArgPos = -1;
156            s = backup;
157        }
158        /* munge the '$' */
159        else
160            s++;
161    }
162
163    /* Get any format flags */
164    while(ISFLAG(*s)) {
165        switch(*s++) {
166
167            /* skip argument */
168        case FLAG_ASTERISK:
169            info->fSkipArg = TRUE;
170            break;
171
172            /* pad character specified */
173        case FLAG_PAREN:
174
175            /* first four characters are hex values for pad char */
176            info->fPadChar = (UChar)ufmt_digitvalue(*s++);
177            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
178            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
179            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180
181            /* final character is ignored */
182            s++;
183
184            break;
185        }
186    }
187
188    /* Get the width */
189    if(ISDIGIT(*s)){
190        info->fWidth = (int) (*s++ - DIGIT_ZERO);
191
192        while(ISDIGIT(*s)) {
193            info->fWidth *= 10;
194            info->fWidth += (int) (*s++ - DIGIT_ZERO);
195        }
196    }
197
198    /* Get any modifiers */
199    if(ISMOD(*s)) {
200        switch(*s++) {
201
202            /* short */
203        case MOD_H:
204            info->fIsShort = TRUE;
205            break;
206
207            /* long or long long */
208        case MOD_LOWERL:
209            if(*s == MOD_LOWERL) {
210                info->fIsLongLong = TRUE;
211                /* skip over the next 'l' */
212                s++;
213            }
214            else
215                info->fIsLong = TRUE;
216            break;
217
218            /* long double */
219        case MOD_L:
220            info->fIsLongDouble = TRUE;
221            break;
222        }
223    }
224
225    /* finally, get the specifier letter */
226    info->fSpec = *s++;
227
228    /* return # of characters in this specifier */
229    return (int32_t)(s - fmt);
230}
231
232#define UP_PERCENT 0x0025
233
234
235/* ANSI style formatting */
236/* Use US-ASCII characters only for formatting */
237
238/* % */
239#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
240/* s */
241#define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
242/* c */
243#define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
244/* d, i */
245#define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
246/* u */
247#define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
248/* o */
249#define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
250/* x, X */
251#define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
252/* f */
253#define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
254/* e, E */
255#define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
256/* g, G */
257#define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
258/* n */
259#define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
260/* [ */
261#define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}
262
263/* non-ANSI extensions */
264/* Use US-ASCII characters only for formatting */
265
266/* p */
267#define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
268/* V */
269#define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
270/* P */
271#define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
272/* C  K is old format */
273#define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
274/* S  U is old format */
275#define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}
276
277
278#define UFMT_EMPTY {ufmt_empty, NULL}
279
280/**
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
293 * error occurred.
294 * @return The number of code points consumed during reading.
295 */
296typedef int32_t (*u_scanf_handler) (UFILE   *stream,
297                   u_scanf_spec_info  *info,
298                   ufmt_args                *args,
299                   const UChar              *fmt,
300                   int32_t                  *fmtConsumed,
301                   int32_t                  *argConverted);
302
303typedef struct u_scanf_info {
304    ufmt_type_info info;
305    u_scanf_handler handler;
306} u_scanf_info;
307
308#define USCANF_NUM_FMT_HANDLERS 108
309#define USCANF_SYMBOL_BUFFER_SIZE 8
310
311/* We do not use handlers for 0-0x1f */
312#define USCANF_BASE_FMT_HANDLERS 0x20
313
314
315static int32_t
316u_scanf_skip_leading_ws(UFILE   *input,
317                        UChar   pad)
318{
319    UChar   c;
320    int32_t count = 0;
321    UBool isNotEOF;
322
323    /* skip all leading ws in the input */
324    while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
325    {
326        count++;
327    }
328
329    /* put the final character back on the input */
330    if(isNotEOF)
331        u_fungetc(c, input);
332
333    return count;
334}
335
336/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
337static int32_t
338u_scanf_skip_leading_positive_sign(UFILE   *input,
339                                   UNumberFormat *format,
340                                   UErrorCode *status)
341{
342    UChar   c;
343    int32_t count = 0;
344    UBool isNotEOF;
345    UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
346    int32_t symbolLen;
347    UErrorCode localStatus = U_ZERO_ERROR;
348
349    if (U_SUCCESS(*status)) {
350        symbolLen = unum_getSymbol(format,
351            UNUM_PLUS_SIGN_SYMBOL,
352            plusSymbol,
353            sizeof(plusSymbol)/sizeof(*plusSymbol),
354            &localStatus);
355
356        if (U_SUCCESS(localStatus)) {
357            /* skip all leading ws in the input */
358            while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
359            {
360                count++;
361            }
362
363            /* put the final character back on the input */
364            if(isNotEOF) {
365                u_fungetc(c, input);
366            }
367        }
368    }
369
370    return count;
371}
372
373static int32_t
374u_scanf_simple_percent_handler(UFILE        *input,
375                               u_scanf_spec_info *info,
376                               ufmt_args    *args,
377                               const UChar  *fmt,
378                               int32_t      *fmtConsumed,
379                               int32_t      *argConverted)
380{
381    /* make sure the next character in the input is a percent */
382    *argConverted = 0;
383    if(u_fgetc(input) != 0x0025) {
384        *argConverted = -1;
385    }
386    return 1;
387}
388
389static int32_t
390u_scanf_count_handler(UFILE         *input,
391                      u_scanf_spec_info *info,
392                      ufmt_args     *args,
393                      const UChar   *fmt,
394                      int32_t       *fmtConsumed,
395                      int32_t       *argConverted)
396{
397    /* in the special case of count, the u_scanf_spec_info's width */
398    /* will contain the # of items converted thus far */
399    if (!info->fSkipArg) {
400        if (info->fIsShort)
401            *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
402        else if (info->fIsLongLong)
403            *(int64_t*)(args[0].ptrValue) = info->fWidth;
404        else
405            *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
406    }
407    *argConverted = 0;
408
409    /* we converted 0 args */
410    return 0;
411}
412
413static int32_t
414u_scanf_double_handler(UFILE        *input,
415                       u_scanf_spec_info *info,
416                       ufmt_args    *args,
417                       const UChar  *fmt,
418                       int32_t      *fmtConsumed,
419                       int32_t      *argConverted)
420{
421    int32_t         len;
422    double          num;
423    UNumberFormat   *format;
424    int32_t         parsePos    = 0;
425    int32_t         skipped;
426    UErrorCode      status      = U_ZERO_ERROR;
427
428
429    /* skip all ws in the input */
430    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
431
432    /* fill the input's internal buffer */
433    ufile_fill_uchar_buffer(input);
434
435    /* determine the size of the input's buffer */
436    len = (int32_t)(input->str.fLimit - input->str.fPos);
437
438    /* truncate to the width, if specified */
439    if(info->fWidth != -1)
440        len = ufmt_min(len, info->fWidth);
441
442    /* get the formatter */
443    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
444
445    /* handle error */
446    if(format == 0)
447        return 0;
448
449    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
451
452    /* parse the number */
453    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
454
455    if (!info->fSkipArg) {
456        if (info->fIsLong)
457            *(double*)(args[0].ptrValue) = num;
458        else if (info->fIsLongDouble)
459            *(long double*)(args[0].ptrValue) = num;
460        else
461            *(float*)(args[0].ptrValue) = (float)num;
462    }
463
464    /* mask off any necessary bits */
465    /*  if(! info->fIsLong_double)
466    num &= DBL_MAX;*/
467
468    /* update the input's position to reflect consumed data */
469    input->str.fPos += parsePos;
470
471    /* we converted 1 arg */
472    *argConverted = !info->fSkipArg;
473    return parsePos + skipped;
474}
475
476#define UPRINTF_SYMBOL_BUFFER_SIZE 8
477
478static int32_t
479u_scanf_scientific_handler(UFILE        *input,
480                           u_scanf_spec_info *info,
481                           ufmt_args    *args,
482                           const UChar  *fmt,
483                           int32_t      *fmtConsumed,
484                           int32_t      *argConverted)
485{
486    int32_t         len;
487    double          num;
488    UNumberFormat   *format;
489    int32_t         parsePos    = 0;
490    int32_t         skipped;
491    UErrorCode      status      = U_ZERO_ERROR;
492    UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
493    int32_t srcLen, expLen;
494    UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
495
496
497    /* skip all ws in the input */
498    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
499
500    /* fill the input's internal buffer */
501    ufile_fill_uchar_buffer(input);
502
503    /* determine the size of the input's buffer */
504    len = (int32_t)(input->str.fLimit - input->str.fPos);
505
506    /* truncate to the width, if specified */
507    if(info->fWidth != -1)
508        len = ufmt_min(len, info->fWidth);
509
510    /* get the formatter */
511    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
512
513    /* handle error */
514    if(format == 0)
515        return 0;
516
517    /* set the appropriate flags on the formatter */
518
519    srcLen = unum_getSymbol(format,
520        UNUM_EXPONENTIAL_SYMBOL,
521        srcExpBuf,
522        sizeof(srcExpBuf),
523        &status);
524
525    /* Upper/lower case the e */
526    if (info->fSpec == (UChar)0x65 /* e */) {
527        expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf),
528            srcExpBuf, srcLen,
529            input->str.fBundle.fLocale,
530            &status);
531    }
532    else {
533        expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf),
534            srcExpBuf, srcLen,
535            input->str.fBundle.fLocale,
536            &status);
537    }
538
539    unum_setSymbol(format,
540        UNUM_EXPONENTIAL_SYMBOL,
541        expBuf,
542        expLen,
543        &status);
544
545
546
547
548    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
549    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
550
551    /* parse the number */
552    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
553
554    if (!info->fSkipArg) {
555        if (info->fIsLong)
556            *(double*)(args[0].ptrValue) = num;
557        else if (info->fIsLongDouble)
558            *(long double*)(args[0].ptrValue) = num;
559        else
560            *(float*)(args[0].ptrValue) = (float)num;
561    }
562
563    /* mask off any necessary bits */
564    /*  if(! info->fIsLong_double)
565    num &= DBL_MAX;*/
566
567    /* update the input's position to reflect consumed data */
568    input->str.fPos += parsePos;
569
570    /* we converted 1 arg */
571    *argConverted = !info->fSkipArg;
572    return parsePos + skipped;
573}
574
575static int32_t
576u_scanf_scidbl_handler(UFILE        *input,
577                       u_scanf_spec_info *info,
578                       ufmt_args    *args,
579                       const UChar  *fmt,
580                       int32_t      *fmtConsumed,
581                       int32_t      *argConverted)
582{
583    int32_t       len;
584    double        num;
585    UNumberFormat *scientificFormat, *genericFormat;
586    /*int32_t       scientificResult, genericResult;*/
587    double        scientificResult, genericResult;
588    int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
589    int32_t       skipped;
590    UErrorCode    scientificStatus = U_ZERO_ERROR;
591    UErrorCode    genericStatus = U_ZERO_ERROR;
592
593
594    /* since we can't determine by scanning the characters whether */
595    /* a number was formatted in the 'f' or 'g' styles, parse the */
596    /* string with both formatters, and assume whichever one */
597    /* parsed the most is the correct formatter to use */
598
599
600    /* skip all ws in the input */
601    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
602
603    /* fill the input's internal buffer */
604    ufile_fill_uchar_buffer(input);
605
606    /* determine the size of the input's buffer */
607    len = (int32_t)(input->str.fLimit - input->str.fPos);
608
609    /* truncate to the width, if specified */
610    if(info->fWidth != -1)
611        len = ufmt_min(len, info->fWidth);
612
613    /* get the formatters */
614    scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
615    genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
616
617    /* handle error */
618    if(scientificFormat == 0 || genericFormat == 0)
619        return 0;
620
621    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
622    skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
623
624    /* parse the number using each format*/
625
626    scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
627        &scientificParsePos, &scientificStatus);
628
629    genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
630        &genericParsePos, &genericStatus);
631
632    /* determine which parse made it farther */
633    if(scientificParsePos > genericParsePos) {
634        /* stash the result in num */
635        num = scientificResult;
636        /* update the input's position to reflect consumed data */
637        parsePos += scientificParsePos;
638    }
639    else {
640        /* stash the result in num */
641        num = genericResult;
642        /* update the input's position to reflect consumed data */
643        parsePos += genericParsePos;
644    }
645    input->str.fPos += parsePos;
646
647    if (!info->fSkipArg) {
648        if (info->fIsLong)
649            *(double*)(args[0].ptrValue) = num;
650        else if (info->fIsLongDouble)
651            *(long double*)(args[0].ptrValue) = num;
652        else
653            *(float*)(args[0].ptrValue) = (float)num;
654    }
655
656    /* mask off any necessary bits */
657    /*  if(! info->fIsLong_double)
658    num &= DBL_MAX;*/
659
660    /* we converted 1 arg */
661    *argConverted = !info->fSkipArg;
662    return parsePos + skipped;
663}
664
665static int32_t
666u_scanf_integer_handler(UFILE       *input,
667                        u_scanf_spec_info *info,
668                        ufmt_args   *args,
669                        const UChar *fmt,
670                        int32_t     *fmtConsumed,
671                        int32_t     *argConverted)
672{
673    int32_t         len;
674    void            *num        = (void*) (args[0].ptrValue);
675    UNumberFormat   *format;
676    int32_t         parsePos    = 0;
677    int32_t         skipped;
678    UErrorCode      status      = U_ZERO_ERROR;
679    int64_t         result;
680
681
682    /* skip all ws in the input */
683    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
684
685    /* fill the input's internal buffer */
686    ufile_fill_uchar_buffer(input);
687
688    /* determine the size of the input's buffer */
689    len = (int32_t)(input->str.fLimit - input->str.fPos);
690
691    /* truncate to the width, if specified */
692    if(info->fWidth != -1)
693        len = ufmt_min(len, info->fWidth);
694
695    /* get the formatter */
696    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
697
698    /* handle error */
699    if(format == 0)
700        return 0;
701
702    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
703    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
704
705    /* parse the number */
706    result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
707
708    /* mask off any necessary bits */
709    if (!info->fSkipArg) {
710        if (info->fIsShort)
711            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
712        else if (info->fIsLongLong)
713            *(int64_t*)num = result;
714        else
715            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
716    }
717
718    /* update the input's position to reflect consumed data */
719    input->str.fPos += parsePos;
720
721    /* we converted 1 arg */
722    *argConverted = !info->fSkipArg;
723    return parsePos + skipped;
724}
725
726static int32_t
727u_scanf_uinteger_handler(UFILE          *input,
728                         u_scanf_spec_info *info,
729                         ufmt_args      *args,
730                         const UChar    *fmt,
731                         int32_t        *fmtConsumed,
732                         int32_t        *argConverted)
733{
734    /* TODO Fix this when Numberformat handles uint64_t */
735    return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
736}
737
738static int32_t
739u_scanf_percent_handler(UFILE       *input,
740                        u_scanf_spec_info *info,
741                        ufmt_args   *args,
742                        const UChar *fmt,
743                        int32_t     *fmtConsumed,
744                        int32_t     *argConverted)
745{
746    int32_t         len;
747    double          num;
748    UNumberFormat   *format;
749    int32_t         parsePos    = 0;
750    UErrorCode      status      = U_ZERO_ERROR;
751
752
753    /* skip all ws in the input */
754    u_scanf_skip_leading_ws(input, info->fPadChar);
755
756    /* fill the input's internal buffer */
757    ufile_fill_uchar_buffer(input);
758
759    /* determine the size of the input's buffer */
760    len = (int32_t)(input->str.fLimit - input->str.fPos);
761
762    /* truncate to the width, if specified */
763    if(info->fWidth != -1)
764        len = ufmt_min(len, info->fWidth);
765
766    /* get the formatter */
767    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
768
769    /* handle error */
770    if(format == 0)
771        return 0;
772
773    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
774    u_scanf_skip_leading_positive_sign(input, format, &status);
775
776    /* parse the number */
777    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
778
779    if (!info->fSkipArg) {
780        *(double*)(args[0].ptrValue) = num;
781    }
782
783    /* mask off any necessary bits */
784    /*  if(! info->fIsLong_double)
785    num &= DBL_MAX;*/
786
787    /* update the input's position to reflect consumed data */
788    input->str.fPos += parsePos;
789
790    /* we converted 1 arg */
791    *argConverted = !info->fSkipArg;
792    return parsePos;
793}
794
795static int32_t
796u_scanf_string_handler(UFILE        *input,
797                       u_scanf_spec_info *info,
798                       ufmt_args    *args,
799                       const UChar  *fmt,
800                       int32_t      *fmtConsumed,
801                       int32_t      *argConverted)
802{
803    const UChar *source;
804    UConverter  *conv;
805    char        *arg    = (char*)(args[0].ptrValue);
806    char        *alias  = arg;
807    char        *limit;
808    UErrorCode  status  = U_ZERO_ERROR;
809    int32_t     count;
810    int32_t     skipped = 0;
811    UChar       c;
812    UBool       isNotEOF = FALSE;
813
814    /* skip all ws in the input */
815    if (info->fIsString) {
816        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
817    }
818
819    /* get the string one character at a time, truncating to the width */
820    count = 0;
821
822    /* open the default converter */
823    conv = u_getDefaultConverter(&status);
824
825    if(U_FAILURE(status))
826        return -1;
827
828    while( (info->fWidth == -1 || count < info->fWidth)
829        && (isNotEOF = ufile_getch(input, &c))
830        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
831    {
832
833        if (!info->fSkipArg) {
834            /* put the character from the input onto the target */
835            source = &c;
836            /* Since we do this one character at a time, do it this way. */
837            if (info->fWidth > 0) {
838                limit = alias + info->fWidth - count;
839            }
840            else {
841                limit = alias + ucnv_getMaxCharSize(conv);
842            }
843
844            /* convert the character to the default codepage */
845            ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
846                NULL, TRUE, &status);
847
848            if(U_FAILURE(status)) {
849                /* clean up */
850                u_releaseDefaultConverter(conv);
851                return -1;
852            }
853        }
854
855        /* increment the count */
856        ++count;
857    }
858
859    /* put the final character we read back on the input */
860    if (!info->fSkipArg) {
861        if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
862            u_fungetc(c, input);
863
864        /* add the terminator */
865        if (info->fIsString) {
866            *alias = 0x00;
867        }
868    }
869
870    /* clean up */
871    u_releaseDefaultConverter(conv);
872
873    /* we converted 1 arg */
874    *argConverted = !info->fSkipArg;
875    return count + skipped;
876}
877
878static int32_t
879u_scanf_char_handler(UFILE          *input,
880                     u_scanf_spec_info *info,
881                     ufmt_args      *args,
882                     const UChar    *fmt,
883                     int32_t        *fmtConsumed,
884                     int32_t        *argConverted)
885{
886    if (info->fWidth < 0) {
887        info->fWidth = 1;
888    }
889    info->fIsString = FALSE;
890    return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
891}
892
893static int32_t
894u_scanf_ustring_handler(UFILE       *input,
895                        u_scanf_spec_info *info,
896                        ufmt_args   *args,
897                        const UChar *fmt,
898                        int32_t     *fmtConsumed,
899                        int32_t     *argConverted)
900{
901    UChar   *arg     = (UChar*)(args[0].ptrValue);
902    UChar   *alias     = arg;
903    int32_t count;
904    int32_t skipped = 0;
905    UChar   c;
906    UBool   isNotEOF = FALSE;
907
908    /* skip all ws in the input */
909    if (info->fIsString) {
910        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
911    }
912
913    /* get the string one character at a time, truncating to the width */
914    count = 0;
915
916    while( (info->fWidth == -1 || count < info->fWidth)
917        && (isNotEOF = ufile_getch(input, &c))
918        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
919    {
920
921        /* put the character from the input onto the target */
922        if (!info->fSkipArg) {
923            *alias++ = c;
924        }
925
926        /* increment the count */
927        ++count;
928    }
929
930    /* put the final character we read back on the input */
931    if (!info->fSkipArg) {
932        if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
933            u_fungetc(c, input);
934        }
935
936        /* add the terminator */
937        if (info->fIsString) {
938            *alias = 0x0000;
939        }
940    }
941
942    /* we converted 1 arg */
943    *argConverted = !info->fSkipArg;
944    return count + skipped;
945}
946
947static int32_t
948u_scanf_uchar_handler(UFILE         *input,
949                      u_scanf_spec_info *info,
950                      ufmt_args     *args,
951                      const UChar   *fmt,
952                      int32_t       *fmtConsumed,
953                      int32_t       *argConverted)
954{
955    if (info->fWidth < 0) {
956        info->fWidth = 1;
957    }
958    info->fIsString = FALSE;
959    return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
960}
961
962static int32_t
963u_scanf_spellout_handler(UFILE          *input,
964                         u_scanf_spec_info *info,
965                         ufmt_args      *args,
966                         const UChar    *fmt,
967                         int32_t        *fmtConsumed,
968                         int32_t        *argConverted)
969{
970    int32_t         len;
971    double          num;
972    UNumberFormat   *format;
973    int32_t         parsePos    = 0;
974    int32_t         skipped;
975    UErrorCode      status      = U_ZERO_ERROR;
976
977
978    /* skip all ws in the input */
979    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
980
981    /* fill the input's internal buffer */
982    ufile_fill_uchar_buffer(input);
983
984    /* determine the size of the input's buffer */
985    len = (int32_t)(input->str.fLimit - input->str.fPos);
986
987    /* truncate to the width, if specified */
988    if(info->fWidth != -1)
989        len = ufmt_min(len, info->fWidth);
990
991    /* get the formatter */
992    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
993
994    /* handle error */
995    if(format == 0)
996        return 0;
997
998    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
999    /* This is not applicable to RBNF. */
1000    /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
1001
1002    /* parse the number */
1003    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
1004
1005    if (!info->fSkipArg) {
1006        *(double*)(args[0].ptrValue) = num;
1007    }
1008
1009    /* mask off any necessary bits */
1010    /*  if(! info->fIsLong_double)
1011    num &= DBL_MAX;*/
1012
1013    /* update the input's position to reflect consumed data */
1014    input->str.fPos += parsePos;
1015
1016    /* we converted 1 arg */
1017    *argConverted = !info->fSkipArg;
1018    return parsePos + skipped;
1019}
1020
1021static int32_t
1022u_scanf_hex_handler(UFILE       *input,
1023                    u_scanf_spec_info *info,
1024                    ufmt_args   *args,
1025                    const UChar *fmt,
1026                    int32_t     *fmtConsumed,
1027                    int32_t     *argConverted)
1028{
1029    int32_t     len;
1030    int32_t     skipped;
1031    void        *num    = (void*) (args[0].ptrValue);
1032    int64_t     result;
1033
1034    /* skip all ws in the input */
1035    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1036
1037    /* fill the input's internal buffer */
1038    ufile_fill_uchar_buffer(input);
1039
1040    /* determine the size of the input's buffer */
1041    len = (int32_t)(input->str.fLimit - input->str.fPos);
1042
1043    /* truncate to the width, if specified */
1044    if(info->fWidth != -1)
1045        len = ufmt_min(len, info->fWidth);
1046
1047    /* check for alternate form */
1048    if( *(input->str.fPos) == 0x0030 &&
1049        (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1050
1051        /* skip the '0' and 'x' or 'X' if present */
1052        input->str.fPos += 2;
1053        len -= 2;
1054    }
1055
1056    /* parse the number */
1057    result = ufmt_uto64(input->str.fPos, &len, 16);
1058
1059    /* update the input's position to reflect consumed data */
1060    input->str.fPos += len;
1061
1062    /* mask off any necessary bits */
1063    if (!info->fSkipArg) {
1064        if (info->fIsShort)
1065            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1066        else if (info->fIsLongLong)
1067            *(int64_t*)num = result;
1068        else
1069            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1070    }
1071
1072    /* we converted 1 arg */
1073    *argConverted = !info->fSkipArg;
1074    return len + skipped;
1075}
1076
1077static int32_t
1078u_scanf_octal_handler(UFILE         *input,
1079                      u_scanf_spec_info *info,
1080                      ufmt_args     *args,
1081                      const UChar   *fmt,
1082                      int32_t       *fmtConsumed,
1083                      int32_t       *argConverted)
1084{
1085    int32_t     len;
1086    int32_t     skipped;
1087    void        *num         = (void*) (args[0].ptrValue);
1088    int64_t     result;
1089
1090    /* skip all ws in the input */
1091    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1092
1093    /* fill the input's internal buffer */
1094    ufile_fill_uchar_buffer(input);
1095
1096    /* determine the size of the input's buffer */
1097    len = (int32_t)(input->str.fLimit - input->str.fPos);
1098
1099    /* truncate to the width, if specified */
1100    if(info->fWidth != -1)
1101        len = ufmt_min(len, info->fWidth);
1102
1103    /* parse the number */
1104    result = ufmt_uto64(input->str.fPos, &len, 8);
1105
1106    /* update the input's position to reflect consumed data */
1107    input->str.fPos += len;
1108
1109    /* mask off any necessary bits */
1110    if (!info->fSkipArg) {
1111        if (info->fIsShort)
1112            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1113        else if (info->fIsLongLong)
1114            *(int64_t*)num = result;
1115        else
1116            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1117    }
1118
1119    /* we converted 1 arg */
1120    *argConverted = !info->fSkipArg;
1121    return len + skipped;
1122}
1123
1124static int32_t
1125u_scanf_pointer_handler(UFILE       *input,
1126                        u_scanf_spec_info *info,
1127                        ufmt_args   *args,
1128                        const UChar *fmt,
1129                        int32_t     *fmtConsumed,
1130                        int32_t     *argConverted)
1131{
1132    int32_t len;
1133    int32_t skipped;
1134    void    *result;
1135    void    **p     = (void**)(args[0].ptrValue);
1136
1137
1138    /* skip all ws in the input */
1139    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1140
1141    /* fill the input's internal buffer */
1142    ufile_fill_uchar_buffer(input);
1143
1144    /* determine the size of the input's buffer */
1145    len = (int32_t)(input->str.fLimit - input->str.fPos);
1146
1147    /* truncate to the width, if specified */
1148    if(info->fWidth != -1) {
1149        len = ufmt_min(len, info->fWidth);
1150    }
1151
1152    /* Make sure that we don't consume too much */
1153    if (len > (int32_t)(sizeof(void*)*2)) {
1154        len = (int32_t)(sizeof(void*)*2);
1155    }
1156
1157    /* parse the pointer - assign to temporary value */
1158    result = ufmt_utop(input->str.fPos, &len);
1159
1160    if (!info->fSkipArg) {
1161        *p = result;
1162    }
1163
1164    /* update the input's position to reflect consumed data */
1165    input->str.fPos += len;
1166
1167    /* we converted 1 arg */
1168    *argConverted = !info->fSkipArg;
1169    return len + skipped;
1170}
1171
1172static int32_t
1173u_scanf_scanset_handler(UFILE       *input,
1174                        u_scanf_spec_info *info,
1175                        ufmt_args   *args,
1176                        const UChar *fmt,
1177                        int32_t     *fmtConsumed,
1178                        int32_t     *argConverted)
1179{
1180    USet        *scanset;
1181    UErrorCode  status = U_ZERO_ERROR;
1182    int32_t     chLeft = INT32_MAX;
1183    UChar32     c;
1184    UChar       *alias = (UChar*) (args[0].ptrValue);
1185    UBool       isNotEOF = FALSE;
1186    UBool       readCharacter = FALSE;
1187
1188    /* Create an empty set */
1189    scanset = uset_open(0, -1);
1190
1191    /* Back up one to get the [ */
1192    fmt--;
1193
1194    /* truncate to the width, if specified and alias the target */
1195    if(info->fWidth >= 0) {
1196        chLeft = info->fWidth;
1197    }
1198
1199    /* parse the scanset from the fmt string */
1200    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1201
1202    /* verify that the parse was successful */
1203    if (U_SUCCESS(status)) {
1204        c=0;
1205
1206        /* grab characters one at a time and make sure they are in the scanset */
1207        while(chLeft > 0) {
1208            if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1209                readCharacter = TRUE;
1210                if (!info->fSkipArg) {
1211                    int32_t idx = 0;
1212                    UBool isError = FALSE;
1213
1214                    U16_APPEND(alias, idx, chLeft, c, isError);
1215                    if (isError) {
1216                        break;
1217                    }
1218                    alias += idx;
1219                }
1220                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1221            }
1222            else {
1223                /* if the character's not in the scanset, break out */
1224                break;
1225            }
1226        }
1227
1228        /* put the final character we read back on the input */
1229        if(isNotEOF && chLeft > 0) {
1230            u_fungetc(c, input);
1231        }
1232    }
1233
1234    uset_close(scanset);
1235
1236    /* if we didn't match at least 1 character, fail */
1237    if(!readCharacter)
1238        return -1;
1239    /* otherwise, add the terminator */
1240    else if (!info->fSkipArg) {
1241        *alias = 0x00;
1242    }
1243
1244    /* we converted 1 arg */
1245    *argConverted = !info->fSkipArg;
1246    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1247}
1248
1249/* Use US-ASCII characters only for formatting. Most codepages have
1250 characters 20-7F from Unicode. Using any other codepage specific
1251 characters will make it very difficult to format the string on
1252 non-Unicode machines */
1253static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1254/* 0x20 */
1255    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1256    UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
1257    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1258    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1259
1260/* 0x30 */
1261    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1262    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1263    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1264    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1265
1266/* 0x40 */
1267    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
1268    UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
1269#ifdef U_USE_OBSOLETE_IO_FORMATTING
1270    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
1271#else
1272    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1273#endif
1274    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1275
1276/* 0x50 */
1277    UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
1278#ifdef U_USE_OBSOLETE_IO_FORMATTING
1279    UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
1280#else
1281    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
1282#endif
1283    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
1284    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1285
1286/* 0x60 */
1287    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
1288    UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
1289    UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
1290    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,
1291
1292/* 0x70 */
1293    UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
1294    UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
1295    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1296    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1297};
1298
1299U_CFUNC int32_t
1300u_scanf_parse(UFILE     *f,
1301            const UChar *patternSpecification,
1302            va_list     ap)
1303{
1304    const UChar     *alias;
1305    int32_t         count, converted, argConsumed, cpConsumed;
1306    uint16_t        handlerNum;
1307
1308    ufmt_args       args;
1309    u_scanf_spec    spec;
1310    ufmt_type_info  info;
1311    u_scanf_handler handler;
1312
1313    /* alias the pattern */
1314    alias = patternSpecification;
1315
1316    /* haven't converted anything yet */
1317    argConsumed = 0;
1318    converted = 0;
1319    cpConsumed = 0;
1320
1321    /* iterate through the pattern */
1322    for(;;) {
1323
1324        /* match any characters up to the next '%' */
1325        while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1326            alias++;
1327        }
1328
1329        /* if we aren't at a '%', or if we're at end of string, break*/
1330        if(*alias != UP_PERCENT || *alias == 0x0000)
1331            break;
1332
1333        /* parse the specifier */
1334        count = u_scanf_parse_spec(alias, &spec);
1335
1336        /* update the pointer in pattern */
1337        alias += count;
1338
1339        handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1340        if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1341            /* skip the argument, if necessary */
1342            /* query the info function for argument information */
1343            info = g_u_scanf_infos[ handlerNum ].info;
1344            if (info != ufmt_count && u_feof(f)) {
1345                break;
1346            }
1347            else if(spec.fInfo.fSkipArg) {
1348                args.ptrValue = NULL;
1349            }
1350            else {
1351                switch(info) {
1352                case ufmt_count:
1353                    /* set the spec's width to the # of items converted */
1354                    spec.fInfo.fWidth = cpConsumed;
1355                    /* fall through to next case */
1356                case ufmt_char:
1357                case ufmt_uchar:
1358                case ufmt_int:
1359                case ufmt_string:
1360                case ufmt_ustring:
1361                case ufmt_pointer:
1362                case ufmt_float:
1363                case ufmt_double:
1364                    args.ptrValue = va_arg(ap, void*);
1365                    break;
1366
1367                default:
1368                    /* else args is ignored */
1369                    args.ptrValue = NULL;
1370                    break;
1371                }
1372            }
1373
1374            /* call the handler function */
1375            handler = g_u_scanf_infos[ handlerNum ].handler;
1376            if(handler != 0) {
1377
1378                /* reset count to 1 so that += for alias works. */
1379                count = 1;
1380
1381                cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1382
1383                /* if the handler encountered an error condition, break */
1384                if(argConsumed < 0) {
1385                    converted = -1;
1386                    break;
1387                }
1388
1389                /* add to the # of items converted */
1390                converted += argConsumed;
1391
1392                /* update the pointer in pattern */
1393                alias += count-1;
1394            }
1395            /* else do nothing */
1396        }
1397        /* else do nothing */
1398
1399        /* just ignore unknown tags */
1400    }
1401
1402    /* return # of items converted */
1403    return converted;
1404}
1405
1406#endif /* #if !UCONFIG_NO_FORMATTING */
1407