uscanf_p.c revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/*
2*******************************************************************************
3*
4*   Copyright (C) 1998-2006, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File uscnnf_p.c
10*
11* Modification History:
12*
13*   Date        Name        Description
14*   12/02/98    stephen        Creation.
15*   03/13/99    stephen     Modified for new C API.
16*******************************************************************************
17*/
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_FORMATTING
22
23#include "unicode/uchar.h"
24#include "unicode/ustring.h"
25#include "unicode/unum.h"
26#include "unicode/udat.h"
27#include "unicode/uset.h"
28#include "uscanf.h"
29#include "ufmt_cmn.h"
30#include "ufile.h"
31#include "locbund.h"
32
33#include "cmemory.h"
34#include "ustr_cnv.h"
35
36/* flag characters for u_scanf */
37#define FLAG_ASTERISK 0x002A
38#define FLAG_PAREN 0x0028
39
40#define ISFLAG(s)    (s) == FLAG_ASTERISK || \
41            (s) == FLAG_PAREN
42
43/* special characters for u_scanf */
44#define SPEC_DOLLARSIGN 0x0024
45
46/* unicode digits */
47#define DIGIT_ZERO 0x0030
48#define DIGIT_ONE 0x0031
49#define DIGIT_TWO 0x0032
50#define DIGIT_THREE 0x0033
51#define DIGIT_FOUR 0x0034
52#define DIGIT_FIVE 0x0035
53#define DIGIT_SIX 0x0036
54#define DIGIT_SEVEN 0x0037
55#define DIGIT_EIGHT 0x0038
56#define DIGIT_NINE 0x0039
57
58#define ISDIGIT(s)    (s) == DIGIT_ZERO || \
59            (s) == DIGIT_ONE || \
60            (s) == DIGIT_TWO || \
61            (s) == DIGIT_THREE || \
62            (s) == DIGIT_FOUR || \
63            (s) == DIGIT_FIVE || \
64            (s) == DIGIT_SIX || \
65            (s) == DIGIT_SEVEN || \
66            (s) == DIGIT_EIGHT || \
67            (s) == DIGIT_NINE
68
69/* u_scanf modifiers */
70#define MOD_H 0x0068
71#define MOD_LOWERL 0x006C
72#define MOD_L 0x004C
73
74#define ISMOD(s)    (s) == MOD_H || \
75            (s) == MOD_LOWERL || \
76            (s) == MOD_L
77
78/**
79 * Struct encapsulating a single uscanf format specification.
80 */
81typedef struct u_scanf_spec_info {
82    int32_t fWidth;         /* Width  */
83
84    UChar   fSpec;          /* Format specification  */
85
86    UChar   fPadChar;       /* Padding character  */
87
88    UBool   fSkipArg;       /* TRUE if arg should be skipped */
89    UBool   fIsLongDouble;  /* L flag  */
90    UBool   fIsShort;       /* h flag  */
91    UBool   fIsLong;        /* l flag  */
92    UBool   fIsLongLong;    /* ll flag  */
93    UBool   fIsString;      /* TRUE if this is a NULL-terminated string. */
94} u_scanf_spec_info;
95
96
97/**
98 * Struct encapsulating a single u_scanf format specification.
99 */
100typedef struct u_scanf_spec {
101    u_scanf_spec_info    fInfo;        /* Information on this spec */
102    int32_t        fArgPos;    /* Position of data in arg list */
103} u_scanf_spec;
104
105/**
106 * Parse a single u_scanf format specifier in Unicode.
107 * @param fmt A pointer to a '%' character in a u_scanf format specification.
108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
109 * format specifier.
110 * @return The number of characters contained in this specifier.
111 */
112static int32_t
113u_scanf_parse_spec (const UChar     *fmt,
114            u_scanf_spec    *spec)
115{
116    const UChar *s = fmt;
117    const UChar *backup;
118    u_scanf_spec_info *info = &(spec->fInfo);
119
120    /* initialize spec to default values */
121    spec->fArgPos             = -1;
122
123    info->fWidth        = -1;
124    info->fSpec         = 0x0000;
125    info->fPadChar      = 0x0020;
126    info->fSkipArg      = FALSE;
127    info->fIsLongDouble = FALSE;
128    info->fIsShort      = FALSE;
129    info->fIsLong       = FALSE;
130    info->fIsLongLong   = FALSE;
131    info->fIsString     = TRUE;
132
133
134    /* skip over the initial '%' */
135    s++;
136
137    /* Check for positional argument */
138    if(ISDIGIT(*s)) {
139
140        /* Save the current position */
141        backup = s;
142
143        /* handle positional parameters */
144        if(ISDIGIT(*s)) {
145            spec->fArgPos = (int) (*s++ - DIGIT_ZERO);
146
147            while(ISDIGIT(*s)) {
148                spec->fArgPos *= 10;
149                spec->fArgPos += (int) (*s++ - DIGIT_ZERO);
150            }
151        }
152
153        /* if there is no '$', don't read anything */
154        if(*s != SPEC_DOLLARSIGN) {
155            spec->fArgPos = -1;
156            s = backup;
157        }
158        /* munge the '$' */
159        else
160            s++;
161    }
162
163    /* Get any format flags */
164    while(ISFLAG(*s)) {
165        switch(*s++) {
166
167            /* skip argument */
168        case FLAG_ASTERISK:
169            info->fSkipArg = TRUE;
170            break;
171
172            /* pad character specified */
173        case FLAG_PAREN:
174
175            /* first four characters are hex values for pad char */
176            info->fPadChar = (UChar)ufmt_digitvalue(*s++);
177            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
178            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
179            info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++));
180
181            /* final character is ignored */
182            s++;
183
184            break;
185        }
186    }
187
188    /* Get the width */
189    if(ISDIGIT(*s)){
190        info->fWidth = (int) (*s++ - DIGIT_ZERO);
191
192        while(ISDIGIT(*s)) {
193            info->fWidth *= 10;
194            info->fWidth += (int) (*s++ - DIGIT_ZERO);
195        }
196    }
197
198    /* Get any modifiers */
199    if(ISMOD(*s)) {
200        switch(*s++) {
201
202            /* short */
203        case MOD_H:
204            info->fIsShort = TRUE;
205            break;
206
207            /* long or long long */
208        case MOD_LOWERL:
209            if(*s == MOD_LOWERL) {
210                info->fIsLongLong = TRUE;
211                /* skip over the next 'l' */
212                s++;
213            }
214            else
215                info->fIsLong = TRUE;
216            break;
217
218            /* long double */
219        case MOD_L:
220            info->fIsLongDouble = TRUE;
221            break;
222        }
223    }
224
225    /* finally, get the specifier letter */
226    info->fSpec = *s++;
227
228    /* return # of characters in this specifier */
229    return (int32_t)(s - fmt);
230}
231
232#define UP_PERCENT 0x0025
233
234
235/* ANSI style formatting */
236/* Use US-ASCII characters only for formatting */
237
238/* % */
239#define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
240/* s */
241#define UFMT_STRING         {ufmt_string, u_scanf_string_handler}
242/* c */
243#define UFMT_CHAR           {ufmt_string, u_scanf_char_handler}
244/* d, i */
245#define UFMT_INT            {ufmt_int, u_scanf_integer_handler}
246/* u */
247#define UFMT_UINT           {ufmt_int, u_scanf_uinteger_handler}
248/* o */
249#define UFMT_OCTAL          {ufmt_int, u_scanf_octal_handler}
250/* x, X */
251#define UFMT_HEX            {ufmt_int, u_scanf_hex_handler}
252/* f */
253#define UFMT_DOUBLE         {ufmt_double, u_scanf_double_handler}
254/* e, E */
255#define UFMT_SCIENTIFIC     {ufmt_double, u_scanf_scientific_handler}
256/* g, G */
257#define UFMT_SCIDBL         {ufmt_double, u_scanf_scidbl_handler}
258/* n */
259#define UFMT_COUNT          {ufmt_count, u_scanf_count_handler}
260/* [ */
261#define UFMT_SCANSET        {ufmt_string, u_scanf_scanset_handler}
262
263/* non-ANSI extensions */
264/* Use US-ASCII characters only for formatting */
265
266/* p */
267#define UFMT_POINTER        {ufmt_pointer, u_scanf_pointer_handler}
268/* V */
269#define UFMT_SPELLOUT       {ufmt_double, u_scanf_spellout_handler}
270/* P */
271#define UFMT_PERCENT        {ufmt_double, u_scanf_percent_handler}
272/* C  K is old format */
273#define UFMT_UCHAR          {ufmt_uchar, u_scanf_uchar_handler}
274/* S  U is old format */
275#define UFMT_USTRING        {ufmt_ustring, u_scanf_ustring_handler}
276
277
278#define UFMT_EMPTY {ufmt_empty, NULL}
279
280/**
281 * A u_scanf handler function.
282 * A u_scanf handler is responsible for handling a single u_scanf
283 * format specification, for example 'd' or 's'.
284 * @param stream The UFILE to which to write output.
285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
286 * information on the format specification.
287 * @param args A pointer to the argument data
288 * @param fmt A pointer to the first character in the format string
289 * following the spec.
290 * @param fmtConsumed On output, set to the number of characters consumed
291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
292 * @param argConverted The number of arguments converted and assigned, or -1 if an
293 * error occurred.
294 * @return The number of code points consumed during reading.
295 */
296typedef int32_t (*u_scanf_handler) (UFILE   *stream,
297                   u_scanf_spec_info  *info,
298                   ufmt_args                *args,
299                   const UChar              *fmt,
300                   int32_t                  *fmtConsumed,
301                   int32_t                  *argConverted);
302
303typedef struct u_scanf_info {
304    ufmt_type_info info;
305    u_scanf_handler handler;
306} u_scanf_info;
307
308#define USCANF_NUM_FMT_HANDLERS 108
309#define USCANF_SYMBOL_BUFFER_SIZE 8
310
311/* We do not use handlers for 0-0x1f */
312#define USCANF_BASE_FMT_HANDLERS 0x20
313
314
315static int32_t
316u_scanf_skip_leading_ws(UFILE   *input,
317                        UChar   pad)
318{
319    UChar   c;
320    int32_t count = 0;
321    UBool isNotEOF;
322
323    /* skip all leading ws in the input */
324    while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) )
325    {
326        count++;
327    }
328
329    /* put the final character back on the input */
330    if(isNotEOF)
331        u_fungetc(c, input);
332
333    return count;
334}
335
336/* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
337static int32_t
338u_scanf_skip_leading_positive_sign(UFILE   *input,
339                                   UNumberFormat *format,
340                                   UErrorCode *status)
341{
342    UChar   c;
343    int32_t count = 0;
344    UBool isNotEOF;
345    UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
346    int32_t symbolLen;
347    UErrorCode localStatus = U_ZERO_ERROR;
348
349    if (U_SUCCESS(*status)) {
350        symbolLen = unum_getSymbol(format,
351            UNUM_PLUS_SIGN_SYMBOL,
352            plusSymbol,
353            sizeof(plusSymbol)/sizeof(*plusSymbol),
354            &localStatus);
355
356        if (U_SUCCESS(localStatus)) {
357            /* skip all leading ws in the input */
358            while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) )
359            {
360                count++;
361            }
362
363            /* put the final character back on the input */
364            if(isNotEOF) {
365                u_fungetc(c, input);
366            }
367        }
368    }
369
370    return count;
371}
372
373static int32_t
374u_scanf_simple_percent_handler(UFILE        *input,
375                               u_scanf_spec_info *info,
376                               ufmt_args    *args,
377                               const UChar  *fmt,
378                               int32_t      *fmtConsumed,
379                               int32_t      *argConverted)
380{
381    /* make sure the next character in the input is a percent */
382    *argConverted = 0;
383    if(u_fgetc(input) != 0x0025) {
384        *argConverted = -1;
385    }
386    return 1;
387}
388
389static int32_t
390u_scanf_count_handler(UFILE         *input,
391                      u_scanf_spec_info *info,
392                      ufmt_args     *args,
393                      const UChar   *fmt,
394                      int32_t       *fmtConsumed,
395                      int32_t       *argConverted)
396{
397    /* in the special case of count, the u_scanf_spec_info's width */
398    /* will contain the # of items converted thus far */
399    if (!info->fSkipArg) {
400        if (info->fIsShort)
401            *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth);
402        else if (info->fIsLongLong)
403            *(int64_t*)(args[0].ptrValue) = info->fWidth;
404        else
405            *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth);
406    }
407    *argConverted = 0;
408
409    /* we converted 0 args */
410    return 0;
411}
412
413static int32_t
414u_scanf_double_handler(UFILE        *input,
415                       u_scanf_spec_info *info,
416                       ufmt_args    *args,
417                       const UChar  *fmt,
418                       int32_t      *fmtConsumed,
419                       int32_t      *argConverted)
420{
421    int32_t         len;
422    double          num;
423    UNumberFormat   *format;
424    int32_t         parsePos    = 0;
425    int32_t         skipped;
426    UErrorCode      status      = U_ZERO_ERROR;
427
428
429    /* skip all ws in the input */
430    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
431
432    /* fill the input's internal buffer */
433    ufile_fill_uchar_buffer(input);
434
435    /* determine the size of the input's buffer */
436    len = (int32_t)(input->str.fLimit - input->str.fPos);
437
438    /* truncate to the width, if specified */
439    if(info->fWidth != -1)
440        len = ufmt_min(len, info->fWidth);
441
442    /* get the formatter */
443    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
444
445    /* handle error */
446    if(format == 0)
447        return 0;
448
449    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
450    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
451
452    /* parse the number */
453    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
454
455    if (!info->fSkipArg) {
456        if (info->fIsLong)
457            *(double*)(args[0].ptrValue) = num;
458        else if (info->fIsLongDouble)
459            *(long double*)(args[0].ptrValue) = num;
460        else
461            *(float*)(args[0].ptrValue) = (float)num;
462    }
463
464    /* mask off any necessary bits */
465    /*  if(! info->fIsLong_double)
466    num &= DBL_MAX;*/
467
468    /* update the input's position to reflect consumed data */
469    input->str.fPos += parsePos;
470
471    /* we converted 1 arg */
472    *argConverted = !info->fSkipArg;
473    return parsePos + skipped;
474}
475
476static int32_t
477u_scanf_scientific_handler(UFILE        *input,
478                           u_scanf_spec_info *info,
479                           ufmt_args    *args,
480                           const UChar  *fmt,
481                           int32_t      *fmtConsumed,
482                           int32_t      *argConverted)
483{
484    int32_t         len;
485    double          num;
486    UNumberFormat   *format;
487    int32_t         parsePos    = 0;
488    int32_t         skipped;
489    UErrorCode      status      = U_ZERO_ERROR;
490
491
492    /* skip all ws in the input */
493    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
494
495    /* fill the input's internal buffer */
496    ufile_fill_uchar_buffer(input);
497
498    /* determine the size of the input's buffer */
499    len = (int32_t)(input->str.fLimit - input->str.fPos);
500
501    /* truncate to the width, if specified */
502    if(info->fWidth != -1)
503        len = ufmt_min(len, info->fWidth);
504
505    /* get the formatter */
506    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
507
508    /* handle error */
509    if(format == 0)
510        return 0;
511
512    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
513    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
514
515    /* parse the number */
516    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
517
518    if (!info->fSkipArg) {
519        if (info->fIsLong)
520            *(double*)(args[0].ptrValue) = num;
521        else if (info->fIsLongDouble)
522            *(long double*)(args[0].ptrValue) = num;
523        else
524            *(float*)(args[0].ptrValue) = (float)num;
525    }
526
527    /* mask off any necessary bits */
528    /*  if(! info->fIsLong_double)
529    num &= DBL_MAX;*/
530
531    /* update the input's position to reflect consumed data */
532    input->str.fPos += parsePos;
533
534    /* we converted 1 arg */
535    *argConverted = !info->fSkipArg;
536    return parsePos + skipped;
537}
538
539static int32_t
540u_scanf_scidbl_handler(UFILE        *input,
541                       u_scanf_spec_info *info,
542                       ufmt_args    *args,
543                       const UChar  *fmt,
544                       int32_t      *fmtConsumed,
545                       int32_t      *argConverted)
546{
547    int32_t       len;
548    double        num;
549    UNumberFormat *scientificFormat, *genericFormat;
550    /*int32_t       scientificResult, genericResult;*/
551    double        scientificResult, genericResult;
552    int32_t       scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
553    int32_t       skipped;
554    UErrorCode    scientificStatus = U_ZERO_ERROR;
555    UErrorCode    genericStatus = U_ZERO_ERROR;
556
557
558    /* since we can't determine by scanning the characters whether */
559    /* a number was formatted in the 'f' or 'g' styles, parse the */
560    /* string with both formatters, and assume whichever one */
561    /* parsed the most is the correct formatter to use */
562
563
564    /* skip all ws in the input */
565    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
566
567    /* fill the input's internal buffer */
568    ufile_fill_uchar_buffer(input);
569
570    /* determine the size of the input's buffer */
571    len = (int32_t)(input->str.fLimit - input->str.fPos);
572
573    /* truncate to the width, if specified */
574    if(info->fWidth != -1)
575        len = ufmt_min(len, info->fWidth);
576
577    /* get the formatters */
578    scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
579    genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
580
581    /* handle error */
582    if(scientificFormat == 0 || genericFormat == 0)
583        return 0;
584
585    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
586    skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
587
588    /* parse the number using each format*/
589
590    scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
591        &scientificParsePos, &scientificStatus);
592
593    genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
594        &genericParsePos, &genericStatus);
595
596    /* determine which parse made it farther */
597    if(scientificParsePos > genericParsePos) {
598        /* stash the result in num */
599        num = scientificResult;
600        /* update the input's position to reflect consumed data */
601        parsePos += scientificParsePos;
602    }
603    else {
604        /* stash the result in num */
605        num = genericResult;
606        /* update the input's position to reflect consumed data */
607        parsePos += genericParsePos;
608    }
609    input->str.fPos += parsePos;
610
611    if (!info->fSkipArg) {
612        if (info->fIsLong)
613            *(double*)(args[0].ptrValue) = num;
614        else if (info->fIsLongDouble)
615            *(long double*)(args[0].ptrValue) = num;
616        else
617            *(float*)(args[0].ptrValue) = (float)num;
618    }
619
620    /* mask off any necessary bits */
621    /*  if(! info->fIsLong_double)
622    num &= DBL_MAX;*/
623
624    /* we converted 1 arg */
625    *argConverted = !info->fSkipArg;
626    return parsePos + skipped;
627}
628
629static int32_t
630u_scanf_integer_handler(UFILE       *input,
631                        u_scanf_spec_info *info,
632                        ufmt_args   *args,
633                        const UChar *fmt,
634                        int32_t     *fmtConsumed,
635                        int32_t     *argConverted)
636{
637    int32_t         len;
638    void            *num        = (void*) (args[0].ptrValue);
639    UNumberFormat   *format;
640    int32_t         parsePos    = 0;
641    int32_t         skipped;
642    UErrorCode      status      = U_ZERO_ERROR;
643    int64_t         result;
644
645
646    /* skip all ws in the input */
647    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
648
649    /* fill the input's internal buffer */
650    ufile_fill_uchar_buffer(input);
651
652    /* determine the size of the input's buffer */
653    len = (int32_t)(input->str.fLimit - input->str.fPos);
654
655    /* truncate to the width, if specified */
656    if(info->fWidth != -1)
657        len = ufmt_min(len, info->fWidth);
658
659    /* get the formatter */
660    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
661
662    /* handle error */
663    if(format == 0)
664        return 0;
665
666    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
667    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
668
669    /* parse the number */
670    result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
671
672    /* mask off any necessary bits */
673    if (!info->fSkipArg) {
674        if (info->fIsShort)
675            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
676        else if (info->fIsLongLong)
677            *(int64_t*)num = result;
678        else
679            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
680    }
681
682    /* update the input's position to reflect consumed data */
683    input->str.fPos += parsePos;
684
685    /* we converted 1 arg */
686    *argConverted = !info->fSkipArg;
687    return parsePos + skipped;
688}
689
690static int32_t
691u_scanf_uinteger_handler(UFILE          *input,
692                         u_scanf_spec_info *info,
693                         ufmt_args      *args,
694                         const UChar    *fmt,
695                         int32_t        *fmtConsumed,
696                         int32_t        *argConverted)
697{
698    /* TODO Fix this when Numberformat handles uint64_t */
699    return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
700}
701
702static int32_t
703u_scanf_percent_handler(UFILE       *input,
704                        u_scanf_spec_info *info,
705                        ufmt_args   *args,
706                        const UChar *fmt,
707                        int32_t     *fmtConsumed,
708                        int32_t     *argConverted)
709{
710    int32_t         len;
711    double          num;
712    UNumberFormat   *format;
713    int32_t         parsePos    = 0;
714    int32_t         skipped;
715    UErrorCode      status      = U_ZERO_ERROR;
716
717
718    /* skip all ws in the input */
719    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
720
721    /* fill the input's internal buffer */
722    ufile_fill_uchar_buffer(input);
723
724    /* determine the size of the input's buffer */
725    len = (int32_t)(input->str.fLimit - input->str.fPos);
726
727    /* truncate to the width, if specified */
728    if(info->fWidth != -1)
729        len = ufmt_min(len, info->fWidth);
730
731    /* get the formatter */
732    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
733
734    /* handle error */
735    if(format == 0)
736        return 0;
737
738    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
739    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
740
741    /* parse the number */
742    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
743
744    if (!info->fSkipArg) {
745        *(double*)(args[0].ptrValue) = num;
746    }
747
748    /* mask off any necessary bits */
749    /*  if(! info->fIsLong_double)
750    num &= DBL_MAX;*/
751
752    /* update the input's position to reflect consumed data */
753    input->str.fPos += parsePos;
754
755    /* we converted 1 arg */
756    *argConverted = !info->fSkipArg;
757    return parsePos;
758}
759
760static int32_t
761u_scanf_string_handler(UFILE        *input,
762                       u_scanf_spec_info *info,
763                       ufmt_args    *args,
764                       const UChar  *fmt,
765                       int32_t      *fmtConsumed,
766                       int32_t      *argConverted)
767{
768    const UChar *source;
769    UConverter  *conv;
770    char        *arg    = (char*)(args[0].ptrValue);
771    char        *alias  = arg;
772    char        *limit;
773    UErrorCode  status  = U_ZERO_ERROR;
774    int32_t     count;
775    int32_t     skipped = 0;
776    UChar       c;
777    UBool       isNotEOF = FALSE;
778
779    /* skip all ws in the input */
780    if (info->fIsString) {
781        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
782    }
783
784    /* get the string one character at a time, truncating to the width */
785    count = 0;
786
787    /* open the default converter */
788    conv = u_getDefaultConverter(&status);
789
790    if(U_FAILURE(status))
791        return -1;
792
793    while( (info->fWidth == -1 || count < info->fWidth)
794        && (isNotEOF = ufile_getch(input, &c))
795        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
796    {
797
798        if (!info->fSkipArg) {
799            /* put the character from the input onto the target */
800            source = &c;
801            /* Since we do this one character at a time, do it this way. */
802            if (info->fWidth > 0) {
803                limit = alias + info->fWidth - count;
804            }
805            else {
806                limit = alias + ucnv_getMaxCharSize(conv);
807            }
808
809            /* convert the character to the default codepage */
810            ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
811                NULL, TRUE, &status);
812
813            if(U_FAILURE(status)) {
814                /* clean up */
815                u_releaseDefaultConverter(conv);
816                return -1;
817            }
818        }
819
820        /* increment the count */
821        ++count;
822    }
823
824    /* put the final character we read back on the input */
825    if (!info->fSkipArg) {
826        if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
827            u_fungetc(c, input);
828
829        /* add the terminator */
830        if (info->fIsString) {
831            *alias = 0x00;
832        }
833    }
834
835    /* clean up */
836    u_releaseDefaultConverter(conv);
837
838    /* we converted 1 arg */
839    *argConverted = !info->fSkipArg;
840    return count + skipped;
841}
842
843static int32_t
844u_scanf_char_handler(UFILE          *input,
845                     u_scanf_spec_info *info,
846                     ufmt_args      *args,
847                     const UChar    *fmt,
848                     int32_t        *fmtConsumed,
849                     int32_t        *argConverted)
850{
851    if (info->fWidth < 0) {
852        info->fWidth = 1;
853    }
854    info->fIsString = FALSE;
855    return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
856}
857
858static int32_t
859u_scanf_ustring_handler(UFILE       *input,
860                        u_scanf_spec_info *info,
861                        ufmt_args   *args,
862                        const UChar *fmt,
863                        int32_t     *fmtConsumed,
864                        int32_t     *argConverted)
865{
866    UChar   *arg     = (UChar*)(args[0].ptrValue);
867    UChar   *alias     = arg;
868    int32_t count;
869    int32_t skipped = 0;
870    UChar   c;
871    UBool   isNotEOF = FALSE;
872
873    /* skip all ws in the input */
874    if (info->fIsString) {
875        skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
876    }
877
878    /* get the string one character at a time, truncating to the width */
879    count = 0;
880
881    while( (info->fWidth == -1 || count < info->fWidth)
882        && (isNotEOF = ufile_getch(input, &c))
883        && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
884    {
885
886        /* put the character from the input onto the target */
887        if (!info->fSkipArg) {
888            *alias++ = c;
889        }
890
891        /* increment the count */
892        ++count;
893    }
894
895    /* put the final character we read back on the input */
896    if (!info->fSkipArg) {
897        if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
898            u_fungetc(c, input);
899        }
900
901        /* add the terminator */
902        if (info->fIsString) {
903            *alias = 0x0000;
904        }
905    }
906
907    /* we converted 1 arg */
908    *argConverted = !info->fSkipArg;
909    return count + skipped;
910}
911
912static int32_t
913u_scanf_uchar_handler(UFILE         *input,
914                      u_scanf_spec_info *info,
915                      ufmt_args     *args,
916                      const UChar   *fmt,
917                      int32_t       *fmtConsumed,
918                      int32_t       *argConverted)
919{
920    if (info->fWidth < 0) {
921        info->fWidth = 1;
922    }
923    info->fIsString = FALSE;
924    return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
925}
926
927static int32_t
928u_scanf_spellout_handler(UFILE          *input,
929                         u_scanf_spec_info *info,
930                         ufmt_args      *args,
931                         const UChar    *fmt,
932                         int32_t        *fmtConsumed,
933                         int32_t        *argConverted)
934{
935    int32_t         len;
936    double          num;
937    UNumberFormat   *format;
938    int32_t         parsePos    = 0;
939    int32_t         skipped;
940    UErrorCode      status      = U_ZERO_ERROR;
941
942
943    /* skip all ws in the input */
944    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
945
946    /* fill the input's internal buffer */
947    ufile_fill_uchar_buffer(input);
948
949    /* determine the size of the input's buffer */
950    len = (int32_t)(input->str.fLimit - input->str.fPos);
951
952    /* truncate to the width, if specified */
953    if(info->fWidth != -1)
954        len = ufmt_min(len, info->fWidth);
955
956    /* get the formatter */
957    format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
958
959    /* handle error */
960    if(format == 0)
961        return 0;
962
963    /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
964    /* This is not applicable to RBNF. */
965    /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
966
967    /* parse the number */
968    num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
969
970    if (!info->fSkipArg) {
971        *(double*)(args[0].ptrValue) = num;
972    }
973
974    /* mask off any necessary bits */
975    /*  if(! info->fIsLong_double)
976    num &= DBL_MAX;*/
977
978    /* update the input's position to reflect consumed data */
979    input->str.fPos += parsePos;
980
981    /* we converted 1 arg */
982    *argConverted = !info->fSkipArg;
983    return parsePos + skipped;
984}
985
986static int32_t
987u_scanf_hex_handler(UFILE       *input,
988                    u_scanf_spec_info *info,
989                    ufmt_args   *args,
990                    const UChar *fmt,
991                    int32_t     *fmtConsumed,
992                    int32_t     *argConverted)
993{
994    int32_t     len;
995    int32_t     skipped;
996    void        *num    = (void*) (args[0].ptrValue);
997    int64_t     result;
998
999    /* skip all ws in the input */
1000    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1001
1002    /* fill the input's internal buffer */
1003    ufile_fill_uchar_buffer(input);
1004
1005    /* determine the size of the input's buffer */
1006    len = (int32_t)(input->str.fLimit - input->str.fPos);
1007
1008    /* truncate to the width, if specified */
1009    if(info->fWidth != -1)
1010        len = ufmt_min(len, info->fWidth);
1011
1012    /* check for alternate form */
1013    if( *(input->str.fPos) == 0x0030 &&
1014        (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
1015
1016        /* skip the '0' and 'x' or 'X' if present */
1017        input->str.fPos += 2;
1018        len -= 2;
1019    }
1020
1021    /* parse the number */
1022    result = ufmt_uto64(input->str.fPos, &len, 16);
1023
1024    /* update the input's position to reflect consumed data */
1025    input->str.fPos += len;
1026
1027    /* mask off any necessary bits */
1028    if (!info->fSkipArg) {
1029        if (info->fIsShort)
1030            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1031        else if (info->fIsLongLong)
1032            *(int64_t*)num = result;
1033        else
1034            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1035    }
1036
1037    /* we converted 1 arg */
1038    *argConverted = !info->fSkipArg;
1039    return len + skipped;
1040}
1041
1042static int32_t
1043u_scanf_octal_handler(UFILE         *input,
1044                      u_scanf_spec_info *info,
1045                      ufmt_args     *args,
1046                      const UChar   *fmt,
1047                      int32_t       *fmtConsumed,
1048                      int32_t       *argConverted)
1049{
1050    int32_t     len;
1051    int32_t     skipped;
1052    void        *num         = (void*) (args[0].ptrValue);
1053    int64_t     result;
1054
1055    /* skip all ws in the input */
1056    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1057
1058    /* fill the input's internal buffer */
1059    ufile_fill_uchar_buffer(input);
1060
1061    /* determine the size of the input's buffer */
1062    len = (int32_t)(input->str.fLimit - input->str.fPos);
1063
1064    /* truncate to the width, if specified */
1065    if(info->fWidth != -1)
1066        len = ufmt_min(len, info->fWidth);
1067
1068    /* parse the number */
1069    result = ufmt_uto64(input->str.fPos, &len, 8);
1070
1071    /* update the input's position to reflect consumed data */
1072    input->str.fPos += len;
1073
1074    /* mask off any necessary bits */
1075    if (!info->fSkipArg) {
1076        if (info->fIsShort)
1077            *(int16_t*)num = (int16_t)(UINT16_MAX & result);
1078        else if (info->fIsLongLong)
1079            *(int64_t*)num = result;
1080        else
1081            *(int32_t*)num = (int32_t)(UINT32_MAX & result);
1082    }
1083
1084    /* we converted 1 arg */
1085    *argConverted = !info->fSkipArg;
1086    return len + skipped;
1087}
1088
1089static int32_t
1090u_scanf_pointer_handler(UFILE       *input,
1091                        u_scanf_spec_info *info,
1092                        ufmt_args   *args,
1093                        const UChar *fmt,
1094                        int32_t     *fmtConsumed,
1095                        int32_t     *argConverted)
1096{
1097    int32_t len;
1098    int32_t skipped;
1099    void    *result;
1100    void    **p     = (void**)(args[0].ptrValue);
1101
1102
1103    /* skip all ws in the input */
1104    skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
1105
1106    /* fill the input's internal buffer */
1107    ufile_fill_uchar_buffer(input);
1108
1109    /* determine the size of the input's buffer */
1110    len = (int32_t)(input->str.fLimit - input->str.fPos);
1111
1112    /* truncate to the width, if specified */
1113    if(info->fWidth != -1) {
1114        len = ufmt_min(len, info->fWidth);
1115    }
1116
1117    /* Make sure that we don't consume too much */
1118    if (len > (int32_t)(sizeof(void*)*2)) {
1119        len = (int32_t)(sizeof(void*)*2);
1120    }
1121
1122    /* parse the pointer - assign to temporary value */
1123    result = ufmt_utop(input->str.fPos, &len);
1124
1125    if (!info->fSkipArg) {
1126        *p = result;
1127    }
1128
1129    /* update the input's position to reflect consumed data */
1130    input->str.fPos += len;
1131
1132    /* we converted 1 arg */
1133    *argConverted = !info->fSkipArg;
1134    return len + skipped;
1135}
1136
1137static int32_t
1138u_scanf_scanset_handler(UFILE       *input,
1139                        u_scanf_spec_info *info,
1140                        ufmt_args   *args,
1141                        const UChar *fmt,
1142                        int32_t     *fmtConsumed,
1143                        int32_t     *argConverted)
1144{
1145    USet        *scanset;
1146    UErrorCode  status = U_ZERO_ERROR;
1147    int32_t     chLeft = INT32_MAX;
1148    UChar32     c;
1149    UChar       *alias = (UChar*) (args[0].ptrValue);
1150    UBool       isNotEOF = FALSE;
1151    UBool       readCharacter = FALSE;
1152
1153    /* Create an empty set */
1154    scanset = uset_open(0, -1);
1155
1156    /* Back up one to get the [ */
1157    fmt--;
1158
1159    /* truncate to the width, if specified and alias the target */
1160    if(info->fWidth >= 0) {
1161        chLeft = info->fWidth;
1162    }
1163
1164    /* parse the scanset from the fmt string */
1165    *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
1166
1167    /* verify that the parse was successful */
1168    if (U_SUCCESS(status)) {
1169        c=0;
1170
1171        /* grab characters one at a time and make sure they are in the scanset */
1172        while(chLeft > 0) {
1173            if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) {
1174                readCharacter = TRUE;
1175                if (!info->fSkipArg) {
1176                    int32_t idx = 0;
1177                    UBool isError = FALSE;
1178
1179                    U16_APPEND(alias, idx, chLeft, c, isError);
1180                    if (isError) {
1181                        break;
1182                    }
1183                    alias += idx;
1184                }
1185                chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
1186            }
1187            else {
1188                /* if the character's not in the scanset, break out */
1189                break;
1190            }
1191        }
1192
1193        /* put the final character we read back on the input */
1194        if(isNotEOF && chLeft > 0) {
1195            u_fungetc(c, input);
1196        }
1197    }
1198
1199    uset_close(scanset);
1200
1201    /* if we didn't match at least 1 character, fail */
1202    if(!readCharacter)
1203        return -1;
1204    /* otherwise, add the terminator */
1205    else if (!info->fSkipArg) {
1206        *alias = 0x00;
1207    }
1208
1209    /* we converted 1 arg */
1210    *argConverted = !info->fSkipArg;
1211    return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
1212}
1213
1214/* Use US-ASCII characters only for formatting. Most codepages have
1215 characters 20-7F from Unicode. Using any other codepage specific
1216 characters will make it very difficult to format the string on
1217 non-Unicode machines */
1218static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
1219/* 0x20 */
1220    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1221    UFMT_EMPTY,         UFMT_SIMPLE_PERCENT,UFMT_EMPTY,         UFMT_EMPTY,
1222    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1223    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1224
1225/* 0x30 */
1226    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1227    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1228    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1229    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1230
1231/* 0x40 */
1232    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR,
1233    UFMT_EMPTY,         UFMT_SCIENTIFIC,    UFMT_EMPTY,         UFMT_SCIDBL,
1234#ifdef U_USE_OBSOLETE_IO_FORMATTING
1235    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_UCHAR/*deprecated*/,
1236#else
1237    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1238#endif
1239    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1240
1241/* 0x50 */
1242    UFMT_PERCENT,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_USTRING,
1243#ifdef U_USE_OBSOLETE_IO_FORMATTING
1244    UFMT_EMPTY,         UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT,      UFMT_EMPTY,
1245#else
1246    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SPELLOUT,      UFMT_EMPTY,
1247#endif
1248    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_SCANSET,
1249    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1250
1251/* 0x60 */
1252    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_CHAR,
1253    UFMT_INT,           UFMT_SCIENTIFIC,    UFMT_DOUBLE,        UFMT_SCIDBL,
1254    UFMT_EMPTY,         UFMT_INT,           UFMT_EMPTY,         UFMT_EMPTY,
1255    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_COUNT,         UFMT_OCTAL,
1256
1257/* 0x70 */
1258    UFMT_POINTER,       UFMT_EMPTY,         UFMT_EMPTY,         UFMT_STRING,
1259    UFMT_EMPTY,         UFMT_UINT,          UFMT_EMPTY,         UFMT_EMPTY,
1260    UFMT_HEX,           UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1261    UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,         UFMT_EMPTY,
1262};
1263
1264U_CFUNC int32_t
1265u_scanf_parse(UFILE     *f,
1266            const UChar *patternSpecification,
1267            va_list     ap)
1268{
1269    const UChar     *alias;
1270    int32_t         count, converted, argConsumed, cpConsumed;
1271    uint16_t        handlerNum;
1272
1273    ufmt_args       args;
1274    u_scanf_spec    spec;
1275    ufmt_type_info  info;
1276    u_scanf_handler handler;
1277
1278    /* alias the pattern */
1279    alias = patternSpecification;
1280
1281    /* haven't converted anything yet */
1282    argConsumed = 0;
1283    converted = 0;
1284    cpConsumed = 0;
1285
1286    /* iterate through the pattern */
1287    for(;;) {
1288
1289        /* match any characters up to the next '%' */
1290        while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
1291            alias++;
1292        }
1293
1294        /* if we aren't at a '%', or if we're at end of string, break*/
1295        if(*alias != UP_PERCENT || *alias == 0x0000)
1296            break;
1297
1298        /* parse the specifier */
1299        count = u_scanf_parse_spec(alias, &spec);
1300
1301        /* update the pointer in pattern */
1302        alias += count;
1303
1304        handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
1305        if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
1306            /* skip the argument, if necessary */
1307            /* query the info function for argument information */
1308            info = g_u_scanf_infos[ handlerNum ].info;
1309            if (info != ufmt_count && u_feof(f)) {
1310                break;
1311            }
1312            else if(spec.fInfo.fSkipArg) {
1313                args.ptrValue = NULL;
1314            }
1315            else {
1316                switch(info) {
1317                case ufmt_count:
1318                    /* set the spec's width to the # of items converted */
1319                    spec.fInfo.fWidth = cpConsumed;
1320                    /* fall through to next case */
1321                case ufmt_char:
1322                case ufmt_uchar:
1323                case ufmt_int:
1324                case ufmt_string:
1325                case ufmt_ustring:
1326                case ufmt_pointer:
1327                case ufmt_float:
1328                case ufmt_double:
1329                    args.ptrValue = va_arg(ap, void*);
1330                    break;
1331
1332                default:
1333                    /* else args is ignored */
1334                    args.ptrValue = NULL;
1335                    break;
1336                }
1337            }
1338
1339            /* call the handler function */
1340            handler = g_u_scanf_infos[ handlerNum ].handler;
1341            if(handler != 0) {
1342
1343                /* reset count to 1 so that += for alias works. */
1344                count = 1;
1345
1346                cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
1347
1348                /* if the handler encountered an error condition, break */
1349                if(argConsumed < 0) {
1350                    converted = -1;
1351                    break;
1352                }
1353
1354                /* add to the # of items converted */
1355                converted += argConsumed;
1356
1357                /* update the pointer in pattern */
1358                alias += count-1;
1359            }
1360            /* else do nothing */
1361        }
1362        /* else do nothing */
1363
1364        /* just ignore unknown tags */
1365    }
1366
1367    /* return # of items converted */
1368    return converted;
1369}
1370
1371#endif /* #if !UCONFIG_NO_FORMATTING */
1372