1/*
2*******************************************************************************
3*
4*   Copyright (C) 1998-2008, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.c
10*
11* Modification History:
12*
13*   Date          Name          Description
14*   05/26/99     stephen       Creation.
15*   02/25/00     weiv          Overhaul to write udata
16*   5/10/01      Ram           removed ustdio dependency
17*   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21#include "ucol_imp.h"
22#include "parse.h"
23#include "errmsg.h"
24#include "uhash.h"
25#include "cmemory.h"
26#include "cstring.h"
27#include "uinvchar.h"
28#include "read.h"
29#include "ustr.h"
30#include "reslist.h"
31#include "rbt_pars.h"
32#include "unicode/ustring.h"
33#include "unicode/putil.h"
34#include <stdio.h>
35
36/* Number of tokens to read ahead of the current stream position */
37#define MAX_LOOKAHEAD   3
38
39#define CR               0x000D
40#define LF               0x000A
41#define SPACE            0x0020
42#define TAB              0x0009
43#define ESCAPE           0x005C
44#define HASH             0x0023
45#define QUOTE            0x0027
46#define ZERO             0x0030
47#define STARTCOMMAND     0x005B
48#define ENDCOMMAND       0x005D
49#define OPENSQBRACKET    0x005B
50#define CLOSESQBRACKET   0x005D
51
52typedef struct SResource *
53ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
54
55struct Lookahead
56{
57     enum   ETokenType type;
58     struct UString    value;
59     struct UString    comment;
60     uint32_t          line;
61};
62
63/* keep in sync with token defines in read.h */
64const char *tokenNames[TOK_TOKEN_COUNT] =
65{
66     "string",             /* A string token, such as "MonthNames" */
67     "'{'",                 /* An opening brace character */
68     "'}'",                 /* A closing brace character */
69     "','",                 /* A comma */
70     "':'",                 /* A colon */
71
72     "<end of file>",     /* End of the file has been reached successfully */
73     "<end of line>"
74};
75
76/* Just to store "TRUE" */
77static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
78
79static struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
80static uint32_t          lookaheadPosition;
81static UCHARBUF         *buffer;
82
83static struct SRBRoot *bundle;
84static const char     *inputdir;
85static uint32_t        inputdirLength;
86static const char     *outputdir;
87static uint32_t        outputdirLength;
88
89static UBool gMakeBinaryCollation = TRUE;
90static UBool gOmitCollationRules  = FALSE;
91
92static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
93
94/* The nature of the lookahead buffer:
95   There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
96   MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
97   When getToken is called, the current pointer is moved to the next slot and the
98   old slot is filled with the next token from the reader by calling getNextToken.
99   The token values are stored in the slot, which means that token values don't
100   survive a call to getToken, ie.
101
102   UString *value;
103
104   getToken(&value, NULL, status);
105   getToken(NULL,   NULL, status);       bad - value is now a different string
106*/
107static void
108initLookahead(UCHARBUF *buf, UErrorCode *status)
109{
110    static uint32_t initTypeStrings = 0;
111    uint32_t i;
112
113    if (!initTypeStrings)
114    {
115        initTypeStrings = 1;
116    }
117
118    lookaheadPosition   = 0;
119    buffer              = buf;
120
121    resetLineNumber();
122
123    for (i = 0; i < MAX_LOOKAHEAD; i++)
124    {
125        lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
126        if (U_FAILURE(*status))
127        {
128            return;
129        }
130    }
131
132    *status = U_ZERO_ERROR;
133}
134
135static void
136cleanupLookahead()
137{
138    uint32_t i;
139    for (i = 0; i < MAX_LOOKAHEAD; i++)
140    {
141        ustr_deinit(&lookahead[i].value);
142        ustr_deinit(&lookahead[i].comment);
143    }
144
145}
146
147static enum ETokenType
148getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
149{
150    enum ETokenType result;
151    uint32_t          i;
152
153    result = lookahead[lookaheadPosition].type;
154
155    if (tokenValue != NULL)
156    {
157        *tokenValue = &lookahead[lookaheadPosition].value;
158    }
159
160    if (linenumber != NULL)
161    {
162        *linenumber = lookahead[lookaheadPosition].line;
163    }
164
165    if (comment != NULL)
166    {
167        ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
168    }
169
170    i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
171    lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
172    ustr_setlen(&lookahead[i].comment, 0, status);
173    ustr_setlen(&lookahead[i].value, 0, status);
174    lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
175
176    /* printf("getToken, returning %s\n", tokenNames[result]); */
177
178    return result;
179}
180
181static enum ETokenType
182peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
183{
184    uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
185
186    if (U_FAILURE(*status))
187    {
188        return TOK_ERROR;
189    }
190
191    if (lookaheadCount >= MAX_LOOKAHEAD)
192    {
193        *status = U_INTERNAL_PROGRAM_ERROR;
194        return TOK_ERROR;
195    }
196
197    if (tokenValue != NULL)
198    {
199        *tokenValue = &lookahead[i].value;
200    }
201
202    if (linenumber != NULL)
203    {
204        *linenumber = lookahead[i].line;
205    }
206
207    if(comment != NULL){
208        ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
209    }
210
211    return lookahead[i].type;
212}
213
214static void
215expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
216{
217    uint32_t        line;
218
219    enum ETokenType token = getToken(tokenValue, comment, &line, status);
220
221    if (linenumber != NULL)
222    {
223        *linenumber = line;
224    }
225
226    if (U_FAILURE(*status))
227    {
228        return;
229    }
230
231    if (token != expectedToken)
232    {
233        *status = U_INVALID_FORMAT_ERROR;
234        error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
235    }
236    else
237    {
238        *status = U_ZERO_ERROR;
239    }
240}
241
242static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
243{
244    struct UString *tokenValue;
245    char           *result;
246    uint32_t        count;
247
248    expect(TOK_STRING, &tokenValue, comment, line, status);
249
250    if (U_FAILURE(*status))
251    {
252        return NULL;
253    }
254
255    count = u_strlen(tokenValue->fChars);
256    if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
257        *status = U_INVALID_FORMAT_ERROR;
258        error(*line, "invariant characters required for table keys, binary data, etc.");
259        return NULL;
260    }
261
262    result = uprv_malloc(count+1);
263
264    if (result == NULL)
265    {
266        *status = U_MEMORY_ALLOCATION_ERROR;
267        return NULL;
268    }
269
270    u_UCharsToChars(tokenValue->fChars, result, count+1);
271    return result;
272}
273
274static struct SResource *
275parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
276{
277    struct SResource *result = NULL;
278    struct UString   *tokenValue;
279    FileStream       *file          = NULL;
280    char              filename[256] = { '\0' };
281    char              cs[128]       = { '\0' };
282    uint32_t          line;
283    int               len=0;
284    UBool quoted = FALSE;
285    UCHARBUF *ucbuf=NULL;
286    UChar32   c     = 0;
287    const char* cp  = NULL;
288    UChar *pTarget     = NULL;
289    UChar *target      = NULL;
290    UChar *targetLimit = NULL;
291    int32_t size = 0;
292
293    expect(TOK_STRING, &tokenValue, NULL, &line, status);
294
295    if(isVerbose()){
296        printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
297    }
298
299    if (U_FAILURE(*status))
300    {
301        return NULL;
302    }
303    /* make the filename including the directory */
304    if (inputdir != NULL)
305    {
306        uprv_strcat(filename, inputdir);
307
308        if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
309        {
310            uprv_strcat(filename, U_FILE_SEP_STRING);
311        }
312    }
313
314    u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
315
316    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
317
318    if (U_FAILURE(*status))
319    {
320        return NULL;
321    }
322    uprv_strcat(filename, cs);
323
324    if(gOmitCollationRules) {
325        return res_none();
326    }
327
328    ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
329
330    if (U_FAILURE(*status)) {
331        error(line, "An error occured while opening the input file %s\n", filename);
332        return NULL;
333    }
334
335    /* We allocate more space than actually required
336    * since the actual size needed for storing UChars
337    * is not known in UTF-8 byte stream
338    */
339    size        = ucbuf_size(ucbuf) + 1;
340    pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
341    uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
342    target      = pTarget;
343    targetLimit = pTarget+size;
344
345    /* read the rules into the buffer */
346    while (target < targetLimit)
347    {
348        c = ucbuf_getc(ucbuf, status);
349        if(c == QUOTE) {
350            quoted = (UBool)!quoted;
351        }
352        /* weiv (06/26/2002): adding the following:
353         * - preserving spaces in commands [...]
354         * - # comments until the end of line
355         */
356        if (c == STARTCOMMAND && !quoted)
357        {
358            /* preserve commands
359             * closing bracket will be handled by the
360             * append at the end of the loop
361             */
362            while(c != ENDCOMMAND) {
363                U_APPEND_CHAR32(c, target,len);
364                c = ucbuf_getc(ucbuf, status);
365            }
366        }
367        else if (c == HASH && !quoted) {
368            /* skip comments */
369            while(c != CR && c != LF) {
370                c = ucbuf_getc(ucbuf, status);
371            }
372            continue;
373        }
374        else if (c == ESCAPE)
375        {
376            c = unescape(ucbuf, status);
377
378            if (c == U_ERR)
379            {
380                uprv_free(pTarget);
381                T_FileStream_close(file);
382                return NULL;
383            }
384        }
385        else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
386        {
387            /* ignore spaces carriage returns
388            * and line feed unless in the form \uXXXX
389            */
390            continue;
391        }
392
393        /* Append UChar * after dissembling if c > 0xffff*/
394        if (c != U_EOF)
395        {
396            U_APPEND_CHAR32(c, target,len);
397        }
398        else
399        {
400            break;
401        }
402    }
403
404    /* terminate the string */
405    if(target < targetLimit){
406        *target = 0x0000;
407    }
408
409    result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
410
411
412    ucbuf_close(ucbuf);
413    uprv_free(pTarget);
414    T_FileStream_close(file);
415
416    return result;
417}
418
419static struct SResource *
420parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
421{
422    struct SResource *result = NULL;
423    struct UString   *tokenValue;
424    FileStream       *file          = NULL;
425    char              filename[256] = { '\0' };
426    char              cs[128]       = { '\0' };
427    uint32_t          line;
428    UCHARBUF *ucbuf=NULL;
429    const char* cp  = NULL;
430    UChar *pTarget     = NULL;
431    const UChar *pSource     = NULL;
432    int32_t size = 0;
433
434    expect(TOK_STRING, &tokenValue, NULL, &line, status);
435
436    if(isVerbose()){
437        printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
438    }
439
440    if (U_FAILURE(*status))
441    {
442        return NULL;
443    }
444    /* make the filename including the directory */
445    if (inputdir != NULL)
446    {
447        uprv_strcat(filename, inputdir);
448
449        if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
450        {
451            uprv_strcat(filename, U_FILE_SEP_STRING);
452        }
453    }
454
455    u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
456
457    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
458
459    if (U_FAILURE(*status))
460    {
461        return NULL;
462    }
463    uprv_strcat(filename, cs);
464
465
466    ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
467
468    if (U_FAILURE(*status)) {
469        error(line, "An error occured while opening the input file %s\n", filename);
470        return NULL;
471    }
472
473    /* We allocate more space than actually required
474    * since the actual size needed for storing UChars
475    * is not known in UTF-8 byte stream
476    */
477    pSource = ucbuf_getBuffer(ucbuf, &size, status);
478    pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
479    uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
480
481#if !UCONFIG_NO_TRANSLITERATION
482    size = utrans_stripRules(pSource, size, pTarget, status);
483#else
484    size = 0;
485    fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
486#endif
487    result = string_open(bundle, tag, pTarget, size, NULL, status);
488
489    ucbuf_close(ucbuf);
490    uprv_free(pTarget);
491    T_FileStream_close(file);
492
493    return result;
494}
495static struct SResource* dependencyArray = NULL;
496
497static struct SResource *
498parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
499{
500    struct SResource *result = NULL;
501    struct SResource *elem = NULL;
502    struct UString   *tokenValue;
503    uint32_t          line;
504    char              filename[256] = { '\0' };
505    char              cs[128]       = { '\0' };
506
507    expect(TOK_STRING, &tokenValue, NULL, &line, status);
508
509    if(isVerbose()){
510        printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
511    }
512
513    if (U_FAILURE(*status))
514    {
515        return NULL;
516    }
517    /* make the filename including the directory */
518    if (outputdir != NULL)
519    {
520        uprv_strcat(filename, outputdir);
521
522        if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR)
523        {
524            uprv_strcat(filename, U_FILE_SEP_STRING);
525        }
526    }
527
528    u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
529
530    if (U_FAILURE(*status))
531    {
532        return NULL;
533    }
534    uprv_strcat(filename, cs);
535    if(!T_FileStream_file_exists(filename)){
536        if(isStrict()){
537            error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
538        }else{
539            warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540        }
541    }
542    if(dependencyArray==NULL){
543        dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status);
544    }
545    if(tag!=NULL){
546        result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
547    }
548    elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
549
550    array_add(dependencyArray, elem, status);
551
552    if (U_FAILURE(*status))
553    {
554        return NULL;
555    }
556    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
557    return result;
558}
559static struct SResource *
560parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
561{
562    struct UString   *tokenValue;
563    struct SResource *result = NULL;
564
565/*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
566    {
567        return parseUCARules(tag, startline, status);
568    }*/
569    if(isVerbose()){
570        printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
571    }
572    expect(TOK_STRING, &tokenValue, NULL, NULL, status);
573
574    if (U_SUCCESS(*status))
575    {
576        /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
577        doesn't survive expect either) */
578
579        result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
580        if(U_SUCCESS(*status) && result) {
581            expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
582
583            if (U_FAILURE(*status))
584            {
585                res_close(result);
586                return NULL;
587            }
588        }
589    }
590
591    return result;
592}
593
594static struct SResource *
595parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
596{
597    struct UString   *tokenValue;
598    struct SResource *result  = NULL;
599
600    expect(TOK_STRING, &tokenValue, NULL, NULL, status);
601
602    if(isVerbose()){
603        printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
604    }
605
606    if (U_SUCCESS(*status))
607    {
608        /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
609        doesn't survive expect either) */
610
611        result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
612
613        expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
614
615        if (U_FAILURE(*status))
616        {
617            res_close(result);
618            return NULL;
619        }
620    }
621
622    return result;
623}
624
625static struct SResource *
626addCollation(struct SResource  *result, uint32_t startline, UErrorCode *status)
627{
628    struct SResource  *member = NULL;
629    struct UString    *tokenValue;
630    struct UString     comment;
631    enum   ETokenType  token;
632    char               subtag[1024];
633    UVersionInfo       version;
634    UBool              override = FALSE;
635    uint32_t           line;
636    /* '{' . (name resource)* '}' */
637    version[0]=0; version[1]=0; version[2]=0; version[3]=0;
638
639    for (;;)
640    {
641        ustr_init(&comment);
642        token = getToken(&tokenValue, &comment, &line, status);
643
644        if (token == TOK_CLOSE_BRACE)
645        {
646            return result;
647        }
648
649        if (token != TOK_STRING)
650        {
651            res_close(result);
652            *status = U_INVALID_FORMAT_ERROR;
653
654            if (token == TOK_EOF)
655            {
656                error(startline, "unterminated table");
657            }
658            else
659            {
660                error(line, "Unexpected token %s", tokenNames[token]);
661            }
662
663            return NULL;
664        }
665
666        u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
667
668        if (U_FAILURE(*status))
669        {
670            res_close(result);
671            return NULL;
672        }
673
674        member = parseResource(subtag, NULL, status);
675
676        if (U_FAILURE(*status))
677        {
678            res_close(result);
679            return NULL;
680        }
681
682        if (uprv_strcmp(subtag, "Version") == 0)
683        {
684            char     ver[40];
685            int32_t length = member->u.fString.fLength;
686
687            if (length >= (int32_t) sizeof(ver))
688            {
689                length = (int32_t) sizeof(ver) - 1;
690            }
691
692            u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
693            u_versionFromString(version, ver);
694
695            table_add(result, member, line, status);
696
697        }
698        else if (uprv_strcmp(subtag, "Override") == 0)
699        {
700            override = FALSE;
701
702            if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
703            {
704                override = TRUE;
705            }
706            table_add(result, member, line, status);
707
708        }
709        else if(uprv_strcmp(subtag, "%%CollationBin")==0)
710        {
711            /* discard duplicate %%CollationBin if any*/
712        }
713        else if (uprv_strcmp(subtag, "Sequence") == 0)
714        {
715#if UCONFIG_NO_COLLATION
716            warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
717#else
718            /* in order to achieve smaller data files, we can direct genrb */
719            /* to omit collation rules */
720            if(!gOmitCollationRules) {
721              /* first we add the "Sequence", so that we always have rules */
722              table_add(result, member, line, status);
723            }
724            if(gMakeBinaryCollation) {
725                UErrorCode intStatus = U_ZERO_ERROR;
726
727                /* do the collation elements */
728                int32_t     len   = 0;
729                uint8_t   *data  = NULL;
730                UCollator *coll  = NULL;
731                UParseError parseError;
732                /* add sequence */
733                /*table_add(result, member, line, status);*/
734
735                coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
736                    UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
737
738                if (U_SUCCESS(intStatus) && coll != NULL)
739                {
740                    len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
741                    data = (uint8_t *)uprv_malloc(len);
742                    intStatus = U_ZERO_ERROR;
743                    len = ucol_cloneBinary(coll, data, len, &intStatus);
744                    /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
745
746                    /* tailoring rules version */
747                    /* This is wrong! */
748                    /*coll->dataInfo.dataVersion[1] = version[0];*/
749                    /* Copy tailoring version. Builder version already */
750                    /* set in ucol_openRules */
751                    ((UCATableHeader *)data)->version[1] = version[0];
752                    ((UCATableHeader *)data)->version[2] = version[1];
753                    ((UCATableHeader *)data)->version[3] = version[2];
754
755                    if (U_SUCCESS(intStatus) && data != NULL)
756                    {
757                        member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
758                        /*table_add(bundle->fRoot, member, line, status);*/
759                        table_add(result, member, line, status);
760                        uprv_free(data);
761                    }
762                    else
763                    {
764                        warning(line, "could not obtain rules from collator");
765                        if(isStrict()){
766                            *status = U_INVALID_FORMAT_ERROR;
767                            return NULL;
768                        }
769                    }
770
771                    ucol_close(coll);
772                }
773                else
774                {
775                    warning(line, "%%Collation could not be constructed from CollationElements - check context!");
776                    if(isStrict()){
777                        *status = intStatus;
778                        return NULL;
779                    }
780                }
781            } else {
782                if(isVerbose()) {
783                    printf("Not building Collation binary\n");
784                }
785            }
786#endif
787        }
788
789        /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
790
791        /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
792
793        if (U_FAILURE(*status))
794        {
795            res_close(result);
796            return NULL;
797        }
798    }
799
800    /* not reached */
801    /* A compiler warning will appear if all paths don't contain a return statement. */
802/*    *status = U_INTERNAL_PROGRAM_ERROR;
803    return NULL;*/
804}
805
806static struct SResource *
807parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
808{
809    struct SResource  *result = NULL;
810    struct SResource  *member = NULL;
811    struct SResource  *collationRes = NULL;
812    struct UString    *tokenValue;
813    struct UString     comment;
814    enum   ETokenType  token;
815    char               subtag[1024], typeKeyword[1024];
816    uint32_t           line;
817
818    result = table_open(bundle, tag, NULL, status);
819
820    if (result == NULL || U_FAILURE(*status))
821    {
822        return NULL;
823    }
824    if(isVerbose()){
825        printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
826    }
827    if(!newCollation) {
828        return addCollation(result, startline, status);
829    }
830    else {
831        for(;;) {
832            ustr_init(&comment);
833            token = getToken(&tokenValue, &comment, &line, status);
834
835            if (token == TOK_CLOSE_BRACE)
836            {
837                return result;
838            }
839
840            if (token != TOK_STRING)
841            {
842                res_close(result);
843                *status = U_INVALID_FORMAT_ERROR;
844
845                if (token == TOK_EOF)
846                {
847                    error(startline, "unterminated table");
848                }
849                else
850                {
851                    error(line, "Unexpected token %s", tokenNames[token]);
852                }
853
854                return NULL;
855            }
856
857            u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858
859            if (U_FAILURE(*status))
860            {
861                res_close(result);
862                return NULL;
863            }
864
865            if (uprv_strcmp(subtag, "default") == 0)
866            {
867                member = parseResource(subtag, NULL, status);
868
869                if (U_FAILURE(*status))
870                {
871                    res_close(result);
872                    return NULL;
873                }
874
875                table_add(result, member, line, status);
876            }
877            else
878            {
879                token = peekToken(0, &tokenValue, &line, &comment, status);
880                /* this probably needs to be refactored or recursively use the parser */
881                /* first we assume that our collation table won't have the explicit type */
882                /* then, we cannot handle aliases */
883                if(token == TOK_OPEN_BRACE) {
884                    token = getToken(&tokenValue, &comment, &line, status);
885                    collationRes = table_open(bundle, subtag, NULL, status);
886                    table_add(result, addCollation(collationRes, startline, status), startline, status);
887                } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
888                    /* we could have a table too */
889                    token = peekToken(1, &tokenValue, &line, &comment, status);
890                    u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
891                    if(uprv_strcmp(typeKeyword, "alias") == 0) {
892                        member = parseResource(subtag, NULL, status);
893
894                        if (U_FAILURE(*status))
895                        {
896                            res_close(result);
897                            return NULL;
898                        }
899
900                        table_add(result, member, line, status);
901                    } else {
902                        res_close(result);
903                        *status = U_INVALID_FORMAT_ERROR;
904                        return NULL;
905                    }
906                } else {
907                    res_close(result);
908                    *status = U_INVALID_FORMAT_ERROR;
909                    return NULL;
910                }
911            }
912
913            /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
914
915            /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
916
917            if (U_FAILURE(*status))
918            {
919                res_close(result);
920                return NULL;
921            }
922        }
923    }
924}
925
926/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
927   if this weren't special-cased, wouldn't be set until the entire file had been processed. */
928static struct SResource *
929realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
930{
931    struct SResource  *member = NULL;
932    struct UString    *tokenValue=NULL;
933    struct UString    comment;
934    enum   ETokenType token;
935    char              subtag[1024];
936    uint32_t          line;
937    UBool             readToken = FALSE;
938
939    /* '{' . (name resource)* '}' */
940    if(isVerbose()){
941        printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
942    }
943    for (;;)
944    {
945        ustr_init(&comment);
946        token = getToken(&tokenValue, &comment, &line, status);
947
948        if (token == TOK_CLOSE_BRACE)
949        {
950            if (!readToken) {
951                warning(startline, "Encountered empty table");
952            }
953            return table;
954        }
955
956        if (token != TOK_STRING)
957        {
958            *status = U_INVALID_FORMAT_ERROR;
959
960            if (token == TOK_EOF)
961            {
962                error(startline, "unterminated table");
963            }
964            else
965            {
966                error(line, "unexpected token %s", tokenNames[token]);
967            }
968
969            return NULL;
970        }
971
972        if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
973            u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
974        } else {
975            *status = U_INVALID_FORMAT_ERROR;
976            error(line, "invariant characters required for table keys");
977            return NULL;
978        }
979
980        if (U_FAILURE(*status))
981        {
982            error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
983            return NULL;
984        }
985
986        member = parseResource(subtag, &comment, status);
987
988        if (member == NULL || U_FAILURE(*status))
989        {
990            error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
991            return NULL;
992        }
993
994        table_add(table, member, line, status);
995
996        if (U_FAILURE(*status))
997        {
998            error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
999            return NULL;
1000        }
1001        readToken = TRUE;
1002        ustr_deinit(&comment);
1003    }
1004
1005    /* not reached */
1006    /* A compiler warning will appear if all paths don't contain a return statement. */
1007/*     *status = U_INTERNAL_PROGRAM_ERROR;
1008     return NULL;*/
1009}
1010
1011static struct SResource *
1012parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1013{
1014    struct SResource *result;
1015
1016    if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1017    {
1018        return parseCollationElements(tag, startline, FALSE, status);
1019    }
1020    if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1021    {
1022        return parseCollationElements(tag, startline, TRUE, status);
1023    }
1024    if(isVerbose()){
1025        printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1026    }
1027
1028    result = table_open(bundle, tag, comment, status);
1029
1030    if (result == NULL || U_FAILURE(*status))
1031    {
1032        return NULL;
1033    }
1034
1035    return realParseTable(result, tag, startline,  status);
1036}
1037
1038static struct SResource *
1039parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1040{
1041    struct SResource  *result = NULL;
1042    struct SResource  *member = NULL;
1043    struct UString    *tokenValue;
1044    struct UString    memberComments;
1045    enum   ETokenType token;
1046    UBool             readToken = FALSE;
1047
1048    result = array_open(bundle, tag, comment, status);
1049
1050    if (result == NULL || U_FAILURE(*status))
1051    {
1052        return NULL;
1053    }
1054    if(isVerbose()){
1055        printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1056    }
1057
1058    ustr_init(&memberComments);
1059
1060    /* '{' . resource [','] '}' */
1061    for (;;)
1062    {
1063        /* reset length */
1064        ustr_setlen(&memberComments, 0, status);
1065
1066        /* check for end of array, but don't consume next token unless it really is the end */
1067        token = peekToken(0, &tokenValue, NULL, &memberComments, status);
1068
1069
1070        if (token == TOK_CLOSE_BRACE)
1071        {
1072            getToken(NULL, NULL, NULL, status);
1073            if (!readToken) {
1074                warning(startline, "Encountered empty array");
1075            }
1076            break;
1077        }
1078
1079        if (token == TOK_EOF)
1080        {
1081            res_close(result);
1082            *status = U_INVALID_FORMAT_ERROR;
1083            error(startline, "unterminated array");
1084            return NULL;
1085        }
1086
1087        /* string arrays are a special case */
1088        if (token == TOK_STRING)
1089        {
1090            getToken(&tokenValue, &memberComments, NULL, status);
1091            member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1092        }
1093        else
1094        {
1095            member = parseResource(NULL, &memberComments, status);
1096        }
1097
1098        if (member == NULL || U_FAILURE(*status))
1099        {
1100            res_close(result);
1101            return NULL;
1102        }
1103
1104        array_add(result, member, status);
1105
1106        if (U_FAILURE(*status))
1107        {
1108            res_close(result);
1109            return NULL;
1110        }
1111
1112        /* eat optional comma if present */
1113        token = peekToken(0, NULL, NULL, NULL, status);
1114
1115        if (token == TOK_COMMA)
1116        {
1117            getToken(NULL, NULL, NULL, status);
1118        }
1119
1120        if (U_FAILURE(*status))
1121        {
1122            res_close(result);
1123            return NULL;
1124        }
1125        readToken = TRUE;
1126    }
1127
1128    ustr_deinit(&memberComments);
1129    return result;
1130}
1131
1132static struct SResource *
1133parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1134{
1135    struct SResource  *result = NULL;
1136    enum   ETokenType  token;
1137    char              *string;
1138    int32_t            value;
1139    UBool              readToken = FALSE;
1140    char              *stopstring;
1141    uint32_t           len;
1142    struct UString     memberComments;
1143
1144    result = intvector_open(bundle, tag, comment, status);
1145
1146    if (result == NULL || U_FAILURE(*status))
1147    {
1148        return NULL;
1149    }
1150
1151    if(isVerbose()){
1152        printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1153    }
1154    ustr_init(&memberComments);
1155    /* '{' . string [','] '}' */
1156    for (;;)
1157    {
1158        ustr_setlen(&memberComments, 0, status);
1159
1160        /* check for end of array, but don't consume next token unless it really is the end */
1161        token = peekToken(0, NULL, NULL,&memberComments, status);
1162
1163        if (token == TOK_CLOSE_BRACE)
1164        {
1165            /* it's the end, consume the close brace */
1166            getToken(NULL, NULL, NULL, status);
1167            if (!readToken) {
1168                warning(startline, "Encountered empty int vector");
1169            }
1170            ustr_deinit(&memberComments);
1171            return result;
1172        }
1173
1174        string = getInvariantString(NULL, NULL, status);
1175
1176        if (U_FAILURE(*status))
1177        {
1178            res_close(result);
1179            return NULL;
1180        }
1181
1182        /* For handling illegal char in the Intvector */
1183        value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1184        len=(uint32_t)(stopstring-string);
1185
1186        if(len==uprv_strlen(string))
1187        {
1188            intvector_add(result, value, status);
1189            uprv_free(string);
1190            token = peekToken(0, NULL, NULL, NULL, status);
1191        }
1192        else
1193        {
1194            uprv_free(string);
1195            *status=U_INVALID_CHAR_FOUND;
1196        }
1197
1198        if (U_FAILURE(*status))
1199        {
1200            res_close(result);
1201            return NULL;
1202        }
1203
1204        /* the comma is optional (even though it is required to prevent the reader from concatenating
1205        consecutive entries) so that a missing comma on the last entry isn't an error */
1206        if (token == TOK_COMMA)
1207        {
1208            getToken(NULL, NULL, NULL, status);
1209        }
1210        readToken = TRUE;
1211    }
1212
1213    /* not reached */
1214    /* A compiler warning will appear if all paths don't contain a return statement. */
1215/*    intvector_close(result, status);
1216    *status = U_INTERNAL_PROGRAM_ERROR;
1217    return NULL;*/
1218}
1219
1220static struct SResource *
1221parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1222{
1223    struct SResource *result = NULL;
1224    uint8_t          *value;
1225    char             *string;
1226    char              toConv[3] = {'\0', '\0', '\0'};
1227    uint32_t          count;
1228    uint32_t          i;
1229    uint32_t          line;
1230    char             *stopstring;
1231    uint32_t          len;
1232
1233    string = getInvariantString(&line, NULL, status);
1234
1235    if (string == NULL || U_FAILURE(*status))
1236    {
1237        return NULL;
1238    }
1239
1240    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1241
1242    if (U_FAILURE(*status))
1243    {
1244        uprv_free(string);
1245        return NULL;
1246    }
1247
1248    if(isVerbose()){
1249        printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1250    }
1251
1252    count = (uint32_t)uprv_strlen(string);
1253    if (count > 0){
1254        if((count % 2)==0){
1255            value = uprv_malloc(sizeof(uint8_t) * count);
1256
1257            if (value == NULL)
1258            {
1259                uprv_free(string);
1260                *status = U_MEMORY_ALLOCATION_ERROR;
1261                return NULL;
1262            }
1263
1264            for (i = 0; i < count; i += 2)
1265            {
1266                toConv[0] = string[i];
1267                toConv[1] = string[i + 1];
1268
1269                value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1270                len=(uint32_t)(stopstring-toConv);
1271
1272                if(len!=uprv_strlen(toConv))
1273                {
1274                    uprv_free(string);
1275                    *status=U_INVALID_CHAR_FOUND;
1276                    return NULL;
1277                }
1278            }
1279
1280            result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
1281
1282            uprv_free(value);
1283        }
1284        else
1285        {
1286            *status = U_INVALID_CHAR_FOUND;
1287            uprv_free(string);
1288            error(line, "Encountered invalid binary string");
1289            return NULL;
1290        }
1291    }
1292    else
1293    {
1294        result = bin_open(bundle, tag, 0, NULL, "",comment,status);
1295        warning(startline, "Encountered empty binary tag");
1296    }
1297    uprv_free(string);
1298
1299    return result;
1300}
1301
1302static struct SResource *
1303parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1304{
1305    struct SResource *result = NULL;
1306    int32_t           value;
1307    char             *string;
1308    char             *stopstring;
1309    uint32_t          len;
1310
1311    string = getInvariantString(NULL, NULL, status);
1312
1313    if (string == NULL || U_FAILURE(*status))
1314    {
1315        return NULL;
1316    }
1317
1318    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1319
1320    if (U_FAILURE(*status))
1321    {
1322        uprv_free(string);
1323        return NULL;
1324    }
1325
1326    if(isVerbose()){
1327        printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1328    }
1329
1330    if (uprv_strlen(string) <= 0)
1331    {
1332        warning(startline, "Encountered empty integer. Default value is 0.");
1333    }
1334
1335    /* Allow integer support for hexdecimal, octal digit and decimal*/
1336    /* and handle illegal char in the integer*/
1337    value = uprv_strtoul(string, &stopstring, 0);
1338    len=(uint32_t)(stopstring-string);
1339    if(len==uprv_strlen(string))
1340    {
1341        result = int_open(bundle, tag, value, comment, status);
1342    }
1343    else
1344    {
1345        *status=U_INVALID_CHAR_FOUND;
1346    }
1347    uprv_free(string);
1348
1349    return result;
1350}
1351
1352static struct SResource *
1353parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1354{
1355    struct SResource *result;
1356    FileStream       *file;
1357    int32_t           len;
1358    uint8_t          *data;
1359    char             *filename;
1360    uint32_t          line;
1361    char     *fullname = NULL;
1362    int32_t numRead = 0;
1363    filename = getInvariantString(&line, NULL, status);
1364
1365    if (U_FAILURE(*status))
1366    {
1367        return NULL;
1368    }
1369
1370    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1371
1372    if (U_FAILURE(*status))
1373    {
1374        uprv_free(filename);
1375        return NULL;
1376    }
1377
1378    if(isVerbose()){
1379        printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1380    }
1381
1382    /* Open the input file for reading */
1383    if (inputdir == NULL)
1384    {
1385#if 1
1386        /*
1387         * Always save file file name, even if there's
1388         * no input directory specified. MIGHT BREAK SOMETHING
1389         */
1390        int32_t filenameLength = uprv_strlen(filename);
1391
1392        fullname = (char *) uprv_malloc(filenameLength + 1);
1393        uprv_strcpy(fullname, filename);
1394#endif
1395
1396        file = T_FileStream_open(filename, "rb");
1397    }
1398    else
1399    {
1400
1401        int32_t  count     = (int32_t)uprv_strlen(filename);
1402
1403        if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1404        {
1405            fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1406
1407            /* test for NULL */
1408            if(fullname == NULL)
1409            {
1410                *status = U_MEMORY_ALLOCATION_ERROR;
1411                return NULL;
1412            }
1413
1414            uprv_strcpy(fullname, inputdir);
1415
1416            fullname[inputdirLength]      = U_FILE_SEP_CHAR;
1417            fullname[inputdirLength + 1] = '\0';
1418
1419            uprv_strcat(fullname, filename);
1420        }
1421        else
1422        {
1423            fullname = (char *) uprv_malloc(inputdirLength + count + 1);
1424
1425            /* test for NULL */
1426            if(fullname == NULL)
1427            {
1428                *status = U_MEMORY_ALLOCATION_ERROR;
1429                return NULL;
1430            }
1431
1432            uprv_strcpy(fullname, inputdir);
1433            uprv_strcat(fullname, filename);
1434        }
1435
1436        file = T_FileStream_open(fullname, "rb");
1437
1438    }
1439
1440    if (file == NULL)
1441    {
1442        error(line, "couldn't open input file %s", filename);
1443        *status = U_FILE_ACCESS_ERROR;
1444        return NULL;
1445    }
1446
1447    len  = T_FileStream_size(file);
1448    data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1449    /* test for NULL */
1450    if(data == NULL)
1451    {
1452        *status = U_MEMORY_ALLOCATION_ERROR;
1453        T_FileStream_close (file);
1454        return NULL;
1455    }
1456
1457    numRead = T_FileStream_read  (file, data, len);
1458    T_FileStream_close (file);
1459
1460    result = bin_open(bundle, tag, len, data, fullname, comment, status);
1461
1462    uprv_free(data);
1463    uprv_free(filename);
1464    uprv_free(fullname);
1465
1466    return result;
1467}
1468
1469static struct SResource *
1470parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1471{
1472    struct SResource *result;
1473    int32_t           len=0;
1474    char             *filename;
1475    uint32_t          line;
1476    UChar *pTarget     = NULL;
1477
1478    UCHARBUF *ucbuf;
1479    char     *fullname = NULL;
1480    int32_t  count     = 0;
1481    const char* cp = NULL;
1482    const UChar* uBuffer = NULL;
1483
1484    filename = getInvariantString(&line, NULL, status);
1485    count     = (int32_t)uprv_strlen(filename);
1486
1487    if (U_FAILURE(*status))
1488    {
1489        return NULL;
1490    }
1491
1492    expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1493
1494    if (U_FAILURE(*status))
1495    {
1496        uprv_free(filename);
1497        return NULL;
1498    }
1499
1500    if(isVerbose()){
1501        printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1502    }
1503
1504    fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1505    /* test for NULL */
1506    if(fullname == NULL)
1507    {
1508        *status = U_MEMORY_ALLOCATION_ERROR;
1509        uprv_free(filename);
1510        return NULL;
1511    }
1512
1513    if(inputdir!=NULL){
1514        if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1515        {
1516
1517            uprv_strcpy(fullname, inputdir);
1518
1519            fullname[inputdirLength]      = U_FILE_SEP_CHAR;
1520            fullname[inputdirLength + 1] = '\0';
1521
1522            uprv_strcat(fullname, filename);
1523        }
1524        else
1525        {
1526            uprv_strcpy(fullname, inputdir);
1527            uprv_strcat(fullname, filename);
1528        }
1529    }else{
1530        uprv_strcpy(fullname,filename);
1531    }
1532
1533    ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1534
1535    if (U_FAILURE(*status)) {
1536        error(line, "couldn't open input file %s\n", filename);
1537        return NULL;
1538    }
1539
1540    uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1541    result = string_open(bundle, tag, uBuffer, len, comment, status);
1542
1543    uprv_free(pTarget);
1544
1545    uprv_free(filename);
1546    uprv_free(fullname);
1547
1548    return result;
1549}
1550
1551
1552
1553
1554
1555U_STRING_DECL(k_type_string,    "string",    6);
1556U_STRING_DECL(k_type_binary,    "binary",    6);
1557U_STRING_DECL(k_type_bin,       "bin",       3);
1558U_STRING_DECL(k_type_table,     "table",     5);
1559U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1560U_STRING_DECL(k_type_int,       "int",       3);
1561U_STRING_DECL(k_type_integer,   "integer",   7);
1562U_STRING_DECL(k_type_array,     "array",     5);
1563U_STRING_DECL(k_type_alias,     "alias",     5);
1564U_STRING_DECL(k_type_intvector, "intvector", 9);
1565U_STRING_DECL(k_type_import,    "import",    6);
1566U_STRING_DECL(k_type_include,   "include",   7);
1567U_STRING_DECL(k_type_reserved,  "reserved",  8);
1568
1569/* Various non-standard processing plugins that create one or more special resources. */
1570U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1571U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1572U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1573U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1574
1575typedef enum EResourceType
1576{
1577    RT_UNKNOWN,
1578    RT_STRING,
1579    RT_BINARY,
1580    RT_TABLE,
1581    RT_TABLE_NO_FALLBACK,
1582    RT_INTEGER,
1583    RT_ARRAY,
1584    RT_ALIAS,
1585    RT_INTVECTOR,
1586    RT_IMPORT,
1587    RT_INCLUDE,
1588    RT_PROCESS_UCA_RULES,
1589    RT_PROCESS_COLLATION,
1590    RT_PROCESS_TRANSLITERATOR,
1591    RT_PROCESS_DEPENDENCY,
1592    RT_RESERVED
1593} EResourceType;
1594
1595static struct {
1596    const char *nameChars;   /* only used for debugging */
1597    const UChar *nameUChars;
1598    ParseResourceFunction *parseFunction;
1599} gResourceTypes[] = {
1600    {"Unknown", NULL, NULL},
1601    {"string", k_type_string, parseString},
1602    {"binary", k_type_binary, parseBinary},
1603    {"table", k_type_table, parseTable},
1604    {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1605    {"integer", k_type_integer, parseInteger},
1606    {"array", k_type_array, parseArray},
1607    {"alias", k_type_alias, parseAlias},
1608    {"intvector", k_type_intvector, parseIntVector},
1609    {"import", k_type_import, parseImport},
1610    {"include", k_type_include, parseInclude},
1611    {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1612    {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1613    {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1614    {"process(dependency)", k_type_plugin_dependency, parseDependency},
1615    {"reserved", NULL, NULL}
1616};
1617
1618void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
1619{
1620    uint32_t i;
1621
1622    U_STRING_INIT(k_type_string,    "string",    6);
1623    U_STRING_INIT(k_type_binary,    "binary",    6);
1624    U_STRING_INIT(k_type_bin,       "bin",       3);
1625    U_STRING_INIT(k_type_table,     "table",     5);
1626    U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1627    U_STRING_INIT(k_type_int,       "int",       3);
1628    U_STRING_INIT(k_type_integer,   "integer",   7);
1629    U_STRING_INIT(k_type_array,     "array",     5);
1630    U_STRING_INIT(k_type_alias,     "alias",     5);
1631    U_STRING_INIT(k_type_intvector, "intvector", 9);
1632    U_STRING_INIT(k_type_import,    "import",    6);
1633    U_STRING_INIT(k_type_reserved,  "reserved",  8);
1634    U_STRING_INIT(k_type_include,   "include",   7);
1635
1636    U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1637    U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1638    U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1639    U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1640
1641    for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1642    {
1643        ustr_init(&lookahead[i].value);
1644    }
1645    gMakeBinaryCollation = !omitBinaryCollation;
1646    gOmitCollationRules = omitCollationRules;
1647}
1648
1649static U_INLINE UBool isTable(enum EResourceType type) {
1650    return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1651}
1652
1653static enum EResourceType
1654parseResourceType(UErrorCode *status)
1655{
1656    struct UString        *tokenValue;
1657    struct UString        comment;
1658    enum   EResourceType  result = RT_UNKNOWN;
1659    uint32_t              line=0;
1660    ustr_init(&comment);
1661    expect(TOK_STRING, &tokenValue, &comment, &line, status);
1662
1663    if (U_FAILURE(*status))
1664    {
1665        return RT_UNKNOWN;
1666    }
1667
1668    *status = U_ZERO_ERROR;
1669
1670    /* Search for normal types */
1671    result=RT_UNKNOWN;
1672    while (++result < RT_RESERVED) {
1673        if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1674            break;
1675        }
1676    }
1677    /* Now search for the aliases */
1678    if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1679        result = RT_INTEGER;
1680    }
1681    else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1682        result = RT_BINARY;
1683    }
1684    else if (result == RT_RESERVED) {
1685        char tokenBuffer[1024];
1686        u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1687        tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1688        *status = U_INVALID_FORMAT_ERROR;
1689        error(line, "unknown resource type '%s'", tokenBuffer);
1690    }
1691
1692    return result;
1693}
1694
1695/* parse a non-top-level resource */
1696static struct SResource *
1697parseResource(char *tag, const struct UString *comment, UErrorCode *status)
1698{
1699    enum   ETokenType      token;
1700    enum   EResourceType  resType = RT_UNKNOWN;
1701    ParseResourceFunction *parseFunction = NULL;
1702    struct UString        *tokenValue;
1703    uint32_t                 startline;
1704    uint32_t                 line;
1705
1706    token = getToken(&tokenValue, NULL, &startline, status);
1707
1708    if(isVerbose()){
1709        printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1710    }
1711
1712    /* name . [ ':' type ] '{' resource '}' */
1713    /* This function parses from the colon onwards.  If the colon is present, parse the
1714    type then try to parse a resource of that type.  If there is no explicit type,
1715    work it out using the lookahead tokens. */
1716    switch (token)
1717    {
1718    case TOK_EOF:
1719        *status = U_INVALID_FORMAT_ERROR;
1720        error(startline, "Unexpected EOF encountered");
1721        return NULL;
1722
1723    case TOK_ERROR:
1724        *status = U_INVALID_FORMAT_ERROR;
1725        return NULL;
1726
1727    case TOK_COLON:
1728        resType = parseResourceType(status);
1729        expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1730
1731        if (U_FAILURE(*status))
1732        {
1733            return NULL;
1734        }
1735
1736        break;
1737
1738    case TOK_OPEN_BRACE:
1739        break;
1740
1741    default:
1742        *status = U_INVALID_FORMAT_ERROR;
1743        error(startline, "syntax error while reading a resource, expected '{' or ':'");
1744        return NULL;
1745    }
1746
1747    if (resType == RT_UNKNOWN)
1748    {
1749        /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
1750        We could have any of the following:
1751        { {         => array (nested)
1752        { :/}       => array
1753        { string ,  => string array
1754
1755        { string {  => table
1756
1757        { string :/{    => table
1758        { string }      => string
1759        */
1760
1761        token = peekToken(0, NULL, &line, NULL,status);
1762
1763        if (U_FAILURE(*status))
1764        {
1765            return NULL;
1766        }
1767
1768        if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1769        {
1770            resType = RT_ARRAY;
1771        }
1772        else if (token == TOK_STRING)
1773        {
1774            token = peekToken(1, NULL, &line, NULL, status);
1775
1776            if (U_FAILURE(*status))
1777            {
1778                return NULL;
1779            }
1780
1781            switch (token)
1782            {
1783            case TOK_COMMA:         resType = RT_ARRAY;  break;
1784            case TOK_OPEN_BRACE:    resType = RT_TABLE;  break;
1785            case TOK_CLOSE_BRACE:   resType = RT_STRING; break;
1786            case TOK_COLON:         resType = RT_TABLE;  break;
1787            default:
1788                *status = U_INVALID_FORMAT_ERROR;
1789                error(line, "Unexpected token after string, expected ',', '{' or '}'");
1790                return NULL;
1791            }
1792        }
1793        else
1794        {
1795            *status = U_INVALID_FORMAT_ERROR;
1796            error(line, "Unexpected token after '{'");
1797            return NULL;
1798        }
1799
1800        /* printf("Type guessed as %s\n", resourceNames[resType]); */
1801    } else if(resType == RT_TABLE_NO_FALLBACK) {
1802        *status = U_INVALID_FORMAT_ERROR;
1803        error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1804        return NULL;
1805    }
1806
1807    /* We should now know what we need to parse next, so call the appropriate parser
1808    function and return. */
1809    parseFunction = gResourceTypes[resType].parseFunction;
1810    if (parseFunction != NULL) {
1811        return parseFunction(tag, startline, comment, status);
1812    }
1813    else {
1814        *status = U_INTERNAL_PROGRAM_ERROR;
1815        error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1816    }
1817
1818    return NULL;
1819}
1820
1821/* parse the top-level resource */
1822struct SRBRoot *
1823parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
1824{
1825    struct UString    *tokenValue;
1826    struct UString    comment;
1827    uint32_t           line;
1828    enum EResourceType bundleType;
1829    enum ETokenType    token;
1830
1831    initLookahead(buf, status);
1832
1833    inputdir       = inputDir;
1834    inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
1835    outputdir       = outputDir;
1836    outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0;
1837
1838    ustr_init(&comment);
1839    expect(TOK_STRING, &tokenValue, &comment, NULL, status);
1840
1841    bundle = bundle_open(&comment, status);
1842
1843    if (bundle == NULL || U_FAILURE(*status))
1844    {
1845        return NULL;
1846    }
1847
1848
1849    bundle_setlocale(bundle, tokenValue->fChars, status);
1850    /* The following code is to make Empty bundle work no matter with :table specifer or not */
1851    token = getToken(NULL, NULL, &line, status);
1852    if(token==TOK_COLON) {
1853        *status=U_ZERO_ERROR;
1854        bundleType=parseResourceType(status);
1855
1856        if(isTable(bundleType))
1857        {
1858            expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
1859        }
1860        else
1861        {
1862            *status=U_PARSE_ERROR;
1863            error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
1864        }
1865    }
1866    else
1867    {
1868        /* not a colon */
1869        if(token==TOK_OPEN_BRACE)
1870        {
1871            *status=U_ZERO_ERROR;
1872            bundleType=RT_TABLE;
1873        }
1874        else
1875        {
1876            /* neither colon nor open brace */
1877            *status=U_PARSE_ERROR;
1878            bundleType=RT_UNKNOWN;
1879            error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
1880        }
1881    }
1882
1883    if (U_FAILURE(*status))
1884    {
1885        bundle_close(bundle, status);
1886        return NULL;
1887    }
1888
1889    if(bundleType==RT_TABLE_NO_FALLBACK) {
1890        /*
1891         * Parse a top-level table with the table(nofallback) declaration.
1892         * This is the same as a regular table, but also sets the
1893         * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
1894         */
1895        bundle->noFallback=TRUE;
1896    }
1897    /* top-level tables need not handle special table names like "collations" */
1898    realParseTable(bundle->fRoot, NULL, line, status);
1899
1900    if(dependencyArray!=NULL){
1901        table_add(bundle->fRoot, dependencyArray, 0, status);
1902        dependencyArray = NULL;
1903    }
1904    if (U_FAILURE(*status))
1905    {
1906        bundle_close(bundle, status);
1907        res_close(dependencyArray);
1908        return NULL;
1909    }
1910
1911    if (getToken(NULL, NULL, &line, status) != TOK_EOF)
1912    {
1913        warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
1914        if(isStrict()){
1915            *status = U_INVALID_FORMAT_ERROR;
1916            return NULL;
1917        }
1918    }
1919
1920    cleanupLookahead();
1921    ustr_deinit(&comment);
1922    return bundle;
1923}
1924
1925