1/*
2*******************************************************************************
3* Copyright (C) 2007-2010, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*
7* File PLURRULE.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*******************************************************************************
13*/
14
15
16#include "unicode/uniset.h"
17#include "unicode/utypes.h"
18#include "unicode/ures.h"
19#include "unicode/plurrule.h"
20#include "cmemory.h"
21#include "cstring.h"
22#include "hash.h"
23#include "mutex.h"
24#include "plurrule_impl.h"
25#include "putilimp.h"
26#include "ucln_in.h"
27#include "ustrfmt.h"
28#include "locutil.h"
29
30/*
31// TODO(claireho): remove stdio
32#include "stdio.h"
33*/
34
35#if !UCONFIG_NO_FORMATTING
36
37U_NAMESPACE_BEGIN
38
39
40#define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
41
42static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
43static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
44static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
45static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
46static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
47static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
48static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
49static const UChar PK_IN[]={LOW_I,LOW_N,0};
50static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
51static const UChar PK_IS[]={LOW_I,LOW_S,0};
52static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
53static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
54static const UChar PK_OR[]={LOW_O,LOW_R,0};
55static const UChar PK_VAR_N[]={LOW_N,0};
56static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
57
58UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
59UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
60
61PluralRules::PluralRules(UErrorCode& status)
62:   UObject(),
63    mRules(NULL)
64{
65    if (U_FAILURE(status)) {
66        return;
67    }
68    mParser = new RuleParser();
69    if (mParser==NULL) {
70        status = U_MEMORY_ALLOCATION_ERROR;
71    }
72}
73
74PluralRules::PluralRules(const PluralRules& other)
75: UObject(other),
76    mRules(NULL),
77    mParser(new RuleParser())
78{
79    *this=other;
80}
81
82PluralRules::~PluralRules() {
83    delete mRules;
84    delete mParser;
85}
86
87PluralRules*
88PluralRules::clone() const {
89    return new PluralRules(*this);
90}
91
92PluralRules&
93PluralRules::operator=(const PluralRules& other) {
94    if (this != &other) {
95        delete mRules;
96        if (other.mRules==NULL) {
97            mRules = NULL;
98        }
99        else {
100            mRules = new RuleChain(*other.mRules);
101        }
102        delete mParser;
103        mParser = new RuleParser();
104    }
105
106    return *this;
107}
108
109PluralRules* U_EXPORT2
110PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
111    RuleChain   rules;
112
113    if (U_FAILURE(status)) {
114        return NULL;
115    }
116    PluralRules *newRules = new PluralRules(status);
117    if ( (newRules != NULL)&& U_SUCCESS(status) ) {
118        newRules->parseDescription((UnicodeString &)description, rules, status);
119        if (U_SUCCESS(status)) {
120            newRules->addRules(rules);
121        }
122    }
123    if (U_FAILURE(status)) {
124        delete newRules;
125        return NULL;
126    }
127    else {
128        return newRules;
129    }
130}
131
132PluralRules* U_EXPORT2
133PluralRules::createDefaultRules(UErrorCode& status) {
134    return createRules(PLURAL_DEFAULT_RULE, status);
135}
136
137PluralRules* U_EXPORT2
138PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
139    RuleChain   rChain;
140    if (U_FAILURE(status)) {
141        return NULL;
142    }
143    PluralRules *newObj = new PluralRules(status);
144    if (newObj==NULL || U_FAILURE(status)) {
145        return NULL;
146    }
147    UnicodeString locRule = newObj->getRuleFromResource(locale, status);
148    if ((locRule.length() != 0) && U_SUCCESS(status)) {
149        newObj->parseDescription(locRule, rChain, status);
150        if (U_SUCCESS(status)) {
151            newObj->addRules(rChain);
152        }
153    }
154    if (U_FAILURE(status)||(locRule.length() == 0)) {
155        // use default plural rule
156        status = U_ZERO_ERROR;
157        UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
158        newObj->parseDescription(defRule, rChain, status);
159        newObj->addRules(rChain);
160    }
161
162    return newObj;
163}
164
165UnicodeString
166PluralRules::select(int32_t number) const {
167    if (mRules == NULL) {
168        return PLURAL_DEFAULT_RULE;
169    }
170    else {
171        return mRules->select(number);
172    }
173}
174
175UnicodeString
176PluralRules::select(double number) const {
177    if (mRules == NULL) {
178        return PLURAL_DEFAULT_RULE;
179    }
180    else {
181        return mRules->select(number);
182    }
183}
184
185StringEnumeration*
186PluralRules::getKeywords(UErrorCode& status) const {
187    if (U_FAILURE(status))  return NULL;
188    StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
189    if (U_FAILURE(status))  return NULL;
190
191    return nameEnumerator;
192}
193
194
195UBool
196PluralRules::isKeyword(const UnicodeString& keyword) const {
197    if ( keyword == PLURAL_KEYWORD_OTHER ) {
198        return true;
199    }
200    else {
201        if (mRules==NULL) {
202            return false;
203        }
204        else {
205            return mRules->isKeyword(keyword);
206        }
207    }
208}
209
210UnicodeString
211PluralRules::getKeywordOther() const {
212    return PLURAL_KEYWORD_OTHER;
213}
214
215UBool
216PluralRules::operator==(const PluralRules& other) const  {
217    int32_t limit;
218    UBool sameList = TRUE;
219    const UnicodeString *ptrKeyword;
220    UErrorCode status= U_ZERO_ERROR;
221
222    if ( this == &other ) {
223        return TRUE;
224    }
225    StringEnumeration* myKeywordList = getKeywords(status);
226    if (U_FAILURE(status)) {
227        return FALSE;
228    }
229    StringEnumeration* otherKeywordList =other.getKeywords(status);
230    if (U_FAILURE(status)) {
231        return FALSE;
232    }
233
234    if (myKeywordList->count(status)!=otherKeywordList->count(status) ||
235        U_FAILURE(status)) {
236        sameList = FALSE;
237    }
238    else {
239        myKeywordList->reset(status);
240        if (U_FAILURE(status)) {
241            return FALSE;
242        }
243        while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
244            if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) {
245                sameList = FALSE;
246            }
247        }
248        otherKeywordList->reset(status);
249        if (U_FAILURE(status)) {
250            return FALSE;
251        }
252        while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
253            if (U_FAILURE(status)) {
254                return FALSE;
255            }
256            if (!this->isKeyword(*ptrKeyword))  {
257                sameList = FALSE;
258            }
259        }
260        delete myKeywordList;
261        delete otherKeywordList;
262        if (!sameList) {
263            return FALSE;
264        }
265    }
266
267    if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
268        return FALSE;
269    }
270    UnicodeString myKeyword, otherKeyword;
271    for (int32_t i=0; i<limit; ++i) {
272        myKeyword = this->select(i);
273        otherKeyword = other.select(i);
274        if (myKeyword!=otherKeyword) {
275            return FALSE;
276        }
277    }
278    return TRUE;
279}
280
281void
282PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
283{
284    int32_t ruleIndex=0;
285    UnicodeString token;
286    tokenType type;
287    tokenType prevType=none;
288    RuleChain *ruleChain=NULL;
289    AndConstraint *curAndConstraint=NULL;
290    OrConstraint *orNode=NULL;
291    RuleChain *lastChain=NULL;
292
293    if (U_FAILURE(status)) {
294        return;
295    }
296    UnicodeString ruleData = data.toLower();
297    while (ruleIndex< ruleData.length()) {
298        mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
299        if (U_FAILURE(status)) {
300            return;
301        }
302        mParser->checkSyntax(prevType, type, status);
303        if (U_FAILURE(status)) {
304            return;
305        }
306        switch (type) {
307        case tAnd:
308            curAndConstraint = curAndConstraint->add();
309            break;
310        case tOr:
311            lastChain = &rules;
312            while (lastChain->next !=NULL) {
313                lastChain = lastChain->next;
314            }
315            orNode=lastChain->ruleHeader;
316            while (orNode->next != NULL) {
317                orNode = orNode->next;
318            }
319            orNode->next= new OrConstraint();
320            orNode=orNode->next;
321            orNode->next=NULL;
322            curAndConstraint = orNode->add();
323            break;
324        case tIs:
325            curAndConstraint->rangeHigh=-1;
326            break;
327        case tNot:
328            curAndConstraint->notIn=TRUE;
329            break;
330        case tIn:
331            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
332            curAndConstraint->integerOnly = TRUE;
333            break;
334        case tWithin:
335            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
336            break;
337        case tNumber:
338            if ( (curAndConstraint->op==AndConstraint::MOD)&&
339                 (curAndConstraint->opNum == -1 ) ) {
340                curAndConstraint->opNum=getNumberValue(token);
341            }
342            else {
343                if (curAndConstraint->rangeLow == -1) {
344                    curAndConstraint->rangeLow=getNumberValue(token);
345                }
346                else {
347                    curAndConstraint->rangeHigh=getNumberValue(token);
348                }
349            }
350            break;
351        case tMod:
352            curAndConstraint->op=AndConstraint::MOD;
353            break;
354        case tKeyword:
355            if (ruleChain==NULL) {
356                ruleChain = &rules;
357            }
358            else {
359                while (ruleChain->next!=NULL){
360                    ruleChain=ruleChain->next;
361                }
362                ruleChain=ruleChain->next=new RuleChain();
363            }
364            orNode = ruleChain->ruleHeader = new OrConstraint();
365            curAndConstraint = orNode->add();
366            ruleChain->keyword = token;
367            break;
368        default:
369            break;
370        }
371        prevType=type;
372    }
373}
374
375int32_t
376PluralRules::getNumberValue(const UnicodeString& token) const {
377    int32_t i;
378    char digits[128];
379
380    i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
381    digits[i]='\0';
382
383    return((int32_t)atoi(digits));
384}
385
386
387void
388PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
389    int32_t i=*curIndex;
390
391    localeName.remove();
392    while (i< localeData.length()) {
393       if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
394           break;
395       }
396       i++;
397    }
398
399    while (i< localeData.length()) {
400       if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
401           break;
402       }
403       localeName+=localeData.charAt(i++);
404    }
405    *curIndex=i;
406}
407
408
409int32_t
410PluralRules::getRepeatLimit() const {
411    if (mRules!=NULL) {
412        return mRules->getRepeatLimit();
413    }
414    else {
415        return 0;
416    }
417}
418
419
420void
421PluralRules::addRules(RuleChain& rules) {
422    RuleChain *newRule = new RuleChain(rules);
423    this->mRules=newRule;
424    newRule->setRepeatLimit();
425}
426
427UnicodeString
428PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
429    UnicodeString emptyStr;
430
431    if (U_FAILURE(errCode)) {
432        return emptyStr;
433    }
434    UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
435    if(U_FAILURE(errCode)) {
436        /* total failure, not even root could be opened */
437        return emptyStr;
438    }
439    UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
440    if(U_FAILURE(errCode)) {
441        ures_close(rb);
442        return emptyStr;
443    }
444    int32_t resLen=0;
445    const char *curLocaleName=locale.getName();
446    const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
447
448    if (s == NULL) {
449        // Check parent locales.
450        UErrorCode status = U_ZERO_ERROR;
451        char parentLocaleName[ULOC_FULLNAME_CAPACITY];
452        const char *curLocaleName=locale.getName();
453        int32_t localeNameLen=0;
454        uprv_strcpy(parentLocaleName, curLocaleName);
455
456        while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
457                                       ULOC_FULLNAME_CAPACITY, &status)) > 0) {
458            resLen=0;
459            s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
460            if (s != NULL) {
461                errCode = U_ZERO_ERROR;
462                break;
463            }
464            status = U_ZERO_ERROR;
465        }
466    }
467    if (s==NULL) {
468        ures_close(locRes);
469        ures_close(rb);
470        return emptyStr;
471    }
472
473    char setKey[256];
474    UChar result[256];
475    u_UCharsToChars(s, setKey, resLen + 1);
476    // printf("\n PluralRule: %s\n", setKey);
477
478
479    UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
480    if(U_FAILURE(errCode)) {
481        ures_close(locRes);
482        ures_close(rb);
483        return emptyStr;
484    }
485    resLen=0;
486    UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
487    if (U_FAILURE(errCode)) {
488        ures_close(ruleRes);
489        ures_close(locRes);
490        ures_close(rb);
491        return emptyStr;
492    }
493
494    int32_t numberKeys = ures_getSize(setRes);
495    char *key=NULL;
496    int32_t len=0;
497    for(int32_t i=0; i<numberKeys; ++i) {
498        int32_t keyLen;
499        resLen=0;
500        s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
501        keyLen = (int32_t)uprv_strlen(key);
502        u_charsToUChars(key, result+len, keyLen);
503        len += keyLen;
504        result[len++]=COLON;
505        uprv_memcpy(result+len, s, resLen*sizeof(UChar));
506        len += resLen;
507        result[len++]=SEMI_COLON;
508    }
509    result[len++]=0;
510    u_UCharsToChars(result, setKey, len);
511    // printf(" Rule: %s\n", setKey);
512
513    ures_close(setRes);
514    ures_close(ruleRes);
515    ures_close(locRes);
516    ures_close(rb);
517    return UnicodeString(result);
518
519}
520
521AndConstraint::AndConstraint() {
522    op = AndConstraint::NONE;
523    opNum=-1;
524    rangeLow=-1;
525    rangeHigh=-1;
526    notIn=FALSE;
527    integerOnly=FALSE;
528    next=NULL;
529}
530
531
532AndConstraint::AndConstraint(const AndConstraint& other) {
533    this->op = other.op;
534    this->opNum=other.opNum;
535    this->rangeLow=other.rangeLow;
536    this->rangeHigh=other.rangeHigh;
537    this->integerOnly=other.integerOnly;
538    this->notIn=other.notIn;
539    if (other.next==NULL) {
540        this->next=NULL;
541    }
542    else {
543        this->next = new AndConstraint(*other.next);
544    }
545}
546
547AndConstraint::~AndConstraint() {
548    if (next!=NULL) {
549        delete next;
550    }
551}
552
553
554UBool
555AndConstraint::isFulfilled(double number) {
556    UBool result=TRUE;
557    double value=number;
558
559    if ( op == MOD ) {
560        value = (int32_t)value % opNum;
561    }
562    if ( rangeHigh == -1 ) {
563        if ( rangeLow == -1 ) {
564            result = TRUE; // empty rule
565        }
566        else {
567            if ( value == rangeLow ) {
568                result = TRUE;
569            }
570            else {
571                result = FALSE;
572            }
573        }
574    }
575    else {
576        if ((rangeLow <= value) && (value <= rangeHigh)) {
577            if (integerOnly) {
578                if ( value != (int32_t)value) {
579                    result = FALSE;
580                }
581                else {
582                    result = TRUE;
583                }
584            }
585            else {
586                result = TRUE;
587            }
588        }
589        else {
590            result = FALSE;
591        }
592    }
593    if (notIn) {
594        return !result;
595    }
596    else {
597        return result;
598    }
599}
600
601int32_t
602AndConstraint::updateRepeatLimit(int32_t maxLimit) {
603
604    if ( op == MOD ) {
605        return uprv_max(opNum, maxLimit);
606    }
607    else {
608        if ( rangeHigh == -1 ) {
609            return uprv_max(rangeLow, maxLimit);
610        }
611        else{
612            return uprv_max(rangeHigh, maxLimit);
613        }
614    }
615}
616
617
618AndConstraint*
619AndConstraint::add()
620{
621    this->next = new AndConstraint();
622    return this->next;
623}
624
625OrConstraint::OrConstraint() {
626    childNode=NULL;
627    next=NULL;
628}
629
630OrConstraint::OrConstraint(const OrConstraint& other) {
631    if ( other.childNode == NULL ) {
632        this->childNode = NULL;
633    }
634    else {
635        this->childNode = new AndConstraint(*(other.childNode));
636    }
637    if (other.next == NULL ) {
638        this->next = NULL;
639    }
640    else {
641        this->next = new OrConstraint(*(other.next));
642    }
643}
644
645OrConstraint::~OrConstraint() {
646    if (childNode!=NULL) {
647        delete childNode;
648    }
649    if (next!=NULL) {
650        delete next;
651    }
652}
653
654AndConstraint*
655OrConstraint::add()
656{
657    OrConstraint *curOrConstraint=this;
658    {
659        while (curOrConstraint->next!=NULL) {
660            curOrConstraint = curOrConstraint->next;
661        }
662        curOrConstraint->next = NULL;
663        curOrConstraint->childNode = new AndConstraint();
664    }
665    return curOrConstraint->childNode;
666}
667
668UBool
669OrConstraint::isFulfilled(double number) {
670    OrConstraint* orRule=this;
671    UBool result=FALSE;
672
673    while (orRule!=NULL && !result) {
674        result=TRUE;
675        AndConstraint* andRule = orRule->childNode;
676        while (andRule!=NULL && result) {
677            result = andRule->isFulfilled(number);
678            andRule=andRule->next;
679        }
680        orRule = orRule->next;
681    }
682
683    return result;
684}
685
686
687RuleChain::RuleChain() {
688    ruleHeader=NULL;
689    next = NULL;
690    repeatLimit=0;
691}
692
693RuleChain::RuleChain(const RuleChain& other) {
694    this->repeatLimit = other.repeatLimit;
695    this->keyword=other.keyword;
696    if (other.ruleHeader != NULL) {
697        this->ruleHeader = new OrConstraint(*(other.ruleHeader));
698    }
699    else {
700        this->ruleHeader = NULL;
701    }
702    if (other.next != NULL ) {
703        this->next = new RuleChain(*other.next);
704    }
705    else
706    {
707        this->next = NULL;
708    }
709}
710
711RuleChain::~RuleChain() {
712    if (next != NULL) {
713        delete next;
714    }
715    if ( ruleHeader != NULL ) {
716        delete ruleHeader;
717    }
718}
719
720UnicodeString
721RuleChain::select(double number) const {
722
723   if ( ruleHeader != NULL ) {
724       if (ruleHeader->isFulfilled(number)) {
725           return keyword;
726       }
727   }
728   if ( next != NULL ) {
729       return next->select(number);
730   }
731   else {
732       return PLURAL_KEYWORD_OTHER;
733   }
734
735}
736
737void
738RuleChain::dumpRules(UnicodeString& result) {
739    UChar digitString[16];
740
741    if ( ruleHeader != NULL ) {
742        result +=  keyword;
743        OrConstraint* orRule=ruleHeader;
744        while ( orRule != NULL ) {
745            AndConstraint* andRule=orRule->childNode;
746            while ( andRule != NULL ) {
747                if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
748                    result += UNICODE_STRING_SIMPLE(" n is ");
749                    if (andRule->notIn) {
750                        result += UNICODE_STRING_SIMPLE("not ");
751                    }
752                    uprv_itou(digitString,16, andRule->rangeLow,10,0);
753                    result += UnicodeString(digitString);
754                }
755                else {
756                    if (andRule->op==AndConstraint::MOD) {
757                        result += UNICODE_STRING_SIMPLE("  n mod ");
758                        uprv_itou(digitString,16, andRule->opNum,10,0);
759                        result += UnicodeString(digitString);
760                    }
761                    else {
762                        result += UNICODE_STRING_SIMPLE("  n ");
763                    }
764                    if (andRule->rangeHigh==-1) {
765                        if (andRule->notIn) {
766                            result += UNICODE_STRING_SIMPLE(" is not ");
767                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
768                            result += UnicodeString(digitString);
769                        }
770                        else {
771                            result += UNICODE_STRING_SIMPLE(" is ");
772                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
773                            result += UnicodeString(digitString);
774                        }
775                    }
776                    else {
777                        if (andRule->notIn) {
778                            if ( andRule->integerOnly ) {
779                                result += UNICODE_STRING_SIMPLE("  not in ");
780                            }
781                            else {
782                                result += UNICODE_STRING_SIMPLE("  not within ");
783                            }
784                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
785                            result += UnicodeString(digitString);
786                            result += UNICODE_STRING_SIMPLE(" .. ");
787                            uprv_itou(digitString,16, andRule->rangeHigh,10,0);
788                            result += UnicodeString(digitString);
789                        }
790                        else {
791                            if ( andRule->integerOnly ) {
792                                result += UNICODE_STRING_SIMPLE(" in ");
793                            }
794                            else {
795                                result += UNICODE_STRING_SIMPLE(" within ");
796                            }
797                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
798                            result += UnicodeString(digitString);
799                            result += UNICODE_STRING_SIMPLE(" .. ");
800                            uprv_itou(digitString,16, andRule->rangeHigh,10,0);
801                        }
802                    }
803                }
804                if ( (andRule=andRule->next) != NULL) {
805                    result += PK_AND;
806                }
807            }
808            if ( (orRule = orRule->next) != NULL ) {
809                result += PK_OR;
810            }
811        }
812    }
813    if ( next != NULL ) {
814        next->dumpRules(result);
815    }
816}
817
818int32_t
819RuleChain::getRepeatLimit () {
820    return repeatLimit;
821}
822
823void
824RuleChain::setRepeatLimit () {
825    int32_t limit=0;
826
827    if ( next != NULL ) {
828        next->setRepeatLimit();
829        limit = next->repeatLimit;
830    }
831
832    if ( ruleHeader != NULL ) {
833        OrConstraint* orRule=ruleHeader;
834        while ( orRule != NULL ) {
835            AndConstraint* andRule=orRule->childNode;
836            while ( andRule != NULL ) {
837                limit = andRule->updateRepeatLimit(limit);
838                andRule = andRule->next;
839            }
840            orRule = orRule->next;
841        }
842    }
843    repeatLimit = limit;
844}
845
846UErrorCode
847RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
848    if ( arraySize < capacityOfKeywords-1 ) {
849        keywords[arraySize++]=keyword;
850    }
851    else {
852        return U_BUFFER_OVERFLOW_ERROR;
853    }
854
855    if ( next != NULL ) {
856        return next->getKeywords(capacityOfKeywords, keywords, arraySize);
857    }
858    else {
859        return U_ZERO_ERROR;
860    }
861}
862
863UBool
864RuleChain::isKeyword(const UnicodeString& keywordParam) const {
865    if ( keyword == keywordParam ) {
866        return TRUE;
867    }
868
869    if ( next != NULL ) {
870        return next->isKeyword(keywordParam);
871    }
872    else {
873        return FALSE;
874    }
875}
876
877
878RuleParser::RuleParser() {
879    UErrorCode err=U_ZERO_ERROR;
880    const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
881    const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
882    idStartFilter = new UnicodeSet(idStart, err);
883    idContinueFilter = new UnicodeSet(idContinue, err);
884}
885
886RuleParser::~RuleParser() {
887    delete idStartFilter;
888    delete idContinueFilter;
889}
890
891void
892RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
893{
894    if (U_FAILURE(status)) {
895        return;
896    }
897    switch(prevType) {
898    case none:
899    case tSemiColon:
900        if (curType!=tKeyword) {
901            status = U_UNEXPECTED_TOKEN;
902        }
903        break;
904    case tVariableN :
905        if (curType != tIs && curType != tMod && curType != tIn &&
906            curType != tNot && curType != tWithin) {
907            status = U_UNEXPECTED_TOKEN;
908        }
909        break;
910    case tZero:
911    case tOne:
912    case tTwo:
913    case tFew:
914    case tMany:
915    case tOther:
916    case tKeyword:
917        if (curType != tColon) {
918            status = U_UNEXPECTED_TOKEN;
919        }
920        break;
921    case tColon :
922        if (curType != tVariableN) {
923            status = U_UNEXPECTED_TOKEN;
924        }
925        break;
926    case tIs:
927        if ( curType != tNumber && curType != tNot) {
928            status = U_UNEXPECTED_TOKEN;
929        }
930        break;
931    case tNot:
932        if (curType != tNumber && curType != tIn && curType != tWithin) {
933            status = U_UNEXPECTED_TOKEN;
934        }
935        break;
936    case tMod:
937    case tDot:
938    case tIn:
939    case tWithin:
940    case tAnd:
941    case tOr:
942        if (curType != tNumber && curType != tVariableN) {
943            status = U_UNEXPECTED_TOKEN;
944        }
945        break;
946    case tNumber:
947        if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
948            curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
949        {
950            status = U_UNEXPECTED_TOKEN;
951        }
952        break;
953    default:
954        status = U_UNEXPECTED_TOKEN;
955        break;
956    }
957}
958
959void
960RuleParser::getNextToken(const UnicodeString& ruleData,
961                         int32_t *ruleIndex,
962                         UnicodeString& token,
963                         tokenType& type,
964                         UErrorCode &status)
965{
966    int32_t curIndex= *ruleIndex;
967    UChar ch;
968    tokenType prevType=none;
969
970    if (U_FAILURE(status)) {
971        return;
972    }
973    while (curIndex<ruleData.length()) {
974        ch = ruleData.charAt(curIndex);
975        if ( !inRange(ch, type) ) {
976            status = U_ILLEGAL_CHARACTER;
977            return;
978        }
979        switch (type) {
980        case tSpace:
981            if ( *ruleIndex != curIndex ) { // letter
982                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
983                *ruleIndex=curIndex;
984                type=prevType;
985                getKeyType(token, type, status);
986                return;
987            }
988            else {
989                *ruleIndex=*ruleIndex+1;
990            }
991            break; // consective space
992        case tColon:
993        case tSemiColon:
994            if ( *ruleIndex != curIndex ) {
995                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
996                *ruleIndex=curIndex;
997                type=prevType;
998                getKeyType(token, type, status);
999                return;
1000            }
1001            else {
1002                *ruleIndex=curIndex+1;
1003                return;
1004            }
1005        case tLetter:
1006             if ((type==prevType)||(prevType==none)) {
1007                prevType=type;
1008                break;
1009             }
1010             break;
1011        case tNumber:
1012             if ((type==prevType)||(prevType==none)) {
1013                prevType=type;
1014                break;
1015             }
1016             else {
1017                *ruleIndex=curIndex+1;
1018                return;
1019             }
1020         case tDot:
1021             if (prevType==none) {  // first dot
1022                prevType=type;
1023                continue;
1024             }
1025             else {
1026                 if ( *ruleIndex != curIndex ) {
1027                    token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1028                    *ruleIndex=curIndex;  // letter
1029                    type=prevType;
1030                    getKeyType(token, type, status);
1031                    return;
1032                 }
1033                 else {  // two consective dots
1034                    *ruleIndex=curIndex+2;
1035                    return;
1036                 }
1037             }
1038             break;
1039         default:
1040             status = U_UNEXPECTED_TOKEN;
1041             return;
1042        }
1043        curIndex++;
1044    }
1045    if ( curIndex>=ruleData.length() ) {
1046        if ( (type == tLetter)||(type == tNumber) ) {
1047            token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1048            getKeyType(token, type, status);
1049            if (U_FAILURE(status)) {
1050                return;
1051            }
1052        }
1053        *ruleIndex = ruleData.length();
1054    }
1055}
1056
1057UBool
1058RuleParser::inRange(UChar ch, tokenType& type) {
1059    if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1060        // we assume all characters are in lower case already.
1061        return FALSE;
1062    }
1063    if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1064        type = tLetter;
1065        return TRUE;
1066    }
1067    if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1068        type = tNumber;
1069        return TRUE;
1070    }
1071    switch (ch) {
1072    case COLON:
1073        type = tColon;
1074        return TRUE;
1075    case SPACE:
1076        type = tSpace;
1077        return TRUE;
1078    case SEMI_COLON:
1079        type = tSemiColon;
1080        return TRUE;
1081    case DOT:
1082        type = tDot;
1083        return TRUE;
1084    default :
1085        type = none;
1086        return FALSE;
1087    }
1088}
1089
1090
1091void
1092RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1093{
1094    if (U_FAILURE(status)) {
1095        return;
1096    }
1097    if ( keyType==tNumber) {
1098    }
1099    else if (token==PK_VAR_N) {
1100        keyType = tVariableN;
1101    }
1102    else if (token==PK_IS) {
1103        keyType = tIs;
1104    }
1105    else if (token==PK_AND) {
1106        keyType = tAnd;
1107    }
1108    else if (token==PK_IN) {
1109        keyType = tIn;
1110    }
1111    else if (token==PK_WITHIN) {
1112        keyType = tWithin;
1113    }
1114    else if (token==PK_NOT) {
1115        keyType = tNot;
1116    }
1117    else if (token==PK_MOD) {
1118        keyType = tMod;
1119    }
1120    else if (token==PK_OR) {
1121        keyType = tOr;
1122    }
1123    else if ( isValidKeyword(token) ) {
1124        keyType = tKeyword;
1125    }
1126    else {
1127        status = U_UNEXPECTED_TOKEN;
1128    }
1129}
1130
1131UBool
1132RuleParser::isValidKeyword(const UnicodeString& token) {
1133    if ( token.length()==0 ) {
1134        return FALSE;
1135    }
1136    if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
1137        int32_t i;
1138        for (i=1; i< token.length(); i++) {
1139            if (idContinueFilter->contains(token.charAt(i))== FALSE) {
1140                return FALSE;
1141            }
1142        }
1143        return TRUE;
1144    }
1145    else {
1146        return FALSE;
1147    }
1148}
1149
1150PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
1151fKeywordNames(status)
1152{
1153    RuleChain *node=header;
1154    UBool  addKeywordOther=true;
1155
1156    if (U_FAILURE(status)) {
1157        return;
1158    }
1159    pos=0;
1160    fKeywordNames.removeAllElements();
1161    while(node!=NULL) {
1162        fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1163        if (U_FAILURE(status)) {
1164            return;
1165        }
1166        if (node->keyword == PLURAL_KEYWORD_OTHER) {
1167            addKeywordOther= false;
1168        }
1169        node=node->next;
1170    }
1171
1172    if (addKeywordOther) {
1173        fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1174        if (U_FAILURE(status)) {
1175            return;
1176        }
1177    }
1178}
1179
1180const UnicodeString*
1181PluralKeywordEnumeration::snext(UErrorCode& status) {
1182    if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1183        return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1184    }
1185    return NULL;
1186}
1187
1188void
1189PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1190    pos=0;
1191}
1192
1193int32_t
1194PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1195       return fKeywordNames.size();
1196}
1197
1198PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1199    UnicodeString *s;
1200    for (int32_t i=0; i<fKeywordNames.size(); ++i) {
1201        if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
1202            delete s;
1203        }
1204    }
1205}
1206
1207U_NAMESPACE_END
1208
1209
1210#endif /* #if !UCONFIG_NO_FORMATTING */
1211
1212//eof
1213