plurrule.cpp revision b0ac937921a2c196d8b9da665135bf6ba01a1ccf
1/*
2*******************************************************************************
3* Copyright (C) 2007-2009, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*
7* File PLURRULE.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*******************************************************************************
13*/
14
15
16#include "unicode/uniset.h"
17#include "unicode/utypes.h"
18#include "unicode/ures.h"
19#include "unicode/plurrule.h"
20#include "cmemory.h"
21#include "cstring.h"
22#include "hash.h"
23#include "mutex.h"
24#include "plurrule_impl.h"
25#include "putilimp.h"
26#include "ucln_in.h"
27#include "ustrfmt.h"
28#include "locutil.h"
29
30/*
31// TODO(claireho): remove stdio
32#include "stdio.h"
33*/
34
35#if !UCONFIG_NO_FORMATTING
36
37U_NAMESPACE_BEGIN
38
39
40#define ARRAY_SIZE(array) (int32_t)(sizeof array  / sizeof array[0])
41
42static const UChar PLURAL_KEYWORD_ZERO[] = {LOW_Z,LOW_E,LOW_R,LOW_O, 0};
43static const UChar PLURAL_KEYWORD_ONE[]={LOW_O,LOW_N,LOW_E,0};
44static const UChar PLURAL_KEYWORD_TWO[]={LOW_T,LOW_W,LOW_O,0};
45static const UChar PLURAL_KEYWORD_FEW[]={LOW_F,LOW_E,LOW_W,0};
46static const UChar PLURAL_KEYWORD_MANY[]={LOW_M,LOW_A,LOW_N,LOW_Y,0};
47static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
48static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
49static const UChar PK_IN[]={LOW_I,LOW_N,0};
50static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
51static const UChar PK_IS[]={LOW_I,LOW_S,0};
52static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
53static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
54static const UChar PK_OR[]={LOW_O,LOW_R,0};
55static const UChar PK_VAR_N[]={LOW_N,0};
56static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
57
58UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
59UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
60
61PluralRules::PluralRules(UErrorCode& status)
62:   UObject(),
63    mRules(NULL)
64{
65    if (U_FAILURE(status)) {
66        return;
67    }
68    mParser = new RuleParser();
69    if (mParser==NULL) {
70        status = U_MEMORY_ALLOCATION_ERROR;
71    }
72}
73
74PluralRules::PluralRules(const PluralRules& other)
75: UObject(other),
76    mRules(NULL),
77    mParser(new RuleParser())
78{
79    *this=other;
80}
81
82PluralRules::~PluralRules() {
83    delete mRules;
84    delete mParser;
85}
86
87PluralRules*
88PluralRules::clone() const {
89    return new PluralRules(*this);
90}
91
92PluralRules&
93PluralRules::operator=(const PluralRules& other) {
94    if (this != &other) {
95        delete mRules;
96        if (other.mRules==NULL) {
97            mRules = NULL;
98        }
99        else {
100            mRules = new RuleChain(*other.mRules);
101        }
102        delete mParser;
103        mParser = new RuleParser();
104    }
105
106    return *this;
107}
108
109PluralRules* U_EXPORT2
110PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
111    RuleChain   rules;
112
113    if (U_FAILURE(status)) {
114        return NULL;
115    }
116    PluralRules *newRules = new PluralRules(status);
117    if ( (newRules != NULL)&& U_SUCCESS(status) ) {
118        newRules->parseDescription((UnicodeString &)description, rules, status);
119        if (U_SUCCESS(status)) {
120            newRules->addRules(rules);
121        }
122    }
123    if (U_FAILURE(status)) {
124        delete newRules;
125        return NULL;
126    }
127    else {
128        return newRules;
129    }
130}
131
132PluralRules* U_EXPORT2
133PluralRules::createDefaultRules(UErrorCode& status) {
134    return createRules(PLURAL_DEFAULT_RULE, status);
135}
136
137PluralRules* U_EXPORT2
138PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
139    RuleChain   rChain;
140    if (U_FAILURE(status)) {
141        return NULL;
142    }
143    PluralRules *newObj = new PluralRules(status);
144    if (newObj==NULL || U_FAILURE(status)) {
145        return NULL;
146    }
147    UnicodeString locRule = newObj->getRuleFromResource(locale, status);
148    if ((locRule.length() != 0) && U_SUCCESS(status)) {
149        newObj->parseDescription(locRule, rChain, status);
150        if (U_SUCCESS(status)) {
151            newObj->addRules(rChain);
152        }
153    }
154    if (U_FAILURE(status)||(locRule.length() == 0)) {
155        // use default plural rule
156        status = U_ZERO_ERROR;
157        UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
158        newObj->parseDescription(defRule, rChain, status);
159        newObj->addRules(rChain);
160    }
161
162    return newObj;
163}
164
165UnicodeString
166PluralRules::select(int32_t number) const {
167    if (mRules == NULL) {
168        return PLURAL_DEFAULT_RULE;
169    }
170    else {
171        return mRules->select(number);
172    }
173}
174
175UnicodeString
176PluralRules::select(double number) const {
177    if (mRules == NULL) {
178        return PLURAL_DEFAULT_RULE;
179    }
180    else {
181        return mRules->select(number);
182    }
183}
184
185StringEnumeration*
186PluralRules::getKeywords(UErrorCode& status) const {
187    if (U_FAILURE(status))  return NULL;
188    StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
189    if (U_FAILURE(status))  return NULL;
190
191    return nameEnumerator;
192}
193
194
195UBool
196PluralRules::isKeyword(const UnicodeString& keyword) const {
197    if ( keyword == PLURAL_KEYWORD_OTHER ) {
198        return true;
199    }
200    else {
201        if (mRules==NULL) {
202            return false;
203        }
204        else {
205            return mRules->isKeyword(keyword);
206        }
207    }
208}
209
210UnicodeString
211PluralRules::getKeywordOther() const {
212    return PLURAL_KEYWORD_OTHER;
213}
214
215UBool
216PluralRules::operator==(const PluralRules& other) const  {
217    int32_t limit;
218    UBool sameList = TRUE;
219    const UnicodeString *ptrKeyword;
220    UErrorCode status= U_ZERO_ERROR;
221
222    if ( this == &other ) {
223        return TRUE;
224    }
225    StringEnumeration* myKeywordList = getKeywords(status);
226    if (U_FAILURE(status)) {
227        return FALSE;
228    }
229    StringEnumeration* otherKeywordList =other.getKeywords(status);
230    if (U_FAILURE(status)) {
231        return FALSE;
232    }
233
234    if (myKeywordList->count(status)!=otherKeywordList->count(status) ||
235        U_FAILURE(status)) {
236        sameList = FALSE;
237    }
238    else {
239        myKeywordList->reset(status);
240        if (U_FAILURE(status)) {
241            return FALSE;
242        }
243        while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) {
244            if (U_FAILURE(status) || !other.isKeyword(*ptrKeyword)) {
245                sameList = FALSE;
246            }
247        }
248        otherKeywordList->reset(status);
249        if (U_FAILURE(status)) {
250            return FALSE;
251        }
252        while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) {
253            if (U_FAILURE(status)) {
254                return FALSE;
255            }
256            if (!this->isKeyword(*ptrKeyword))  {
257                sameList = FALSE;
258            }
259        }
260        delete myKeywordList;
261        delete otherKeywordList;
262        if (!sameList) {
263            return FALSE;
264        }
265    }
266
267    if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
268        return FALSE;
269    }
270    UnicodeString myKeyword, otherKeyword;
271    for (int32_t i=0; i<limit; ++i) {
272        myKeyword = this->select(i);
273        otherKeyword = other.select(i);
274        if (myKeyword!=otherKeyword) {
275            return FALSE;
276        }
277    }
278    return TRUE;
279}
280
281void
282PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
283{
284    int32_t ruleIndex=0;
285    UnicodeString token;
286    tokenType type;
287    tokenType prevType=none;
288    RuleChain *ruleChain=NULL;
289    AndConstraint *curAndConstraint=NULL;
290    OrConstraint *orNode=NULL;
291    RuleChain *lastChain=NULL;
292
293    if (U_FAILURE(status)) {
294        return;
295    }
296    UnicodeString ruleData = data.toLower();
297    while (ruleIndex< ruleData.length()) {
298        mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
299        if (U_FAILURE(status)) {
300            return;
301        }
302        mParser->checkSyntax(prevType, type, status);
303        if (U_FAILURE(status)) {
304            return;
305        }
306        switch (type) {
307        case tAnd:
308            curAndConstraint = curAndConstraint->add();
309            break;
310        case tOr:
311            lastChain = &rules;
312            while (lastChain->next !=NULL) {
313                lastChain = lastChain->next;
314            }
315            orNode=lastChain->ruleHeader;
316            while (orNode->next != NULL) {
317                orNode = orNode->next;
318            }
319            orNode->next= new OrConstraint();
320            orNode=orNode->next;
321            orNode->next=NULL;
322            curAndConstraint = orNode->add();
323            break;
324        case tIs:
325            curAndConstraint->rangeHigh=-1;
326            break;
327        case tNot:
328            curAndConstraint->notIn=TRUE;
329            break;
330        case tIn:
331            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
332            curAndConstraint->integerOnly = TRUE;
333            break;
334        case tWithin:
335            curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
336            break;
337        case tNumber:
338            if ( (curAndConstraint->op==AndConstraint::MOD)&&
339                 (curAndConstraint->opNum == -1 ) ) {
340                curAndConstraint->opNum=getNumberValue(token);
341            }
342            else {
343                if (curAndConstraint->rangeLow == -1) {
344                    curAndConstraint->rangeLow=getNumberValue(token);
345                }
346                else {
347                    curAndConstraint->rangeHigh=getNumberValue(token);
348                }
349            }
350            break;
351        case tMod:
352            curAndConstraint->op=AndConstraint::MOD;
353            break;
354        case tKeyword:
355            if (ruleChain==NULL) {
356                ruleChain = &rules;
357            }
358            else {
359                while (ruleChain->next!=NULL){
360                    ruleChain=ruleChain->next;
361                }
362                ruleChain=ruleChain->next=new RuleChain();
363            }
364            orNode = ruleChain->ruleHeader = new OrConstraint();
365            curAndConstraint = orNode->add();
366            ruleChain->keyword = token;
367            break;
368        default:
369            break;
370        }
371        prevType=type;
372    }
373}
374
375int32_t
376PluralRules::getNumberValue(const UnicodeString& token) const {
377    int32_t i;
378    char digits[128];
379
380    i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
381    digits[i]='\0';
382
383    return((int32_t)atoi(digits));
384}
385
386
387void
388PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
389    int32_t i=*curIndex;
390
391    localeName.remove();
392    while (i< localeData.length()) {
393       if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
394           break;
395       }
396       i++;
397    }
398
399    while (i< localeData.length()) {
400       if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
401           break;
402       }
403       localeName+=localeData.charAt(i++);
404    }
405    *curIndex=i;
406}
407
408
409int32_t
410PluralRules::getRepeatLimit() const {
411    if (mRules!=NULL) {
412        return mRules->getRepeatLimit();
413    }
414    else {
415        return 0;
416    }
417}
418
419
420void
421PluralRules::addRules(RuleChain& rules) {
422    RuleChain *newRule = new RuleChain(rules);
423    this->mRules=newRule;
424    newRule->setRepeatLimit();
425}
426
427UnicodeString
428PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
429    UnicodeString emptyStr;
430
431    if (U_FAILURE(errCode)) {
432        return emptyStr;
433    }
434    UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
435    if(U_FAILURE(errCode)) {
436        /* total failure, not even root could be opened */
437        return emptyStr;
438    }
439    UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
440    if(U_FAILURE(errCode)) {
441        ures_close(rb);
442        return emptyStr;
443    }
444    int32_t resLen=0;
445    const char *curLocaleName=locale.getName();
446    const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
447
448    if (s == NULL) {
449        // Check parent locales.
450        UErrorCode status = U_ZERO_ERROR;
451        char parentLocaleName[ULOC_FULLNAME_CAPACITY];
452        const char *curLocaleName=locale.getName();
453        int32_t localeNameLen=0;
454        uprv_strcpy(parentLocaleName, curLocaleName);
455
456        while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
457                                       ULOC_FULLNAME_CAPACITY, &status)) > 0) {
458            resLen=0;
459            s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
460            if (s != NULL) {
461                errCode = U_ZERO_ERROR;
462                break;
463            }
464            status = U_ZERO_ERROR;
465        }
466    }
467    if (s==NULL) {
468        ures_close(locRes);
469        ures_close(rb);
470        return emptyStr;
471    }
472
473    char setKey[256];
474    UChar result[256];
475    u_UCharsToChars(s, setKey, resLen + 1);
476    // printf("\n PluralRule: %s\n", setKey);
477
478
479    UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
480    if(U_FAILURE(errCode)) {
481        ures_close(locRes);
482        ures_close(rb);
483        return emptyStr;
484    }
485    resLen=0;
486    UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
487    if (U_FAILURE(errCode)) {
488        ures_close(ruleRes);
489        ures_close(locRes);
490        ures_close(rb);
491        return emptyStr;
492    }
493
494    int32_t numberKeys = ures_getSize(setRes);
495    char *key=NULL;
496    int32_t len=0;
497    for(int32_t i=0; i<numberKeys; ++i) {
498        int32_t keyLen;
499        resLen=0;
500        s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
501        keyLen = uprv_strlen(key);
502        u_charsToUChars(key, result+len, keyLen);
503        len += keyLen;
504        result[len++]=COLON;
505        uprv_memcpy(result+len, s, resLen*sizeof(UChar));
506        len += resLen;
507        result[len++]=SEMI_COLON;
508    }
509    result[len++]=0;
510    u_UCharsToChars(result, setKey, len);
511    // printf(" Rule: %s\n", setKey);
512
513    ures_close(setRes);
514    ures_close(ruleRes);
515    ures_close(locRes);
516    ures_close(rb);
517    return UnicodeString(result);
518
519}
520
521AndConstraint::AndConstraint() {
522    op = AndConstraint::NONE;
523    opNum=-1;
524    rangeLow=-1;
525    rangeHigh=-1;
526    notIn=FALSE;
527    integerOnly=FALSE;
528    next=NULL;
529}
530
531
532AndConstraint::AndConstraint(const AndConstraint& other) {
533    this->op = other.op;
534    this->opNum=other.opNum;
535    this->rangeLow=other.rangeLow;
536    this->rangeHigh=other.rangeHigh;
537    this->integerOnly=other.integerOnly;
538    this->notIn=other.notIn;
539    if (other.next==NULL) {
540        this->next=NULL;
541    }
542    else {
543        this->next = new AndConstraint(*other.next);
544    }
545}
546
547AndConstraint::~AndConstraint() {
548    if (next!=NULL) {
549        delete next;
550    }
551}
552
553
554UBool
555AndConstraint::isFulfilled(double number) {
556    UBool result=TRUE;
557    double value=number;
558
559    if ( op == MOD ) {
560        value = (int32_t)value % opNum;
561    }
562    if ( rangeHigh == -1 ) {
563        if ( rangeLow == -1 ) {
564            result = TRUE; // empty rule
565        }
566        else {
567            if ( value == rangeLow ) {
568                result = TRUE;
569            }
570            else {
571                result = FALSE;
572            }
573        }
574    }
575    else {
576        if ((rangeLow <= value) && (value <= rangeHigh)) {
577            if (integerOnly) {
578                if ( value != (int32_t)value) {
579                    result = FALSE;
580                }
581                else {
582                    result = TRUE;
583                }
584            }
585            else {
586                result = TRUE;
587            }
588        }
589        else {
590            result = FALSE;
591        }
592    }
593    if (notIn) {
594        return !result;
595    }
596    else {
597        return result;
598    }
599}
600
601int32_t
602AndConstraint::updateRepeatLimit(int32_t maxLimit) {
603
604    if ( op == MOD ) {
605        return uprv_max(opNum, maxLimit);
606    }
607    else {
608        if ( rangeHigh == -1 ) {
609            return(rangeLow>maxLimit? rangeLow : maxLimit);
610            return uprv_max(rangeLow, maxLimit);
611        }
612        else{
613            return uprv_max(rangeHigh, maxLimit);
614        }
615    }
616}
617
618
619AndConstraint*
620AndConstraint::add()
621{
622    this->next = new AndConstraint();
623    return this->next;
624}
625
626OrConstraint::OrConstraint() {
627    childNode=NULL;
628    next=NULL;
629}
630
631OrConstraint::OrConstraint(const OrConstraint& other) {
632    if ( other.childNode == NULL ) {
633        this->childNode = NULL;
634    }
635    else {
636        this->childNode = new AndConstraint(*(other.childNode));
637    }
638    if (other.next == NULL ) {
639        this->next = NULL;
640    }
641    else {
642        this->next = new OrConstraint(*(other.next));
643    }
644}
645
646OrConstraint::~OrConstraint() {
647    if (childNode!=NULL) {
648        delete childNode;
649    }
650    if (next!=NULL) {
651        delete next;
652    }
653}
654
655AndConstraint*
656OrConstraint::add()
657{
658    OrConstraint *curOrConstraint=this;
659    {
660        while (curOrConstraint->next!=NULL) {
661            curOrConstraint = curOrConstraint->next;
662        }
663        curOrConstraint->next = NULL;
664        curOrConstraint->childNode = new AndConstraint();
665    }
666    return curOrConstraint->childNode;
667}
668
669UBool
670OrConstraint::isFulfilled(double number) {
671    OrConstraint* orRule=this;
672    UBool result=FALSE;
673
674    while (orRule!=NULL && !result) {
675        result=TRUE;
676        AndConstraint* andRule = orRule->childNode;
677        while (andRule!=NULL && result) {
678            result = andRule->isFulfilled(number);
679            andRule=andRule->next;
680        }
681        orRule = orRule->next;
682    }
683
684    return result;
685}
686
687
688RuleChain::RuleChain() {
689    ruleHeader=NULL;
690    next = NULL;
691    repeatLimit=0;
692}
693
694RuleChain::RuleChain(const RuleChain& other) {
695    this->repeatLimit = other.repeatLimit;
696    this->keyword=other.keyword;
697    if (other.ruleHeader != NULL) {
698        this->ruleHeader = new OrConstraint(*(other.ruleHeader));
699    }
700    else {
701        this->ruleHeader = NULL;
702    }
703    if (other.next != NULL ) {
704        this->next = new RuleChain(*other.next);
705    }
706    else
707    {
708        this->next = NULL;
709    }
710}
711
712RuleChain::~RuleChain() {
713    if (next != NULL) {
714        delete next;
715    }
716    if ( ruleHeader != NULL ) {
717        delete ruleHeader;
718    }
719}
720
721UnicodeString
722RuleChain::select(double number) const {
723
724   if ( ruleHeader != NULL ) {
725       if (ruleHeader->isFulfilled(number)) {
726           return keyword;
727       }
728   }
729   if ( next != NULL ) {
730       return next->select(number);
731   }
732   else {
733       return PLURAL_KEYWORD_OTHER;
734   }
735
736}
737
738void
739RuleChain::dumpRules(UnicodeString& result) {
740    UChar digitString[16];
741
742    if ( ruleHeader != NULL ) {
743        result +=  keyword;
744        OrConstraint* orRule=ruleHeader;
745        while ( orRule != NULL ) {
746            AndConstraint* andRule=orRule->childNode;
747            while ( andRule != NULL ) {
748                if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
749                    result += UNICODE_STRING_SIMPLE(" n is ");
750                    if (andRule->notIn) {
751                        result += UNICODE_STRING_SIMPLE("not ");
752                    }
753                    uprv_itou(digitString,16, andRule->rangeLow,10,0);
754                    result += UnicodeString(digitString);
755                }
756                else {
757                    if (andRule->op==AndConstraint::MOD) {
758                        result += UNICODE_STRING_SIMPLE("  n mod ");
759                        uprv_itou(digitString,16, andRule->opNum,10,0);
760                        result += UnicodeString(digitString);
761                    }
762                    else {
763                        result += UNICODE_STRING_SIMPLE("  n ");
764                    }
765                    if (andRule->rangeHigh==-1) {
766                        if (andRule->notIn) {
767                            result += UNICODE_STRING_SIMPLE(" is not ");
768                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
769                            result += UnicodeString(digitString);
770                        }
771                        else {
772                            result += UNICODE_STRING_SIMPLE(" is ");
773                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
774                            result += UnicodeString(digitString);
775                        }
776                    }
777                    else {
778                        if (andRule->notIn) {
779                            if ( andRule->integerOnly ) {
780                                result += UNICODE_STRING_SIMPLE("  not in ");
781                            }
782                            else {
783                                result += UNICODE_STRING_SIMPLE("  not within ");
784                            }
785                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
786                            result += UnicodeString(digitString);
787                            result += UNICODE_STRING_SIMPLE(" .. ");
788                            uprv_itou(digitString,16, andRule->rangeHigh,10,0);
789                            result += UnicodeString(digitString);
790                        }
791                        else {
792                            if ( andRule->integerOnly ) {
793                                result += UNICODE_STRING_SIMPLE(" in ");
794                            }
795                            else {
796                                result += UNICODE_STRING_SIMPLE(" within ");
797                            }
798                            uprv_itou(digitString,16, andRule->rangeLow,10,0);
799                            result += UnicodeString(digitString);
800                            result += UNICODE_STRING_SIMPLE(" .. ");
801                            uprv_itou(digitString,16, andRule->rangeHigh,10,0);
802                        }
803                    }
804                }
805                if ( (andRule=andRule->next) != NULL) {
806                    result += PK_AND;
807                }
808            }
809            if ( (orRule = orRule->next) != NULL ) {
810                result += PK_OR;
811            }
812        }
813    }
814    if ( next != NULL ) {
815        next->dumpRules(result);
816    }
817}
818
819int32_t
820RuleChain::getRepeatLimit () {
821    return repeatLimit;
822}
823
824void
825RuleChain::setRepeatLimit () {
826    int32_t limit=0;
827
828    if ( next != NULL ) {
829        next->setRepeatLimit();
830        limit = next->repeatLimit;
831    }
832
833    if ( ruleHeader != NULL ) {
834        OrConstraint* orRule=ruleHeader;
835        while ( orRule != NULL ) {
836            AndConstraint* andRule=orRule->childNode;
837            while ( andRule != NULL ) {
838                limit = andRule->updateRepeatLimit(limit);
839                andRule = andRule->next;
840            }
841            orRule = orRule->next;
842        }
843    }
844    repeatLimit = limit;
845}
846
847UErrorCode
848RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
849    if ( arraySize < capacityOfKeywords-1 ) {
850        keywords[arraySize++]=keyword;
851    }
852    else {
853        return U_BUFFER_OVERFLOW_ERROR;
854    }
855
856    if ( next != NULL ) {
857        return next->getKeywords(capacityOfKeywords, keywords, arraySize);
858    }
859    else {
860        return U_ZERO_ERROR;
861    }
862}
863
864UBool
865RuleChain::isKeyword(const UnicodeString& keywordParam) const {
866    if ( keyword == keywordParam ) {
867        return TRUE;
868    }
869
870    if ( next != NULL ) {
871        return next->isKeyword(keywordParam);
872    }
873    else {
874        return FALSE;
875    }
876}
877
878
879RuleParser::RuleParser() {
880    UErrorCode err=U_ZERO_ERROR;
881    const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
882    const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
883    idStartFilter = new UnicodeSet(idStart, err);
884    idContinueFilter = new UnicodeSet(idContinue, err);
885}
886
887RuleParser::~RuleParser() {
888    delete idStartFilter;
889    delete idContinueFilter;
890}
891
892void
893RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
894{
895    if (U_FAILURE(status)) {
896        return;
897    }
898    switch(prevType) {
899    case none:
900    case tSemiColon:
901        if (curType!=tKeyword) {
902            status = U_UNEXPECTED_TOKEN;
903        }
904        break;
905    case tVariableN :
906        if (curType != tIs && curType != tMod && curType != tIn &&
907            curType != tNot && curType != tWithin) {
908            status = U_UNEXPECTED_TOKEN;
909        }
910        break;
911    case tZero:
912    case tOne:
913    case tTwo:
914    case tFew:
915    case tMany:
916    case tOther:
917    case tKeyword:
918        if (curType != tColon) {
919            status = U_UNEXPECTED_TOKEN;
920        }
921        break;
922    case tColon :
923        if (curType != tVariableN) {
924            status = U_UNEXPECTED_TOKEN;
925        }
926        break;
927    case tIs:
928        if ( curType != tNumber && curType != tNot) {
929            status = U_UNEXPECTED_TOKEN;
930        }
931        break;
932    case tNot:
933        if (curType != tNumber && curType != tIn && curType != tWithin) {
934            status = U_UNEXPECTED_TOKEN;
935        }
936        break;
937    case tMod:
938    case tDot:
939    case tIn:
940    case tWithin:
941    case tAnd:
942    case tOr:
943        if (curType != tNumber && curType != tVariableN) {
944            status = U_UNEXPECTED_TOKEN;
945        }
946        break;
947    case tNumber:
948        if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
949            curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
950        {
951            status = U_UNEXPECTED_TOKEN;
952        }
953        break;
954    default:
955        status = U_UNEXPECTED_TOKEN;
956        break;
957    }
958}
959
960void
961RuleParser::getNextToken(const UnicodeString& ruleData,
962                         int32_t *ruleIndex,
963                         UnicodeString& token,
964                         tokenType& type,
965                         UErrorCode &status)
966{
967    int32_t curIndex= *ruleIndex;
968    UChar ch;
969    tokenType prevType=none;
970
971    if (U_FAILURE(status)) {
972        return;
973    }
974    while (curIndex<ruleData.length()) {
975        ch = ruleData.charAt(curIndex);
976        if ( !inRange(ch, type) ) {
977            status = U_ILLEGAL_CHARACTER;
978            return;
979        }
980        switch (type) {
981        case tSpace:
982            if ( *ruleIndex != curIndex ) { // letter
983                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
984                *ruleIndex=curIndex;
985                type=prevType;
986                getKeyType(token, type, status);
987                return;
988            }
989            else {
990                *ruleIndex=*ruleIndex+1;
991            }
992            break; // consective space
993        case tColon:
994        case tSemiColon:
995            if ( *ruleIndex != curIndex ) {
996                token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
997                *ruleIndex=curIndex;
998                type=prevType;
999                getKeyType(token, type, status);
1000                return;
1001            }
1002            else {
1003                *ruleIndex=curIndex+1;
1004                return;
1005            }
1006        case tLetter:
1007             if ((type==prevType)||(prevType==none)) {
1008                prevType=type;
1009                break;
1010             }
1011             break;
1012        case tNumber:
1013             if ((type==prevType)||(prevType==none)) {
1014                prevType=type;
1015                break;
1016             }
1017             else {
1018                *ruleIndex=curIndex+1;
1019                return;
1020             }
1021         case tDot:
1022             if (prevType==none) {  // first dot
1023                prevType=type;
1024                continue;
1025             }
1026             else {
1027                 if ( *ruleIndex != curIndex ) {
1028                    token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1029                    *ruleIndex=curIndex;  // letter
1030                    type=prevType;
1031                    getKeyType(token, type, status);
1032                    return;
1033                 }
1034                 else {  // two consective dots
1035                    *ruleIndex=curIndex+2;
1036                    return;
1037                 }
1038             }
1039             break;
1040         default:
1041             status = U_UNEXPECTED_TOKEN;
1042             return;
1043        }
1044        curIndex++;
1045    }
1046    if ( curIndex>=ruleData.length() ) {
1047        if ( (type == tLetter)||(type == tNumber) ) {
1048            token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1049            getKeyType(token, type, status);
1050            if (U_FAILURE(status)) {
1051                return;
1052            }
1053        }
1054        *ruleIndex = ruleData.length();
1055    }
1056}
1057
1058UBool
1059RuleParser::inRange(UChar ch, tokenType& type) {
1060    if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1061        // we assume all characters are in lower case already.
1062        return FALSE;
1063    }
1064    if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1065        type = tLetter;
1066        return TRUE;
1067    }
1068    if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1069        type = tNumber;
1070        return TRUE;
1071    }
1072    switch (ch) {
1073    case COLON:
1074        type = tColon;
1075        return TRUE;
1076    case SPACE:
1077        type = tSpace;
1078        return TRUE;
1079    case SEMI_COLON:
1080        type = tSemiColon;
1081        return TRUE;
1082    case DOT:
1083        type = tDot;
1084        return TRUE;
1085    default :
1086        type = none;
1087        return FALSE;
1088    }
1089}
1090
1091
1092void
1093RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1094{
1095    if (U_FAILURE(status)) {
1096        return;
1097    }
1098    if ( keyType==tNumber) {
1099    }
1100    else if (token==PK_VAR_N) {
1101        keyType = tVariableN;
1102    }
1103    else if (token==PK_IS) {
1104        keyType = tIs;
1105    }
1106    else if (token==PK_AND) {
1107        keyType = tAnd;
1108    }
1109    else if (token==PK_IN) {
1110        keyType = tIn;
1111    }
1112    else if (token==PK_WITHIN) {
1113        keyType = tWithin;
1114    }
1115    else if (token==PK_NOT) {
1116        keyType = tNot;
1117    }
1118    else if (token==PK_MOD) {
1119        keyType = tMod;
1120    }
1121    else if (token==PK_OR) {
1122        keyType = tOr;
1123    }
1124    else if ( isValidKeyword(token) ) {
1125        keyType = tKeyword;
1126    }
1127    else {
1128        status = U_UNEXPECTED_TOKEN;
1129    }
1130}
1131
1132UBool
1133RuleParser::isValidKeyword(const UnicodeString& token) {
1134    if ( token.length()==0 ) {
1135        return FALSE;
1136    }
1137    if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
1138        int32_t i;
1139        for (i=1; i< token.length(); i++) {
1140            if (idContinueFilter->contains(token.charAt(i))== FALSE) {
1141                return FALSE;
1142            }
1143        }
1144        return TRUE;
1145    }
1146    else {
1147        return FALSE;
1148    }
1149}
1150
1151PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
1152fKeywordNames(status)
1153{
1154    RuleChain *node=header;
1155    UBool  addKeywordOther=true;
1156
1157    if (U_FAILURE(status)) {
1158        return;
1159    }
1160    pos=0;
1161    fKeywordNames.removeAllElements();
1162    while(node!=NULL) {
1163        fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1164        if (U_FAILURE(status)) {
1165            return;
1166        }
1167        if (node->keyword == PLURAL_KEYWORD_OTHER) {
1168            addKeywordOther= false;
1169        }
1170        node=node->next;
1171    }
1172
1173    if (addKeywordOther) {
1174        fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1175        if (U_FAILURE(status)) {
1176            return;
1177        }
1178    }
1179}
1180
1181const UnicodeString*
1182PluralKeywordEnumeration::snext(UErrorCode& status) {
1183    if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1184        return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1185    }
1186    return NULL;
1187}
1188
1189void
1190PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1191    pos=0;
1192}
1193
1194int32_t
1195PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1196       return fKeywordNames.size();
1197}
1198
1199PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1200    UnicodeString *s;
1201    for (int32_t i=0; i<fKeywordNames.size(); ++i) {
1202        if ((s=(UnicodeString *)fKeywordNames.elementAt(i))!=NULL) {
1203            delete s;
1204        }
1205    }
1206}
1207
1208U_NAMESPACE_END
1209
1210
1211#endif /* #if !UCONFIG_NO_FORMATTING */
1212
1213//eof
1214