1/*
2*******************************************************************************
3* Copyright (C) 1997-2009, International Business Machines Corporation
4* and others. All Rights Reserved.
5*******************************************************************************
6*/
7
8#include "unicode/rbnf.h"
9
10#if U_HAVE_RBNF
11
12#include "unicode/normlzr.h"
13#include "unicode/tblcoll.h"
14#include "unicode/uchar.h"
15#include "unicode/ucol.h"
16#include "unicode/uloc.h"
17#include "unicode/unum.h"
18#include "unicode/ures.h"
19#include "unicode/ustring.h"
20#include "unicode/utf16.h"
21#include "unicode/udata.h"
22#include "nfrs.h"
23
24#include "cmemory.h"
25#include "cstring.h"
26#include "../common/util.h"
27#include "uresimp.h"
28
29// debugging
30// #define DEBUG
31
32#ifdef DEBUG
33#include "stdio.h"
34#endif
35
36#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
37
38static const UChar gPercentPercent[] =
39{
40    0x25, 0x25, 0
41}; /* "%%" */
42
43// All urbnf objects are created through openRules, so we init all of the
44// Unicode string constants required by rbnf, nfrs, or nfr here.
45static const UChar gLenientParse[] =
46{
47    0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
48}; /* "%%lenient-parse:" */
49static const UChar gSemiColon = 0x003B;
50static const UChar gSemiPercent[] =
51{
52    0x3B, 0x25, 0
53}; /* ";%" */
54
55#define kSomeNumberOfBitsDiv2 22
56#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
57#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
58
59// Temporary workaround - when noParse is true, do noting in parse.
60// TODO: We need a real fix - see #6895/#6896
61static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
62
63U_NAMESPACE_BEGIN
64
65UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
66
67/*
68This is a utility class. It does not use ICU's RTTI.
69If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
70Please make sure that intltest passes on Windows in Release mode,
71since the string pooling per compilation unit will mess up how RTTI works.
72The RTTI code was also removed due to lack of code coverage.
73*/
74class LocalizationInfo : public UMemory {
75protected:
76    virtual ~LocalizationInfo() {};
77    uint32_t refcount;
78
79public:
80    LocalizationInfo() : refcount(0) {}
81
82    LocalizationInfo* ref(void) {
83        ++refcount;
84        return this;
85    }
86
87    LocalizationInfo* unref(void) {
88        if (refcount && --refcount == 0) {
89            delete this;
90        }
91        return NULL;
92    }
93
94    virtual UBool operator==(const LocalizationInfo* rhs) const;
95    inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
96
97    virtual int32_t getNumberOfRuleSets(void) const = 0;
98    virtual const UChar* getRuleSetName(int32_t index) const = 0;
99    virtual int32_t getNumberOfDisplayLocales(void) const = 0;
100    virtual const UChar* getLocaleName(int32_t index) const = 0;
101    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
102
103    virtual int32_t indexForLocale(const UChar* locale) const;
104    virtual int32_t indexForRuleSet(const UChar* ruleset) const;
105
106//    virtual UClassID getDynamicClassID() const = 0;
107//    static UClassID getStaticClassID(void);
108};
109
110//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
111
112// if both strings are NULL, this returns TRUE
113static UBool
114streq(const UChar* lhs, const UChar* rhs) {
115    if (rhs == lhs) {
116        return TRUE;
117    }
118    if (lhs && rhs) {
119        return u_strcmp(lhs, rhs) == 0;
120    }
121    return FALSE;
122}
123
124UBool
125LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
126    if (rhs) {
127        if (this == rhs) {
128            return TRUE;
129        }
130
131        int32_t rsc = getNumberOfRuleSets();
132        if (rsc == rhs->getNumberOfRuleSets()) {
133            for (int i = 0; i < rsc; ++i) {
134                if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
135                    return FALSE;
136                }
137            }
138            int32_t dlc = getNumberOfDisplayLocales();
139            if (dlc == rhs->getNumberOfDisplayLocales()) {
140                for (int i = 0; i < dlc; ++i) {
141                    const UChar* locale = getLocaleName(i);
142                    int32_t ix = rhs->indexForLocale(locale);
143                    // if no locale, ix is -1, getLocaleName returns null, so streq returns false
144                    if (!streq(locale, rhs->getLocaleName(ix))) {
145                        return FALSE;
146                    }
147                    for (int j = 0; j < rsc; ++j) {
148                        if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
149                            return FALSE;
150                        }
151                    }
152                }
153                return TRUE;
154            }
155        }
156    }
157    return FALSE;
158}
159
160int32_t
161LocalizationInfo::indexForLocale(const UChar* locale) const {
162    for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
163        if (streq(locale, getLocaleName(i))) {
164            return i;
165        }
166    }
167    return -1;
168}
169
170int32_t
171LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
172    if (ruleset) {
173        for (int i = 0; i < getNumberOfRuleSets(); ++i) {
174            if (streq(ruleset, getRuleSetName(i))) {
175                return i;
176            }
177        }
178    }
179    return -1;
180}
181
182
183typedef void (*Fn_Deleter)(void*);
184
185class VArray {
186    void** buf;
187    int32_t cap;
188    int32_t size;
189    Fn_Deleter deleter;
190public:
191    VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
192
193    VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
194
195    ~VArray() {
196        if (deleter) {
197            for (int i = 0; i < size; ++i) {
198                (*deleter)(buf[i]);
199            }
200        }
201        uprv_free(buf);
202    }
203
204    int32_t length() {
205        return size;
206    }
207
208    void add(void* elem, UErrorCode& status) {
209        if (U_SUCCESS(status)) {
210            if (size == cap) {
211                if (cap == 0) {
212                    cap = 1;
213                } else if (cap < 256) {
214                    cap *= 2;
215                } else {
216                    cap += 256;
217                }
218                if (buf == NULL) {
219                    buf = (void**)uprv_malloc(cap * sizeof(void*));
220                } else {
221                    buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
222                }
223                if (buf == NULL) {
224                    // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
225                    status = U_MEMORY_ALLOCATION_ERROR;
226                    return;
227                }
228                void* start = &buf[size];
229                size_t count = (cap - size) * sizeof(void*);
230                uprv_memset(start, 0, count); // fill with nulls, just because
231            }
232            buf[size++] = elem;
233        }
234    }
235
236    void** release(void) {
237        void** result = buf;
238        buf = NULL;
239        cap = 0;
240        size = 0;
241        return result;
242    }
243};
244
245class LocDataParser;
246
247class StringLocalizationInfo : public LocalizationInfo {
248    UChar* info;
249    UChar*** data;
250    int32_t numRuleSets;
251    int32_t numLocales;
252
253friend class LocDataParser;
254
255    StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
256        : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
257    {
258    }
259
260public:
261    static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
262
263    virtual ~StringLocalizationInfo();
264    virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
265    virtual const UChar* getRuleSetName(int32_t index) const;
266    virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
267    virtual const UChar* getLocaleName(int32_t index) const;
268    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
269
270//    virtual UClassID getDynamicClassID() const;
271//    static UClassID getStaticClassID(void);
272
273private:
274    void init(UErrorCode& status) const;
275};
276
277
278enum {
279    OPEN_ANGLE = 0x003c, /* '<' */
280    CLOSE_ANGLE = 0x003e, /* '>' */
281    COMMA = 0x002c,
282    TICK = 0x0027,
283    QUOTE = 0x0022,
284    SPACE = 0x0020
285};
286
287/**
288 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
289 */
290class LocDataParser {
291    UChar* data;
292    const UChar* e;
293    UChar* p;
294    UChar ch;
295    UParseError& pe;
296    UErrorCode& ec;
297
298public:
299    LocDataParser(UParseError& parseError, UErrorCode& status)
300        : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
301    ~LocDataParser() {}
302
303    /*
304    * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
305    * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
306    */
307    StringLocalizationInfo* parse(UChar* data, int32_t len);
308
309private:
310
311    void inc(void) { ++p; ch = 0xffff; }
312    UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
313    UBool check(UChar c) { return p < e && (ch == c || *p == c); }
314    void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
315    UBool inList(UChar c, const UChar* list) const {
316        if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
317        while (*list && *list != c) ++list; return *list == c;
318    }
319    void parseError(const char* msg);
320
321    StringLocalizationInfo* doParse(void);
322
323    UChar** nextArray(int32_t& requiredLength);
324    UChar*  nextString(void);
325};
326
327#ifdef DEBUG
328#define ERROR(msg) parseError(msg); return NULL;
329#else
330#define ERROR(msg) parseError(NULL); return NULL;
331#endif
332
333
334static const UChar DQUOTE_STOPLIST[] = {
335    QUOTE, 0
336};
337
338static const UChar SQUOTE_STOPLIST[] = {
339    TICK, 0
340};
341
342static const UChar NOQUOTE_STOPLIST[] = {
343    SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
344};
345
346static void
347DeleteFn(void* p) {
348  uprv_free(p);
349}
350
351StringLocalizationInfo*
352LocDataParser::parse(UChar* _data, int32_t len) {
353    if (U_FAILURE(ec)) {
354        if (_data) uprv_free(_data);
355        return NULL;
356    }
357
358    pe.line = 0;
359    pe.offset = -1;
360    pe.postContext[0] = 0;
361    pe.preContext[0] = 0;
362
363    if (_data == NULL) {
364        ec = U_ILLEGAL_ARGUMENT_ERROR;
365        return NULL;
366    }
367
368    if (len <= 0) {
369        ec = U_ILLEGAL_ARGUMENT_ERROR;
370        uprv_free(_data);
371        return NULL;
372    }
373
374    data = _data;
375    e = data + len;
376    p = _data;
377    ch = 0xffff;
378
379    return doParse();
380}
381
382
383StringLocalizationInfo*
384LocDataParser::doParse(void) {
385    skipWhitespace();
386    if (!checkInc(OPEN_ANGLE)) {
387        ERROR("Missing open angle");
388    } else {
389        VArray array(DeleteFn);
390        UBool mightHaveNext = TRUE;
391        int32_t requiredLength = -1;
392        while (mightHaveNext) {
393            mightHaveNext = FALSE;
394            UChar** elem = nextArray(requiredLength);
395            skipWhitespace();
396            UBool haveComma = check(COMMA);
397            if (elem) {
398                array.add(elem, ec);
399                if (haveComma) {
400                    inc();
401                    mightHaveNext = TRUE;
402                }
403            } else if (haveComma) {
404                ERROR("Unexpected character");
405            }
406        }
407
408        skipWhitespace();
409        if (!checkInc(CLOSE_ANGLE)) {
410            if (check(OPEN_ANGLE)) {
411                ERROR("Missing comma in outer array");
412            } else {
413                ERROR("Missing close angle bracket in outer array");
414            }
415        }
416
417        skipWhitespace();
418        if (p != e) {
419            ERROR("Extra text after close of localization data");
420        }
421
422        array.add(NULL, ec);
423        if (U_SUCCESS(ec)) {
424            int32_t numLocs = array.length() - 2; // subtract first, NULL
425            UChar*** result = (UChar***)array.release();
426
427            return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
428        }
429    }
430
431    ERROR("Unknown error");
432}
433
434UChar**
435LocDataParser::nextArray(int32_t& requiredLength) {
436    if (U_FAILURE(ec)) {
437        return NULL;
438    }
439
440    skipWhitespace();
441    if (!checkInc(OPEN_ANGLE)) {
442        ERROR("Missing open angle");
443    }
444
445    VArray array;
446    UBool mightHaveNext = TRUE;
447    while (mightHaveNext) {
448        mightHaveNext = FALSE;
449        UChar* elem = nextString();
450        skipWhitespace();
451        UBool haveComma = check(COMMA);
452        if (elem) {
453            array.add(elem, ec);
454            if (haveComma) {
455                inc();
456                mightHaveNext = TRUE;
457            }
458        } else if (haveComma) {
459            ERROR("Unexpected comma");
460        }
461    }
462    skipWhitespace();
463    if (!checkInc(CLOSE_ANGLE)) {
464        if (check(OPEN_ANGLE)) {
465            ERROR("Missing close angle bracket in inner array");
466        } else {
467            ERROR("Missing comma in inner array");
468        }
469    }
470
471    array.add(NULL, ec);
472    if (U_SUCCESS(ec)) {
473        if (requiredLength == -1) {
474            requiredLength = array.length() + 1;
475        } else if (array.length() != requiredLength) {
476            ec = U_ILLEGAL_ARGUMENT_ERROR;
477            ERROR("Array not of required length");
478        }
479
480        return (UChar**)array.release();
481    }
482    ERROR("Unknown Error");
483}
484
485UChar*
486LocDataParser::nextString() {
487    UChar* result = NULL;
488
489    skipWhitespace();
490    if (p < e) {
491        const UChar* terminators;
492        UChar c = *p;
493        UBool haveQuote = c == QUOTE || c == TICK;
494        if (haveQuote) {
495            inc();
496            terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
497        } else {
498            terminators = NOQUOTE_STOPLIST;
499        }
500        UChar* start = p;
501        while (p < e && !inList(*p, terminators)) ++p;
502        if (p == e) {
503            ERROR("Unexpected end of data");
504        }
505
506        UChar x = *p;
507        if (p > start) {
508            ch = x;
509            *p = 0x0; // terminate by writing to data
510            result = start; // just point into data
511        }
512        if (haveQuote) {
513            if (x != c) {
514                ERROR("Missing matching quote");
515            } else if (p == start) {
516                ERROR("Empty string");
517            }
518            inc();
519        } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
520            ERROR("Unexpected character in string");
521        }
522    }
523
524    // ok for there to be no next string
525    return result;
526}
527
528void
529LocDataParser::parseError(const char* /*str*/) {
530    if (!data) {
531        return;
532    }
533
534    const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
535    if (start < data) {
536        start = data;
537    }
538    for (UChar* x = p; --x >= start;) {
539        if (!*x) {
540            start = x+1;
541            break;
542        }
543    }
544    const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
545    if (limit > e) {
546        limit = e;
547    }
548    u_strncpy(pe.preContext, start, (int32_t)(p-start));
549    pe.preContext[p-start] = 0;
550    u_strncpy(pe.postContext, p, (int32_t)(limit-p));
551    pe.postContext[limit-p] = 0;
552    pe.offset = (int32_t)(p - data);
553
554#ifdef DEBUG
555    fprintf(stderr, "%s at or near character %d: ", str, p-data);
556
557    UnicodeString msg;
558    msg.append(start, p - start);
559    msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
560    msg.append(p, limit-p);
561    msg.append("'");
562
563    char buf[128];
564    int32_t len = msg.extract(0, msg.length(), buf, 128);
565    if (len >= 128) {
566        buf[127] = 0;
567    } else {
568        buf[len] = 0;
569    }
570    fprintf(stderr, "%s\n", buf);
571    fflush(stderr);
572#endif
573
574    uprv_free(data);
575    data = NULL;
576    p = NULL;
577    e = NULL;
578
579    if (U_SUCCESS(ec)) {
580        ec = U_PARSE_ERROR;
581    }
582}
583
584//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
585
586StringLocalizationInfo*
587StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
588    if (U_FAILURE(status)) {
589        return NULL;
590    }
591
592    int32_t len = info.length();
593    if (len == 0) {
594        return NULL; // no error;
595    }
596
597    UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
598    if (!p) {
599        status = U_MEMORY_ALLOCATION_ERROR;
600        return NULL;
601    }
602    info.extract(p, len, status);
603    if (!U_FAILURE(status)) {
604        status = U_ZERO_ERROR; // clear warning about non-termination
605    }
606
607    LocDataParser parser(perror, status);
608    return parser.parse(p, len);
609}
610
611StringLocalizationInfo::~StringLocalizationInfo() {
612    for (UChar*** p = (UChar***)data; *p; ++p) {
613        // remaining data is simply pointer into our unicode string data.
614        if (*p) uprv_free(*p);
615    }
616    if (data) uprv_free(data);
617    if (info) uprv_free(info);
618}
619
620
621const UChar*
622StringLocalizationInfo::getRuleSetName(int32_t index) const {
623    if (index >= 0 && index < getNumberOfRuleSets()) {
624        return data[0][index];
625    }
626    return NULL;
627}
628
629const UChar*
630StringLocalizationInfo::getLocaleName(int32_t index) const {
631    if (index >= 0 && index < getNumberOfDisplayLocales()) {
632        return data[index+1][0];
633    }
634    return NULL;
635}
636
637const UChar*
638StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
639    if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
640        ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
641        return data[localeIndex+1][ruleIndex+1];
642    }
643    return NULL;
644}
645
646// ----------
647
648RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
649                                             const UnicodeString& locs,
650                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
651  : ruleSets(NULL)
652  , defaultRuleSet(NULL)
653  , locale(alocale)
654  , collator(NULL)
655  , decimalFormatSymbols(NULL)
656  , lenient(FALSE)
657  , lenientParseRules(NULL)
658  , localizations(NULL)
659  , noParse(FALSE) //TODO: to be removed after #6895
660{
661  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
662  init(description, locinfo, perror, status);
663}
664
665RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
666                                             const UnicodeString& locs,
667                                             UParseError& perror, UErrorCode& status)
668  : ruleSets(NULL)
669  , defaultRuleSet(NULL)
670  , locale(Locale::getDefault())
671  , collator(NULL)
672  , decimalFormatSymbols(NULL)
673  , lenient(FALSE)
674  , lenientParseRules(NULL)
675  , localizations(NULL)
676  , noParse(FALSE) //TODO: to be removed after #6895
677{
678  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
679  init(description, locinfo, perror, status);
680}
681
682RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
683                                             LocalizationInfo* info,
684                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
685  : ruleSets(NULL)
686  , defaultRuleSet(NULL)
687  , locale(alocale)
688  , collator(NULL)
689  , decimalFormatSymbols(NULL)
690  , lenient(FALSE)
691  , lenientParseRules(NULL)
692  , localizations(NULL)
693  , noParse(FALSE) //TODO: to be removed after #6895
694{
695  init(description, info, perror, status);
696}
697
698RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
699                         UParseError& perror,
700                         UErrorCode& status)
701  : ruleSets(NULL)
702  , defaultRuleSet(NULL)
703  , locale(Locale::getDefault())
704  , collator(NULL)
705  , decimalFormatSymbols(NULL)
706  , lenient(FALSE)
707  , lenientParseRules(NULL)
708  , localizations(NULL)
709  , noParse(FALSE) //TODO: to be removed after #6895
710{
711    init(description, NULL, perror, status);
712}
713
714RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
715                         const Locale& aLocale,
716                         UParseError& perror,
717                         UErrorCode& status)
718  : ruleSets(NULL)
719  , defaultRuleSet(NULL)
720  , locale(aLocale)
721  , collator(NULL)
722  , decimalFormatSymbols(NULL)
723  , lenient(FALSE)
724  , lenientParseRules(NULL)
725  , localizations(NULL)
726  , noParse(FALSE) //TODO: to be removed after #6895
727{
728    init(description, NULL, perror, status);
729}
730
731RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
732  : ruleSets(NULL)
733  , defaultRuleSet(NULL)
734  , locale(alocale)
735  , collator(NULL)
736  , decimalFormatSymbols(NULL)
737  , lenient(FALSE)
738  , lenientParseRules(NULL)
739  , localizations(NULL)
740{
741    if (U_FAILURE(status)) {
742        return;
743    }
744
745    const char* rules_tag = "RBNFRules";
746    const char* fmt_tag = "";
747    switch (tag) {
748    case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
749    case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
750    case URBNF_DURATION: fmt_tag = "DurationRules"; break;
751    case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
752    default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
753    }
754
755    // TODO: read localization info from resource
756    LocalizationInfo* locinfo = NULL;
757
758    int32_t len = 0;
759    UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
760    if (U_SUCCESS(status)) {
761        setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
762                     ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
763
764        UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
765        if (U_FAILURE(status)) {
766            ures_close(nfrb);
767        }
768        UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
769        if (U_FAILURE(status)) {
770            ures_close(rbnfRules);
771            ures_close(nfrb);
772            return;
773        }
774
775        UnicodeString desc;
776        while (ures_hasNext(ruleSets)) {
777           const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
778           desc.append(currentString);
779        }
780        UParseError perror;
781
782
783        init (desc, locinfo, perror, status);
784
785        //TODO: we need a real fix - see #6895 / #6896
786        noParse = FALSE;
787        if (tag == URBNF_SPELLOUT) {
788            const char *lang = alocale.getLanguage();
789            for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
790                if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
791                    noParse = TRUE;
792                    break;
793                }
794            }
795        }
796        //TODO: end
797
798        ures_close(ruleSets);
799        ures_close(rbnfRules);
800    }
801    ures_close(nfrb);
802}
803
804RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
805  : NumberFormat(rhs)
806  , ruleSets(NULL)
807  , defaultRuleSet(NULL)
808  , locale(rhs.locale)
809  , collator(NULL)
810  , decimalFormatSymbols(NULL)
811  , lenient(FALSE)
812  , lenientParseRules(NULL)
813  , localizations(NULL)
814{
815    this->operator=(rhs);
816}
817
818// --------
819
820RuleBasedNumberFormat&
821RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
822{
823    UErrorCode status = U_ZERO_ERROR;
824    dispose();
825    locale = rhs.locale;
826    lenient = rhs.lenient;
827
828    UnicodeString rules = rhs.getRules();
829    UParseError perror;
830    init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
831
832    //TODO: remove below when we fix the parse bug - See #6895 / #6896
833    noParse = rhs.noParse;
834
835    return *this;
836}
837
838RuleBasedNumberFormat::~RuleBasedNumberFormat()
839{
840    dispose();
841}
842
843Format*
844RuleBasedNumberFormat::clone(void) const
845{
846    RuleBasedNumberFormat * result = NULL;
847    UnicodeString rules = getRules();
848    UErrorCode status = U_ZERO_ERROR;
849    UParseError perror;
850    result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
851    /* test for NULL */
852    if (result == 0) {
853        status = U_MEMORY_ALLOCATION_ERROR;
854        return 0;
855    }
856    if (U_FAILURE(status)) {
857        delete result;
858        result = 0;
859    } else {
860        result->lenient = lenient;
861
862        //TODO: remove below when we fix the parse bug - See #6895 / #6896
863        result->noParse = noParse;
864    }
865    return result;
866}
867
868UBool
869RuleBasedNumberFormat::operator==(const Format& other) const
870{
871    if (this == &other) {
872        return TRUE;
873    }
874
875    if (other.getDynamicClassID() == getStaticClassID()) {
876        const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
877        if (locale == rhs.locale &&
878            lenient == rhs.lenient &&
879            (localizations == NULL
880                ? rhs.localizations == NULL
881                : (rhs.localizations == NULL
882                    ? FALSE
883                    : *localizations == rhs.localizations))) {
884
885            NFRuleSet** p = ruleSets;
886            NFRuleSet** q = rhs.ruleSets;
887            if (p == NULL) {
888                return q == NULL;
889            } else if (q == NULL) {
890                return FALSE;
891            }
892            while (*p && *q && (**p == **q)) {
893                ++p;
894                ++q;
895            }
896            return *q == NULL && *p == NULL;
897        }
898    }
899
900    return FALSE;
901}
902
903UnicodeString
904RuleBasedNumberFormat::getRules() const
905{
906    UnicodeString result;
907    if (ruleSets != NULL) {
908        for (NFRuleSet** p = ruleSets; *p; ++p) {
909            (*p)->appendRules(result);
910        }
911    }
912    return result;
913}
914
915UnicodeString
916RuleBasedNumberFormat::getRuleSetName(int32_t index) const
917{
918    if (localizations) {
919      UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
920      return string;
921    } else if (ruleSets) {
922        UnicodeString result;
923        for (NFRuleSet** p = ruleSets; *p; ++p) {
924            NFRuleSet* rs = *p;
925            if (rs->isPublic()) {
926                if (--index == -1) {
927                    rs->getName(result);
928                    return result;
929                }
930            }
931        }
932    }
933    UnicodeString empty;
934    return empty;
935}
936
937int32_t
938RuleBasedNumberFormat::getNumberOfRuleSetNames() const
939{
940    int32_t result = 0;
941    if (localizations) {
942      result = localizations->getNumberOfRuleSets();
943    } else if (ruleSets) {
944        for (NFRuleSet** p = ruleSets; *p; ++p) {
945            if ((**p).isPublic()) {
946                ++result;
947            }
948        }
949    }
950    return result;
951}
952
953int32_t
954RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
955    if (localizations) {
956        return localizations->getNumberOfDisplayLocales();
957    }
958    return 0;
959}
960
961Locale
962RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
963    if (U_FAILURE(status)) {
964        return Locale("");
965    }
966    if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
967        UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
968        char buffer[64];
969        int32_t cap = name.length() + 1;
970        char* bp = buffer;
971        if (cap > 64) {
972            bp = (char *)uprv_malloc(cap);
973            if (bp == NULL) {
974                status = U_MEMORY_ALLOCATION_ERROR;
975                return Locale("");
976            }
977        }
978        name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
979        Locale retLocale(bp);
980        if (bp != buffer) {
981            uprv_free(bp);
982        }
983        return retLocale;
984    }
985    status = U_ILLEGAL_ARGUMENT_ERROR;
986    Locale retLocale;
987    return retLocale;
988}
989
990UnicodeString
991RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
992    if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
993        UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
994        int32_t len = localeName.length();
995        UChar* localeStr = localeName.getBuffer(len + 1);
996        while (len >= 0) {
997            localeStr[len] = 0;
998            int32_t ix = localizations->indexForLocale(localeStr);
999            if (ix >= 0) {
1000                UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1001                return name;
1002            }
1003
1004            // trim trailing portion, skipping over ommitted sections
1005            do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1006            while (len > 0 && localeStr[len-1] == 0x005F) --len;
1007        }
1008        UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1009        return name;
1010    }
1011    UnicodeString bogus;
1012    bogus.setToBogus();
1013    return bogus;
1014}
1015
1016UnicodeString
1017RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1018    if (localizations) {
1019        UnicodeString rsn(ruleSetName);
1020        int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1021        return getRuleSetDisplayName(ix, localeParam);
1022    }
1023    UnicodeString bogus;
1024    bogus.setToBogus();
1025    return bogus;
1026}
1027
1028NFRuleSet*
1029RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1030{
1031    if (U_SUCCESS(status) && ruleSets) {
1032        for (NFRuleSet** p = ruleSets; *p; ++p) {
1033            NFRuleSet* rs = *p;
1034            if (rs->isNamed(name)) {
1035                return rs;
1036            }
1037        }
1038        status = U_ILLEGAL_ARGUMENT_ERROR;
1039    }
1040    return NULL;
1041}
1042
1043UnicodeString&
1044RuleBasedNumberFormat::format(int32_t number,
1045                              UnicodeString& toAppendTo,
1046                              FieldPosition& /* pos */) const
1047{
1048    if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1049    return toAppendTo;
1050}
1051
1052
1053UnicodeString&
1054RuleBasedNumberFormat::format(int64_t number,
1055                              UnicodeString& toAppendTo,
1056                              FieldPosition& /* pos */) const
1057{
1058    if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1059    return toAppendTo;
1060}
1061
1062
1063UnicodeString&
1064RuleBasedNumberFormat::format(double number,
1065                              UnicodeString& toAppendTo,
1066                              FieldPosition& /* pos */) const
1067{
1068    if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1069    return toAppendTo;
1070}
1071
1072
1073UnicodeString&
1074RuleBasedNumberFormat::format(int32_t number,
1075                              const UnicodeString& ruleSetName,
1076                              UnicodeString& toAppendTo,
1077                              FieldPosition& /* pos */,
1078                              UErrorCode& status) const
1079{
1080    // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1081    if (U_SUCCESS(status)) {
1082        if (ruleSetName.indexOf(gPercentPercent) == 0) {
1083            // throw new IllegalArgumentException("Can't use internal rule set");
1084            status = U_ILLEGAL_ARGUMENT_ERROR;
1085        } else {
1086            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1087            if (rs) {
1088                rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1089            }
1090        }
1091    }
1092    return toAppendTo;
1093}
1094
1095
1096UnicodeString&
1097RuleBasedNumberFormat::format(int64_t number,
1098                              const UnicodeString& ruleSetName,
1099                              UnicodeString& toAppendTo,
1100                              FieldPosition& /* pos */,
1101                              UErrorCode& status) const
1102{
1103    if (U_SUCCESS(status)) {
1104        if (ruleSetName.indexOf(gPercentPercent) == 0) {
1105            // throw new IllegalArgumentException("Can't use internal rule set");
1106            status = U_ILLEGAL_ARGUMENT_ERROR;
1107        } else {
1108            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1109            if (rs) {
1110                rs->format(number, toAppendTo, toAppendTo.length());
1111            }
1112        }
1113    }
1114    return toAppendTo;
1115}
1116
1117
1118// make linker happy
1119UnicodeString&
1120RuleBasedNumberFormat::format(const Formattable& obj,
1121                              UnicodeString& toAppendTo,
1122                              FieldPosition& pos,
1123                              UErrorCode& status) const
1124{
1125    return NumberFormat::format(obj, toAppendTo, pos, status);
1126}
1127
1128UnicodeString&
1129RuleBasedNumberFormat::format(double number,
1130                              const UnicodeString& ruleSetName,
1131                              UnicodeString& toAppendTo,
1132                              FieldPosition& /* pos */,
1133                              UErrorCode& status) const
1134{
1135    if (U_SUCCESS(status)) {
1136        if (ruleSetName.indexOf(gPercentPercent) == 0) {
1137            // throw new IllegalArgumentException("Can't use internal rule set");
1138            status = U_ILLEGAL_ARGUMENT_ERROR;
1139        } else {
1140            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1141            if (rs) {
1142                rs->format(number, toAppendTo, toAppendTo.length());
1143            }
1144        }
1145    }
1146    return toAppendTo;
1147}
1148
1149void
1150RuleBasedNumberFormat::parse(const UnicodeString& text,
1151                             Formattable& result,
1152                             ParsePosition& parsePosition) const
1153{
1154    //TODO: We need a real fix.  See #6895 / #6896
1155    if (noParse) {
1156        // skip parsing
1157        parsePosition.setErrorIndex(0);
1158        return;
1159    }
1160
1161    if (!ruleSets) {
1162        parsePosition.setErrorIndex(0);
1163        return;
1164    }
1165
1166    UnicodeString workingText(text, parsePosition.getIndex());
1167    ParsePosition workingPos(0);
1168
1169    ParsePosition high_pp(0);
1170    Formattable high_result;
1171
1172    for (NFRuleSet** p = ruleSets; *p; ++p) {
1173        NFRuleSet *rp = *p;
1174        if (rp->isPublic() && rp->isParseable()) {
1175            ParsePosition working_pp(0);
1176            Formattable working_result;
1177
1178            rp->parse(workingText, working_pp, kMaxDouble, working_result);
1179            if (working_pp.getIndex() > high_pp.getIndex()) {
1180                high_pp = working_pp;
1181                high_result = working_result;
1182
1183                if (high_pp.getIndex() == workingText.length()) {
1184                    break;
1185                }
1186            }
1187        }
1188    }
1189
1190    int32_t startIndex = parsePosition.getIndex();
1191    parsePosition.setIndex(startIndex + high_pp.getIndex());
1192    if (high_pp.getIndex() > 0) {
1193        parsePosition.setErrorIndex(-1);
1194    } else {
1195        int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1196        parsePosition.setErrorIndex(startIndex + errorIndex);
1197    }
1198    result = high_result;
1199    if (result.getType() == Formattable::kDouble) {
1200        int32_t r = (int32_t)result.getDouble();
1201        if ((double)r == result.getDouble()) {
1202            result.setLong(r);
1203        }
1204    }
1205}
1206
1207#if !UCONFIG_NO_COLLATION
1208
1209void
1210RuleBasedNumberFormat::setLenient(UBool enabled)
1211{
1212    lenient = enabled;
1213    if (!enabled && collator) {
1214        delete collator;
1215        collator = NULL;
1216    }
1217}
1218
1219#endif
1220
1221void
1222RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1223    if (U_SUCCESS(status)) {
1224        if (ruleSetName.isEmpty()) {
1225          if (localizations) {
1226              UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1227              defaultRuleSet = findRuleSet(name, status);
1228          } else {
1229            initDefaultRuleSet();
1230          }
1231        } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1232            status = U_ILLEGAL_ARGUMENT_ERROR;
1233        } else {
1234            NFRuleSet* result = findRuleSet(ruleSetName, status);
1235            if (result != NULL) {
1236                defaultRuleSet = result;
1237            }
1238        }
1239    }
1240}
1241
1242UnicodeString
1243RuleBasedNumberFormat::getDefaultRuleSetName() const {
1244  UnicodeString result;
1245  if (defaultRuleSet && defaultRuleSet->isPublic()) {
1246    defaultRuleSet->getName(result);
1247  } else {
1248    result.setToBogus();
1249  }
1250  return result;
1251}
1252
1253void
1254RuleBasedNumberFormat::initDefaultRuleSet()
1255{
1256    defaultRuleSet = NULL;
1257    if (!ruleSets) {
1258      return;
1259    }
1260
1261    const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1262    const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1263    const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1264
1265    NFRuleSet**p = &ruleSets[0];
1266    while (*p) {
1267        if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1268            defaultRuleSet = *p;
1269            return;
1270        } else {
1271            ++p;
1272        }
1273    }
1274
1275    defaultRuleSet = *--p;
1276    if (!defaultRuleSet->isPublic()) {
1277        while (p != ruleSets) {
1278            if ((*--p)->isPublic()) {
1279                defaultRuleSet = *p;
1280                break;
1281            }
1282        }
1283    }
1284}
1285
1286
1287void
1288RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1289                            UParseError& pErr, UErrorCode& status)
1290{
1291    // TODO: implement UParseError
1292    uprv_memset(&pErr, 0, sizeof(UParseError));
1293    // Note: this can leave ruleSets == NULL, so remaining code should check
1294    if (U_FAILURE(status)) {
1295        return;
1296    }
1297
1298    this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1299
1300    UnicodeString description(rules);
1301    if (!description.length()) {
1302        status = U_MEMORY_ALLOCATION_ERROR;
1303        return;
1304    }
1305
1306    // start by stripping the trailing whitespace from all the rules
1307    // (this is all the whitespace follwing each semicolon in the
1308    // description).  This allows us to look for rule-set boundaries
1309    // by searching for ";%" without having to worry about whitespace
1310    // between the ; and the %
1311    stripWhitespace(description);
1312
1313    // check to see if there's a set of lenient-parse rules.  If there
1314    // is, pull them out into our temporary holding place for them,
1315    // and delete them from the description before the real desciption-
1316    // parsing code sees them
1317    int32_t lp = description.indexOf(gLenientParse);
1318    if (lp != -1) {
1319        // we've got to make sure we're not in the middle of a rule
1320        // (where "%%lenient-parse" would actually get treated as
1321        // rule text)
1322        if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1323            // locate the beginning and end of the actual collation
1324            // rules (there may be whitespace between the name and
1325            // the first token in the description)
1326            int lpEnd = description.indexOf(gSemiPercent, lp);
1327
1328            if (lpEnd == -1) {
1329                lpEnd = description.length() - 1;
1330            }
1331            int lpStart = lp + u_strlen(gLenientParse);
1332            while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1333                ++lpStart;
1334            }
1335
1336            // copy out the lenient-parse rules and delete them
1337            // from the description
1338            lenientParseRules = new UnicodeString();
1339            /* test for NULL */
1340            if (lenientParseRules == 0) {
1341                status = U_MEMORY_ALLOCATION_ERROR;
1342                return;
1343            }
1344            lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1345
1346            description.remove(lp, lpEnd + 1 - lp);
1347        }
1348    }
1349
1350    // pre-flight parsing the description and count the number of
1351    // rule sets (";%" marks the end of one rule set and the beginning
1352    // of the next)
1353    int numRuleSets = 0;
1354    for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1355        ++numRuleSets;
1356        ++p;
1357    }
1358    ++numRuleSets;
1359
1360    // our rule list is an array of the appropriate size
1361    ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1362    /* test for NULL */
1363    if (ruleSets == 0) {
1364        status = U_MEMORY_ALLOCATION_ERROR;
1365        return;
1366    }
1367
1368    for (int i = 0; i <= numRuleSets; ++i) {
1369        ruleSets[i] = NULL;
1370    }
1371
1372    // divide up the descriptions into individual rule-set descriptions
1373    // and store them in a temporary array.  At each step, we also
1374    // new up a rule set, but all this does is initialize its name
1375    // and remove it from its description.  We can't actually parse
1376    // the rest of the descriptions and finish initializing everything
1377    // because we have to know the names and locations of all the rule
1378    // sets before we can actually set everything up
1379    if(!numRuleSets) {
1380        status = U_ILLEGAL_ARGUMENT_ERROR;
1381        return;
1382    }
1383    UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1384    if (ruleSetDescriptions == 0) {
1385        status = U_MEMORY_ALLOCATION_ERROR;
1386        return;
1387    }
1388
1389    {
1390        int curRuleSet = 0;
1391        int32_t start = 0;
1392        for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1393            ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1394            ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1395            if (ruleSets[curRuleSet] == 0) {
1396                status = U_MEMORY_ALLOCATION_ERROR;
1397                goto cleanup;
1398            }
1399            ++curRuleSet;
1400            start = p + 1;
1401        }
1402        ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1403        ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1404        if (ruleSets[curRuleSet] == 0) {
1405            status = U_MEMORY_ALLOCATION_ERROR;
1406            goto cleanup;
1407        }
1408    }
1409
1410    // now we can take note of the formatter's default rule set, which
1411    // is the last public rule set in the description (it's the last
1412    // rather than the first so that a user can create a new formatter
1413    // from an existing formatter and change its default behavior just
1414    // by appending more rule sets to the end)
1415
1416    // {dlf} Initialization of a fraction rule set requires the default rule
1417    // set to be known.  For purposes of initialization, this is always the
1418    // last public rule set, no matter what the localization data says.
1419    initDefaultRuleSet();
1420
1421    // finally, we can go back through the temporary descriptions
1422    // list and finish seting up the substructure (and we throw
1423    // away the temporary descriptions as we go)
1424    {
1425        for (int i = 0; i < numRuleSets; i++) {
1426            ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1427        }
1428    }
1429
1430    // Now that the rules are initialized, the 'real' default rule
1431    // set can be adjusted by the localization data.
1432
1433    // The C code keeps the localization array as is, rather than building
1434    // a separate array of the public rule set names, so we have less work
1435    // to do here-- but we still need to check the names.
1436
1437    if (localizationInfos) {
1438        // confirm the names, if any aren't in the rules, that's an error
1439        // it is ok if the rules contain public rule sets that are not in this list
1440        for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1441            UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1442            NFRuleSet* rs = findRuleSet(name, status);
1443            if (rs == NULL) {
1444                break; // error
1445            }
1446            if (i == 0) {
1447                defaultRuleSet = rs;
1448            }
1449        }
1450    } else {
1451        defaultRuleSet = getDefaultRuleSet();
1452    }
1453
1454cleanup:
1455    delete[] ruleSetDescriptions;
1456}
1457
1458void
1459RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1460{
1461    // iterate through the characters...
1462    UnicodeString result;
1463
1464    int start = 0;
1465    while (start != -1 && start < description.length()) {
1466        // seek to the first non-whitespace character...
1467        while (start < description.length()
1468            && uprv_isRuleWhiteSpace(description.charAt(start))) {
1469            ++start;
1470        }
1471
1472        // locate the next semicolon in the text and copy the text from
1473        // our current position up to that semicolon into the result
1474        int32_t p = description.indexOf(gSemiColon, start);
1475        if (p == -1) {
1476            // or if we don't find a semicolon, just copy the rest of
1477            // the string into the result
1478            result.append(description, start, description.length() - start);
1479            start = -1;
1480        }
1481        else if (p < description.length()) {
1482            result.append(description, start, p + 1 - start);
1483            start = p + 1;
1484        }
1485
1486        // when we get here, we've seeked off the end of the sring, and
1487        // we terminate the loop (we continue until *start* is -1 rather
1488        // than until *p* is -1, because otherwise we'd miss the last
1489        // rule in the description)
1490        else {
1491            start = -1;
1492        }
1493    }
1494
1495    description.setTo(result);
1496}
1497
1498
1499void
1500RuleBasedNumberFormat::dispose()
1501{
1502    if (ruleSets) {
1503        for (NFRuleSet** p = ruleSets; *p; ++p) {
1504            delete *p;
1505        }
1506        uprv_free(ruleSets);
1507        ruleSets = NULL;
1508    }
1509
1510#if !UCONFIG_NO_COLLATION
1511    delete collator;
1512#endif
1513    collator = NULL;
1514
1515    delete decimalFormatSymbols;
1516    decimalFormatSymbols = NULL;
1517
1518    delete lenientParseRules;
1519    lenientParseRules = NULL;
1520
1521    if (localizations) localizations = localizations->unref();
1522}
1523
1524
1525//-----------------------------------------------------------------------
1526// package-internal API
1527//-----------------------------------------------------------------------
1528
1529/**
1530 * Returns the collator to use for lenient parsing.  The collator is lazily created:
1531 * this function creates it the first time it's called.
1532 * @return The collator to use for lenient parsing, or null if lenient parsing
1533 * is turned off.
1534*/
1535Collator*
1536RuleBasedNumberFormat::getCollator() const
1537{
1538#if !UCONFIG_NO_COLLATION
1539    if (!ruleSets) {
1540        return NULL;
1541    }
1542
1543    // lazy-evaulate the collator
1544    if (collator == NULL && lenient) {
1545        // create a default collator based on the formatter's locale,
1546        // then pull out that collator's rules, append any additional
1547        // rules specified in the description, and create a _new_
1548        // collator based on the combinaiton of those rules
1549
1550        UErrorCode status = U_ZERO_ERROR;
1551
1552        Collator* temp = Collator::createInstance(locale, status);
1553        if (U_SUCCESS(status) &&
1554            temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1555
1556            RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
1557            if (lenientParseRules) {
1558                UnicodeString rules(newCollator->getRules());
1559                rules.append(*lenientParseRules);
1560
1561                newCollator = new RuleBasedCollator(rules, status);
1562                // Exit if newCollator could not be created.
1563                if (newCollator == NULL) {
1564                	return NULL;
1565                }
1566            } else {
1567                temp = NULL;
1568            }
1569            if (U_SUCCESS(status)) {
1570                newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1571                // cast away const
1572                ((RuleBasedNumberFormat*)this)->collator = newCollator;
1573            } else {
1574                delete newCollator;
1575            }
1576        }
1577        delete temp;
1578    }
1579#endif
1580
1581    // if lenient-parse mode is off, this will be null
1582    // (see setLenientParseMode())
1583    return collator;
1584}
1585
1586
1587/**
1588 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1589 * instances owned by this formatter.  This object is lazily created: this function
1590 * creates it the first time it's called.
1591 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1592 * instances owned by this formatter.
1593*/
1594DecimalFormatSymbols*
1595RuleBasedNumberFormat::getDecimalFormatSymbols() const
1596{
1597    // lazy-evaluate the DecimalFormatSymbols object.  This object
1598    // is shared by all DecimalFormat instances belonging to this
1599    // formatter
1600    if (decimalFormatSymbols == NULL) {
1601        UErrorCode status = U_ZERO_ERROR;
1602        DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1603        if (U_SUCCESS(status)) {
1604            ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1605        } else {
1606            delete temp;
1607        }
1608    }
1609    return decimalFormatSymbols;
1610}
1611
1612U_NAMESPACE_END
1613
1614/* U_HAVE_RBNF */
1615#endif
1616