1/*
2*******************************************************************************
3* Copyright (C) 1997-2011, International Business Machines Corporation
4* and others. All Rights Reserved.
5*******************************************************************************
6*/
7
8#include <typeinfo>  // for 'typeid' to work
9
10#include "unicode/rbnf.h"
11
12#if U_HAVE_RBNF
13
14#include "unicode/normlzr.h"
15#include "unicode/tblcoll.h"
16#include "unicode/uchar.h"
17#include "unicode/ucol.h"
18#include "unicode/uloc.h"
19#include "unicode/unum.h"
20#include "unicode/ures.h"
21#include "unicode/ustring.h"
22#include "unicode/utf16.h"
23#include "unicode/udata.h"
24#include "nfrs.h"
25
26#include "cmemory.h"
27#include "cstring.h"
28#include "patternprops.h"
29#include "uresimp.h"
30
31// debugging
32// #define DEBUG
33
34#ifdef DEBUG
35#include "stdio.h"
36#endif
37
38#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
39
40static const UChar gPercentPercent[] =
41{
42    0x25, 0x25, 0
43}; /* "%%" */
44
45// All urbnf objects are created through openRules, so we init all of the
46// Unicode string constants required by rbnf, nfrs, or nfr here.
47static const UChar gLenientParse[] =
48{
49    0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50}; /* "%%lenient-parse:" */
51static const UChar gSemiColon = 0x003B;
52static const UChar gSemiPercent[] =
53{
54    0x3B, 0x25, 0
55}; /* ";%" */
56
57#define kSomeNumberOfBitsDiv2 22
58#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60
61// Temporary workaround - when noParse is true, do noting in parse.
62// TODO: We need a real fix - see #6895/#6896
63static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
64
65U_NAMESPACE_BEGIN
66
67UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68
69/*
70This is a utility class. It does not use ICU's RTTI.
71If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72Please make sure that intltest passes on Windows in Release mode,
73since the string pooling per compilation unit will mess up how RTTI works.
74The RTTI code was also removed due to lack of code coverage.
75*/
76class LocalizationInfo : public UMemory {
77protected:
78    virtual ~LocalizationInfo() {}
79    uint32_t refcount;
80
81public:
82    LocalizationInfo() : refcount(0) {}
83
84    LocalizationInfo* ref(void) {
85        ++refcount;
86        return this;
87    }
88
89    LocalizationInfo* unref(void) {
90        if (refcount && --refcount == 0) {
91            delete this;
92        }
93        return NULL;
94    }
95
96    virtual UBool operator==(const LocalizationInfo* rhs) const;
97    inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98
99    virtual int32_t getNumberOfRuleSets(void) const = 0;
100    virtual const UChar* getRuleSetName(int32_t index) const = 0;
101    virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102    virtual const UChar* getLocaleName(int32_t index) const = 0;
103    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104
105    virtual int32_t indexForLocale(const UChar* locale) const;
106    virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107
108//    virtual UClassID getDynamicClassID() const = 0;
109//    static UClassID getStaticClassID(void);
110};
111
112//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
113
114// if both strings are NULL, this returns TRUE
115static UBool
116streq(const UChar* lhs, const UChar* rhs) {
117    if (rhs == lhs) {
118        return TRUE;
119    }
120    if (lhs && rhs) {
121        return u_strcmp(lhs, rhs) == 0;
122    }
123    return FALSE;
124}
125
126UBool
127LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
128    if (rhs) {
129        if (this == rhs) {
130            return TRUE;
131        }
132
133        int32_t rsc = getNumberOfRuleSets();
134        if (rsc == rhs->getNumberOfRuleSets()) {
135            for (int i = 0; i < rsc; ++i) {
136                if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
137                    return FALSE;
138                }
139            }
140            int32_t dlc = getNumberOfDisplayLocales();
141            if (dlc == rhs->getNumberOfDisplayLocales()) {
142                for (int i = 0; i < dlc; ++i) {
143                    const UChar* locale = getLocaleName(i);
144                    int32_t ix = rhs->indexForLocale(locale);
145                    // if no locale, ix is -1, getLocaleName returns null, so streq returns false
146                    if (!streq(locale, rhs->getLocaleName(ix))) {
147                        return FALSE;
148                    }
149                    for (int j = 0; j < rsc; ++j) {
150                        if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
151                            return FALSE;
152                        }
153                    }
154                }
155                return TRUE;
156            }
157        }
158    }
159    return FALSE;
160}
161
162int32_t
163LocalizationInfo::indexForLocale(const UChar* locale) const {
164    for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
165        if (streq(locale, getLocaleName(i))) {
166            return i;
167        }
168    }
169    return -1;
170}
171
172int32_t
173LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
174    if (ruleset) {
175        for (int i = 0; i < getNumberOfRuleSets(); ++i) {
176            if (streq(ruleset, getRuleSetName(i))) {
177                return i;
178            }
179        }
180    }
181    return -1;
182}
183
184
185typedef void (*Fn_Deleter)(void*);
186
187class VArray {
188    void** buf;
189    int32_t cap;
190    int32_t size;
191    Fn_Deleter deleter;
192public:
193    VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
194
195    VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
196
197    ~VArray() {
198        if (deleter) {
199            for (int i = 0; i < size; ++i) {
200                (*deleter)(buf[i]);
201            }
202        }
203        uprv_free(buf);
204    }
205
206    int32_t length() {
207        return size;
208    }
209
210    void add(void* elem, UErrorCode& status) {
211        if (U_SUCCESS(status)) {
212            if (size == cap) {
213                if (cap == 0) {
214                    cap = 1;
215                } else if (cap < 256) {
216                    cap *= 2;
217                } else {
218                    cap += 256;
219                }
220                if (buf == NULL) {
221                    buf = (void**)uprv_malloc(cap * sizeof(void*));
222                } else {
223                    buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
224                }
225                if (buf == NULL) {
226                    // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
227                    status = U_MEMORY_ALLOCATION_ERROR;
228                    return;
229                }
230                void* start = &buf[size];
231                size_t count = (cap - size) * sizeof(void*);
232                uprv_memset(start, 0, count); // fill with nulls, just because
233            }
234            buf[size++] = elem;
235        }
236    }
237
238    void** release(void) {
239        void** result = buf;
240        buf = NULL;
241        cap = 0;
242        size = 0;
243        return result;
244    }
245};
246
247class LocDataParser;
248
249class StringLocalizationInfo : public LocalizationInfo {
250    UChar* info;
251    UChar*** data;
252    int32_t numRuleSets;
253    int32_t numLocales;
254
255friend class LocDataParser;
256
257    StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
258        : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
259    {
260    }
261
262public:
263    static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
264
265    virtual ~StringLocalizationInfo();
266    virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
267    virtual const UChar* getRuleSetName(int32_t index) const;
268    virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
269    virtual const UChar* getLocaleName(int32_t index) const;
270    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
271
272//    virtual UClassID getDynamicClassID() const;
273//    static UClassID getStaticClassID(void);
274
275private:
276    void init(UErrorCode& status) const;
277};
278
279
280enum {
281    OPEN_ANGLE = 0x003c, /* '<' */
282    CLOSE_ANGLE = 0x003e, /* '>' */
283    COMMA = 0x002c,
284    TICK = 0x0027,
285    QUOTE = 0x0022,
286    SPACE = 0x0020
287};
288
289/**
290 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
291 */
292class LocDataParser {
293    UChar* data;
294    const UChar* e;
295    UChar* p;
296    UChar ch;
297    UParseError& pe;
298    UErrorCode& ec;
299
300public:
301    LocDataParser(UParseError& parseError, UErrorCode& status)
302        : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
303    ~LocDataParser() {}
304
305    /*
306    * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
307    * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
308    */
309    StringLocalizationInfo* parse(UChar* data, int32_t len);
310
311private:
312
313    void inc(void) { ++p; ch = 0xffff; }
314    UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
315    UBool check(UChar c) { return p < e && (ch == c || *p == c); }
316    void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
317    UBool inList(UChar c, const UChar* list) const {
318        if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
319        while (*list && *list != c) ++list; return *list == c;
320    }
321    void parseError(const char* msg);
322
323    StringLocalizationInfo* doParse(void);
324
325    UChar** nextArray(int32_t& requiredLength);
326    UChar*  nextString(void);
327};
328
329#ifdef DEBUG
330#define ERROR(msg) parseError(msg); return NULL;
331#else
332#define ERROR(msg) parseError(NULL); return NULL;
333#endif
334
335
336static const UChar DQUOTE_STOPLIST[] = {
337    QUOTE, 0
338};
339
340static const UChar SQUOTE_STOPLIST[] = {
341    TICK, 0
342};
343
344static const UChar NOQUOTE_STOPLIST[] = {
345    SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
346};
347
348static void
349DeleteFn(void* p) {
350  uprv_free(p);
351}
352
353StringLocalizationInfo*
354LocDataParser::parse(UChar* _data, int32_t len) {
355    if (U_FAILURE(ec)) {
356        if (_data) uprv_free(_data);
357        return NULL;
358    }
359
360    pe.line = 0;
361    pe.offset = -1;
362    pe.postContext[0] = 0;
363    pe.preContext[0] = 0;
364
365    if (_data == NULL) {
366        ec = U_ILLEGAL_ARGUMENT_ERROR;
367        return NULL;
368    }
369
370    if (len <= 0) {
371        ec = U_ILLEGAL_ARGUMENT_ERROR;
372        uprv_free(_data);
373        return NULL;
374    }
375
376    data = _data;
377    e = data + len;
378    p = _data;
379    ch = 0xffff;
380
381    return doParse();
382}
383
384
385StringLocalizationInfo*
386LocDataParser::doParse(void) {
387    skipWhitespace();
388    if (!checkInc(OPEN_ANGLE)) {
389        ERROR("Missing open angle");
390    } else {
391        VArray array(DeleteFn);
392        UBool mightHaveNext = TRUE;
393        int32_t requiredLength = -1;
394        while (mightHaveNext) {
395            mightHaveNext = FALSE;
396            UChar** elem = nextArray(requiredLength);
397            skipWhitespace();
398            UBool haveComma = check(COMMA);
399            if (elem) {
400                array.add(elem, ec);
401                if (haveComma) {
402                    inc();
403                    mightHaveNext = TRUE;
404                }
405            } else if (haveComma) {
406                ERROR("Unexpected character");
407            }
408        }
409
410        skipWhitespace();
411        if (!checkInc(CLOSE_ANGLE)) {
412            if (check(OPEN_ANGLE)) {
413                ERROR("Missing comma in outer array");
414            } else {
415                ERROR("Missing close angle bracket in outer array");
416            }
417        }
418
419        skipWhitespace();
420        if (p != e) {
421            ERROR("Extra text after close of localization data");
422        }
423
424        array.add(NULL, ec);
425        if (U_SUCCESS(ec)) {
426            int32_t numLocs = array.length() - 2; // subtract first, NULL
427            UChar*** result = (UChar***)array.release();
428
429            return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
430        }
431    }
432
433    ERROR("Unknown error");
434}
435
436UChar**
437LocDataParser::nextArray(int32_t& requiredLength) {
438    if (U_FAILURE(ec)) {
439        return NULL;
440    }
441
442    skipWhitespace();
443    if (!checkInc(OPEN_ANGLE)) {
444        ERROR("Missing open angle");
445    }
446
447    VArray array;
448    UBool mightHaveNext = TRUE;
449    while (mightHaveNext) {
450        mightHaveNext = FALSE;
451        UChar* elem = nextString();
452        skipWhitespace();
453        UBool haveComma = check(COMMA);
454        if (elem) {
455            array.add(elem, ec);
456            if (haveComma) {
457                inc();
458                mightHaveNext = TRUE;
459            }
460        } else if (haveComma) {
461            ERROR("Unexpected comma");
462        }
463    }
464    skipWhitespace();
465    if (!checkInc(CLOSE_ANGLE)) {
466        if (check(OPEN_ANGLE)) {
467            ERROR("Missing close angle bracket in inner array");
468        } else {
469            ERROR("Missing comma in inner array");
470        }
471    }
472
473    array.add(NULL, ec);
474    if (U_SUCCESS(ec)) {
475        if (requiredLength == -1) {
476            requiredLength = array.length() + 1;
477        } else if (array.length() != requiredLength) {
478            ec = U_ILLEGAL_ARGUMENT_ERROR;
479            ERROR("Array not of required length");
480        }
481
482        return (UChar**)array.release();
483    }
484    ERROR("Unknown Error");
485}
486
487UChar*
488LocDataParser::nextString() {
489    UChar* result = NULL;
490
491    skipWhitespace();
492    if (p < e) {
493        const UChar* terminators;
494        UChar c = *p;
495        UBool haveQuote = c == QUOTE || c == TICK;
496        if (haveQuote) {
497            inc();
498            terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
499        } else {
500            terminators = NOQUOTE_STOPLIST;
501        }
502        UChar* start = p;
503        while (p < e && !inList(*p, terminators)) ++p;
504        if (p == e) {
505            ERROR("Unexpected end of data");
506        }
507
508        UChar x = *p;
509        if (p > start) {
510            ch = x;
511            *p = 0x0; // terminate by writing to data
512            result = start; // just point into data
513        }
514        if (haveQuote) {
515            if (x != c) {
516                ERROR("Missing matching quote");
517            } else if (p == start) {
518                ERROR("Empty string");
519            }
520            inc();
521        } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
522            ERROR("Unexpected character in string");
523        }
524    }
525
526    // ok for there to be no next string
527    return result;
528}
529
530void
531LocDataParser::parseError(const char* /*str*/) {
532    if (!data) {
533        return;
534    }
535
536    const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
537    if (start < data) {
538        start = data;
539    }
540    for (UChar* x = p; --x >= start;) {
541        if (!*x) {
542            start = x+1;
543            break;
544        }
545    }
546    const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
547    if (limit > e) {
548        limit = e;
549    }
550    u_strncpy(pe.preContext, start, (int32_t)(p-start));
551    pe.preContext[p-start] = 0;
552    u_strncpy(pe.postContext, p, (int32_t)(limit-p));
553    pe.postContext[limit-p] = 0;
554    pe.offset = (int32_t)(p - data);
555
556#ifdef DEBUG
557    fprintf(stderr, "%s at or near character %d: ", str, p-data);
558
559    UnicodeString msg;
560    msg.append(start, p - start);
561    msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
562    msg.append(p, limit-p);
563    msg.append("'");
564
565    char buf[128];
566    int32_t len = msg.extract(0, msg.length(), buf, 128);
567    if (len >= 128) {
568        buf[127] = 0;
569    } else {
570        buf[len] = 0;
571    }
572    fprintf(stderr, "%s\n", buf);
573    fflush(stderr);
574#endif
575
576    uprv_free(data);
577    data = NULL;
578    p = NULL;
579    e = NULL;
580
581    if (U_SUCCESS(ec)) {
582        ec = U_PARSE_ERROR;
583    }
584}
585
586//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
587
588StringLocalizationInfo*
589StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
590    if (U_FAILURE(status)) {
591        return NULL;
592    }
593
594    int32_t len = info.length();
595    if (len == 0) {
596        return NULL; // no error;
597    }
598
599    UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
600    if (!p) {
601        status = U_MEMORY_ALLOCATION_ERROR;
602        return NULL;
603    }
604    info.extract(p, len, status);
605    if (!U_FAILURE(status)) {
606        status = U_ZERO_ERROR; // clear warning about non-termination
607    }
608
609    LocDataParser parser(perror, status);
610    return parser.parse(p, len);
611}
612
613StringLocalizationInfo::~StringLocalizationInfo() {
614    for (UChar*** p = (UChar***)data; *p; ++p) {
615        // remaining data is simply pointer into our unicode string data.
616        if (*p) uprv_free(*p);
617    }
618    if (data) uprv_free(data);
619    if (info) uprv_free(info);
620}
621
622
623const UChar*
624StringLocalizationInfo::getRuleSetName(int32_t index) const {
625    if (index >= 0 && index < getNumberOfRuleSets()) {
626        return data[0][index];
627    }
628    return NULL;
629}
630
631const UChar*
632StringLocalizationInfo::getLocaleName(int32_t index) const {
633    if (index >= 0 && index < getNumberOfDisplayLocales()) {
634        return data[index+1][0];
635    }
636    return NULL;
637}
638
639const UChar*
640StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
641    if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
642        ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
643        return data[localeIndex+1][ruleIndex+1];
644    }
645    return NULL;
646}
647
648// ----------
649
650RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
651                                             const UnicodeString& locs,
652                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
653  : ruleSets(NULL)
654  , defaultRuleSet(NULL)
655  , locale(alocale)
656  , collator(NULL)
657  , decimalFormatSymbols(NULL)
658  , lenient(FALSE)
659  , lenientParseRules(NULL)
660  , localizations(NULL)
661  , noParse(FALSE) //TODO: to be removed after #6895
662{
663  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
664  init(description, locinfo, perror, status);
665}
666
667RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
668                                             const UnicodeString& locs,
669                                             UParseError& perror, UErrorCode& status)
670  : ruleSets(NULL)
671  , defaultRuleSet(NULL)
672  , locale(Locale::getDefault())
673  , collator(NULL)
674  , decimalFormatSymbols(NULL)
675  , lenient(FALSE)
676  , lenientParseRules(NULL)
677  , localizations(NULL)
678  , noParse(FALSE) //TODO: to be removed after #6895
679{
680  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681  init(description, locinfo, perror, status);
682}
683
684RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685                                             LocalizationInfo* info,
686                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
687  : ruleSets(NULL)
688  , defaultRuleSet(NULL)
689  , locale(alocale)
690  , collator(NULL)
691  , decimalFormatSymbols(NULL)
692  , lenient(FALSE)
693  , lenientParseRules(NULL)
694  , localizations(NULL)
695  , noParse(FALSE) //TODO: to be removed after #6895
696{
697  init(description, info, perror, status);
698}
699
700RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
701                         UParseError& perror,
702                         UErrorCode& status)
703  : ruleSets(NULL)
704  , defaultRuleSet(NULL)
705  , locale(Locale::getDefault())
706  , collator(NULL)
707  , decimalFormatSymbols(NULL)
708  , lenient(FALSE)
709  , lenientParseRules(NULL)
710  , localizations(NULL)
711  , noParse(FALSE) //TODO: to be removed after #6895
712{
713    init(description, NULL, perror, status);
714}
715
716RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
717                         const Locale& aLocale,
718                         UParseError& perror,
719                         UErrorCode& status)
720  : ruleSets(NULL)
721  , defaultRuleSet(NULL)
722  , locale(aLocale)
723  , collator(NULL)
724  , decimalFormatSymbols(NULL)
725  , lenient(FALSE)
726  , lenientParseRules(NULL)
727  , localizations(NULL)
728  , noParse(FALSE) //TODO: to be removed after #6895
729{
730    init(description, NULL, perror, status);
731}
732
733RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
734  : ruleSets(NULL)
735  , defaultRuleSet(NULL)
736  , locale(alocale)
737  , collator(NULL)
738  , decimalFormatSymbols(NULL)
739  , lenient(FALSE)
740  , lenientParseRules(NULL)
741  , localizations(NULL)
742{
743    if (U_FAILURE(status)) {
744        return;
745    }
746
747    const char* rules_tag = "RBNFRules";
748    const char* fmt_tag = "";
749    switch (tag) {
750    case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
751    case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
752    case URBNF_DURATION: fmt_tag = "DurationRules"; break;
753    case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
754    default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
755    }
756
757    // TODO: read localization info from resource
758    LocalizationInfo* locinfo = NULL;
759
760    int32_t len = 0;
761    UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
762    if (U_SUCCESS(status)) {
763        setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
764                     ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
765
766        UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
767        if (U_FAILURE(status)) {
768            ures_close(nfrb);
769        }
770        UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
771        if (U_FAILURE(status)) {
772            ures_close(rbnfRules);
773            ures_close(nfrb);
774            return;
775        }
776
777        UnicodeString desc;
778        while (ures_hasNext(ruleSets)) {
779           const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
780           desc.append(currentString);
781        }
782        UParseError perror;
783
784
785        init (desc, locinfo, perror, status);
786
787        //TODO: we need a real fix - see #6895 / #6896
788        noParse = FALSE;
789        if (tag == URBNF_SPELLOUT) {
790            const char *lang = alocale.getLanguage();
791            for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
792                if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
793                    noParse = TRUE;
794                    break;
795                }
796            }
797        }
798        //TODO: end
799
800        ures_close(ruleSets);
801        ures_close(rbnfRules);
802    }
803    ures_close(nfrb);
804}
805
806RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
807  : NumberFormat(rhs)
808  , ruleSets(NULL)
809  , defaultRuleSet(NULL)
810  , locale(rhs.locale)
811  , collator(NULL)
812  , decimalFormatSymbols(NULL)
813  , lenient(FALSE)
814  , lenientParseRules(NULL)
815  , localizations(NULL)
816{
817    this->operator=(rhs);
818}
819
820// --------
821
822RuleBasedNumberFormat&
823RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
824{
825    UErrorCode status = U_ZERO_ERROR;
826    dispose();
827    locale = rhs.locale;
828    lenient = rhs.lenient;
829
830    UnicodeString rules = rhs.getRules();
831    UParseError perror;
832    init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
833
834    //TODO: remove below when we fix the parse bug - See #6895 / #6896
835    noParse = rhs.noParse;
836
837    return *this;
838}
839
840RuleBasedNumberFormat::~RuleBasedNumberFormat()
841{
842    dispose();
843}
844
845Format*
846RuleBasedNumberFormat::clone(void) const
847{
848    RuleBasedNumberFormat * result = NULL;
849    UnicodeString rules = getRules();
850    UErrorCode status = U_ZERO_ERROR;
851    UParseError perror;
852    result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
853    /* test for NULL */
854    if (result == 0) {
855        status = U_MEMORY_ALLOCATION_ERROR;
856        return 0;
857    }
858    if (U_FAILURE(status)) {
859        delete result;
860        result = 0;
861    } else {
862        result->lenient = lenient;
863
864        //TODO: remove below when we fix the parse bug - See #6895 / #6896
865        result->noParse = noParse;
866    }
867    return result;
868}
869
870UBool
871RuleBasedNumberFormat::operator==(const Format& other) const
872{
873    if (this == &other) {
874        return TRUE;
875    }
876
877    if (typeid(*this) == typeid(other)) {
878        const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
879        if (locale == rhs.locale &&
880            lenient == rhs.lenient &&
881            (localizations == NULL
882                ? rhs.localizations == NULL
883                : (rhs.localizations == NULL
884                    ? FALSE
885                    : *localizations == rhs.localizations))) {
886
887            NFRuleSet** p = ruleSets;
888            NFRuleSet** q = rhs.ruleSets;
889            if (p == NULL) {
890                return q == NULL;
891            } else if (q == NULL) {
892                return FALSE;
893            }
894            while (*p && *q && (**p == **q)) {
895                ++p;
896                ++q;
897            }
898            return *q == NULL && *p == NULL;
899        }
900    }
901
902    return FALSE;
903}
904
905UnicodeString
906RuleBasedNumberFormat::getRules() const
907{
908    UnicodeString result;
909    if (ruleSets != NULL) {
910        for (NFRuleSet** p = ruleSets; *p; ++p) {
911            (*p)->appendRules(result);
912        }
913    }
914    return result;
915}
916
917UnicodeString
918RuleBasedNumberFormat::getRuleSetName(int32_t index) const
919{
920    if (localizations) {
921      UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
922      return string;
923    } else if (ruleSets) {
924        UnicodeString result;
925        for (NFRuleSet** p = ruleSets; *p; ++p) {
926            NFRuleSet* rs = *p;
927            if (rs->isPublic()) {
928                if (--index == -1) {
929                    rs->getName(result);
930                    return result;
931                }
932            }
933        }
934    }
935    UnicodeString empty;
936    return empty;
937}
938
939int32_t
940RuleBasedNumberFormat::getNumberOfRuleSetNames() const
941{
942    int32_t result = 0;
943    if (localizations) {
944      result = localizations->getNumberOfRuleSets();
945    } else if (ruleSets) {
946        for (NFRuleSet** p = ruleSets; *p; ++p) {
947            if ((**p).isPublic()) {
948                ++result;
949            }
950        }
951    }
952    return result;
953}
954
955int32_t
956RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
957    if (localizations) {
958        return localizations->getNumberOfDisplayLocales();
959    }
960    return 0;
961}
962
963Locale
964RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
965    if (U_FAILURE(status)) {
966        return Locale("");
967    }
968    if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
969        UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
970        char buffer[64];
971        int32_t cap = name.length() + 1;
972        char* bp = buffer;
973        if (cap > 64) {
974            bp = (char *)uprv_malloc(cap);
975            if (bp == NULL) {
976                status = U_MEMORY_ALLOCATION_ERROR;
977                return Locale("");
978            }
979        }
980        name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
981        Locale retLocale(bp);
982        if (bp != buffer) {
983            uprv_free(bp);
984        }
985        return retLocale;
986    }
987    status = U_ILLEGAL_ARGUMENT_ERROR;
988    Locale retLocale;
989    return retLocale;
990}
991
992UnicodeString
993RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
994    if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
995        UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
996        int32_t len = localeName.length();
997        UChar* localeStr = localeName.getBuffer(len + 1);
998        while (len >= 0) {
999            localeStr[len] = 0;
1000            int32_t ix = localizations->indexForLocale(localeStr);
1001            if (ix >= 0) {
1002                UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1003                return name;
1004            }
1005
1006            // trim trailing portion, skipping over ommitted sections
1007            do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1008            while (len > 0 && localeStr[len-1] == 0x005F) --len;
1009        }
1010        UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1011        return name;
1012    }
1013    UnicodeString bogus;
1014    bogus.setToBogus();
1015    return bogus;
1016}
1017
1018UnicodeString
1019RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1020    if (localizations) {
1021        UnicodeString rsn(ruleSetName);
1022        int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1023        return getRuleSetDisplayName(ix, localeParam);
1024    }
1025    UnicodeString bogus;
1026    bogus.setToBogus();
1027    return bogus;
1028}
1029
1030NFRuleSet*
1031RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1032{
1033    if (U_SUCCESS(status) && ruleSets) {
1034        for (NFRuleSet** p = ruleSets; *p; ++p) {
1035            NFRuleSet* rs = *p;
1036            if (rs->isNamed(name)) {
1037                return rs;
1038            }
1039        }
1040        status = U_ILLEGAL_ARGUMENT_ERROR;
1041    }
1042    return NULL;
1043}
1044
1045UnicodeString&
1046RuleBasedNumberFormat::format(int32_t number,
1047                              UnicodeString& toAppendTo,
1048                              FieldPosition& /* pos */) const
1049{
1050    if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1051    return toAppendTo;
1052}
1053
1054
1055UnicodeString&
1056RuleBasedNumberFormat::format(int64_t number,
1057                              UnicodeString& toAppendTo,
1058                              FieldPosition& /* pos */) const
1059{
1060    if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1061    return toAppendTo;
1062}
1063
1064
1065UnicodeString&
1066RuleBasedNumberFormat::format(double number,
1067                              UnicodeString& toAppendTo,
1068                              FieldPosition& /* pos */) const
1069{
1070    // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1071    if (uprv_isNaN(number)) {
1072        DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1073        if (decFmtSyms) {
1074            toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1075        }
1076    } else if (defaultRuleSet) {
1077        defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1078    }
1079    return toAppendTo;
1080}
1081
1082
1083UnicodeString&
1084RuleBasedNumberFormat::format(int32_t number,
1085                              const UnicodeString& ruleSetName,
1086                              UnicodeString& toAppendTo,
1087                              FieldPosition& /* pos */,
1088                              UErrorCode& status) const
1089{
1090    // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1091    if (U_SUCCESS(status)) {
1092        if (ruleSetName.indexOf(gPercentPercent) == 0) {
1093            // throw new IllegalArgumentException("Can't use internal rule set");
1094            status = U_ILLEGAL_ARGUMENT_ERROR;
1095        } else {
1096            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1097            if (rs) {
1098                rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1099            }
1100        }
1101    }
1102    return toAppendTo;
1103}
1104
1105
1106UnicodeString&
1107RuleBasedNumberFormat::format(int64_t number,
1108                              const UnicodeString& ruleSetName,
1109                              UnicodeString& toAppendTo,
1110                              FieldPosition& /* pos */,
1111                              UErrorCode& status) const
1112{
1113    if (U_SUCCESS(status)) {
1114        if (ruleSetName.indexOf(gPercentPercent) == 0) {
1115            // throw new IllegalArgumentException("Can't use internal rule set");
1116            status = U_ILLEGAL_ARGUMENT_ERROR;
1117        } else {
1118            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1119            if (rs) {
1120                rs->format(number, toAppendTo, toAppendTo.length());
1121            }
1122        }
1123    }
1124    return toAppendTo;
1125}
1126
1127
1128// make linker happy
1129UnicodeString&
1130RuleBasedNumberFormat::format(const Formattable& obj,
1131                              UnicodeString& toAppendTo,
1132                              FieldPosition& pos,
1133                              UErrorCode& status) const
1134{
1135    return NumberFormat::format(obj, toAppendTo, pos, status);
1136}
1137
1138UnicodeString&
1139RuleBasedNumberFormat::format(double number,
1140                              const UnicodeString& ruleSetName,
1141                              UnicodeString& toAppendTo,
1142                              FieldPosition& /* pos */,
1143                              UErrorCode& status) const
1144{
1145    if (U_SUCCESS(status)) {
1146        if (ruleSetName.indexOf(gPercentPercent) == 0) {
1147            // throw new IllegalArgumentException("Can't use internal rule set");
1148            status = U_ILLEGAL_ARGUMENT_ERROR;
1149        } else {
1150            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151            if (rs) {
1152                rs->format(number, toAppendTo, toAppendTo.length());
1153            }
1154        }
1155    }
1156    return toAppendTo;
1157}
1158
1159void
1160RuleBasedNumberFormat::parse(const UnicodeString& text,
1161                             Formattable& result,
1162                             ParsePosition& parsePosition) const
1163{
1164    //TODO: We need a real fix.  See #6895 / #6896
1165    if (noParse) {
1166        // skip parsing
1167        parsePosition.setErrorIndex(0);
1168        return;
1169    }
1170
1171    if (!ruleSets) {
1172        parsePosition.setErrorIndex(0);
1173        return;
1174    }
1175
1176    UnicodeString workingText(text, parsePosition.getIndex());
1177    ParsePosition workingPos(0);
1178
1179    ParsePosition high_pp(0);
1180    Formattable high_result;
1181
1182    for (NFRuleSet** p = ruleSets; *p; ++p) {
1183        NFRuleSet *rp = *p;
1184        if (rp->isPublic() && rp->isParseable()) {
1185            ParsePosition working_pp(0);
1186            Formattable working_result;
1187
1188            rp->parse(workingText, working_pp, kMaxDouble, working_result);
1189            if (working_pp.getIndex() > high_pp.getIndex()) {
1190                high_pp = working_pp;
1191                high_result = working_result;
1192
1193                if (high_pp.getIndex() == workingText.length()) {
1194                    break;
1195                }
1196            }
1197        }
1198    }
1199
1200    int32_t startIndex = parsePosition.getIndex();
1201    parsePosition.setIndex(startIndex + high_pp.getIndex());
1202    if (high_pp.getIndex() > 0) {
1203        parsePosition.setErrorIndex(-1);
1204    } else {
1205        int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1206        parsePosition.setErrorIndex(startIndex + errorIndex);
1207    }
1208    result = high_result;
1209    if (result.getType() == Formattable::kDouble) {
1210        int32_t r = (int32_t)result.getDouble();
1211        if ((double)r == result.getDouble()) {
1212            result.setLong(r);
1213        }
1214    }
1215}
1216
1217#if !UCONFIG_NO_COLLATION
1218
1219void
1220RuleBasedNumberFormat::setLenient(UBool enabled)
1221{
1222    lenient = enabled;
1223    if (!enabled && collator) {
1224        delete collator;
1225        collator = NULL;
1226    }
1227}
1228
1229#endif
1230
1231void
1232RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1233    if (U_SUCCESS(status)) {
1234        if (ruleSetName.isEmpty()) {
1235          if (localizations) {
1236              UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1237              defaultRuleSet = findRuleSet(name, status);
1238          } else {
1239            initDefaultRuleSet();
1240          }
1241        } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1242            status = U_ILLEGAL_ARGUMENT_ERROR;
1243        } else {
1244            NFRuleSet* result = findRuleSet(ruleSetName, status);
1245            if (result != NULL) {
1246                defaultRuleSet = result;
1247            }
1248        }
1249    }
1250}
1251
1252UnicodeString
1253RuleBasedNumberFormat::getDefaultRuleSetName() const {
1254  UnicodeString result;
1255  if (defaultRuleSet && defaultRuleSet->isPublic()) {
1256    defaultRuleSet->getName(result);
1257  } else {
1258    result.setToBogus();
1259  }
1260  return result;
1261}
1262
1263void
1264RuleBasedNumberFormat::initDefaultRuleSet()
1265{
1266    defaultRuleSet = NULL;
1267    if (!ruleSets) {
1268      return;
1269    }
1270
1271    const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1272    const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1273    const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1274
1275    NFRuleSet**p = &ruleSets[0];
1276    while (*p) {
1277        if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1278            defaultRuleSet = *p;
1279            return;
1280        } else {
1281            ++p;
1282        }
1283    }
1284
1285    defaultRuleSet = *--p;
1286    if (!defaultRuleSet->isPublic()) {
1287        while (p != ruleSets) {
1288            if ((*--p)->isPublic()) {
1289                defaultRuleSet = *p;
1290                break;
1291            }
1292        }
1293    }
1294}
1295
1296
1297void
1298RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1299                            UParseError& pErr, UErrorCode& status)
1300{
1301    // TODO: implement UParseError
1302    uprv_memset(&pErr, 0, sizeof(UParseError));
1303    // Note: this can leave ruleSets == NULL, so remaining code should check
1304    if (U_FAILURE(status)) {
1305        return;
1306    }
1307
1308    this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1309
1310    UnicodeString description(rules);
1311    if (!description.length()) {
1312        status = U_MEMORY_ALLOCATION_ERROR;
1313        return;
1314    }
1315
1316    // start by stripping the trailing whitespace from all the rules
1317    // (this is all the whitespace follwing each semicolon in the
1318    // description).  This allows us to look for rule-set boundaries
1319    // by searching for ";%" without having to worry about whitespace
1320    // between the ; and the %
1321    stripWhitespace(description);
1322
1323    // check to see if there's a set of lenient-parse rules.  If there
1324    // is, pull them out into our temporary holding place for them,
1325    // and delete them from the description before the real desciption-
1326    // parsing code sees them
1327    int32_t lp = description.indexOf(gLenientParse);
1328    if (lp != -1) {
1329        // we've got to make sure we're not in the middle of a rule
1330        // (where "%%lenient-parse" would actually get treated as
1331        // rule text)
1332        if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1333            // locate the beginning and end of the actual collation
1334            // rules (there may be whitespace between the name and
1335            // the first token in the description)
1336            int lpEnd = description.indexOf(gSemiPercent, lp);
1337
1338            if (lpEnd == -1) {
1339                lpEnd = description.length() - 1;
1340            }
1341            int lpStart = lp + u_strlen(gLenientParse);
1342            while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1343                ++lpStart;
1344            }
1345
1346            // copy out the lenient-parse rules and delete them
1347            // from the description
1348            lenientParseRules = new UnicodeString();
1349            /* test for NULL */
1350            if (lenientParseRules == 0) {
1351                status = U_MEMORY_ALLOCATION_ERROR;
1352                return;
1353            }
1354            lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1355
1356            description.remove(lp, lpEnd + 1 - lp);
1357        }
1358    }
1359
1360    // pre-flight parsing the description and count the number of
1361    // rule sets (";%" marks the end of one rule set and the beginning
1362    // of the next)
1363    int numRuleSets = 0;
1364    for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1365        ++numRuleSets;
1366        ++p;
1367    }
1368    ++numRuleSets;
1369
1370    // our rule list is an array of the appropriate size
1371    ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1372    /* test for NULL */
1373    if (ruleSets == 0) {
1374        status = U_MEMORY_ALLOCATION_ERROR;
1375        return;
1376    }
1377
1378    for (int i = 0; i <= numRuleSets; ++i) {
1379        ruleSets[i] = NULL;
1380    }
1381
1382    // divide up the descriptions into individual rule-set descriptions
1383    // and store them in a temporary array.  At each step, we also
1384    // new up a rule set, but all this does is initialize its name
1385    // and remove it from its description.  We can't actually parse
1386    // the rest of the descriptions and finish initializing everything
1387    // because we have to know the names and locations of all the rule
1388    // sets before we can actually set everything up
1389    if(!numRuleSets) {
1390        status = U_ILLEGAL_ARGUMENT_ERROR;
1391        return;
1392    }
1393    UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1394    if (ruleSetDescriptions == 0) {
1395        status = U_MEMORY_ALLOCATION_ERROR;
1396        return;
1397    }
1398
1399    {
1400        int curRuleSet = 0;
1401        int32_t start = 0;
1402        for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1403            ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1404            ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1405            if (ruleSets[curRuleSet] == 0) {
1406                status = U_MEMORY_ALLOCATION_ERROR;
1407                goto cleanup;
1408            }
1409            ++curRuleSet;
1410            start = p + 1;
1411        }
1412        ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1413        ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1414        if (ruleSets[curRuleSet] == 0) {
1415            status = U_MEMORY_ALLOCATION_ERROR;
1416            goto cleanup;
1417        }
1418    }
1419
1420    // now we can take note of the formatter's default rule set, which
1421    // is the last public rule set in the description (it's the last
1422    // rather than the first so that a user can create a new formatter
1423    // from an existing formatter and change its default behavior just
1424    // by appending more rule sets to the end)
1425
1426    // {dlf} Initialization of a fraction rule set requires the default rule
1427    // set to be known.  For purposes of initialization, this is always the
1428    // last public rule set, no matter what the localization data says.
1429    initDefaultRuleSet();
1430
1431    // finally, we can go back through the temporary descriptions
1432    // list and finish seting up the substructure (and we throw
1433    // away the temporary descriptions as we go)
1434    {
1435        for (int i = 0; i < numRuleSets; i++) {
1436            ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1437        }
1438    }
1439
1440    // Now that the rules are initialized, the 'real' default rule
1441    // set can be adjusted by the localization data.
1442
1443    // The C code keeps the localization array as is, rather than building
1444    // a separate array of the public rule set names, so we have less work
1445    // to do here-- but we still need to check the names.
1446
1447    if (localizationInfos) {
1448        // confirm the names, if any aren't in the rules, that's an error
1449        // it is ok if the rules contain public rule sets that are not in this list
1450        for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1451            UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1452            NFRuleSet* rs = findRuleSet(name, status);
1453            if (rs == NULL) {
1454                break; // error
1455            }
1456            if (i == 0) {
1457                defaultRuleSet = rs;
1458            }
1459        }
1460    } else {
1461        defaultRuleSet = getDefaultRuleSet();
1462    }
1463
1464cleanup:
1465    delete[] ruleSetDescriptions;
1466}
1467
1468void
1469RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1470{
1471    // iterate through the characters...
1472    UnicodeString result;
1473
1474    int start = 0;
1475    while (start != -1 && start < description.length()) {
1476        // seek to the first non-whitespace character...
1477        while (start < description.length()
1478            && PatternProps::isWhiteSpace(description.charAt(start))) {
1479            ++start;
1480        }
1481
1482        // locate the next semicolon in the text and copy the text from
1483        // our current position up to that semicolon into the result
1484        int32_t p = description.indexOf(gSemiColon, start);
1485        if (p == -1) {
1486            // or if we don't find a semicolon, just copy the rest of
1487            // the string into the result
1488            result.append(description, start, description.length() - start);
1489            start = -1;
1490        }
1491        else if (p < description.length()) {
1492            result.append(description, start, p + 1 - start);
1493            start = p + 1;
1494        }
1495
1496        // when we get here, we've seeked off the end of the sring, and
1497        // we terminate the loop (we continue until *start* is -1 rather
1498        // than until *p* is -1, because otherwise we'd miss the last
1499        // rule in the description)
1500        else {
1501            start = -1;
1502        }
1503    }
1504
1505    description.setTo(result);
1506}
1507
1508
1509void
1510RuleBasedNumberFormat::dispose()
1511{
1512    if (ruleSets) {
1513        for (NFRuleSet** p = ruleSets; *p; ++p) {
1514            delete *p;
1515        }
1516        uprv_free(ruleSets);
1517        ruleSets = NULL;
1518    }
1519
1520#if !UCONFIG_NO_COLLATION
1521    delete collator;
1522#endif
1523    collator = NULL;
1524
1525    delete decimalFormatSymbols;
1526    decimalFormatSymbols = NULL;
1527
1528    delete lenientParseRules;
1529    lenientParseRules = NULL;
1530
1531    if (localizations) localizations = localizations->unref();
1532}
1533
1534
1535//-----------------------------------------------------------------------
1536// package-internal API
1537//-----------------------------------------------------------------------
1538
1539/**
1540 * Returns the collator to use for lenient parsing.  The collator is lazily created:
1541 * this function creates it the first time it's called.
1542 * @return The collator to use for lenient parsing, or null if lenient parsing
1543 * is turned off.
1544*/
1545Collator*
1546RuleBasedNumberFormat::getCollator() const
1547{
1548#if !UCONFIG_NO_COLLATION
1549    if (!ruleSets) {
1550        return NULL;
1551    }
1552
1553    // lazy-evaulate the collator
1554    if (collator == NULL && lenient) {
1555        // create a default collator based on the formatter's locale,
1556        // then pull out that collator's rules, append any additional
1557        // rules specified in the description, and create a _new_
1558        // collator based on the combinaiton of those rules
1559
1560        UErrorCode status = U_ZERO_ERROR;
1561
1562        Collator* temp = Collator::createInstance(locale, status);
1563        RuleBasedCollator* newCollator;
1564        if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1565            if (lenientParseRules) {
1566                UnicodeString rules(newCollator->getRules());
1567                rules.append(*lenientParseRules);
1568
1569                newCollator = new RuleBasedCollator(rules, status);
1570                // Exit if newCollator could not be created.
1571                if (newCollator == NULL) {
1572                	return NULL;
1573                }
1574            } else {
1575                temp = NULL;
1576            }
1577            if (U_SUCCESS(status)) {
1578                newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1579                // cast away const
1580                ((RuleBasedNumberFormat*)this)->collator = newCollator;
1581            } else {
1582                delete newCollator;
1583            }
1584        }
1585        delete temp;
1586    }
1587#endif
1588
1589    // if lenient-parse mode is off, this will be null
1590    // (see setLenientParseMode())
1591    return collator;
1592}
1593
1594
1595/**
1596 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1597 * instances owned by this formatter.  This object is lazily created: this function
1598 * creates it the first time it's called.
1599 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1600 * instances owned by this formatter.
1601*/
1602DecimalFormatSymbols*
1603RuleBasedNumberFormat::getDecimalFormatSymbols() const
1604{
1605    // lazy-evaluate the DecimalFormatSymbols object.  This object
1606    // is shared by all DecimalFormat instances belonging to this
1607    // formatter
1608    if (decimalFormatSymbols == NULL) {
1609        UErrorCode status = U_ZERO_ERROR;
1610        DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1611        if (U_SUCCESS(status)) {
1612            ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1613        } else {
1614            delete temp;
1615        }
1616    }
1617    return decimalFormatSymbols;
1618}
1619
1620U_NAMESPACE_END
1621
1622/* U_HAVE_RBNF */
1623#endif
1624