1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1997-2015, International Business Machines Corporation
6* and others. All Rights Reserved.
7*******************************************************************************
8*/
9
10#include "unicode/utypes.h"
11#include "utypeinfo.h"  // for 'typeid' to work
12
13#include "unicode/rbnf.h"
14
15#if U_HAVE_RBNF
16
17#include "unicode/normlzr.h"
18#include "unicode/plurfmt.h"
19#include "unicode/tblcoll.h"
20#include "unicode/uchar.h"
21#include "unicode/ucol.h"
22#include "unicode/uloc.h"
23#include "unicode/unum.h"
24#include "unicode/ures.h"
25#include "unicode/ustring.h"
26#include "unicode/utf16.h"
27#include "unicode/udata.h"
28#include "unicode/udisplaycontext.h"
29#include "unicode/brkiter.h"
30#include "unicode/ucasemap.h"
31
32#include "cmemory.h"
33#include "cstring.h"
34#include "patternprops.h"
35#include "uresimp.h"
36#include "nfrs.h"
37#include "digitlst.h"
38
39// debugging
40// #define RBNF_DEBUG
41
42#ifdef RBNF_DEBUG
43#include <stdio.h>
44#endif
45
46#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
47
48static const UChar gPercentPercent[] =
49{
50    0x25, 0x25, 0
51}; /* "%%" */
52
53// All urbnf objects are created through openRules, so we init all of the
54// Unicode string constants required by rbnf, nfrs, or nfr here.
55static const UChar gLenientParse[] =
56{
57    0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
58}; /* "%%lenient-parse:" */
59static const UChar gSemiColon = 0x003B;
60static const UChar gSemiPercent[] =
61{
62    0x3B, 0x25, 0
63}; /* ";%" */
64
65#define kSomeNumberOfBitsDiv2 22
66#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
67#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
68
69U_NAMESPACE_BEGIN
70
71UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
72
73/*
74This is a utility class. It does not use ICU's RTTI.
75If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
76Please make sure that intltest passes on Windows in Release mode,
77since the string pooling per compilation unit will mess up how RTTI works.
78The RTTI code was also removed due to lack of code coverage.
79*/
80class LocalizationInfo : public UMemory {
81protected:
82    virtual ~LocalizationInfo();
83    uint32_t refcount;
84
85public:
86    LocalizationInfo() : refcount(0) {}
87
88    LocalizationInfo* ref(void) {
89        ++refcount;
90        return this;
91    }
92
93    LocalizationInfo* unref(void) {
94        if (refcount && --refcount == 0) {
95            delete this;
96        }
97        return NULL;
98    }
99
100    virtual UBool operator==(const LocalizationInfo* rhs) const;
101    inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
102
103    virtual int32_t getNumberOfRuleSets(void) const = 0;
104    virtual const UChar* getRuleSetName(int32_t index) const = 0;
105    virtual int32_t getNumberOfDisplayLocales(void) const = 0;
106    virtual const UChar* getLocaleName(int32_t index) const = 0;
107    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
108
109    virtual int32_t indexForLocale(const UChar* locale) const;
110    virtual int32_t indexForRuleSet(const UChar* ruleset) const;
111
112//    virtual UClassID getDynamicClassID() const = 0;
113//    static UClassID getStaticClassID(void);
114};
115
116LocalizationInfo::~LocalizationInfo() {}
117
118//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
119
120// if both strings are NULL, this returns TRUE
121static UBool
122streq(const UChar* lhs, const UChar* rhs) {
123    if (rhs == lhs) {
124        return TRUE;
125    }
126    if (lhs && rhs) {
127        return u_strcmp(lhs, rhs) == 0;
128    }
129    return FALSE;
130}
131
132UBool
133LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
134    if (rhs) {
135        if (this == rhs) {
136            return TRUE;
137        }
138
139        int32_t rsc = getNumberOfRuleSets();
140        if (rsc == rhs->getNumberOfRuleSets()) {
141            for (int i = 0; i < rsc; ++i) {
142                if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
143                    return FALSE;
144                }
145            }
146            int32_t dlc = getNumberOfDisplayLocales();
147            if (dlc == rhs->getNumberOfDisplayLocales()) {
148                for (int i = 0; i < dlc; ++i) {
149                    const UChar* locale = getLocaleName(i);
150                    int32_t ix = rhs->indexForLocale(locale);
151                    // if no locale, ix is -1, getLocaleName returns null, so streq returns false
152                    if (!streq(locale, rhs->getLocaleName(ix))) {
153                        return FALSE;
154                    }
155                    for (int j = 0; j < rsc; ++j) {
156                        if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
157                            return FALSE;
158                        }
159                    }
160                }
161                return TRUE;
162            }
163        }
164    }
165    return FALSE;
166}
167
168int32_t
169LocalizationInfo::indexForLocale(const UChar* locale) const {
170    for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
171        if (streq(locale, getLocaleName(i))) {
172            return i;
173        }
174    }
175    return -1;
176}
177
178int32_t
179LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
180    if (ruleset) {
181        for (int i = 0; i < getNumberOfRuleSets(); ++i) {
182            if (streq(ruleset, getRuleSetName(i))) {
183                return i;
184            }
185        }
186    }
187    return -1;
188}
189
190
191typedef void (*Fn_Deleter)(void*);
192
193class VArray {
194    void** buf;
195    int32_t cap;
196    int32_t size;
197    Fn_Deleter deleter;
198public:
199    VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
200
201    VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
202
203    ~VArray() {
204        if (deleter) {
205            for (int i = 0; i < size; ++i) {
206                (*deleter)(buf[i]);
207            }
208        }
209        uprv_free(buf);
210    }
211
212    int32_t length() {
213        return size;
214    }
215
216    void add(void* elem, UErrorCode& status) {
217        if (U_SUCCESS(status)) {
218            if (size == cap) {
219                if (cap == 0) {
220                    cap = 1;
221                } else if (cap < 256) {
222                    cap *= 2;
223                } else {
224                    cap += 256;
225                }
226                if (buf == NULL) {
227                    buf = (void**)uprv_malloc(cap * sizeof(void*));
228                } else {
229                    buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
230                }
231                if (buf == NULL) {
232                    // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
233                    status = U_MEMORY_ALLOCATION_ERROR;
234                    return;
235                }
236                void* start = &buf[size];
237                size_t count = (cap - size) * sizeof(void*);
238                uprv_memset(start, 0, count); // fill with nulls, just because
239            }
240            buf[size++] = elem;
241        }
242    }
243
244    void** release(void) {
245        void** result = buf;
246        buf = NULL;
247        cap = 0;
248        size = 0;
249        return result;
250    }
251};
252
253class LocDataParser;
254
255class StringLocalizationInfo : public LocalizationInfo {
256    UChar* info;
257    UChar*** data;
258    int32_t numRuleSets;
259    int32_t numLocales;
260
261friend class LocDataParser;
262
263    StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
264        : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
265    {
266    }
267
268public:
269    static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
270
271    virtual ~StringLocalizationInfo();
272    virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
273    virtual const UChar* getRuleSetName(int32_t index) const;
274    virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
275    virtual const UChar* getLocaleName(int32_t index) const;
276    virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
277
278//    virtual UClassID getDynamicClassID() const;
279//    static UClassID getStaticClassID(void);
280
281private:
282    void init(UErrorCode& status) const;
283};
284
285
286enum {
287    OPEN_ANGLE = 0x003c, /* '<' */
288    CLOSE_ANGLE = 0x003e, /* '>' */
289    COMMA = 0x002c,
290    TICK = 0x0027,
291    QUOTE = 0x0022,
292    SPACE = 0x0020
293};
294
295/**
296 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
297 */
298class LocDataParser {
299    UChar* data;
300    const UChar* e;
301    UChar* p;
302    UChar ch;
303    UParseError& pe;
304    UErrorCode& ec;
305
306public:
307    LocDataParser(UParseError& parseError, UErrorCode& status)
308        : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
309    ~LocDataParser() {}
310
311    /*
312    * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
313    * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
314    */
315    StringLocalizationInfo* parse(UChar* data, int32_t len);
316
317private:
318
319    inline void inc(void) {
320        ++p;
321        ch = 0xffff;
322    }
323    inline UBool checkInc(UChar c) {
324        if (p < e && (ch == c || *p == c)) {
325            inc();
326            return TRUE;
327        }
328        return FALSE;
329    }
330    inline UBool check(UChar c) {
331        return p < e && (ch == c || *p == c);
332    }
333    inline void skipWhitespace(void) {
334        while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) {
335            inc();
336        }
337    }
338    inline UBool inList(UChar c, const UChar* list) const {
339        if (*list == SPACE && PatternProps::isWhiteSpace(c)) {
340            return TRUE;
341        }
342        while (*list && *list != c) {
343            ++list;
344        }
345        return *list == c;
346    }
347    void parseError(const char* msg);
348
349    StringLocalizationInfo* doParse(void);
350
351    UChar** nextArray(int32_t& requiredLength);
352    UChar*  nextString(void);
353};
354
355#ifdef RBNF_DEBUG
356#define ERROR(msg) parseError(msg); return NULL;
357#define EXPLANATION_ARG explanationArg
358#else
359#define ERROR(msg) parseError(NULL); return NULL;
360#define EXPLANATION_ARG
361#endif
362
363
364static const UChar DQUOTE_STOPLIST[] = {
365    QUOTE, 0
366};
367
368static const UChar SQUOTE_STOPLIST[] = {
369    TICK, 0
370};
371
372static const UChar NOQUOTE_STOPLIST[] = {
373    SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
374};
375
376static void
377DeleteFn(void* p) {
378  uprv_free(p);
379}
380
381StringLocalizationInfo*
382LocDataParser::parse(UChar* _data, int32_t len) {
383    if (U_FAILURE(ec)) {
384        if (_data) uprv_free(_data);
385        return NULL;
386    }
387
388    pe.line = 0;
389    pe.offset = -1;
390    pe.postContext[0] = 0;
391    pe.preContext[0] = 0;
392
393    if (_data == NULL) {
394        ec = U_ILLEGAL_ARGUMENT_ERROR;
395        return NULL;
396    }
397
398    if (len <= 0) {
399        ec = U_ILLEGAL_ARGUMENT_ERROR;
400        uprv_free(_data);
401        return NULL;
402    }
403
404    data = _data;
405    e = data + len;
406    p = _data;
407    ch = 0xffff;
408
409    return doParse();
410}
411
412
413StringLocalizationInfo*
414LocDataParser::doParse(void) {
415    skipWhitespace();
416    if (!checkInc(OPEN_ANGLE)) {
417        ERROR("Missing open angle");
418    } else {
419        VArray array(DeleteFn);
420        UBool mightHaveNext = TRUE;
421        int32_t requiredLength = -1;
422        while (mightHaveNext) {
423            mightHaveNext = FALSE;
424            UChar** elem = nextArray(requiredLength);
425            skipWhitespace();
426            UBool haveComma = check(COMMA);
427            if (elem) {
428                array.add(elem, ec);
429                if (haveComma) {
430                    inc();
431                    mightHaveNext = TRUE;
432                }
433            } else if (haveComma) {
434                ERROR("Unexpected character");
435            }
436        }
437
438        skipWhitespace();
439        if (!checkInc(CLOSE_ANGLE)) {
440            if (check(OPEN_ANGLE)) {
441                ERROR("Missing comma in outer array");
442            } else {
443                ERROR("Missing close angle bracket in outer array");
444            }
445        }
446
447        skipWhitespace();
448        if (p != e) {
449            ERROR("Extra text after close of localization data");
450        }
451
452        array.add(NULL, ec);
453        if (U_SUCCESS(ec)) {
454            int32_t numLocs = array.length() - 2; // subtract first, NULL
455            UChar*** result = (UChar***)array.release();
456
457            return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
458        }
459    }
460
461    ERROR("Unknown error");
462}
463
464UChar**
465LocDataParser::nextArray(int32_t& requiredLength) {
466    if (U_FAILURE(ec)) {
467        return NULL;
468    }
469
470    skipWhitespace();
471    if (!checkInc(OPEN_ANGLE)) {
472        ERROR("Missing open angle");
473    }
474
475    VArray array;
476    UBool mightHaveNext = TRUE;
477    while (mightHaveNext) {
478        mightHaveNext = FALSE;
479        UChar* elem = nextString();
480        skipWhitespace();
481        UBool haveComma = check(COMMA);
482        if (elem) {
483            array.add(elem, ec);
484            if (haveComma) {
485                inc();
486                mightHaveNext = TRUE;
487            }
488        } else if (haveComma) {
489            ERROR("Unexpected comma");
490        }
491    }
492    skipWhitespace();
493    if (!checkInc(CLOSE_ANGLE)) {
494        if (check(OPEN_ANGLE)) {
495            ERROR("Missing close angle bracket in inner array");
496        } else {
497            ERROR("Missing comma in inner array");
498        }
499    }
500
501    array.add(NULL, ec);
502    if (U_SUCCESS(ec)) {
503        if (requiredLength == -1) {
504            requiredLength = array.length() + 1;
505        } else if (array.length() != requiredLength) {
506            ec = U_ILLEGAL_ARGUMENT_ERROR;
507            ERROR("Array not of required length");
508        }
509
510        return (UChar**)array.release();
511    }
512    ERROR("Unknown Error");
513}
514
515UChar*
516LocDataParser::nextString() {
517    UChar* result = NULL;
518
519    skipWhitespace();
520    if (p < e) {
521        const UChar* terminators;
522        UChar c = *p;
523        UBool haveQuote = c == QUOTE || c == TICK;
524        if (haveQuote) {
525            inc();
526            terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
527        } else {
528            terminators = NOQUOTE_STOPLIST;
529        }
530        UChar* start = p;
531        while (p < e && !inList(*p, terminators)) ++p;
532        if (p == e) {
533            ERROR("Unexpected end of data");
534        }
535
536        UChar x = *p;
537        if (p > start) {
538            ch = x;
539            *p = 0x0; // terminate by writing to data
540            result = start; // just point into data
541        }
542        if (haveQuote) {
543            if (x != c) {
544                ERROR("Missing matching quote");
545            } else if (p == start) {
546                ERROR("Empty string");
547            }
548            inc();
549        } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
550            ERROR("Unexpected character in string");
551        }
552    }
553
554    // ok for there to be no next string
555    return result;
556}
557
558void LocDataParser::parseError(const char* EXPLANATION_ARG)
559{
560    if (!data) {
561        return;
562    }
563
564    const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
565    if (start < data) {
566        start = data;
567    }
568    for (UChar* x = p; --x >= start;) {
569        if (!*x) {
570            start = x+1;
571            break;
572        }
573    }
574    const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
575    if (limit > e) {
576        limit = e;
577    }
578    u_strncpy(pe.preContext, start, (int32_t)(p-start));
579    pe.preContext[p-start] = 0;
580    u_strncpy(pe.postContext, p, (int32_t)(limit-p));
581    pe.postContext[limit-p] = 0;
582    pe.offset = (int32_t)(p - data);
583
584#ifdef RBNF_DEBUG
585    fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
586
587    UnicodeString msg;
588    msg.append(start, p - start);
589    msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
590    msg.append(p, limit-p);
591    msg.append(UNICODE_STRING_SIMPLE("'"));
592
593    char buf[128];
594    int32_t len = msg.extract(0, msg.length(), buf, 128);
595    if (len >= 128) {
596        buf[127] = 0;
597    } else {
598        buf[len] = 0;
599    }
600    fprintf(stderr, "%s\n", buf);
601    fflush(stderr);
602#endif
603
604    uprv_free(data);
605    data = NULL;
606    p = NULL;
607    e = NULL;
608
609    if (U_SUCCESS(ec)) {
610        ec = U_PARSE_ERROR;
611    }
612}
613
614//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
615
616StringLocalizationInfo*
617StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
618    if (U_FAILURE(status)) {
619        return NULL;
620    }
621
622    int32_t len = info.length();
623    if (len == 0) {
624        return NULL; // no error;
625    }
626
627    UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
628    if (!p) {
629        status = U_MEMORY_ALLOCATION_ERROR;
630        return NULL;
631    }
632    info.extract(p, len, status);
633    if (!U_FAILURE(status)) {
634        status = U_ZERO_ERROR; // clear warning about non-termination
635    }
636
637    LocDataParser parser(perror, status);
638    return parser.parse(p, len);
639}
640
641StringLocalizationInfo::~StringLocalizationInfo() {
642    for (UChar*** p = (UChar***)data; *p; ++p) {
643        // remaining data is simply pointer into our unicode string data.
644        if (*p) uprv_free(*p);
645    }
646    if (data) uprv_free(data);
647    if (info) uprv_free(info);
648}
649
650
651const UChar*
652StringLocalizationInfo::getRuleSetName(int32_t index) const {
653    if (index >= 0 && index < getNumberOfRuleSets()) {
654        return data[0][index];
655    }
656    return NULL;
657}
658
659const UChar*
660StringLocalizationInfo::getLocaleName(int32_t index) const {
661    if (index >= 0 && index < getNumberOfDisplayLocales()) {
662        return data[index+1][0];
663    }
664    return NULL;
665}
666
667const UChar*
668StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
669    if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
670        ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
671        return data[localeIndex+1][ruleIndex+1];
672    }
673    return NULL;
674}
675
676// ----------
677
678RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
679                                             const UnicodeString& locs,
680                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
681  : ruleSets(NULL)
682  , ruleSetDescriptions(NULL)
683  , numRuleSets(0)
684  , defaultRuleSet(NULL)
685  , locale(alocale)
686  , collator(NULL)
687  , decimalFormatSymbols(NULL)
688  , defaultInfinityRule(NULL)
689  , defaultNaNRule(NULL)
690  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
691  , lenient(FALSE)
692  , lenientParseRules(NULL)
693  , localizations(NULL)
694  , capitalizationInfoSet(FALSE)
695  , capitalizationForUIListMenu(FALSE)
696  , capitalizationForStandAlone(FALSE)
697  , capitalizationBrkIter(NULL)
698{
699  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
700  init(description, locinfo, perror, status);
701}
702
703RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
704                                             const UnicodeString& locs,
705                                             UParseError& perror, UErrorCode& status)
706  : ruleSets(NULL)
707  , ruleSetDescriptions(NULL)
708  , numRuleSets(0)
709  , defaultRuleSet(NULL)
710  , locale(Locale::getDefault())
711  , collator(NULL)
712  , decimalFormatSymbols(NULL)
713  , defaultInfinityRule(NULL)
714  , defaultNaNRule(NULL)
715  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
716  , lenient(FALSE)
717  , lenientParseRules(NULL)
718  , localizations(NULL)
719  , capitalizationInfoSet(FALSE)
720  , capitalizationForUIListMenu(FALSE)
721  , capitalizationForStandAlone(FALSE)
722  , capitalizationBrkIter(NULL)
723{
724  LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
725  init(description, locinfo, perror, status);
726}
727
728RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
729                                             LocalizationInfo* info,
730                                             const Locale& alocale, UParseError& perror, UErrorCode& status)
731  : ruleSets(NULL)
732  , ruleSetDescriptions(NULL)
733  , numRuleSets(0)
734  , defaultRuleSet(NULL)
735  , locale(alocale)
736  , collator(NULL)
737  , decimalFormatSymbols(NULL)
738  , defaultInfinityRule(NULL)
739  , defaultNaNRule(NULL)
740  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
741  , lenient(FALSE)
742  , lenientParseRules(NULL)
743  , localizations(NULL)
744  , capitalizationInfoSet(FALSE)
745  , capitalizationForUIListMenu(FALSE)
746  , capitalizationForStandAlone(FALSE)
747  , capitalizationBrkIter(NULL)
748{
749  init(description, info, perror, status);
750}
751
752RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
753                         UParseError& perror,
754                         UErrorCode& status)
755  : ruleSets(NULL)
756  , ruleSetDescriptions(NULL)
757  , numRuleSets(0)
758  , defaultRuleSet(NULL)
759  , locale(Locale::getDefault())
760  , collator(NULL)
761  , decimalFormatSymbols(NULL)
762  , defaultInfinityRule(NULL)
763  , defaultNaNRule(NULL)
764  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
765  , lenient(FALSE)
766  , lenientParseRules(NULL)
767  , localizations(NULL)
768  , capitalizationInfoSet(FALSE)
769  , capitalizationForUIListMenu(FALSE)
770  , capitalizationForStandAlone(FALSE)
771  , capitalizationBrkIter(NULL)
772{
773    init(description, NULL, perror, status);
774}
775
776RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
777                         const Locale& aLocale,
778                         UParseError& perror,
779                         UErrorCode& status)
780  : ruleSets(NULL)
781  , ruleSetDescriptions(NULL)
782  , numRuleSets(0)
783  , defaultRuleSet(NULL)
784  , locale(aLocale)
785  , collator(NULL)
786  , decimalFormatSymbols(NULL)
787  , defaultInfinityRule(NULL)
788  , defaultNaNRule(NULL)
789  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
790  , lenient(FALSE)
791  , lenientParseRules(NULL)
792  , localizations(NULL)
793  , capitalizationInfoSet(FALSE)
794  , capitalizationForUIListMenu(FALSE)
795  , capitalizationForStandAlone(FALSE)
796  , capitalizationBrkIter(NULL)
797{
798    init(description, NULL, perror, status);
799}
800
801RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
802  : ruleSets(NULL)
803  , ruleSetDescriptions(NULL)
804  , numRuleSets(0)
805  , defaultRuleSet(NULL)
806  , locale(alocale)
807  , collator(NULL)
808  , decimalFormatSymbols(NULL)
809  , defaultInfinityRule(NULL)
810  , defaultNaNRule(NULL)
811  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
812  , lenient(FALSE)
813  , lenientParseRules(NULL)
814  , localizations(NULL)
815  , capitalizationInfoSet(FALSE)
816  , capitalizationForUIListMenu(FALSE)
817  , capitalizationForStandAlone(FALSE)
818  , capitalizationBrkIter(NULL)
819{
820    if (U_FAILURE(status)) {
821        return;
822    }
823
824    const char* rules_tag = "RBNFRules";
825    const char* fmt_tag = "";
826    switch (tag) {
827    case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
828    case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
829    case URBNF_DURATION: fmt_tag = "DurationRules"; break;
830    case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
831    default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
832    }
833
834    // TODO: read localization info from resource
835    LocalizationInfo* locinfo = NULL;
836
837    UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
838    if (U_SUCCESS(status)) {
839        setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
840                     ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
841
842        UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
843        if (U_FAILURE(status)) {
844            ures_close(nfrb);
845        }
846        UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
847        if (U_FAILURE(status)) {
848            ures_close(rbnfRules);
849            ures_close(nfrb);
850            return;
851        }
852
853        UnicodeString desc;
854        while (ures_hasNext(ruleSets)) {
855           desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
856        }
857        UParseError perror;
858
859        init(desc, locinfo, perror, status);
860
861        ures_close(ruleSets);
862        ures_close(rbnfRules);
863    }
864    ures_close(nfrb);
865}
866
867RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
868  : NumberFormat(rhs)
869  , ruleSets(NULL)
870  , ruleSetDescriptions(NULL)
871  , numRuleSets(0)
872  , defaultRuleSet(NULL)
873  , locale(rhs.locale)
874  , collator(NULL)
875  , decimalFormatSymbols(NULL)
876  , defaultInfinityRule(NULL)
877  , defaultNaNRule(NULL)
878  , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
879  , lenient(FALSE)
880  , lenientParseRules(NULL)
881  , localizations(NULL)
882  , capitalizationInfoSet(FALSE)
883  , capitalizationForUIListMenu(FALSE)
884  , capitalizationForStandAlone(FALSE)
885  , capitalizationBrkIter(NULL)
886{
887    this->operator=(rhs);
888}
889
890// --------
891
892RuleBasedNumberFormat&
893RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
894{
895    if (this == &rhs) {
896        return *this;
897    }
898    NumberFormat::operator=(rhs);
899    UErrorCode status = U_ZERO_ERROR;
900    dispose();
901    locale = rhs.locale;
902    lenient = rhs.lenient;
903
904    UParseError perror;
905    setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
906    init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
907    setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
908    setRoundingMode(rhs.getRoundingMode());
909
910    capitalizationInfoSet = rhs.capitalizationInfoSet;
911    capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
912    capitalizationForStandAlone = rhs.capitalizationForStandAlone;
913#if !UCONFIG_NO_BREAK_ITERATION
914    capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
915#endif
916
917    return *this;
918}
919
920RuleBasedNumberFormat::~RuleBasedNumberFormat()
921{
922    dispose();
923}
924
925Format*
926RuleBasedNumberFormat::clone(void) const
927{
928    return new RuleBasedNumberFormat(*this);
929}
930
931UBool
932RuleBasedNumberFormat::operator==(const Format& other) const
933{
934    if (this == &other) {
935        return TRUE;
936    }
937
938    if (typeid(*this) == typeid(other)) {
939        const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
940        // test for capitalization info equality is adequately handled
941        // by the NumberFormat test for fCapitalizationContext equality;
942        // the info here is just derived from that.
943        if (locale == rhs.locale &&
944            lenient == rhs.lenient &&
945            (localizations == NULL
946                ? rhs.localizations == NULL
947                : (rhs.localizations == NULL
948                    ? FALSE
949                    : *localizations == rhs.localizations))) {
950
951            NFRuleSet** p = ruleSets;
952            NFRuleSet** q = rhs.ruleSets;
953            if (p == NULL) {
954                return q == NULL;
955            } else if (q == NULL) {
956                return FALSE;
957            }
958            while (*p && *q && (**p == **q)) {
959                ++p;
960                ++q;
961            }
962            return *q == NULL && *p == NULL;
963        }
964    }
965
966    return FALSE;
967}
968
969UnicodeString
970RuleBasedNumberFormat::getRules() const
971{
972    UnicodeString result;
973    if (ruleSets != NULL) {
974        for (NFRuleSet** p = ruleSets; *p; ++p) {
975            (*p)->appendRules(result);
976        }
977    }
978    return result;
979}
980
981UnicodeString
982RuleBasedNumberFormat::getRuleSetName(int32_t index) const
983{
984    if (localizations) {
985        UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
986        return string;
987    }
988    else if (ruleSets) {
989        UnicodeString result;
990        for (NFRuleSet** p = ruleSets; *p; ++p) {
991            NFRuleSet* rs = *p;
992            if (rs->isPublic()) {
993                if (--index == -1) {
994                    rs->getName(result);
995                    return result;
996                }
997            }
998        }
999    }
1000    UnicodeString empty;
1001    return empty;
1002}
1003
1004int32_t
1005RuleBasedNumberFormat::getNumberOfRuleSetNames() const
1006{
1007    int32_t result = 0;
1008    if (localizations) {
1009        result = localizations->getNumberOfRuleSets();
1010    }
1011    else if (ruleSets) {
1012        for (NFRuleSet** p = ruleSets; *p; ++p) {
1013            if ((**p).isPublic()) {
1014                ++result;
1015            }
1016        }
1017    }
1018    return result;
1019}
1020
1021int32_t
1022RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
1023    if (localizations) {
1024        return localizations->getNumberOfDisplayLocales();
1025    }
1026    return 0;
1027}
1028
1029Locale
1030RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1031    if (U_FAILURE(status)) {
1032        return Locale("");
1033    }
1034    if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1035        UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1036        char buffer[64];
1037        int32_t cap = name.length() + 1;
1038        char* bp = buffer;
1039        if (cap > 64) {
1040            bp = (char *)uprv_malloc(cap);
1041            if (bp == NULL) {
1042                status = U_MEMORY_ALLOCATION_ERROR;
1043                return Locale("");
1044            }
1045        }
1046        name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1047        Locale retLocale(bp);
1048        if (bp != buffer) {
1049            uprv_free(bp);
1050        }
1051        return retLocale;
1052    }
1053    status = U_ILLEGAL_ARGUMENT_ERROR;
1054    Locale retLocale;
1055    return retLocale;
1056}
1057
1058UnicodeString
1059RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1060    if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1061        UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1062        int32_t len = localeName.length();
1063        UChar* localeStr = localeName.getBuffer(len + 1);
1064        while (len >= 0) {
1065            localeStr[len] = 0;
1066            int32_t ix = localizations->indexForLocale(localeStr);
1067            if (ix >= 0) {
1068                UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1069                return name;
1070            }
1071
1072            // trim trailing portion, skipping over ommitted sections
1073            do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1074            while (len > 0 && localeStr[len-1] == 0x005F) --len;
1075        }
1076        UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1077        return name;
1078    }
1079    UnicodeString bogus;
1080    bogus.setToBogus();
1081    return bogus;
1082}
1083
1084UnicodeString
1085RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1086    if (localizations) {
1087        UnicodeString rsn(ruleSetName);
1088        int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1089        return getRuleSetDisplayName(ix, localeParam);
1090    }
1091    UnicodeString bogus;
1092    bogus.setToBogus();
1093    return bogus;
1094}
1095
1096NFRuleSet*
1097RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1098{
1099    if (U_SUCCESS(status) && ruleSets) {
1100        for (NFRuleSet** p = ruleSets; *p; ++p) {
1101            NFRuleSet* rs = *p;
1102            if (rs->isNamed(name)) {
1103                return rs;
1104            }
1105        }
1106        status = U_ILLEGAL_ARGUMENT_ERROR;
1107    }
1108    return NULL;
1109}
1110
1111UnicodeString&
1112RuleBasedNumberFormat::format(const DigitList &number,
1113                      UnicodeString &appendTo,
1114                      FieldPositionIterator *posIter,
1115                      UErrorCode &status) const {
1116    if (U_FAILURE(status)) {
1117        return appendTo;
1118    }
1119    DigitList copy(number);
1120    if (copy.fitsIntoInt64(false)) {
1121        format(((DigitList &)number).getInt64(), appendTo, posIter, status);
1122    }
1123    else {
1124        copy.roundAtExponent(0);
1125        if (copy.fitsIntoInt64(false)) {
1126            format(number.getDouble(), appendTo, posIter, status);
1127        }
1128        else {
1129            // We're outside of our normal range that this framework can handle.
1130            // The DecimalFormat will provide more accurate results.
1131
1132            // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1133            NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1134            Formattable f;
1135            f.adoptDigitList(new DigitList(number));
1136            decimalFormat->format(f, appendTo, posIter, status);
1137            delete decimalFormat;
1138        }
1139    }
1140    return appendTo;
1141}
1142
1143
1144UnicodeString&
1145RuleBasedNumberFormat::format(const DigitList &number,
1146                     UnicodeString& appendTo,
1147                     FieldPosition& pos,
1148                     UErrorCode &status) const {
1149    if (U_FAILURE(status)) {
1150        return appendTo;
1151    }
1152    DigitList copy(number);
1153    if (copy.fitsIntoInt64(false)) {
1154        format(((DigitList &)number).getInt64(), appendTo, pos, status);
1155    }
1156    else {
1157        copy.roundAtExponent(0);
1158        if (copy.fitsIntoInt64(false)) {
1159            format(number.getDouble(), appendTo, pos, status);
1160        }
1161        else {
1162            // We're outside of our normal range that this framework can handle.
1163            // The DecimalFormat will provide more accurate results.
1164
1165            // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1166            NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1167            Formattable f;
1168            f.adoptDigitList(new DigitList(number));
1169            decimalFormat->format(f, appendTo, pos, status);
1170            delete decimalFormat;
1171        }
1172    }
1173    return appendTo;
1174}
1175
1176UnicodeString&
1177RuleBasedNumberFormat::format(int32_t number,
1178                              UnicodeString& toAppendTo,
1179                              FieldPosition& pos) const
1180{
1181    return format((int64_t)number, toAppendTo, pos);
1182}
1183
1184
1185UnicodeString&
1186RuleBasedNumberFormat::format(int64_t number,
1187                              UnicodeString& toAppendTo,
1188                              FieldPosition& /* pos */) const
1189{
1190    if (defaultRuleSet) {
1191        UErrorCode status = U_ZERO_ERROR;
1192        format(number, defaultRuleSet, toAppendTo, status);
1193    }
1194    return toAppendTo;
1195}
1196
1197
1198UnicodeString&
1199RuleBasedNumberFormat::format(double number,
1200                              UnicodeString& toAppendTo,
1201                              FieldPosition& /* pos */) const
1202{
1203    UErrorCode status = U_ZERO_ERROR;
1204    if (defaultRuleSet) {
1205        format(number, *defaultRuleSet, toAppendTo, status);
1206    }
1207    return toAppendTo;
1208}
1209
1210
1211UnicodeString&
1212RuleBasedNumberFormat::format(int32_t number,
1213                              const UnicodeString& ruleSetName,
1214                              UnicodeString& toAppendTo,
1215                              FieldPosition& pos,
1216                              UErrorCode& status) const
1217{
1218    return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1219}
1220
1221
1222UnicodeString&
1223RuleBasedNumberFormat::format(int64_t number,
1224                              const UnicodeString& ruleSetName,
1225                              UnicodeString& toAppendTo,
1226                              FieldPosition& /* pos */,
1227                              UErrorCode& status) const
1228{
1229    if (U_SUCCESS(status)) {
1230        if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1231            // throw new IllegalArgumentException("Can't use internal rule set");
1232            status = U_ILLEGAL_ARGUMENT_ERROR;
1233        } else {
1234            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1235            if (rs) {
1236                format(number, rs, toAppendTo, status);
1237            }
1238        }
1239    }
1240    return toAppendTo;
1241}
1242
1243
1244UnicodeString&
1245RuleBasedNumberFormat::format(double number,
1246                              const UnicodeString& ruleSetName,
1247                              UnicodeString& toAppendTo,
1248                              FieldPosition& /* pos */,
1249                              UErrorCode& status) const
1250{
1251    if (U_SUCCESS(status)) {
1252        if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1253            // throw new IllegalArgumentException("Can't use internal rule set");
1254            status = U_ILLEGAL_ARGUMENT_ERROR;
1255        } else {
1256            NFRuleSet *rs = findRuleSet(ruleSetName, status);
1257            if (rs) {
1258                format(number, *rs, toAppendTo, status);
1259            }
1260        }
1261    }
1262    return toAppendTo;
1263}
1264
1265void
1266RuleBasedNumberFormat::format(double number,
1267                              NFRuleSet& rs,
1268                              UnicodeString& toAppendTo,
1269                              UErrorCode& status) const
1270{
1271    int32_t startPos = toAppendTo.length();
1272    if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) {
1273        DigitList digitList;
1274        digitList.set(number);
1275        digitList.setRoundingMode(getRoundingMode());
1276        digitList.roundFixedPoint(getMaximumFractionDigits());
1277        number = digitList.getDouble();
1278    }
1279    rs.format(number, toAppendTo, toAppendTo.length(), 0, status);
1280    adjustForCapitalizationContext(startPos, toAppendTo, status);
1281}
1282
1283/**
1284 * Bottleneck through which all the public format() methods
1285 * that take a long pass. By the time we get here, we know
1286 * which rule set we're using to do the formatting.
1287 * @param number The number to format
1288 * @param ruleSet The rule set to use to format the number
1289 * @return The text that resulted from formatting the number
1290 */
1291UnicodeString&
1292RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const
1293{
1294    // all API format() routines that take a double vector through
1295    // here.  We have these two identical functions-- one taking a
1296    // double and one taking a long-- the couple digits of precision
1297    // that long has but double doesn't (both types are 8 bytes long,
1298    // but double has to borrow some of the mantissa bits to hold
1299    // the exponent).
1300    // Create an empty string buffer where the result will
1301    // be built, and pass it to the rule set (along with an insertion
1302    // position of 0 and the number being formatted) to the rule set
1303    // for formatting
1304
1305    if (U_SUCCESS(status)) {
1306        if (number == U_INT64_MIN) {
1307            // We can't handle this value right now. Provide an accurate default value.
1308
1309            // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1310            NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1311            Formattable f;
1312            FieldPosition pos(FieldPosition::DONT_CARE);
1313            DigitList *digitList = new DigitList();
1314            digitList->set(number);
1315            f.adoptDigitList(digitList);
1316            decimalFormat->format(f, toAppendTo, pos, status);
1317            delete decimalFormat;
1318        }
1319        else {
1320            int32_t startPos = toAppendTo.length();
1321            ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1322            adjustForCapitalizationContext(startPos, toAppendTo, status);
1323        }
1324    }
1325    return toAppendTo;
1326}
1327
1328UnicodeString&
1329RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1330                                                      UnicodeString& currentResult,
1331                                                      UErrorCode& status) const
1332{
1333#if !UCONFIG_NO_BREAK_ITERATION
1334    UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1335    if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) {
1336        // capitalize currentResult according to context
1337        UChar32 ch = currentResult.char32At(0);
1338        if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL &&
1339              ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1340                (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1341                (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1342            // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1343            // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1344            currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1345        }
1346    }
1347#endif
1348    return currentResult;
1349}
1350
1351
1352void
1353RuleBasedNumberFormat::parse(const UnicodeString& text,
1354                             Formattable& result,
1355                             ParsePosition& parsePosition) const
1356{
1357    if (!ruleSets) {
1358        parsePosition.setErrorIndex(0);
1359        return;
1360    }
1361
1362    UnicodeString workingText(text, parsePosition.getIndex());
1363    ParsePosition workingPos(0);
1364
1365    ParsePosition high_pp(0);
1366    Formattable high_result;
1367
1368    for (NFRuleSet** p = ruleSets; *p; ++p) {
1369        NFRuleSet *rp = *p;
1370        if (rp->isPublic() && rp->isParseable()) {
1371            ParsePosition working_pp(0);
1372            Formattable working_result;
1373
1374            rp->parse(workingText, working_pp, kMaxDouble, working_result);
1375            if (working_pp.getIndex() > high_pp.getIndex()) {
1376                high_pp = working_pp;
1377                high_result = working_result;
1378
1379                if (high_pp.getIndex() == workingText.length()) {
1380                    break;
1381                }
1382            }
1383        }
1384    }
1385
1386    int32_t startIndex = parsePosition.getIndex();
1387    parsePosition.setIndex(startIndex + high_pp.getIndex());
1388    if (high_pp.getIndex() > 0) {
1389        parsePosition.setErrorIndex(-1);
1390    } else {
1391        int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1392        parsePosition.setErrorIndex(startIndex + errorIndex);
1393    }
1394    result = high_result;
1395    if (result.getType() == Formattable::kDouble) {
1396        double d = result.getDouble();
1397        if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1398            // Note: casting a double to an int when the double is too large or small
1399            //       to fit the destination is undefined behavior. The explicit range checks,
1400            //       above, are required. Just casting and checking the result value is undefined.
1401            result.setLong(static_cast<int32_t>(d));
1402        }
1403    }
1404}
1405
1406#if !UCONFIG_NO_COLLATION
1407
1408void
1409RuleBasedNumberFormat::setLenient(UBool enabled)
1410{
1411    lenient = enabled;
1412    if (!enabled && collator) {
1413        delete collator;
1414        collator = NULL;
1415    }
1416}
1417
1418#endif
1419
1420void
1421RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1422    if (U_SUCCESS(status)) {
1423        if (ruleSetName.isEmpty()) {
1424          if (localizations) {
1425              UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1426              defaultRuleSet = findRuleSet(name, status);
1427          } else {
1428            initDefaultRuleSet();
1429          }
1430        } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1431            status = U_ILLEGAL_ARGUMENT_ERROR;
1432        } else {
1433            NFRuleSet* result = findRuleSet(ruleSetName, status);
1434            if (result != NULL) {
1435                defaultRuleSet = result;
1436            }
1437        }
1438    }
1439}
1440
1441UnicodeString
1442RuleBasedNumberFormat::getDefaultRuleSetName() const {
1443    UnicodeString result;
1444    if (defaultRuleSet && defaultRuleSet->isPublic()) {
1445        defaultRuleSet->getName(result);
1446    } else {
1447        result.setToBogus();
1448    }
1449    return result;
1450}
1451
1452void
1453RuleBasedNumberFormat::initDefaultRuleSet()
1454{
1455    defaultRuleSet = NULL;
1456    if (!ruleSets) {
1457        return;
1458    }
1459
1460    const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1461    const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1462    const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1463
1464    NFRuleSet**p = &ruleSets[0];
1465    while (*p) {
1466        if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1467            defaultRuleSet = *p;
1468            return;
1469        } else {
1470            ++p;
1471        }
1472    }
1473
1474    defaultRuleSet = *--p;
1475    if (!defaultRuleSet->isPublic()) {
1476        while (p != ruleSets) {
1477            if ((*--p)->isPublic()) {
1478                defaultRuleSet = *p;
1479                break;
1480            }
1481        }
1482    }
1483}
1484
1485
1486void
1487RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1488                            UParseError& pErr, UErrorCode& status)
1489{
1490    // TODO: implement UParseError
1491    uprv_memset(&pErr, 0, sizeof(UParseError));
1492    // Note: this can leave ruleSets == NULL, so remaining code should check
1493    if (U_FAILURE(status)) {
1494        return;
1495    }
1496
1497    initializeDecimalFormatSymbols(status);
1498    initializeDefaultInfinityRule(status);
1499    initializeDefaultNaNRule(status);
1500    if (U_FAILURE(status)) {
1501        return;
1502    }
1503
1504    this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1505
1506    UnicodeString description(rules);
1507    if (!description.length()) {
1508        status = U_MEMORY_ALLOCATION_ERROR;
1509        return;
1510    }
1511
1512    // start by stripping the trailing whitespace from all the rules
1513    // (this is all the whitespace follwing each semicolon in the
1514    // description).  This allows us to look for rule-set boundaries
1515    // by searching for ";%" without having to worry about whitespace
1516    // between the ; and the %
1517    stripWhitespace(description);
1518
1519    // check to see if there's a set of lenient-parse rules.  If there
1520    // is, pull them out into our temporary holding place for them,
1521    // and delete them from the description before the real desciption-
1522    // parsing code sees them
1523    int32_t lp = description.indexOf(gLenientParse, -1, 0);
1524    if (lp != -1) {
1525        // we've got to make sure we're not in the middle of a rule
1526        // (where "%%lenient-parse" would actually get treated as
1527        // rule text)
1528        if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1529            // locate the beginning and end of the actual collation
1530            // rules (there may be whitespace between the name and
1531            // the first token in the description)
1532            int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1533
1534            if (lpEnd == -1) {
1535                lpEnd = description.length() - 1;
1536            }
1537            int lpStart = lp + u_strlen(gLenientParse);
1538            while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1539                ++lpStart;
1540            }
1541
1542            // copy out the lenient-parse rules and delete them
1543            // from the description
1544            lenientParseRules = new UnicodeString();
1545            /* test for NULL */
1546            if (lenientParseRules == 0) {
1547                status = U_MEMORY_ALLOCATION_ERROR;
1548                return;
1549            }
1550            lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1551
1552            description.remove(lp, lpEnd + 1 - lp);
1553        }
1554    }
1555
1556    // pre-flight parsing the description and count the number of
1557    // rule sets (";%" marks the end of one rule set and the beginning
1558    // of the next)
1559    numRuleSets = 0;
1560    for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1561        ++numRuleSets;
1562        ++p;
1563    }
1564    ++numRuleSets;
1565
1566    // our rule list is an array of the appropriate size
1567    ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1568    /* test for NULL */
1569    if (ruleSets == 0) {
1570        status = U_MEMORY_ALLOCATION_ERROR;
1571        return;
1572    }
1573
1574    for (int i = 0; i <= numRuleSets; ++i) {
1575        ruleSets[i] = NULL;
1576    }
1577
1578    // divide up the descriptions into individual rule-set descriptions
1579    // and store them in a temporary array.  At each step, we also
1580    // new up a rule set, but all this does is initialize its name
1581    // and remove it from its description.  We can't actually parse
1582    // the rest of the descriptions and finish initializing everything
1583    // because we have to know the names and locations of all the rule
1584    // sets before we can actually set everything up
1585    if(!numRuleSets) {
1586        status = U_ILLEGAL_ARGUMENT_ERROR;
1587        return;
1588    }
1589
1590    ruleSetDescriptions = new UnicodeString[numRuleSets];
1591    if (ruleSetDescriptions == 0) {
1592        status = U_MEMORY_ALLOCATION_ERROR;
1593        return;
1594    }
1595
1596    {
1597        int curRuleSet = 0;
1598        int32_t start = 0;
1599        for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1600            ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1601            ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1602            if (ruleSets[curRuleSet] == 0) {
1603                status = U_MEMORY_ALLOCATION_ERROR;
1604                return;
1605            }
1606            ++curRuleSet;
1607            start = p + 1;
1608        }
1609        ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1610        ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1611        if (ruleSets[curRuleSet] == 0) {
1612            status = U_MEMORY_ALLOCATION_ERROR;
1613            return;
1614        }
1615    }
1616
1617    // now we can take note of the formatter's default rule set, which
1618    // is the last public rule set in the description (it's the last
1619    // rather than the first so that a user can create a new formatter
1620    // from an existing formatter and change its default behavior just
1621    // by appending more rule sets to the end)
1622
1623    // {dlf} Initialization of a fraction rule set requires the default rule
1624    // set to be known.  For purposes of initialization, this is always the
1625    // last public rule set, no matter what the localization data says.
1626    initDefaultRuleSet();
1627
1628    // finally, we can go back through the temporary descriptions
1629    // list and finish seting up the substructure (and we throw
1630    // away the temporary descriptions as we go)
1631    {
1632        for (int i = 0; i < numRuleSets; i++) {
1633            ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
1634        }
1635    }
1636
1637    // Now that the rules are initialized, the 'real' default rule
1638    // set can be adjusted by the localization data.
1639
1640    // The C code keeps the localization array as is, rather than building
1641    // a separate array of the public rule set names, so we have less work
1642    // to do here-- but we still need to check the names.
1643
1644    if (localizationInfos) {
1645        // confirm the names, if any aren't in the rules, that's an error
1646        // it is ok if the rules contain public rule sets that are not in this list
1647        for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1648            UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1649            NFRuleSet* rs = findRuleSet(name, status);
1650            if (rs == NULL) {
1651                break; // error
1652            }
1653            if (i == 0) {
1654                defaultRuleSet = rs;
1655            }
1656        }
1657    } else {
1658        defaultRuleSet = getDefaultRuleSet();
1659    }
1660    originalDescription = rules;
1661}
1662
1663// override the NumberFormat implementation in order to
1664// lazily initialize relevant items
1665void
1666RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1667{
1668    NumberFormat::setContext(value, status);
1669    if (U_SUCCESS(status)) {
1670    	if (!capitalizationInfoSet &&
1671    	        (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1672    	    initCapitalizationContextInfo(locale);
1673    	    capitalizationInfoSet = TRUE;
1674        }
1675#if !UCONFIG_NO_BREAK_ITERATION
1676        if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1677                (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1678                (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1679            UErrorCode status = U_ZERO_ERROR;
1680            capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1681            if (U_FAILURE(status)) {
1682                delete capitalizationBrkIter;
1683                capitalizationBrkIter = NULL;
1684            }
1685        }
1686#endif
1687    }
1688}
1689
1690void
1691RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1692{
1693#if !UCONFIG_NO_BREAK_ITERATION
1694    const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1695    UErrorCode status = U_ZERO_ERROR;
1696    UResourceBundle *rb = ures_open(NULL, localeID, &status);
1697    rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1698    rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1699    if (U_SUCCESS(status) && rb != NULL) {
1700        int32_t len = 0;
1701        const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1702        if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1703            capitalizationForUIListMenu = intVector[0];
1704            capitalizationForStandAlone = intVector[1];
1705        }
1706    }
1707    ures_close(rb);
1708#endif
1709}
1710
1711void
1712RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1713{
1714    // iterate through the characters...
1715    UnicodeString result;
1716
1717    int start = 0;
1718    while (start != -1 && start < description.length()) {
1719        // seek to the first non-whitespace character...
1720        while (start < description.length()
1721            && PatternProps::isWhiteSpace(description.charAt(start))) {
1722            ++start;
1723        }
1724
1725        // locate the next semicolon in the text and copy the text from
1726        // our current position up to that semicolon into the result
1727        int32_t p = description.indexOf(gSemiColon, start);
1728        if (p == -1) {
1729            // or if we don't find a semicolon, just copy the rest of
1730            // the string into the result
1731            result.append(description, start, description.length() - start);
1732            start = -1;
1733        }
1734        else if (p < description.length()) {
1735            result.append(description, start, p + 1 - start);
1736            start = p + 1;
1737        }
1738
1739        // when we get here, we've seeked off the end of the sring, and
1740        // we terminate the loop (we continue until *start* is -1 rather
1741        // than until *p* is -1, because otherwise we'd miss the last
1742        // rule in the description)
1743        else {
1744            start = -1;
1745        }
1746    }
1747
1748    description.setTo(result);
1749}
1750
1751
1752void
1753RuleBasedNumberFormat::dispose()
1754{
1755    if (ruleSets) {
1756        for (NFRuleSet** p = ruleSets; *p; ++p) {
1757            delete *p;
1758        }
1759        uprv_free(ruleSets);
1760        ruleSets = NULL;
1761    }
1762
1763    if (ruleSetDescriptions) {
1764        delete [] ruleSetDescriptions;
1765        ruleSetDescriptions = NULL;
1766    }
1767
1768#if !UCONFIG_NO_COLLATION
1769    delete collator;
1770#endif
1771    collator = NULL;
1772
1773    delete decimalFormatSymbols;
1774    decimalFormatSymbols = NULL;
1775
1776    delete defaultInfinityRule;
1777    defaultInfinityRule = NULL;
1778
1779    delete defaultNaNRule;
1780    defaultNaNRule = NULL;
1781
1782    delete lenientParseRules;
1783    lenientParseRules = NULL;
1784
1785#if !UCONFIG_NO_BREAK_ITERATION
1786    delete capitalizationBrkIter;
1787    capitalizationBrkIter = NULL;
1788#endif
1789
1790    if (localizations) {
1791        localizations = localizations->unref();
1792    }
1793}
1794
1795
1796//-----------------------------------------------------------------------
1797// package-internal API
1798//-----------------------------------------------------------------------
1799
1800/**
1801 * Returns the collator to use for lenient parsing.  The collator is lazily created:
1802 * this function creates it the first time it's called.
1803 * @return The collator to use for lenient parsing, or null if lenient parsing
1804 * is turned off.
1805*/
1806const RuleBasedCollator*
1807RuleBasedNumberFormat::getCollator() const
1808{
1809#if !UCONFIG_NO_COLLATION
1810    if (!ruleSets) {
1811        return NULL;
1812    }
1813
1814    // lazy-evaluate the collator
1815    if (collator == NULL && lenient) {
1816        // create a default collator based on the formatter's locale,
1817        // then pull out that collator's rules, append any additional
1818        // rules specified in the description, and create a _new_
1819        // collator based on the combinaiton of those rules
1820
1821        UErrorCode status = U_ZERO_ERROR;
1822
1823        Collator* temp = Collator::createInstance(locale, status);
1824        RuleBasedCollator* newCollator;
1825        if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1826            if (lenientParseRules) {
1827                UnicodeString rules(newCollator->getRules());
1828                rules.append(*lenientParseRules);
1829
1830                newCollator = new RuleBasedCollator(rules, status);
1831                // Exit if newCollator could not be created.
1832                if (newCollator == NULL) {
1833                    return NULL;
1834                }
1835            } else {
1836                temp = NULL;
1837            }
1838            if (U_SUCCESS(status)) {
1839                newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1840                // cast away const
1841                ((RuleBasedNumberFormat*)this)->collator = newCollator;
1842            } else {
1843                delete newCollator;
1844            }
1845        }
1846        delete temp;
1847    }
1848#endif
1849
1850    // if lenient-parse mode is off, this will be null
1851    // (see setLenientParseMode())
1852    return collator;
1853}
1854
1855
1856DecimalFormatSymbols*
1857RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1858{
1859    // lazy-evaluate the DecimalFormatSymbols object.  This object
1860    // is shared by all DecimalFormat instances belonging to this
1861    // formatter
1862    if (decimalFormatSymbols == NULL) {
1863        DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1864        if (U_SUCCESS(status)) {
1865            decimalFormatSymbols = temp;
1866        }
1867        else {
1868            delete temp;
1869        }
1870    }
1871    return decimalFormatSymbols;
1872}
1873
1874/**
1875 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1876 * instances owned by this formatter.
1877*/
1878const DecimalFormatSymbols*
1879RuleBasedNumberFormat::getDecimalFormatSymbols() const
1880{
1881    return decimalFormatSymbols;
1882}
1883
1884NFRule*
1885RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1886{
1887    if (U_FAILURE(status)) {
1888        return NULL;
1889    }
1890    if (defaultInfinityRule == NULL) {
1891        UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1892        rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1893        NFRule* temp = new NFRule(this, rule, status);
1894        if (U_SUCCESS(status)) {
1895            defaultInfinityRule = temp;
1896        }
1897        else {
1898            delete temp;
1899        }
1900    }
1901    return defaultInfinityRule;
1902}
1903
1904const NFRule*
1905RuleBasedNumberFormat::getDefaultInfinityRule() const
1906{
1907    return defaultInfinityRule;
1908}
1909
1910NFRule*
1911RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1912{
1913    if (U_FAILURE(status)) {
1914        return NULL;
1915    }
1916    if (defaultNaNRule == NULL) {
1917        UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1918        rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1919        NFRule* temp = new NFRule(this, rule, status);
1920        if (U_SUCCESS(status)) {
1921            defaultNaNRule = temp;
1922        }
1923        else {
1924            delete temp;
1925        }
1926    }
1927    return defaultNaNRule;
1928}
1929
1930const NFRule*
1931RuleBasedNumberFormat::getDefaultNaNRule() const
1932{
1933    return defaultNaNRule;
1934}
1935
1936// De-owning the current localized symbols and adopt the new symbols.
1937void
1938RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1939{
1940    if (symbolsToAdopt == NULL) {
1941        return; // do not allow caller to set decimalFormatSymbols to NULL
1942    }
1943
1944    if (decimalFormatSymbols != NULL) {
1945        delete decimalFormatSymbols;
1946    }
1947
1948    decimalFormatSymbols = symbolsToAdopt;
1949
1950    {
1951        // Apply the new decimalFormatSymbols by reparsing the rulesets
1952        UErrorCode status = U_ZERO_ERROR;
1953
1954        delete defaultInfinityRule;
1955        defaultInfinityRule = NULL;
1956        initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1957
1958        delete defaultNaNRule;
1959        defaultNaNRule = NULL;
1960        initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1961
1962        if (ruleSets) {
1963            for (int32_t i = 0; i < numRuleSets; i++) {
1964                ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1965            }
1966        }
1967    }
1968}
1969
1970// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1971void
1972RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1973{
1974    adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1975}
1976
1977PluralFormat *
1978RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1979                                          const UnicodeString &pattern,
1980                                          UErrorCode& status) const
1981{
1982    return new PluralFormat(locale, pluralType, pattern, status);
1983}
1984
1985/**
1986 * Get the rounding mode.
1987 * @return A rounding mode
1988 */
1989DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const {
1990    return roundingMode;
1991}
1992
1993/**
1994 * Set the rounding mode.  This has no effect unless the rounding
1995 * increment is greater than zero.
1996 * @param roundingMode A rounding mode
1997 */
1998void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) {
1999    this->roundingMode = roundingMode;
2000}
2001
2002U_NAMESPACE_END
2003
2004/* U_HAVE_RBNF */
2005#endif
2006