1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* Copyright (C) 1997-2015, International Business Machines Corporation 6* and others. All Rights Reserved. 7******************************************************************************* 8*/ 9 10#include "unicode/utypes.h" 11#include "utypeinfo.h" // for 'typeid' to work 12 13#include "unicode/rbnf.h" 14 15#if U_HAVE_RBNF 16 17#include "unicode/normlzr.h" 18#include "unicode/plurfmt.h" 19#include "unicode/tblcoll.h" 20#include "unicode/uchar.h" 21#include "unicode/ucol.h" 22#include "unicode/uloc.h" 23#include "unicode/unum.h" 24#include "unicode/ures.h" 25#include "unicode/ustring.h" 26#include "unicode/utf16.h" 27#include "unicode/udata.h" 28#include "unicode/udisplaycontext.h" 29#include "unicode/brkiter.h" 30#include "unicode/ucasemap.h" 31 32#include "cmemory.h" 33#include "cstring.h" 34#include "patternprops.h" 35#include "uresimp.h" 36#include "nfrs.h" 37#include "digitlst.h" 38 39// debugging 40// #define RBNF_DEBUG 41 42#ifdef RBNF_DEBUG 43#include <stdio.h> 44#endif 45 46#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 47 48static const UChar gPercentPercent[] = 49{ 50 0x25, 0x25, 0 51}; /* "%%" */ 52 53// All urbnf objects are created through openRules, so we init all of the 54// Unicode string constants required by rbnf, nfrs, or nfr here. 55static const UChar gLenientParse[] = 56{ 57 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 58}; /* "%%lenient-parse:" */ 59static const UChar gSemiColon = 0x003B; 60static const UChar gSemiPercent[] = 61{ 62 0x3B, 0x25, 0 63}; /* ";%" */ 64 65#define kSomeNumberOfBitsDiv2 22 66#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 67#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 68 69U_NAMESPACE_BEGIN 70 71UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 72 73/* 74This is a utility class. It does not use ICU's RTTI. 75If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 76Please make sure that intltest passes on Windows in Release mode, 77since the string pooling per compilation unit will mess up how RTTI works. 78The RTTI code was also removed due to lack of code coverage. 79*/ 80class LocalizationInfo : public UMemory { 81protected: 82 virtual ~LocalizationInfo(); 83 uint32_t refcount; 84 85public: 86 LocalizationInfo() : refcount(0) {} 87 88 LocalizationInfo* ref(void) { 89 ++refcount; 90 return this; 91 } 92 93 LocalizationInfo* unref(void) { 94 if (refcount && --refcount == 0) { 95 delete this; 96 } 97 return NULL; 98 } 99 100 virtual UBool operator==(const LocalizationInfo* rhs) const; 101 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 102 103 virtual int32_t getNumberOfRuleSets(void) const = 0; 104 virtual const UChar* getRuleSetName(int32_t index) const = 0; 105 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 106 virtual const UChar* getLocaleName(int32_t index) const = 0; 107 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 108 109 virtual int32_t indexForLocale(const UChar* locale) const; 110 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 111 112// virtual UClassID getDynamicClassID() const = 0; 113// static UClassID getStaticClassID(void); 114}; 115 116LocalizationInfo::~LocalizationInfo() {} 117 118//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 119 120// if both strings are NULL, this returns TRUE 121static UBool 122streq(const UChar* lhs, const UChar* rhs) { 123 if (rhs == lhs) { 124 return TRUE; 125 } 126 if (lhs && rhs) { 127 return u_strcmp(lhs, rhs) == 0; 128 } 129 return FALSE; 130} 131 132UBool 133LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 134 if (rhs) { 135 if (this == rhs) { 136 return TRUE; 137 } 138 139 int32_t rsc = getNumberOfRuleSets(); 140 if (rsc == rhs->getNumberOfRuleSets()) { 141 for (int i = 0; i < rsc; ++i) { 142 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 143 return FALSE; 144 } 145 } 146 int32_t dlc = getNumberOfDisplayLocales(); 147 if (dlc == rhs->getNumberOfDisplayLocales()) { 148 for (int i = 0; i < dlc; ++i) { 149 const UChar* locale = getLocaleName(i); 150 int32_t ix = rhs->indexForLocale(locale); 151 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 152 if (!streq(locale, rhs->getLocaleName(ix))) { 153 return FALSE; 154 } 155 for (int j = 0; j < rsc; ++j) { 156 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 157 return FALSE; 158 } 159 } 160 } 161 return TRUE; 162 } 163 } 164 } 165 return FALSE; 166} 167 168int32_t 169LocalizationInfo::indexForLocale(const UChar* locale) const { 170 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 171 if (streq(locale, getLocaleName(i))) { 172 return i; 173 } 174 } 175 return -1; 176} 177 178int32_t 179LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 180 if (ruleset) { 181 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 182 if (streq(ruleset, getRuleSetName(i))) { 183 return i; 184 } 185 } 186 } 187 return -1; 188} 189 190 191typedef void (*Fn_Deleter)(void*); 192 193class VArray { 194 void** buf; 195 int32_t cap; 196 int32_t size; 197 Fn_Deleter deleter; 198public: 199 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 200 201 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 202 203 ~VArray() { 204 if (deleter) { 205 for (int i = 0; i < size; ++i) { 206 (*deleter)(buf[i]); 207 } 208 } 209 uprv_free(buf); 210 } 211 212 int32_t length() { 213 return size; 214 } 215 216 void add(void* elem, UErrorCode& status) { 217 if (U_SUCCESS(status)) { 218 if (size == cap) { 219 if (cap == 0) { 220 cap = 1; 221 } else if (cap < 256) { 222 cap *= 2; 223 } else { 224 cap += 256; 225 } 226 if (buf == NULL) { 227 buf = (void**)uprv_malloc(cap * sizeof(void*)); 228 } else { 229 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 230 } 231 if (buf == NULL) { 232 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 233 status = U_MEMORY_ALLOCATION_ERROR; 234 return; 235 } 236 void* start = &buf[size]; 237 size_t count = (cap - size) * sizeof(void*); 238 uprv_memset(start, 0, count); // fill with nulls, just because 239 } 240 buf[size++] = elem; 241 } 242 } 243 244 void** release(void) { 245 void** result = buf; 246 buf = NULL; 247 cap = 0; 248 size = 0; 249 return result; 250 } 251}; 252 253class LocDataParser; 254 255class StringLocalizationInfo : public LocalizationInfo { 256 UChar* info; 257 UChar*** data; 258 int32_t numRuleSets; 259 int32_t numLocales; 260 261friend class LocDataParser; 262 263 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 264 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 265 { 266 } 267 268public: 269 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 270 271 virtual ~StringLocalizationInfo(); 272 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 273 virtual const UChar* getRuleSetName(int32_t index) const; 274 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 275 virtual const UChar* getLocaleName(int32_t index) const; 276 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 277 278// virtual UClassID getDynamicClassID() const; 279// static UClassID getStaticClassID(void); 280 281private: 282 void init(UErrorCode& status) const; 283}; 284 285 286enum { 287 OPEN_ANGLE = 0x003c, /* '<' */ 288 CLOSE_ANGLE = 0x003e, /* '>' */ 289 COMMA = 0x002c, 290 TICK = 0x0027, 291 QUOTE = 0x0022, 292 SPACE = 0x0020 293}; 294 295/** 296 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 297 */ 298class LocDataParser { 299 UChar* data; 300 const UChar* e; 301 UChar* p; 302 UChar ch; 303 UParseError& pe; 304 UErrorCode& ec; 305 306public: 307 LocDataParser(UParseError& parseError, UErrorCode& status) 308 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 309 ~LocDataParser() {} 310 311 /* 312 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 313 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 314 */ 315 StringLocalizationInfo* parse(UChar* data, int32_t len); 316 317private: 318 319 inline void inc(void) { 320 ++p; 321 ch = 0xffff; 322 } 323 inline UBool checkInc(UChar c) { 324 if (p < e && (ch == c || *p == c)) { 325 inc(); 326 return TRUE; 327 } 328 return FALSE; 329 } 330 inline UBool check(UChar c) { 331 return p < e && (ch == c || *p == c); 332 } 333 inline void skipWhitespace(void) { 334 while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) { 335 inc(); 336 } 337 } 338 inline UBool inList(UChar c, const UChar* list) const { 339 if (*list == SPACE && PatternProps::isWhiteSpace(c)) { 340 return TRUE; 341 } 342 while (*list && *list != c) { 343 ++list; 344 } 345 return *list == c; 346 } 347 void parseError(const char* msg); 348 349 StringLocalizationInfo* doParse(void); 350 351 UChar** nextArray(int32_t& requiredLength); 352 UChar* nextString(void); 353}; 354 355#ifdef RBNF_DEBUG 356#define ERROR(msg) parseError(msg); return NULL; 357#define EXPLANATION_ARG explanationArg 358#else 359#define ERROR(msg) parseError(NULL); return NULL; 360#define EXPLANATION_ARG 361#endif 362 363 364static const UChar DQUOTE_STOPLIST[] = { 365 QUOTE, 0 366}; 367 368static const UChar SQUOTE_STOPLIST[] = { 369 TICK, 0 370}; 371 372static const UChar NOQUOTE_STOPLIST[] = { 373 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 374}; 375 376static void 377DeleteFn(void* p) { 378 uprv_free(p); 379} 380 381StringLocalizationInfo* 382LocDataParser::parse(UChar* _data, int32_t len) { 383 if (U_FAILURE(ec)) { 384 if (_data) uprv_free(_data); 385 return NULL; 386 } 387 388 pe.line = 0; 389 pe.offset = -1; 390 pe.postContext[0] = 0; 391 pe.preContext[0] = 0; 392 393 if (_data == NULL) { 394 ec = U_ILLEGAL_ARGUMENT_ERROR; 395 return NULL; 396 } 397 398 if (len <= 0) { 399 ec = U_ILLEGAL_ARGUMENT_ERROR; 400 uprv_free(_data); 401 return NULL; 402 } 403 404 data = _data; 405 e = data + len; 406 p = _data; 407 ch = 0xffff; 408 409 return doParse(); 410} 411 412 413StringLocalizationInfo* 414LocDataParser::doParse(void) { 415 skipWhitespace(); 416 if (!checkInc(OPEN_ANGLE)) { 417 ERROR("Missing open angle"); 418 } else { 419 VArray array(DeleteFn); 420 UBool mightHaveNext = TRUE; 421 int32_t requiredLength = -1; 422 while (mightHaveNext) { 423 mightHaveNext = FALSE; 424 UChar** elem = nextArray(requiredLength); 425 skipWhitespace(); 426 UBool haveComma = check(COMMA); 427 if (elem) { 428 array.add(elem, ec); 429 if (haveComma) { 430 inc(); 431 mightHaveNext = TRUE; 432 } 433 } else if (haveComma) { 434 ERROR("Unexpected character"); 435 } 436 } 437 438 skipWhitespace(); 439 if (!checkInc(CLOSE_ANGLE)) { 440 if (check(OPEN_ANGLE)) { 441 ERROR("Missing comma in outer array"); 442 } else { 443 ERROR("Missing close angle bracket in outer array"); 444 } 445 } 446 447 skipWhitespace(); 448 if (p != e) { 449 ERROR("Extra text after close of localization data"); 450 } 451 452 array.add(NULL, ec); 453 if (U_SUCCESS(ec)) { 454 int32_t numLocs = array.length() - 2; // subtract first, NULL 455 UChar*** result = (UChar***)array.release(); 456 457 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 458 } 459 } 460 461 ERROR("Unknown error"); 462} 463 464UChar** 465LocDataParser::nextArray(int32_t& requiredLength) { 466 if (U_FAILURE(ec)) { 467 return NULL; 468 } 469 470 skipWhitespace(); 471 if (!checkInc(OPEN_ANGLE)) { 472 ERROR("Missing open angle"); 473 } 474 475 VArray array; 476 UBool mightHaveNext = TRUE; 477 while (mightHaveNext) { 478 mightHaveNext = FALSE; 479 UChar* elem = nextString(); 480 skipWhitespace(); 481 UBool haveComma = check(COMMA); 482 if (elem) { 483 array.add(elem, ec); 484 if (haveComma) { 485 inc(); 486 mightHaveNext = TRUE; 487 } 488 } else if (haveComma) { 489 ERROR("Unexpected comma"); 490 } 491 } 492 skipWhitespace(); 493 if (!checkInc(CLOSE_ANGLE)) { 494 if (check(OPEN_ANGLE)) { 495 ERROR("Missing close angle bracket in inner array"); 496 } else { 497 ERROR("Missing comma in inner array"); 498 } 499 } 500 501 array.add(NULL, ec); 502 if (U_SUCCESS(ec)) { 503 if (requiredLength == -1) { 504 requiredLength = array.length() + 1; 505 } else if (array.length() != requiredLength) { 506 ec = U_ILLEGAL_ARGUMENT_ERROR; 507 ERROR("Array not of required length"); 508 } 509 510 return (UChar**)array.release(); 511 } 512 ERROR("Unknown Error"); 513} 514 515UChar* 516LocDataParser::nextString() { 517 UChar* result = NULL; 518 519 skipWhitespace(); 520 if (p < e) { 521 const UChar* terminators; 522 UChar c = *p; 523 UBool haveQuote = c == QUOTE || c == TICK; 524 if (haveQuote) { 525 inc(); 526 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 527 } else { 528 terminators = NOQUOTE_STOPLIST; 529 } 530 UChar* start = p; 531 while (p < e && !inList(*p, terminators)) ++p; 532 if (p == e) { 533 ERROR("Unexpected end of data"); 534 } 535 536 UChar x = *p; 537 if (p > start) { 538 ch = x; 539 *p = 0x0; // terminate by writing to data 540 result = start; // just point into data 541 } 542 if (haveQuote) { 543 if (x != c) { 544 ERROR("Missing matching quote"); 545 } else if (p == start) { 546 ERROR("Empty string"); 547 } 548 inc(); 549 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 550 ERROR("Unexpected character in string"); 551 } 552 } 553 554 // ok for there to be no next string 555 return result; 556} 557 558void LocDataParser::parseError(const char* EXPLANATION_ARG) 559{ 560 if (!data) { 561 return; 562 } 563 564 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 565 if (start < data) { 566 start = data; 567 } 568 for (UChar* x = p; --x >= start;) { 569 if (!*x) { 570 start = x+1; 571 break; 572 } 573 } 574 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 575 if (limit > e) { 576 limit = e; 577 } 578 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 579 pe.preContext[p-start] = 0; 580 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 581 pe.postContext[limit-p] = 0; 582 pe.offset = (int32_t)(p - data); 583 584#ifdef RBNF_DEBUG 585 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data); 586 587 UnicodeString msg; 588 msg.append(start, p - start); 589 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 590 msg.append(p, limit-p); 591 msg.append(UNICODE_STRING_SIMPLE("'")); 592 593 char buf[128]; 594 int32_t len = msg.extract(0, msg.length(), buf, 128); 595 if (len >= 128) { 596 buf[127] = 0; 597 } else { 598 buf[len] = 0; 599 } 600 fprintf(stderr, "%s\n", buf); 601 fflush(stderr); 602#endif 603 604 uprv_free(data); 605 data = NULL; 606 p = NULL; 607 e = NULL; 608 609 if (U_SUCCESS(ec)) { 610 ec = U_PARSE_ERROR; 611 } 612} 613 614//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 615 616StringLocalizationInfo* 617StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 618 if (U_FAILURE(status)) { 619 return NULL; 620 } 621 622 int32_t len = info.length(); 623 if (len == 0) { 624 return NULL; // no error; 625 } 626 627 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 628 if (!p) { 629 status = U_MEMORY_ALLOCATION_ERROR; 630 return NULL; 631 } 632 info.extract(p, len, status); 633 if (!U_FAILURE(status)) { 634 status = U_ZERO_ERROR; // clear warning about non-termination 635 } 636 637 LocDataParser parser(perror, status); 638 return parser.parse(p, len); 639} 640 641StringLocalizationInfo::~StringLocalizationInfo() { 642 for (UChar*** p = (UChar***)data; *p; ++p) { 643 // remaining data is simply pointer into our unicode string data. 644 if (*p) uprv_free(*p); 645 } 646 if (data) uprv_free(data); 647 if (info) uprv_free(info); 648} 649 650 651const UChar* 652StringLocalizationInfo::getRuleSetName(int32_t index) const { 653 if (index >= 0 && index < getNumberOfRuleSets()) { 654 return data[0][index]; 655 } 656 return NULL; 657} 658 659const UChar* 660StringLocalizationInfo::getLocaleName(int32_t index) const { 661 if (index >= 0 && index < getNumberOfDisplayLocales()) { 662 return data[index+1][0]; 663 } 664 return NULL; 665} 666 667const UChar* 668StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 669 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 670 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 671 return data[localeIndex+1][ruleIndex+1]; 672 } 673 return NULL; 674} 675 676// ---------- 677 678RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 679 const UnicodeString& locs, 680 const Locale& alocale, UParseError& perror, UErrorCode& status) 681 : ruleSets(NULL) 682 , ruleSetDescriptions(NULL) 683 , numRuleSets(0) 684 , defaultRuleSet(NULL) 685 , locale(alocale) 686 , collator(NULL) 687 , decimalFormatSymbols(NULL) 688 , defaultInfinityRule(NULL) 689 , defaultNaNRule(NULL) 690 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 691 , lenient(FALSE) 692 , lenientParseRules(NULL) 693 , localizations(NULL) 694 , capitalizationInfoSet(FALSE) 695 , capitalizationForUIListMenu(FALSE) 696 , capitalizationForStandAlone(FALSE) 697 , capitalizationBrkIter(NULL) 698{ 699 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 700 init(description, locinfo, perror, status); 701} 702 703RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 704 const UnicodeString& locs, 705 UParseError& perror, UErrorCode& status) 706 : ruleSets(NULL) 707 , ruleSetDescriptions(NULL) 708 , numRuleSets(0) 709 , defaultRuleSet(NULL) 710 , locale(Locale::getDefault()) 711 , collator(NULL) 712 , decimalFormatSymbols(NULL) 713 , defaultInfinityRule(NULL) 714 , defaultNaNRule(NULL) 715 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 716 , lenient(FALSE) 717 , lenientParseRules(NULL) 718 , localizations(NULL) 719 , capitalizationInfoSet(FALSE) 720 , capitalizationForUIListMenu(FALSE) 721 , capitalizationForStandAlone(FALSE) 722 , capitalizationBrkIter(NULL) 723{ 724 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 725 init(description, locinfo, perror, status); 726} 727 728RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 729 LocalizationInfo* info, 730 const Locale& alocale, UParseError& perror, UErrorCode& status) 731 : ruleSets(NULL) 732 , ruleSetDescriptions(NULL) 733 , numRuleSets(0) 734 , defaultRuleSet(NULL) 735 , locale(alocale) 736 , collator(NULL) 737 , decimalFormatSymbols(NULL) 738 , defaultInfinityRule(NULL) 739 , defaultNaNRule(NULL) 740 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 741 , lenient(FALSE) 742 , lenientParseRules(NULL) 743 , localizations(NULL) 744 , capitalizationInfoSet(FALSE) 745 , capitalizationForUIListMenu(FALSE) 746 , capitalizationForStandAlone(FALSE) 747 , capitalizationBrkIter(NULL) 748{ 749 init(description, info, perror, status); 750} 751 752RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 753 UParseError& perror, 754 UErrorCode& status) 755 : ruleSets(NULL) 756 , ruleSetDescriptions(NULL) 757 , numRuleSets(0) 758 , defaultRuleSet(NULL) 759 , locale(Locale::getDefault()) 760 , collator(NULL) 761 , decimalFormatSymbols(NULL) 762 , defaultInfinityRule(NULL) 763 , defaultNaNRule(NULL) 764 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 765 , lenient(FALSE) 766 , lenientParseRules(NULL) 767 , localizations(NULL) 768 , capitalizationInfoSet(FALSE) 769 , capitalizationForUIListMenu(FALSE) 770 , capitalizationForStandAlone(FALSE) 771 , capitalizationBrkIter(NULL) 772{ 773 init(description, NULL, perror, status); 774} 775 776RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 777 const Locale& aLocale, 778 UParseError& perror, 779 UErrorCode& status) 780 : ruleSets(NULL) 781 , ruleSetDescriptions(NULL) 782 , numRuleSets(0) 783 , defaultRuleSet(NULL) 784 , locale(aLocale) 785 , collator(NULL) 786 , decimalFormatSymbols(NULL) 787 , defaultInfinityRule(NULL) 788 , defaultNaNRule(NULL) 789 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 790 , lenient(FALSE) 791 , lenientParseRules(NULL) 792 , localizations(NULL) 793 , capitalizationInfoSet(FALSE) 794 , capitalizationForUIListMenu(FALSE) 795 , capitalizationForStandAlone(FALSE) 796 , capitalizationBrkIter(NULL) 797{ 798 init(description, NULL, perror, status); 799} 800 801RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 802 : ruleSets(NULL) 803 , ruleSetDescriptions(NULL) 804 , numRuleSets(0) 805 , defaultRuleSet(NULL) 806 , locale(alocale) 807 , collator(NULL) 808 , decimalFormatSymbols(NULL) 809 , defaultInfinityRule(NULL) 810 , defaultNaNRule(NULL) 811 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 812 , lenient(FALSE) 813 , lenientParseRules(NULL) 814 , localizations(NULL) 815 , capitalizationInfoSet(FALSE) 816 , capitalizationForUIListMenu(FALSE) 817 , capitalizationForStandAlone(FALSE) 818 , capitalizationBrkIter(NULL) 819{ 820 if (U_FAILURE(status)) { 821 return; 822 } 823 824 const char* rules_tag = "RBNFRules"; 825 const char* fmt_tag = ""; 826 switch (tag) { 827 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 828 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 829 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 830 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 831 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 832 } 833 834 // TODO: read localization info from resource 835 LocalizationInfo* locinfo = NULL; 836 837 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 838 if (U_SUCCESS(status)) { 839 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 840 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 841 842 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 843 if (U_FAILURE(status)) { 844 ures_close(nfrb); 845 } 846 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 847 if (U_FAILURE(status)) { 848 ures_close(rbnfRules); 849 ures_close(nfrb); 850 return; 851 } 852 853 UnicodeString desc; 854 while (ures_hasNext(ruleSets)) { 855 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status)); 856 } 857 UParseError perror; 858 859 init(desc, locinfo, perror, status); 860 861 ures_close(ruleSets); 862 ures_close(rbnfRules); 863 } 864 ures_close(nfrb); 865} 866 867RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 868 : NumberFormat(rhs) 869 , ruleSets(NULL) 870 , ruleSetDescriptions(NULL) 871 , numRuleSets(0) 872 , defaultRuleSet(NULL) 873 , locale(rhs.locale) 874 , collator(NULL) 875 , decimalFormatSymbols(NULL) 876 , defaultInfinityRule(NULL) 877 , defaultNaNRule(NULL) 878 , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) 879 , lenient(FALSE) 880 , lenientParseRules(NULL) 881 , localizations(NULL) 882 , capitalizationInfoSet(FALSE) 883 , capitalizationForUIListMenu(FALSE) 884 , capitalizationForStandAlone(FALSE) 885 , capitalizationBrkIter(NULL) 886{ 887 this->operator=(rhs); 888} 889 890// -------- 891 892RuleBasedNumberFormat& 893RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 894{ 895 if (this == &rhs) { 896 return *this; 897 } 898 NumberFormat::operator=(rhs); 899 UErrorCode status = U_ZERO_ERROR; 900 dispose(); 901 locale = rhs.locale; 902 lenient = rhs.lenient; 903 904 UParseError perror; 905 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols()); 906 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 907 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status); 908 setRoundingMode(rhs.getRoundingMode()); 909 910 capitalizationInfoSet = rhs.capitalizationInfoSet; 911 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu; 912 capitalizationForStandAlone = rhs.capitalizationForStandAlone; 913#if !UCONFIG_NO_BREAK_ITERATION 914 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL; 915#endif 916 917 return *this; 918} 919 920RuleBasedNumberFormat::~RuleBasedNumberFormat() 921{ 922 dispose(); 923} 924 925Format* 926RuleBasedNumberFormat::clone(void) const 927{ 928 return new RuleBasedNumberFormat(*this); 929} 930 931UBool 932RuleBasedNumberFormat::operator==(const Format& other) const 933{ 934 if (this == &other) { 935 return TRUE; 936 } 937 938 if (typeid(*this) == typeid(other)) { 939 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 940 // test for capitalization info equality is adequately handled 941 // by the NumberFormat test for fCapitalizationContext equality; 942 // the info here is just derived from that. 943 if (locale == rhs.locale && 944 lenient == rhs.lenient && 945 (localizations == NULL 946 ? rhs.localizations == NULL 947 : (rhs.localizations == NULL 948 ? FALSE 949 : *localizations == rhs.localizations))) { 950 951 NFRuleSet** p = ruleSets; 952 NFRuleSet** q = rhs.ruleSets; 953 if (p == NULL) { 954 return q == NULL; 955 } else if (q == NULL) { 956 return FALSE; 957 } 958 while (*p && *q && (**p == **q)) { 959 ++p; 960 ++q; 961 } 962 return *q == NULL && *p == NULL; 963 } 964 } 965 966 return FALSE; 967} 968 969UnicodeString 970RuleBasedNumberFormat::getRules() const 971{ 972 UnicodeString result; 973 if (ruleSets != NULL) { 974 for (NFRuleSet** p = ruleSets; *p; ++p) { 975 (*p)->appendRules(result); 976 } 977 } 978 return result; 979} 980 981UnicodeString 982RuleBasedNumberFormat::getRuleSetName(int32_t index) const 983{ 984 if (localizations) { 985 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 986 return string; 987 } 988 else if (ruleSets) { 989 UnicodeString result; 990 for (NFRuleSet** p = ruleSets; *p; ++p) { 991 NFRuleSet* rs = *p; 992 if (rs->isPublic()) { 993 if (--index == -1) { 994 rs->getName(result); 995 return result; 996 } 997 } 998 } 999 } 1000 UnicodeString empty; 1001 return empty; 1002} 1003 1004int32_t 1005RuleBasedNumberFormat::getNumberOfRuleSetNames() const 1006{ 1007 int32_t result = 0; 1008 if (localizations) { 1009 result = localizations->getNumberOfRuleSets(); 1010 } 1011 else if (ruleSets) { 1012 for (NFRuleSet** p = ruleSets; *p; ++p) { 1013 if ((**p).isPublic()) { 1014 ++result; 1015 } 1016 } 1017 } 1018 return result; 1019} 1020 1021int32_t 1022RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 1023 if (localizations) { 1024 return localizations->getNumberOfDisplayLocales(); 1025 } 1026 return 0; 1027} 1028 1029Locale 1030RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 1031 if (U_FAILURE(status)) { 1032 return Locale(""); 1033 } 1034 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 1035 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 1036 char buffer[64]; 1037 int32_t cap = name.length() + 1; 1038 char* bp = buffer; 1039 if (cap > 64) { 1040 bp = (char *)uprv_malloc(cap); 1041 if (bp == NULL) { 1042 status = U_MEMORY_ALLOCATION_ERROR; 1043 return Locale(""); 1044 } 1045 } 1046 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 1047 Locale retLocale(bp); 1048 if (bp != buffer) { 1049 uprv_free(bp); 1050 } 1051 return retLocale; 1052 } 1053 status = U_ILLEGAL_ARGUMENT_ERROR; 1054 Locale retLocale; 1055 return retLocale; 1056} 1057 1058UnicodeString 1059RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 1060 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 1061 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 1062 int32_t len = localeName.length(); 1063 UChar* localeStr = localeName.getBuffer(len + 1); 1064 while (len >= 0) { 1065 localeStr[len] = 0; 1066 int32_t ix = localizations->indexForLocale(localeStr); 1067 if (ix >= 0) { 1068 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1069 return name; 1070 } 1071 1072 // trim trailing portion, skipping over ommitted sections 1073 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1074 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1075 } 1076 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1077 return name; 1078 } 1079 UnicodeString bogus; 1080 bogus.setToBogus(); 1081 return bogus; 1082} 1083 1084UnicodeString 1085RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1086 if (localizations) { 1087 UnicodeString rsn(ruleSetName); 1088 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1089 return getRuleSetDisplayName(ix, localeParam); 1090 } 1091 UnicodeString bogus; 1092 bogus.setToBogus(); 1093 return bogus; 1094} 1095 1096NFRuleSet* 1097RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1098{ 1099 if (U_SUCCESS(status) && ruleSets) { 1100 for (NFRuleSet** p = ruleSets; *p; ++p) { 1101 NFRuleSet* rs = *p; 1102 if (rs->isNamed(name)) { 1103 return rs; 1104 } 1105 } 1106 status = U_ILLEGAL_ARGUMENT_ERROR; 1107 } 1108 return NULL; 1109} 1110 1111UnicodeString& 1112RuleBasedNumberFormat::format(const DigitList &number, 1113 UnicodeString &appendTo, 1114 FieldPositionIterator *posIter, 1115 UErrorCode &status) const { 1116 if (U_FAILURE(status)) { 1117 return appendTo; 1118 } 1119 DigitList copy(number); 1120 if (copy.fitsIntoInt64(false)) { 1121 format(((DigitList &)number).getInt64(), appendTo, posIter, status); 1122 } 1123 else { 1124 copy.roundAtExponent(0); 1125 if (copy.fitsIntoInt64(false)) { 1126 format(number.getDouble(), appendTo, posIter, status); 1127 } 1128 else { 1129 // We're outside of our normal range that this framework can handle. 1130 // The DecimalFormat will provide more accurate results. 1131 1132 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1133 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); 1134 Formattable f; 1135 f.adoptDigitList(new DigitList(number)); 1136 decimalFormat->format(f, appendTo, posIter, status); 1137 delete decimalFormat; 1138 } 1139 } 1140 return appendTo; 1141} 1142 1143 1144UnicodeString& 1145RuleBasedNumberFormat::format(const DigitList &number, 1146 UnicodeString& appendTo, 1147 FieldPosition& pos, 1148 UErrorCode &status) const { 1149 if (U_FAILURE(status)) { 1150 return appendTo; 1151 } 1152 DigitList copy(number); 1153 if (copy.fitsIntoInt64(false)) { 1154 format(((DigitList &)number).getInt64(), appendTo, pos, status); 1155 } 1156 else { 1157 copy.roundAtExponent(0); 1158 if (copy.fitsIntoInt64(false)) { 1159 format(number.getDouble(), appendTo, pos, status); 1160 } 1161 else { 1162 // We're outside of our normal range that this framework can handle. 1163 // The DecimalFormat will provide more accurate results. 1164 1165 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1166 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); 1167 Formattable f; 1168 f.adoptDigitList(new DigitList(number)); 1169 decimalFormat->format(f, appendTo, pos, status); 1170 delete decimalFormat; 1171 } 1172 } 1173 return appendTo; 1174} 1175 1176UnicodeString& 1177RuleBasedNumberFormat::format(int32_t number, 1178 UnicodeString& toAppendTo, 1179 FieldPosition& pos) const 1180{ 1181 return format((int64_t)number, toAppendTo, pos); 1182} 1183 1184 1185UnicodeString& 1186RuleBasedNumberFormat::format(int64_t number, 1187 UnicodeString& toAppendTo, 1188 FieldPosition& /* pos */) const 1189{ 1190 if (defaultRuleSet) { 1191 UErrorCode status = U_ZERO_ERROR; 1192 format(number, defaultRuleSet, toAppendTo, status); 1193 } 1194 return toAppendTo; 1195} 1196 1197 1198UnicodeString& 1199RuleBasedNumberFormat::format(double number, 1200 UnicodeString& toAppendTo, 1201 FieldPosition& /* pos */) const 1202{ 1203 UErrorCode status = U_ZERO_ERROR; 1204 if (defaultRuleSet) { 1205 format(number, *defaultRuleSet, toAppendTo, status); 1206 } 1207 return toAppendTo; 1208} 1209 1210 1211UnicodeString& 1212RuleBasedNumberFormat::format(int32_t number, 1213 const UnicodeString& ruleSetName, 1214 UnicodeString& toAppendTo, 1215 FieldPosition& pos, 1216 UErrorCode& status) const 1217{ 1218 return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1219} 1220 1221 1222UnicodeString& 1223RuleBasedNumberFormat::format(int64_t number, 1224 const UnicodeString& ruleSetName, 1225 UnicodeString& toAppendTo, 1226 FieldPosition& /* pos */, 1227 UErrorCode& status) const 1228{ 1229 if (U_SUCCESS(status)) { 1230 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1231 // throw new IllegalArgumentException("Can't use internal rule set"); 1232 status = U_ILLEGAL_ARGUMENT_ERROR; 1233 } else { 1234 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1235 if (rs) { 1236 format(number, rs, toAppendTo, status); 1237 } 1238 } 1239 } 1240 return toAppendTo; 1241} 1242 1243 1244UnicodeString& 1245RuleBasedNumberFormat::format(double number, 1246 const UnicodeString& ruleSetName, 1247 UnicodeString& toAppendTo, 1248 FieldPosition& /* pos */, 1249 UErrorCode& status) const 1250{ 1251 if (U_SUCCESS(status)) { 1252 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) { 1253 // throw new IllegalArgumentException("Can't use internal rule set"); 1254 status = U_ILLEGAL_ARGUMENT_ERROR; 1255 } else { 1256 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1257 if (rs) { 1258 format(number, *rs, toAppendTo, status); 1259 } 1260 } 1261 } 1262 return toAppendTo; 1263} 1264 1265void 1266RuleBasedNumberFormat::format(double number, 1267 NFRuleSet& rs, 1268 UnicodeString& toAppendTo, 1269 UErrorCode& status) const 1270{ 1271 int32_t startPos = toAppendTo.length(); 1272 if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) { 1273 DigitList digitList; 1274 digitList.set(number); 1275 digitList.setRoundingMode(getRoundingMode()); 1276 digitList.roundFixedPoint(getMaximumFractionDigits()); 1277 number = digitList.getDouble(); 1278 } 1279 rs.format(number, toAppendTo, toAppendTo.length(), 0, status); 1280 adjustForCapitalizationContext(startPos, toAppendTo, status); 1281} 1282 1283/** 1284 * Bottleneck through which all the public format() methods 1285 * that take a long pass. By the time we get here, we know 1286 * which rule set we're using to do the formatting. 1287 * @param number The number to format 1288 * @param ruleSet The rule set to use to format the number 1289 * @return The text that resulted from formatting the number 1290 */ 1291UnicodeString& 1292RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const 1293{ 1294 // all API format() routines that take a double vector through 1295 // here. We have these two identical functions-- one taking a 1296 // double and one taking a long-- the couple digits of precision 1297 // that long has but double doesn't (both types are 8 bytes long, 1298 // but double has to borrow some of the mantissa bits to hold 1299 // the exponent). 1300 // Create an empty string buffer where the result will 1301 // be built, and pass it to the rule set (along with an insertion 1302 // position of 0 and the number being formatted) to the rule set 1303 // for formatting 1304 1305 if (U_SUCCESS(status)) { 1306 if (number == U_INT64_MIN) { 1307 // We can't handle this value right now. Provide an accurate default value. 1308 1309 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. 1310 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); 1311 Formattable f; 1312 FieldPosition pos(FieldPosition::DONT_CARE); 1313 DigitList *digitList = new DigitList(); 1314 digitList->set(number); 1315 f.adoptDigitList(digitList); 1316 decimalFormat->format(f, toAppendTo, pos, status); 1317 delete decimalFormat; 1318 } 1319 else { 1320 int32_t startPos = toAppendTo.length(); 1321 ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status); 1322 adjustForCapitalizationContext(startPos, toAppendTo, status); 1323 } 1324 } 1325 return toAppendTo; 1326} 1327 1328UnicodeString& 1329RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos, 1330 UnicodeString& currentResult, 1331 UErrorCode& status) const 1332{ 1333#if !UCONFIG_NO_BREAK_ITERATION 1334 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); 1335 if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) { 1336 // capitalize currentResult according to context 1337 UChar32 ch = currentResult.char32At(0); 1338 if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL && 1339 ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1340 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1341 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1342 // titlecase first word of currentResult, here use sentence iterator unlike current implementations 1343 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format 1344 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); 1345 } 1346 } 1347#endif 1348 return currentResult; 1349} 1350 1351 1352void 1353RuleBasedNumberFormat::parse(const UnicodeString& text, 1354 Formattable& result, 1355 ParsePosition& parsePosition) const 1356{ 1357 if (!ruleSets) { 1358 parsePosition.setErrorIndex(0); 1359 return; 1360 } 1361 1362 UnicodeString workingText(text, parsePosition.getIndex()); 1363 ParsePosition workingPos(0); 1364 1365 ParsePosition high_pp(0); 1366 Formattable high_result; 1367 1368 for (NFRuleSet** p = ruleSets; *p; ++p) { 1369 NFRuleSet *rp = *p; 1370 if (rp->isPublic() && rp->isParseable()) { 1371 ParsePosition working_pp(0); 1372 Formattable working_result; 1373 1374 rp->parse(workingText, working_pp, kMaxDouble, working_result); 1375 if (working_pp.getIndex() > high_pp.getIndex()) { 1376 high_pp = working_pp; 1377 high_result = working_result; 1378 1379 if (high_pp.getIndex() == workingText.length()) { 1380 break; 1381 } 1382 } 1383 } 1384 } 1385 1386 int32_t startIndex = parsePosition.getIndex(); 1387 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1388 if (high_pp.getIndex() > 0) { 1389 parsePosition.setErrorIndex(-1); 1390 } else { 1391 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1392 parsePosition.setErrorIndex(startIndex + errorIndex); 1393 } 1394 result = high_result; 1395 if (result.getType() == Formattable::kDouble) { 1396 double d = result.getDouble(); 1397 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) { 1398 // Note: casting a double to an int when the double is too large or small 1399 // to fit the destination is undefined behavior. The explicit range checks, 1400 // above, are required. Just casting and checking the result value is undefined. 1401 result.setLong(static_cast<int32_t>(d)); 1402 } 1403 } 1404} 1405 1406#if !UCONFIG_NO_COLLATION 1407 1408void 1409RuleBasedNumberFormat::setLenient(UBool enabled) 1410{ 1411 lenient = enabled; 1412 if (!enabled && collator) { 1413 delete collator; 1414 collator = NULL; 1415 } 1416} 1417 1418#endif 1419 1420void 1421RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1422 if (U_SUCCESS(status)) { 1423 if (ruleSetName.isEmpty()) { 1424 if (localizations) { 1425 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1426 defaultRuleSet = findRuleSet(name, status); 1427 } else { 1428 initDefaultRuleSet(); 1429 } 1430 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1431 status = U_ILLEGAL_ARGUMENT_ERROR; 1432 } else { 1433 NFRuleSet* result = findRuleSet(ruleSetName, status); 1434 if (result != NULL) { 1435 defaultRuleSet = result; 1436 } 1437 } 1438 } 1439} 1440 1441UnicodeString 1442RuleBasedNumberFormat::getDefaultRuleSetName() const { 1443 UnicodeString result; 1444 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1445 defaultRuleSet->getName(result); 1446 } else { 1447 result.setToBogus(); 1448 } 1449 return result; 1450} 1451 1452void 1453RuleBasedNumberFormat::initDefaultRuleSet() 1454{ 1455 defaultRuleSet = NULL; 1456 if (!ruleSets) { 1457 return; 1458 } 1459 1460 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering")); 1461 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal")); 1462 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration")); 1463 1464 NFRuleSet**p = &ruleSets[0]; 1465 while (*p) { 1466 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1467 defaultRuleSet = *p; 1468 return; 1469 } else { 1470 ++p; 1471 } 1472 } 1473 1474 defaultRuleSet = *--p; 1475 if (!defaultRuleSet->isPublic()) { 1476 while (p != ruleSets) { 1477 if ((*--p)->isPublic()) { 1478 defaultRuleSet = *p; 1479 break; 1480 } 1481 } 1482 } 1483} 1484 1485 1486void 1487RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1488 UParseError& pErr, UErrorCode& status) 1489{ 1490 // TODO: implement UParseError 1491 uprv_memset(&pErr, 0, sizeof(UParseError)); 1492 // Note: this can leave ruleSets == NULL, so remaining code should check 1493 if (U_FAILURE(status)) { 1494 return; 1495 } 1496 1497 initializeDecimalFormatSymbols(status); 1498 initializeDefaultInfinityRule(status); 1499 initializeDefaultNaNRule(status); 1500 if (U_FAILURE(status)) { 1501 return; 1502 } 1503 1504 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1505 1506 UnicodeString description(rules); 1507 if (!description.length()) { 1508 status = U_MEMORY_ALLOCATION_ERROR; 1509 return; 1510 } 1511 1512 // start by stripping the trailing whitespace from all the rules 1513 // (this is all the whitespace follwing each semicolon in the 1514 // description). This allows us to look for rule-set boundaries 1515 // by searching for ";%" without having to worry about whitespace 1516 // between the ; and the % 1517 stripWhitespace(description); 1518 1519 // check to see if there's a set of lenient-parse rules. If there 1520 // is, pull them out into our temporary holding place for them, 1521 // and delete them from the description before the real desciption- 1522 // parsing code sees them 1523 int32_t lp = description.indexOf(gLenientParse, -1, 0); 1524 if (lp != -1) { 1525 // we've got to make sure we're not in the middle of a rule 1526 // (where "%%lenient-parse" would actually get treated as 1527 // rule text) 1528 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1529 // locate the beginning and end of the actual collation 1530 // rules (there may be whitespace between the name and 1531 // the first token in the description) 1532 int lpEnd = description.indexOf(gSemiPercent, 2, lp); 1533 1534 if (lpEnd == -1) { 1535 lpEnd = description.length() - 1; 1536 } 1537 int lpStart = lp + u_strlen(gLenientParse); 1538 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) { 1539 ++lpStart; 1540 } 1541 1542 // copy out the lenient-parse rules and delete them 1543 // from the description 1544 lenientParseRules = new UnicodeString(); 1545 /* test for NULL */ 1546 if (lenientParseRules == 0) { 1547 status = U_MEMORY_ALLOCATION_ERROR; 1548 return; 1549 } 1550 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1551 1552 description.remove(lp, lpEnd + 1 - lp); 1553 } 1554 } 1555 1556 // pre-flight parsing the description and count the number of 1557 // rule sets (";%" marks the end of one rule set and the beginning 1558 // of the next) 1559 numRuleSets = 0; 1560 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) { 1561 ++numRuleSets; 1562 ++p; 1563 } 1564 ++numRuleSets; 1565 1566 // our rule list is an array of the appropriate size 1567 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1568 /* test for NULL */ 1569 if (ruleSets == 0) { 1570 status = U_MEMORY_ALLOCATION_ERROR; 1571 return; 1572 } 1573 1574 for (int i = 0; i <= numRuleSets; ++i) { 1575 ruleSets[i] = NULL; 1576 } 1577 1578 // divide up the descriptions into individual rule-set descriptions 1579 // and store them in a temporary array. At each step, we also 1580 // new up a rule set, but all this does is initialize its name 1581 // and remove it from its description. We can't actually parse 1582 // the rest of the descriptions and finish initializing everything 1583 // because we have to know the names and locations of all the rule 1584 // sets before we can actually set everything up 1585 if(!numRuleSets) { 1586 status = U_ILLEGAL_ARGUMENT_ERROR; 1587 return; 1588 } 1589 1590 ruleSetDescriptions = new UnicodeString[numRuleSets]; 1591 if (ruleSetDescriptions == 0) { 1592 status = U_MEMORY_ALLOCATION_ERROR; 1593 return; 1594 } 1595 1596 { 1597 int curRuleSet = 0; 1598 int32_t start = 0; 1599 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { 1600 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1601 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1602 if (ruleSets[curRuleSet] == 0) { 1603 status = U_MEMORY_ALLOCATION_ERROR; 1604 return; 1605 } 1606 ++curRuleSet; 1607 start = p + 1; 1608 } 1609 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1610 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); 1611 if (ruleSets[curRuleSet] == 0) { 1612 status = U_MEMORY_ALLOCATION_ERROR; 1613 return; 1614 } 1615 } 1616 1617 // now we can take note of the formatter's default rule set, which 1618 // is the last public rule set in the description (it's the last 1619 // rather than the first so that a user can create a new formatter 1620 // from an existing formatter and change its default behavior just 1621 // by appending more rule sets to the end) 1622 1623 // {dlf} Initialization of a fraction rule set requires the default rule 1624 // set to be known. For purposes of initialization, this is always the 1625 // last public rule set, no matter what the localization data says. 1626 initDefaultRuleSet(); 1627 1628 // finally, we can go back through the temporary descriptions 1629 // list and finish seting up the substructure (and we throw 1630 // away the temporary descriptions as we go) 1631 { 1632 for (int i = 0; i < numRuleSets; i++) { 1633 ruleSets[i]->parseRules(ruleSetDescriptions[i], status); 1634 } 1635 } 1636 1637 // Now that the rules are initialized, the 'real' default rule 1638 // set can be adjusted by the localization data. 1639 1640 // The C code keeps the localization array as is, rather than building 1641 // a separate array of the public rule set names, so we have less work 1642 // to do here-- but we still need to check the names. 1643 1644 if (localizationInfos) { 1645 // confirm the names, if any aren't in the rules, that's an error 1646 // it is ok if the rules contain public rule sets that are not in this list 1647 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1648 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1649 NFRuleSet* rs = findRuleSet(name, status); 1650 if (rs == NULL) { 1651 break; // error 1652 } 1653 if (i == 0) { 1654 defaultRuleSet = rs; 1655 } 1656 } 1657 } else { 1658 defaultRuleSet = getDefaultRuleSet(); 1659 } 1660 originalDescription = rules; 1661} 1662 1663// override the NumberFormat implementation in order to 1664// lazily initialize relevant items 1665void 1666RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) 1667{ 1668 NumberFormat::setContext(value, status); 1669 if (U_SUCCESS(status)) { 1670 if (!capitalizationInfoSet && 1671 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) { 1672 initCapitalizationContextInfo(locale); 1673 capitalizationInfoSet = TRUE; 1674 } 1675#if !UCONFIG_NO_BREAK_ITERATION 1676 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || 1677 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || 1678 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { 1679 UErrorCode status = U_ZERO_ERROR; 1680 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); 1681 if (U_FAILURE(status)) { 1682 delete capitalizationBrkIter; 1683 capitalizationBrkIter = NULL; 1684 } 1685 } 1686#endif 1687 } 1688} 1689 1690void 1691RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) 1692{ 1693#if !UCONFIG_NO_BREAK_ITERATION 1694 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL; 1695 UErrorCode status = U_ZERO_ERROR; 1696 UResourceBundle *rb = ures_open(NULL, localeID, &status); 1697 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status); 1698 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status); 1699 if (U_SUCCESS(status) && rb != NULL) { 1700 int32_t len = 0; 1701 const int32_t * intVector = ures_getIntVector(rb, &len, &status); 1702 if (U_SUCCESS(status) && intVector != NULL && len >= 2) { 1703 capitalizationForUIListMenu = intVector[0]; 1704 capitalizationForStandAlone = intVector[1]; 1705 } 1706 } 1707 ures_close(rb); 1708#endif 1709} 1710 1711void 1712RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1713{ 1714 // iterate through the characters... 1715 UnicodeString result; 1716 1717 int start = 0; 1718 while (start != -1 && start < description.length()) { 1719 // seek to the first non-whitespace character... 1720 while (start < description.length() 1721 && PatternProps::isWhiteSpace(description.charAt(start))) { 1722 ++start; 1723 } 1724 1725 // locate the next semicolon in the text and copy the text from 1726 // our current position up to that semicolon into the result 1727 int32_t p = description.indexOf(gSemiColon, start); 1728 if (p == -1) { 1729 // or if we don't find a semicolon, just copy the rest of 1730 // the string into the result 1731 result.append(description, start, description.length() - start); 1732 start = -1; 1733 } 1734 else if (p < description.length()) { 1735 result.append(description, start, p + 1 - start); 1736 start = p + 1; 1737 } 1738 1739 // when we get here, we've seeked off the end of the sring, and 1740 // we terminate the loop (we continue until *start* is -1 rather 1741 // than until *p* is -1, because otherwise we'd miss the last 1742 // rule in the description) 1743 else { 1744 start = -1; 1745 } 1746 } 1747 1748 description.setTo(result); 1749} 1750 1751 1752void 1753RuleBasedNumberFormat::dispose() 1754{ 1755 if (ruleSets) { 1756 for (NFRuleSet** p = ruleSets; *p; ++p) { 1757 delete *p; 1758 } 1759 uprv_free(ruleSets); 1760 ruleSets = NULL; 1761 } 1762 1763 if (ruleSetDescriptions) { 1764 delete [] ruleSetDescriptions; 1765 ruleSetDescriptions = NULL; 1766 } 1767 1768#if !UCONFIG_NO_COLLATION 1769 delete collator; 1770#endif 1771 collator = NULL; 1772 1773 delete decimalFormatSymbols; 1774 decimalFormatSymbols = NULL; 1775 1776 delete defaultInfinityRule; 1777 defaultInfinityRule = NULL; 1778 1779 delete defaultNaNRule; 1780 defaultNaNRule = NULL; 1781 1782 delete lenientParseRules; 1783 lenientParseRules = NULL; 1784 1785#if !UCONFIG_NO_BREAK_ITERATION 1786 delete capitalizationBrkIter; 1787 capitalizationBrkIter = NULL; 1788#endif 1789 1790 if (localizations) { 1791 localizations = localizations->unref(); 1792 } 1793} 1794 1795 1796//----------------------------------------------------------------------- 1797// package-internal API 1798//----------------------------------------------------------------------- 1799 1800/** 1801 * Returns the collator to use for lenient parsing. The collator is lazily created: 1802 * this function creates it the first time it's called. 1803 * @return The collator to use for lenient parsing, or null if lenient parsing 1804 * is turned off. 1805*/ 1806const RuleBasedCollator* 1807RuleBasedNumberFormat::getCollator() const 1808{ 1809#if !UCONFIG_NO_COLLATION 1810 if (!ruleSets) { 1811 return NULL; 1812 } 1813 1814 // lazy-evaluate the collator 1815 if (collator == NULL && lenient) { 1816 // create a default collator based on the formatter's locale, 1817 // then pull out that collator's rules, append any additional 1818 // rules specified in the description, and create a _new_ 1819 // collator based on the combinaiton of those rules 1820 1821 UErrorCode status = U_ZERO_ERROR; 1822 1823 Collator* temp = Collator::createInstance(locale, status); 1824 RuleBasedCollator* newCollator; 1825 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) { 1826 if (lenientParseRules) { 1827 UnicodeString rules(newCollator->getRules()); 1828 rules.append(*lenientParseRules); 1829 1830 newCollator = new RuleBasedCollator(rules, status); 1831 // Exit if newCollator could not be created. 1832 if (newCollator == NULL) { 1833 return NULL; 1834 } 1835 } else { 1836 temp = NULL; 1837 } 1838 if (U_SUCCESS(status)) { 1839 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1840 // cast away const 1841 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1842 } else { 1843 delete newCollator; 1844 } 1845 } 1846 delete temp; 1847 } 1848#endif 1849 1850 // if lenient-parse mode is off, this will be null 1851 // (see setLenientParseMode()) 1852 return collator; 1853} 1854 1855 1856DecimalFormatSymbols* 1857RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status) 1858{ 1859 // lazy-evaluate the DecimalFormatSymbols object. This object 1860 // is shared by all DecimalFormat instances belonging to this 1861 // formatter 1862 if (decimalFormatSymbols == NULL) { 1863 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1864 if (U_SUCCESS(status)) { 1865 decimalFormatSymbols = temp; 1866 } 1867 else { 1868 delete temp; 1869 } 1870 } 1871 return decimalFormatSymbols; 1872} 1873 1874/** 1875 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1876 * instances owned by this formatter. 1877*/ 1878const DecimalFormatSymbols* 1879RuleBasedNumberFormat::getDecimalFormatSymbols() const 1880{ 1881 return decimalFormatSymbols; 1882} 1883 1884NFRule* 1885RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status) 1886{ 1887 if (U_FAILURE(status)) { 1888 return NULL; 1889 } 1890 if (defaultInfinityRule == NULL) { 1891 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: ")); 1892 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol)); 1893 NFRule* temp = new NFRule(this, rule, status); 1894 if (U_SUCCESS(status)) { 1895 defaultInfinityRule = temp; 1896 } 1897 else { 1898 delete temp; 1899 } 1900 } 1901 return defaultInfinityRule; 1902} 1903 1904const NFRule* 1905RuleBasedNumberFormat::getDefaultInfinityRule() const 1906{ 1907 return defaultInfinityRule; 1908} 1909 1910NFRule* 1911RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status) 1912{ 1913 if (U_FAILURE(status)) { 1914 return NULL; 1915 } 1916 if (defaultNaNRule == NULL) { 1917 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: ")); 1918 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol)); 1919 NFRule* temp = new NFRule(this, rule, status); 1920 if (U_SUCCESS(status)) { 1921 defaultNaNRule = temp; 1922 } 1923 else { 1924 delete temp; 1925 } 1926 } 1927 return defaultNaNRule; 1928} 1929 1930const NFRule* 1931RuleBasedNumberFormat::getDefaultNaNRule() const 1932{ 1933 return defaultNaNRule; 1934} 1935 1936// De-owning the current localized symbols and adopt the new symbols. 1937void 1938RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt) 1939{ 1940 if (symbolsToAdopt == NULL) { 1941 return; // do not allow caller to set decimalFormatSymbols to NULL 1942 } 1943 1944 if (decimalFormatSymbols != NULL) { 1945 delete decimalFormatSymbols; 1946 } 1947 1948 decimalFormatSymbols = symbolsToAdopt; 1949 1950 { 1951 // Apply the new decimalFormatSymbols by reparsing the rulesets 1952 UErrorCode status = U_ZERO_ERROR; 1953 1954 delete defaultInfinityRule; 1955 defaultInfinityRule = NULL; 1956 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols 1957 1958 delete defaultNaNRule; 1959 defaultNaNRule = NULL; 1960 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols 1961 1962 if (ruleSets) { 1963 for (int32_t i = 0; i < numRuleSets; i++) { 1964 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); 1965 } 1966 } 1967 } 1968} 1969 1970// Setting the symbols is equlivalent to adopting a newly created localized symbols. 1971void 1972RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) 1973{ 1974 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); 1975} 1976 1977PluralFormat * 1978RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, 1979 const UnicodeString &pattern, 1980 UErrorCode& status) const 1981{ 1982 return new PluralFormat(locale, pluralType, pattern, status); 1983} 1984 1985/** 1986 * Get the rounding mode. 1987 * @return A rounding mode 1988 */ 1989DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const { 1990 return roundingMode; 1991} 1992 1993/** 1994 * Set the rounding mode. This has no effect unless the rounding 1995 * increment is greater than zero. 1996 * @param roundingMode A rounding mode 1997 */ 1998void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) { 1999 this->roundingMode = roundingMode; 2000} 2001 2002U_NAMESPACE_END 2003 2004/* U_HAVE_RBNF */ 2005#endif 2006