1/* 2******************************************************************************* 3* Copyright (C) 1997-2009, International Business Machines Corporation 4* and others. All Rights Reserved. 5******************************************************************************* 6*/ 7 8#include "unicode/rbnf.h" 9 10#if U_HAVE_RBNF 11 12#include "unicode/normlzr.h" 13#include "unicode/tblcoll.h" 14#include "unicode/uchar.h" 15#include "unicode/ucol.h" 16#include "unicode/uloc.h" 17#include "unicode/unum.h" 18#include "unicode/ures.h" 19#include "unicode/ustring.h" 20#include "unicode/utf16.h" 21#include "unicode/udata.h" 22#include "nfrs.h" 23 24#include "cmemory.h" 25#include "cstring.h" 26#include "../common/util.h" 27#include "uresimp.h" 28 29// debugging 30// #define DEBUG 31 32#ifdef DEBUG 33#include "stdio.h" 34#endif 35 36#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" 37 38static const UChar gPercentPercent[] = 39{ 40 0x25, 0x25, 0 41}; /* "%%" */ 42 43// All urbnf objects are created through openRules, so we init all of the 44// Unicode string constants required by rbnf, nfrs, or nfr here. 45static const UChar gLenientParse[] = 46{ 47 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0 48}; /* "%%lenient-parse:" */ 49static const UChar gSemiColon = 0x003B; 50static const UChar gSemiPercent[] = 51{ 52 0x3B, 0x25, 0 53}; /* ";%" */ 54 55#define kSomeNumberOfBitsDiv2 22 56#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2) 57#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble) 58 59// Temporary workaround - when noParse is true, do noting in parse. 60// TODO: We need a real fix - see #6895/#6896 61static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL }; 62 63U_NAMESPACE_BEGIN 64 65UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) 66 67/* 68This is a utility class. It does not use ICU's RTTI. 69If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject. 70Please make sure that intltest passes on Windows in Release mode, 71since the string pooling per compilation unit will mess up how RTTI works. 72The RTTI code was also removed due to lack of code coverage. 73*/ 74class LocalizationInfo : public UMemory { 75protected: 76 virtual ~LocalizationInfo() {}; 77 uint32_t refcount; 78 79public: 80 LocalizationInfo() : refcount(0) {} 81 82 LocalizationInfo* ref(void) { 83 ++refcount; 84 return this; 85 } 86 87 LocalizationInfo* unref(void) { 88 if (refcount && --refcount == 0) { 89 delete this; 90 } 91 return NULL; 92 } 93 94 virtual UBool operator==(const LocalizationInfo* rhs) const; 95 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } 96 97 virtual int32_t getNumberOfRuleSets(void) const = 0; 98 virtual const UChar* getRuleSetName(int32_t index) const = 0; 99 virtual int32_t getNumberOfDisplayLocales(void) const = 0; 100 virtual const UChar* getLocaleName(int32_t index) const = 0; 101 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; 102 103 virtual int32_t indexForLocale(const UChar* locale) const; 104 virtual int32_t indexForRuleSet(const UChar* ruleset) const; 105 106// virtual UClassID getDynamicClassID() const = 0; 107// static UClassID getStaticClassID(void); 108}; 109 110//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) 111 112// if both strings are NULL, this returns TRUE 113static UBool 114streq(const UChar* lhs, const UChar* rhs) { 115 if (rhs == lhs) { 116 return TRUE; 117 } 118 if (lhs && rhs) { 119 return u_strcmp(lhs, rhs) == 0; 120 } 121 return FALSE; 122} 123 124UBool 125LocalizationInfo::operator==(const LocalizationInfo* rhs) const { 126 if (rhs) { 127 if (this == rhs) { 128 return TRUE; 129 } 130 131 int32_t rsc = getNumberOfRuleSets(); 132 if (rsc == rhs->getNumberOfRuleSets()) { 133 for (int i = 0; i < rsc; ++i) { 134 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { 135 return FALSE; 136 } 137 } 138 int32_t dlc = getNumberOfDisplayLocales(); 139 if (dlc == rhs->getNumberOfDisplayLocales()) { 140 for (int i = 0; i < dlc; ++i) { 141 const UChar* locale = getLocaleName(i); 142 int32_t ix = rhs->indexForLocale(locale); 143 // if no locale, ix is -1, getLocaleName returns null, so streq returns false 144 if (!streq(locale, rhs->getLocaleName(ix))) { 145 return FALSE; 146 } 147 for (int j = 0; j < rsc; ++j) { 148 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { 149 return FALSE; 150 } 151 } 152 } 153 return TRUE; 154 } 155 } 156 } 157 return FALSE; 158} 159 160int32_t 161LocalizationInfo::indexForLocale(const UChar* locale) const { 162 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { 163 if (streq(locale, getLocaleName(i))) { 164 return i; 165 } 166 } 167 return -1; 168} 169 170int32_t 171LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { 172 if (ruleset) { 173 for (int i = 0; i < getNumberOfRuleSets(); ++i) { 174 if (streq(ruleset, getRuleSetName(i))) { 175 return i; 176 } 177 } 178 } 179 return -1; 180} 181 182 183typedef void (*Fn_Deleter)(void*); 184 185class VArray { 186 void** buf; 187 int32_t cap; 188 int32_t size; 189 Fn_Deleter deleter; 190public: 191 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} 192 193 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} 194 195 ~VArray() { 196 if (deleter) { 197 for (int i = 0; i < size; ++i) { 198 (*deleter)(buf[i]); 199 } 200 } 201 uprv_free(buf); 202 } 203 204 int32_t length() { 205 return size; 206 } 207 208 void add(void* elem, UErrorCode& status) { 209 if (U_SUCCESS(status)) { 210 if (size == cap) { 211 if (cap == 0) { 212 cap = 1; 213 } else if (cap < 256) { 214 cap *= 2; 215 } else { 216 cap += 256; 217 } 218 if (buf == NULL) { 219 buf = (void**)uprv_malloc(cap * sizeof(void*)); 220 } else { 221 buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); 222 } 223 if (buf == NULL) { 224 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway 225 status = U_MEMORY_ALLOCATION_ERROR; 226 return; 227 } 228 void* start = &buf[size]; 229 size_t count = (cap - size) * sizeof(void*); 230 uprv_memset(start, 0, count); // fill with nulls, just because 231 } 232 buf[size++] = elem; 233 } 234 } 235 236 void** release(void) { 237 void** result = buf; 238 buf = NULL; 239 cap = 0; 240 size = 0; 241 return result; 242 } 243}; 244 245class LocDataParser; 246 247class StringLocalizationInfo : public LocalizationInfo { 248 UChar* info; 249 UChar*** data; 250 int32_t numRuleSets; 251 int32_t numLocales; 252 253friend class LocDataParser; 254 255 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) 256 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) 257 { 258 } 259 260public: 261 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); 262 263 virtual ~StringLocalizationInfo(); 264 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } 265 virtual const UChar* getRuleSetName(int32_t index) const; 266 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } 267 virtual const UChar* getLocaleName(int32_t index) const; 268 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; 269 270// virtual UClassID getDynamicClassID() const; 271// static UClassID getStaticClassID(void); 272 273private: 274 void init(UErrorCode& status) const; 275}; 276 277 278enum { 279 OPEN_ANGLE = 0x003c, /* '<' */ 280 CLOSE_ANGLE = 0x003e, /* '>' */ 281 COMMA = 0x002c, 282 TICK = 0x0027, 283 QUOTE = 0x0022, 284 SPACE = 0x0020 285}; 286 287/** 288 * Utility for parsing a localization string and returning a StringLocalizationInfo*. 289 */ 290class LocDataParser { 291 UChar* data; 292 const UChar* e; 293 UChar* p; 294 UChar ch; 295 UParseError& pe; 296 UErrorCode& ec; 297 298public: 299 LocDataParser(UParseError& parseError, UErrorCode& status) 300 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} 301 ~LocDataParser() {} 302 303 /* 304 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, 305 * and return NULL. The StringLocalizationInfo will adopt locData if it is created. 306 */ 307 StringLocalizationInfo* parse(UChar* data, int32_t len); 308 309private: 310 311 void inc(void) { ++p; ch = 0xffff; } 312 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } 313 UBool check(UChar c) { return p < e && (ch == c || *p == c); } 314 void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();} 315 UBool inList(UChar c, const UChar* list) const { 316 if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE; 317 while (*list && *list != c) ++list; return *list == c; 318 } 319 void parseError(const char* msg); 320 321 StringLocalizationInfo* doParse(void); 322 323 UChar** nextArray(int32_t& requiredLength); 324 UChar* nextString(void); 325}; 326 327#ifdef DEBUG 328#define ERROR(msg) parseError(msg); return NULL; 329#else 330#define ERROR(msg) parseError(NULL); return NULL; 331#endif 332 333 334static const UChar DQUOTE_STOPLIST[] = { 335 QUOTE, 0 336}; 337 338static const UChar SQUOTE_STOPLIST[] = { 339 TICK, 0 340}; 341 342static const UChar NOQUOTE_STOPLIST[] = { 343 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0 344}; 345 346static void 347DeleteFn(void* p) { 348 uprv_free(p); 349} 350 351StringLocalizationInfo* 352LocDataParser::parse(UChar* _data, int32_t len) { 353 if (U_FAILURE(ec)) { 354 if (_data) uprv_free(_data); 355 return NULL; 356 } 357 358 pe.line = 0; 359 pe.offset = -1; 360 pe.postContext[0] = 0; 361 pe.preContext[0] = 0; 362 363 if (_data == NULL) { 364 ec = U_ILLEGAL_ARGUMENT_ERROR; 365 return NULL; 366 } 367 368 if (len <= 0) { 369 ec = U_ILLEGAL_ARGUMENT_ERROR; 370 uprv_free(_data); 371 return NULL; 372 } 373 374 data = _data; 375 e = data + len; 376 p = _data; 377 ch = 0xffff; 378 379 return doParse(); 380} 381 382 383StringLocalizationInfo* 384LocDataParser::doParse(void) { 385 skipWhitespace(); 386 if (!checkInc(OPEN_ANGLE)) { 387 ERROR("Missing open angle"); 388 } else { 389 VArray array(DeleteFn); 390 UBool mightHaveNext = TRUE; 391 int32_t requiredLength = -1; 392 while (mightHaveNext) { 393 mightHaveNext = FALSE; 394 UChar** elem = nextArray(requiredLength); 395 skipWhitespace(); 396 UBool haveComma = check(COMMA); 397 if (elem) { 398 array.add(elem, ec); 399 if (haveComma) { 400 inc(); 401 mightHaveNext = TRUE; 402 } 403 } else if (haveComma) { 404 ERROR("Unexpected character"); 405 } 406 } 407 408 skipWhitespace(); 409 if (!checkInc(CLOSE_ANGLE)) { 410 if (check(OPEN_ANGLE)) { 411 ERROR("Missing comma in outer array"); 412 } else { 413 ERROR("Missing close angle bracket in outer array"); 414 } 415 } 416 417 skipWhitespace(); 418 if (p != e) { 419 ERROR("Extra text after close of localization data"); 420 } 421 422 array.add(NULL, ec); 423 if (U_SUCCESS(ec)) { 424 int32_t numLocs = array.length() - 2; // subtract first, NULL 425 UChar*** result = (UChar***)array.release(); 426 427 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL 428 } 429 } 430 431 ERROR("Unknown error"); 432} 433 434UChar** 435LocDataParser::nextArray(int32_t& requiredLength) { 436 if (U_FAILURE(ec)) { 437 return NULL; 438 } 439 440 skipWhitespace(); 441 if (!checkInc(OPEN_ANGLE)) { 442 ERROR("Missing open angle"); 443 } 444 445 VArray array; 446 UBool mightHaveNext = TRUE; 447 while (mightHaveNext) { 448 mightHaveNext = FALSE; 449 UChar* elem = nextString(); 450 skipWhitespace(); 451 UBool haveComma = check(COMMA); 452 if (elem) { 453 array.add(elem, ec); 454 if (haveComma) { 455 inc(); 456 mightHaveNext = TRUE; 457 } 458 } else if (haveComma) { 459 ERROR("Unexpected comma"); 460 } 461 } 462 skipWhitespace(); 463 if (!checkInc(CLOSE_ANGLE)) { 464 if (check(OPEN_ANGLE)) { 465 ERROR("Missing close angle bracket in inner array"); 466 } else { 467 ERROR("Missing comma in inner array"); 468 } 469 } 470 471 array.add(NULL, ec); 472 if (U_SUCCESS(ec)) { 473 if (requiredLength == -1) { 474 requiredLength = array.length() + 1; 475 } else if (array.length() != requiredLength) { 476 ec = U_ILLEGAL_ARGUMENT_ERROR; 477 ERROR("Array not of required length"); 478 } 479 480 return (UChar**)array.release(); 481 } 482 ERROR("Unknown Error"); 483} 484 485UChar* 486LocDataParser::nextString() { 487 UChar* result = NULL; 488 489 skipWhitespace(); 490 if (p < e) { 491 const UChar* terminators; 492 UChar c = *p; 493 UBool haveQuote = c == QUOTE || c == TICK; 494 if (haveQuote) { 495 inc(); 496 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; 497 } else { 498 terminators = NOQUOTE_STOPLIST; 499 } 500 UChar* start = p; 501 while (p < e && !inList(*p, terminators)) ++p; 502 if (p == e) { 503 ERROR("Unexpected end of data"); 504 } 505 506 UChar x = *p; 507 if (p > start) { 508 ch = x; 509 *p = 0x0; // terminate by writing to data 510 result = start; // just point into data 511 } 512 if (haveQuote) { 513 if (x != c) { 514 ERROR("Missing matching quote"); 515 } else if (p == start) { 516 ERROR("Empty string"); 517 } 518 inc(); 519 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { 520 ERROR("Unexpected character in string"); 521 } 522 } 523 524 // ok for there to be no next string 525 return result; 526} 527 528void 529LocDataParser::parseError(const char* /*str*/) { 530 if (!data) { 531 return; 532 } 533 534 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; 535 if (start < data) { 536 start = data; 537 } 538 for (UChar* x = p; --x >= start;) { 539 if (!*x) { 540 start = x+1; 541 break; 542 } 543 } 544 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; 545 if (limit > e) { 546 limit = e; 547 } 548 u_strncpy(pe.preContext, start, (int32_t)(p-start)); 549 pe.preContext[p-start] = 0; 550 u_strncpy(pe.postContext, p, (int32_t)(limit-p)); 551 pe.postContext[limit-p] = 0; 552 pe.offset = (int32_t)(p - data); 553 554#ifdef DEBUG 555 fprintf(stderr, "%s at or near character %d: ", str, p-data); 556 557 UnicodeString msg; 558 msg.append(start, p - start); 559 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ 560 msg.append(p, limit-p); 561 msg.append("'"); 562 563 char buf[128]; 564 int32_t len = msg.extract(0, msg.length(), buf, 128); 565 if (len >= 128) { 566 buf[127] = 0; 567 } else { 568 buf[len] = 0; 569 } 570 fprintf(stderr, "%s\n", buf); 571 fflush(stderr); 572#endif 573 574 uprv_free(data); 575 data = NULL; 576 p = NULL; 577 e = NULL; 578 579 if (U_SUCCESS(ec)) { 580 ec = U_PARSE_ERROR; 581 } 582} 583 584//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) 585 586StringLocalizationInfo* 587StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { 588 if (U_FAILURE(status)) { 589 return NULL; 590 } 591 592 int32_t len = info.length(); 593 if (len == 0) { 594 return NULL; // no error; 595 } 596 597 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); 598 if (!p) { 599 status = U_MEMORY_ALLOCATION_ERROR; 600 return NULL; 601 } 602 info.extract(p, len, status); 603 if (!U_FAILURE(status)) { 604 status = U_ZERO_ERROR; // clear warning about non-termination 605 } 606 607 LocDataParser parser(perror, status); 608 return parser.parse(p, len); 609} 610 611StringLocalizationInfo::~StringLocalizationInfo() { 612 for (UChar*** p = (UChar***)data; *p; ++p) { 613 // remaining data is simply pointer into our unicode string data. 614 if (*p) uprv_free(*p); 615 } 616 if (data) uprv_free(data); 617 if (info) uprv_free(info); 618} 619 620 621const UChar* 622StringLocalizationInfo::getRuleSetName(int32_t index) const { 623 if (index >= 0 && index < getNumberOfRuleSets()) { 624 return data[0][index]; 625 } 626 return NULL; 627} 628 629const UChar* 630StringLocalizationInfo::getLocaleName(int32_t index) const { 631 if (index >= 0 && index < getNumberOfDisplayLocales()) { 632 return data[index+1][0]; 633 } 634 return NULL; 635} 636 637const UChar* 638StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { 639 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && 640 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { 641 return data[localeIndex+1][ruleIndex+1]; 642 } 643 return NULL; 644} 645 646// ---------- 647 648RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 649 const UnicodeString& locs, 650 const Locale& alocale, UParseError& perror, UErrorCode& status) 651 : ruleSets(NULL) 652 , defaultRuleSet(NULL) 653 , locale(alocale) 654 , collator(NULL) 655 , decimalFormatSymbols(NULL) 656 , lenient(FALSE) 657 , lenientParseRules(NULL) 658 , localizations(NULL) 659 , noParse(FALSE) //TODO: to be removed after #6895 660{ 661 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 662 init(description, locinfo, perror, status); 663} 664 665RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 666 const UnicodeString& locs, 667 UParseError& perror, UErrorCode& status) 668 : ruleSets(NULL) 669 , defaultRuleSet(NULL) 670 , locale(Locale::getDefault()) 671 , collator(NULL) 672 , decimalFormatSymbols(NULL) 673 , lenient(FALSE) 674 , lenientParseRules(NULL) 675 , localizations(NULL) 676 , noParse(FALSE) //TODO: to be removed after #6895 677{ 678 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); 679 init(description, locinfo, perror, status); 680} 681 682RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 683 LocalizationInfo* info, 684 const Locale& alocale, UParseError& perror, UErrorCode& status) 685 : ruleSets(NULL) 686 , defaultRuleSet(NULL) 687 , locale(alocale) 688 , collator(NULL) 689 , decimalFormatSymbols(NULL) 690 , lenient(FALSE) 691 , lenientParseRules(NULL) 692 , localizations(NULL) 693 , noParse(FALSE) //TODO: to be removed after #6895 694{ 695 init(description, info, perror, status); 696} 697 698RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 699 UParseError& perror, 700 UErrorCode& status) 701 : ruleSets(NULL) 702 , defaultRuleSet(NULL) 703 , locale(Locale::getDefault()) 704 , collator(NULL) 705 , decimalFormatSymbols(NULL) 706 , lenient(FALSE) 707 , lenientParseRules(NULL) 708 , localizations(NULL) 709 , noParse(FALSE) //TODO: to be removed after #6895 710{ 711 init(description, NULL, perror, status); 712} 713 714RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, 715 const Locale& aLocale, 716 UParseError& perror, 717 UErrorCode& status) 718 : ruleSets(NULL) 719 , defaultRuleSet(NULL) 720 , locale(aLocale) 721 , collator(NULL) 722 , decimalFormatSymbols(NULL) 723 , lenient(FALSE) 724 , lenientParseRules(NULL) 725 , localizations(NULL) 726 , noParse(FALSE) //TODO: to be removed after #6895 727{ 728 init(description, NULL, perror, status); 729} 730 731RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) 732 : ruleSets(NULL) 733 , defaultRuleSet(NULL) 734 , locale(alocale) 735 , collator(NULL) 736 , decimalFormatSymbols(NULL) 737 , lenient(FALSE) 738 , lenientParseRules(NULL) 739 , localizations(NULL) 740{ 741 if (U_FAILURE(status)) { 742 return; 743 } 744 745 const char* rules_tag = "RBNFRules"; 746 const char* fmt_tag = ""; 747 switch (tag) { 748 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break; 749 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break; 750 case URBNF_DURATION: fmt_tag = "DurationRules"; break; 751 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break; 752 default: status = U_ILLEGAL_ARGUMENT_ERROR; return; 753 } 754 755 // TODO: read localization info from resource 756 LocalizationInfo* locinfo = NULL; 757 758 int32_t len = 0; 759 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); 760 if (U_SUCCESS(status)) { 761 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), 762 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); 763 764 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status); 765 if (U_FAILURE(status)) { 766 ures_close(nfrb); 767 } 768 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status); 769 if (U_FAILURE(status)) { 770 ures_close(rbnfRules); 771 ures_close(nfrb); 772 return; 773 } 774 775 UnicodeString desc; 776 while (ures_hasNext(ruleSets)) { 777 const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status); 778 desc.append(currentString); 779 } 780 UParseError perror; 781 782 783 init (desc, locinfo, perror, status); 784 785 //TODO: we need a real fix - see #6895 / #6896 786 noParse = FALSE; 787 if (tag == URBNF_SPELLOUT) { 788 const char *lang = alocale.getLanguage(); 789 for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) { 790 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) { 791 noParse = TRUE; 792 break; 793 } 794 } 795 } 796 //TODO: end 797 798 ures_close(ruleSets); 799 ures_close(rbnfRules); 800 } 801 ures_close(nfrb); 802} 803 804RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) 805 : NumberFormat(rhs) 806 , ruleSets(NULL) 807 , defaultRuleSet(NULL) 808 , locale(rhs.locale) 809 , collator(NULL) 810 , decimalFormatSymbols(NULL) 811 , lenient(FALSE) 812 , lenientParseRules(NULL) 813 , localizations(NULL) 814{ 815 this->operator=(rhs); 816} 817 818// -------- 819 820RuleBasedNumberFormat& 821RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) 822{ 823 UErrorCode status = U_ZERO_ERROR; 824 dispose(); 825 locale = rhs.locale; 826 lenient = rhs.lenient; 827 828 UnicodeString rules = rhs.getRules(); 829 UParseError perror; 830 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); 831 832 //TODO: remove below when we fix the parse bug - See #6895 / #6896 833 noParse = rhs.noParse; 834 835 return *this; 836} 837 838RuleBasedNumberFormat::~RuleBasedNumberFormat() 839{ 840 dispose(); 841} 842 843Format* 844RuleBasedNumberFormat::clone(void) const 845{ 846 RuleBasedNumberFormat * result = NULL; 847 UnicodeString rules = getRules(); 848 UErrorCode status = U_ZERO_ERROR; 849 UParseError perror; 850 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status); 851 /* test for NULL */ 852 if (result == 0) { 853 status = U_MEMORY_ALLOCATION_ERROR; 854 return 0; 855 } 856 if (U_FAILURE(status)) { 857 delete result; 858 result = 0; 859 } else { 860 result->lenient = lenient; 861 862 //TODO: remove below when we fix the parse bug - See #6895 / #6896 863 result->noParse = noParse; 864 } 865 return result; 866} 867 868UBool 869RuleBasedNumberFormat::operator==(const Format& other) const 870{ 871 if (this == &other) { 872 return TRUE; 873 } 874 875 if (other.getDynamicClassID() == getStaticClassID()) { 876 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; 877 if (locale == rhs.locale && 878 lenient == rhs.lenient && 879 (localizations == NULL 880 ? rhs.localizations == NULL 881 : (rhs.localizations == NULL 882 ? FALSE 883 : *localizations == rhs.localizations))) { 884 885 NFRuleSet** p = ruleSets; 886 NFRuleSet** q = rhs.ruleSets; 887 if (p == NULL) { 888 return q == NULL; 889 } else if (q == NULL) { 890 return FALSE; 891 } 892 while (*p && *q && (**p == **q)) { 893 ++p; 894 ++q; 895 } 896 return *q == NULL && *p == NULL; 897 } 898 } 899 900 return FALSE; 901} 902 903UnicodeString 904RuleBasedNumberFormat::getRules() const 905{ 906 UnicodeString result; 907 if (ruleSets != NULL) { 908 for (NFRuleSet** p = ruleSets; *p; ++p) { 909 (*p)->appendRules(result); 910 } 911 } 912 return result; 913} 914 915UnicodeString 916RuleBasedNumberFormat::getRuleSetName(int32_t index) const 917{ 918 if (localizations) { 919 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); 920 return string; 921 } else if (ruleSets) { 922 UnicodeString result; 923 for (NFRuleSet** p = ruleSets; *p; ++p) { 924 NFRuleSet* rs = *p; 925 if (rs->isPublic()) { 926 if (--index == -1) { 927 rs->getName(result); 928 return result; 929 } 930 } 931 } 932 } 933 UnicodeString empty; 934 return empty; 935} 936 937int32_t 938RuleBasedNumberFormat::getNumberOfRuleSetNames() const 939{ 940 int32_t result = 0; 941 if (localizations) { 942 result = localizations->getNumberOfRuleSets(); 943 } else if (ruleSets) { 944 for (NFRuleSet** p = ruleSets; *p; ++p) { 945 if ((**p).isPublic()) { 946 ++result; 947 } 948 } 949 } 950 return result; 951} 952 953int32_t 954RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { 955 if (localizations) { 956 return localizations->getNumberOfDisplayLocales(); 957 } 958 return 0; 959} 960 961Locale 962RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { 963 if (U_FAILURE(status)) { 964 return Locale(""); 965 } 966 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { 967 UnicodeString name(TRUE, localizations->getLocaleName(index), -1); 968 char buffer[64]; 969 int32_t cap = name.length() + 1; 970 char* bp = buffer; 971 if (cap > 64) { 972 bp = (char *)uprv_malloc(cap); 973 if (bp == NULL) { 974 status = U_MEMORY_ALLOCATION_ERROR; 975 return Locale(""); 976 } 977 } 978 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); 979 Locale retLocale(bp); 980 if (bp != buffer) { 981 uprv_free(bp); 982 } 983 return retLocale; 984 } 985 status = U_ILLEGAL_ARGUMENT_ERROR; 986 Locale retLocale; 987 return retLocale; 988} 989 990UnicodeString 991RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) { 992 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { 993 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant); 994 int32_t len = localeName.length(); 995 UChar* localeStr = localeName.getBuffer(len + 1); 996 while (len >= 0) { 997 localeStr[len] = 0; 998 int32_t ix = localizations->indexForLocale(localeStr); 999 if (ix >= 0) { 1000 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); 1001 return name; 1002 } 1003 1004 // trim trailing portion, skipping over ommitted sections 1005 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore 1006 while (len > 0 && localeStr[len-1] == 0x005F) --len; 1007 } 1008 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); 1009 return name; 1010 } 1011 UnicodeString bogus; 1012 bogus.setToBogus(); 1013 return bogus; 1014} 1015 1016UnicodeString 1017RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) { 1018 if (localizations) { 1019 UnicodeString rsn(ruleSetName); 1020 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); 1021 return getRuleSetDisplayName(ix, localeParam); 1022 } 1023 UnicodeString bogus; 1024 bogus.setToBogus(); 1025 return bogus; 1026} 1027 1028NFRuleSet* 1029RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const 1030{ 1031 if (U_SUCCESS(status) && ruleSets) { 1032 for (NFRuleSet** p = ruleSets; *p; ++p) { 1033 NFRuleSet* rs = *p; 1034 if (rs->isNamed(name)) { 1035 return rs; 1036 } 1037 } 1038 status = U_ILLEGAL_ARGUMENT_ERROR; 1039 } 1040 return NULL; 1041} 1042 1043UnicodeString& 1044RuleBasedNumberFormat::format(int32_t number, 1045 UnicodeString& toAppendTo, 1046 FieldPosition& /* pos */) const 1047{ 1048 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); 1049 return toAppendTo; 1050} 1051 1052 1053UnicodeString& 1054RuleBasedNumberFormat::format(int64_t number, 1055 UnicodeString& toAppendTo, 1056 FieldPosition& /* pos */) const 1057{ 1058 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); 1059 return toAppendTo; 1060} 1061 1062 1063UnicodeString& 1064RuleBasedNumberFormat::format(double number, 1065 UnicodeString& toAppendTo, 1066 FieldPosition& /* pos */) const 1067{ 1068 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); 1069 return toAppendTo; 1070} 1071 1072 1073UnicodeString& 1074RuleBasedNumberFormat::format(int32_t number, 1075 const UnicodeString& ruleSetName, 1076 UnicodeString& toAppendTo, 1077 FieldPosition& /* pos */, 1078 UErrorCode& status) const 1079{ 1080 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status); 1081 if (U_SUCCESS(status)) { 1082 if (ruleSetName.indexOf(gPercentPercent) == 0) { 1083 // throw new IllegalArgumentException("Can't use internal rule set"); 1084 status = U_ILLEGAL_ARGUMENT_ERROR; 1085 } else { 1086 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1087 if (rs) { 1088 rs->format((int64_t)number, toAppendTo, toAppendTo.length()); 1089 } 1090 } 1091 } 1092 return toAppendTo; 1093} 1094 1095 1096UnicodeString& 1097RuleBasedNumberFormat::format(int64_t number, 1098 const UnicodeString& ruleSetName, 1099 UnicodeString& toAppendTo, 1100 FieldPosition& /* pos */, 1101 UErrorCode& status) const 1102{ 1103 if (U_SUCCESS(status)) { 1104 if (ruleSetName.indexOf(gPercentPercent) == 0) { 1105 // throw new IllegalArgumentException("Can't use internal rule set"); 1106 status = U_ILLEGAL_ARGUMENT_ERROR; 1107 } else { 1108 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1109 if (rs) { 1110 rs->format(number, toAppendTo, toAppendTo.length()); 1111 } 1112 } 1113 } 1114 return toAppendTo; 1115} 1116 1117 1118// make linker happy 1119UnicodeString& 1120RuleBasedNumberFormat::format(const Formattable& obj, 1121 UnicodeString& toAppendTo, 1122 FieldPosition& pos, 1123 UErrorCode& status) const 1124{ 1125 return NumberFormat::format(obj, toAppendTo, pos, status); 1126} 1127 1128UnicodeString& 1129RuleBasedNumberFormat::format(double number, 1130 const UnicodeString& ruleSetName, 1131 UnicodeString& toAppendTo, 1132 FieldPosition& /* pos */, 1133 UErrorCode& status) const 1134{ 1135 if (U_SUCCESS(status)) { 1136 if (ruleSetName.indexOf(gPercentPercent) == 0) { 1137 // throw new IllegalArgumentException("Can't use internal rule set"); 1138 status = U_ILLEGAL_ARGUMENT_ERROR; 1139 } else { 1140 NFRuleSet *rs = findRuleSet(ruleSetName, status); 1141 if (rs) { 1142 rs->format(number, toAppendTo, toAppendTo.length()); 1143 } 1144 } 1145 } 1146 return toAppendTo; 1147} 1148 1149void 1150RuleBasedNumberFormat::parse(const UnicodeString& text, 1151 Formattable& result, 1152 ParsePosition& parsePosition) const 1153{ 1154 //TODO: We need a real fix. See #6895 / #6896 1155 if (noParse) { 1156 // skip parsing 1157 parsePosition.setErrorIndex(0); 1158 return; 1159 } 1160 1161 if (!ruleSets) { 1162 parsePosition.setErrorIndex(0); 1163 return; 1164 } 1165 1166 UnicodeString workingText(text, parsePosition.getIndex()); 1167 ParsePosition workingPos(0); 1168 1169 ParsePosition high_pp(0); 1170 Formattable high_result; 1171 1172 for (NFRuleSet** p = ruleSets; *p; ++p) { 1173 NFRuleSet *rp = *p; 1174 if (rp->isPublic() && rp->isParseable()) { 1175 ParsePosition working_pp(0); 1176 Formattable working_result; 1177 1178 rp->parse(workingText, working_pp, kMaxDouble, working_result); 1179 if (working_pp.getIndex() > high_pp.getIndex()) { 1180 high_pp = working_pp; 1181 high_result = working_result; 1182 1183 if (high_pp.getIndex() == workingText.length()) { 1184 break; 1185 } 1186 } 1187 } 1188 } 1189 1190 int32_t startIndex = parsePosition.getIndex(); 1191 parsePosition.setIndex(startIndex + high_pp.getIndex()); 1192 if (high_pp.getIndex() > 0) { 1193 parsePosition.setErrorIndex(-1); 1194 } else { 1195 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0; 1196 parsePosition.setErrorIndex(startIndex + errorIndex); 1197 } 1198 result = high_result; 1199 if (result.getType() == Formattable::kDouble) { 1200 int32_t r = (int32_t)result.getDouble(); 1201 if ((double)r == result.getDouble()) { 1202 result.setLong(r); 1203 } 1204 } 1205} 1206 1207#if !UCONFIG_NO_COLLATION 1208 1209void 1210RuleBasedNumberFormat::setLenient(UBool enabled) 1211{ 1212 lenient = enabled; 1213 if (!enabled && collator) { 1214 delete collator; 1215 collator = NULL; 1216 } 1217} 1218 1219#endif 1220 1221void 1222RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { 1223 if (U_SUCCESS(status)) { 1224 if (ruleSetName.isEmpty()) { 1225 if (localizations) { 1226 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); 1227 defaultRuleSet = findRuleSet(name, status); 1228 } else { 1229 initDefaultRuleSet(); 1230 } 1231 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { 1232 status = U_ILLEGAL_ARGUMENT_ERROR; 1233 } else { 1234 NFRuleSet* result = findRuleSet(ruleSetName, status); 1235 if (result != NULL) { 1236 defaultRuleSet = result; 1237 } 1238 } 1239 } 1240} 1241 1242UnicodeString 1243RuleBasedNumberFormat::getDefaultRuleSetName() const { 1244 UnicodeString result; 1245 if (defaultRuleSet && defaultRuleSet->isPublic()) { 1246 defaultRuleSet->getName(result); 1247 } else { 1248 result.setToBogus(); 1249 } 1250 return result; 1251} 1252 1253void 1254RuleBasedNumberFormat::initDefaultRuleSet() 1255{ 1256 defaultRuleSet = NULL; 1257 if (!ruleSets) { 1258 return; 1259 } 1260 1261 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering"); 1262 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal"); 1263 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration"); 1264 1265 NFRuleSet**p = &ruleSets[0]; 1266 while (*p) { 1267 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { 1268 defaultRuleSet = *p; 1269 return; 1270 } else { 1271 ++p; 1272 } 1273 } 1274 1275 defaultRuleSet = *--p; 1276 if (!defaultRuleSet->isPublic()) { 1277 while (p != ruleSets) { 1278 if ((*--p)->isPublic()) { 1279 defaultRuleSet = *p; 1280 break; 1281 } 1282 } 1283 } 1284} 1285 1286 1287void 1288RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos, 1289 UParseError& pErr, UErrorCode& status) 1290{ 1291 // TODO: implement UParseError 1292 uprv_memset(&pErr, 0, sizeof(UParseError)); 1293 // Note: this can leave ruleSets == NULL, so remaining code should check 1294 if (U_FAILURE(status)) { 1295 return; 1296 } 1297 1298 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref(); 1299 1300 UnicodeString description(rules); 1301 if (!description.length()) { 1302 status = U_MEMORY_ALLOCATION_ERROR; 1303 return; 1304 } 1305 1306 // start by stripping the trailing whitespace from all the rules 1307 // (this is all the whitespace follwing each semicolon in the 1308 // description). This allows us to look for rule-set boundaries 1309 // by searching for ";%" without having to worry about whitespace 1310 // between the ; and the % 1311 stripWhitespace(description); 1312 1313 // check to see if there's a set of lenient-parse rules. If there 1314 // is, pull them out into our temporary holding place for them, 1315 // and delete them from the description before the real desciption- 1316 // parsing code sees them 1317 int32_t lp = description.indexOf(gLenientParse); 1318 if (lp != -1) { 1319 // we've got to make sure we're not in the middle of a rule 1320 // (where "%%lenient-parse" would actually get treated as 1321 // rule text) 1322 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) { 1323 // locate the beginning and end of the actual collation 1324 // rules (there may be whitespace between the name and 1325 // the first token in the description) 1326 int lpEnd = description.indexOf(gSemiPercent, lp); 1327 1328 if (lpEnd == -1) { 1329 lpEnd = description.length() - 1; 1330 } 1331 int lpStart = lp + u_strlen(gLenientParse); 1332 while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) { 1333 ++lpStart; 1334 } 1335 1336 // copy out the lenient-parse rules and delete them 1337 // from the description 1338 lenientParseRules = new UnicodeString(); 1339 /* test for NULL */ 1340 if (lenientParseRules == 0) { 1341 status = U_MEMORY_ALLOCATION_ERROR; 1342 return; 1343 } 1344 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart); 1345 1346 description.remove(lp, lpEnd + 1 - lp); 1347 } 1348 } 1349 1350 // pre-flight parsing the description and count the number of 1351 // rule sets (";%" marks the end of one rule set and the beginning 1352 // of the next) 1353 int numRuleSets = 0; 1354 for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) { 1355 ++numRuleSets; 1356 ++p; 1357 } 1358 ++numRuleSets; 1359 1360 // our rule list is an array of the appropriate size 1361 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); 1362 /* test for NULL */ 1363 if (ruleSets == 0) { 1364 status = U_MEMORY_ALLOCATION_ERROR; 1365 return; 1366 } 1367 1368 for (int i = 0; i <= numRuleSets; ++i) { 1369 ruleSets[i] = NULL; 1370 } 1371 1372 // divide up the descriptions into individual rule-set descriptions 1373 // and store them in a temporary array. At each step, we also 1374 // new up a rule set, but all this does is initialize its name 1375 // and remove it from its description. We can't actually parse 1376 // the rest of the descriptions and finish initializing everything 1377 // because we have to know the names and locations of all the rule 1378 // sets before we can actually set everything up 1379 if(!numRuleSets) { 1380 status = U_ILLEGAL_ARGUMENT_ERROR; 1381 return; 1382 } 1383 UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets]; 1384 if (ruleSetDescriptions == 0) { 1385 status = U_MEMORY_ALLOCATION_ERROR; 1386 return; 1387 } 1388 1389 { 1390 int curRuleSet = 0; 1391 int32_t start = 0; 1392 for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) { 1393 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); 1394 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1395 if (ruleSets[curRuleSet] == 0) { 1396 status = U_MEMORY_ALLOCATION_ERROR; 1397 goto cleanup; 1398 } 1399 ++curRuleSet; 1400 start = p + 1; 1401 } 1402 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); 1403 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status); 1404 if (ruleSets[curRuleSet] == 0) { 1405 status = U_MEMORY_ALLOCATION_ERROR; 1406 goto cleanup; 1407 } 1408 } 1409 1410 // now we can take note of the formatter's default rule set, which 1411 // is the last public rule set in the description (it's the last 1412 // rather than the first so that a user can create a new formatter 1413 // from an existing formatter and change its default behavior just 1414 // by appending more rule sets to the end) 1415 1416 // {dlf} Initialization of a fraction rule set requires the default rule 1417 // set to be known. For purposes of initialization, this is always the 1418 // last public rule set, no matter what the localization data says. 1419 initDefaultRuleSet(); 1420 1421 // finally, we can go back through the temporary descriptions 1422 // list and finish seting up the substructure (and we throw 1423 // away the temporary descriptions as we go) 1424 { 1425 for (int i = 0; i < numRuleSets; i++) { 1426 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status); 1427 } 1428 } 1429 1430 // Now that the rules are initialized, the 'real' default rule 1431 // set can be adjusted by the localization data. 1432 1433 // The C code keeps the localization array as is, rather than building 1434 // a separate array of the public rule set names, so we have less work 1435 // to do here-- but we still need to check the names. 1436 1437 if (localizationInfos) { 1438 // confirm the names, if any aren't in the rules, that's an error 1439 // it is ok if the rules contain public rule sets that are not in this list 1440 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) { 1441 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1); 1442 NFRuleSet* rs = findRuleSet(name, status); 1443 if (rs == NULL) { 1444 break; // error 1445 } 1446 if (i == 0) { 1447 defaultRuleSet = rs; 1448 } 1449 } 1450 } else { 1451 defaultRuleSet = getDefaultRuleSet(); 1452 } 1453 1454cleanup: 1455 delete[] ruleSetDescriptions; 1456} 1457 1458void 1459RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) 1460{ 1461 // iterate through the characters... 1462 UnicodeString result; 1463 1464 int start = 0; 1465 while (start != -1 && start < description.length()) { 1466 // seek to the first non-whitespace character... 1467 while (start < description.length() 1468 && uprv_isRuleWhiteSpace(description.charAt(start))) { 1469 ++start; 1470 } 1471 1472 // locate the next semicolon in the text and copy the text from 1473 // our current position up to that semicolon into the result 1474 int32_t p = description.indexOf(gSemiColon, start); 1475 if (p == -1) { 1476 // or if we don't find a semicolon, just copy the rest of 1477 // the string into the result 1478 result.append(description, start, description.length() - start); 1479 start = -1; 1480 } 1481 else if (p < description.length()) { 1482 result.append(description, start, p + 1 - start); 1483 start = p + 1; 1484 } 1485 1486 // when we get here, we've seeked off the end of the sring, and 1487 // we terminate the loop (we continue until *start* is -1 rather 1488 // than until *p* is -1, because otherwise we'd miss the last 1489 // rule in the description) 1490 else { 1491 start = -1; 1492 } 1493 } 1494 1495 description.setTo(result); 1496} 1497 1498 1499void 1500RuleBasedNumberFormat::dispose() 1501{ 1502 if (ruleSets) { 1503 for (NFRuleSet** p = ruleSets; *p; ++p) { 1504 delete *p; 1505 } 1506 uprv_free(ruleSets); 1507 ruleSets = NULL; 1508 } 1509 1510#if !UCONFIG_NO_COLLATION 1511 delete collator; 1512#endif 1513 collator = NULL; 1514 1515 delete decimalFormatSymbols; 1516 decimalFormatSymbols = NULL; 1517 1518 delete lenientParseRules; 1519 lenientParseRules = NULL; 1520 1521 if (localizations) localizations = localizations->unref(); 1522} 1523 1524 1525//----------------------------------------------------------------------- 1526// package-internal API 1527//----------------------------------------------------------------------- 1528 1529/** 1530 * Returns the collator to use for lenient parsing. The collator is lazily created: 1531 * this function creates it the first time it's called. 1532 * @return The collator to use for lenient parsing, or null if lenient parsing 1533 * is turned off. 1534*/ 1535Collator* 1536RuleBasedNumberFormat::getCollator() const 1537{ 1538#if !UCONFIG_NO_COLLATION 1539 if (!ruleSets) { 1540 return NULL; 1541 } 1542 1543 // lazy-evaulate the collator 1544 if (collator == NULL && lenient) { 1545 // create a default collator based on the formatter's locale, 1546 // then pull out that collator's rules, append any additional 1547 // rules specified in the description, and create a _new_ 1548 // collator based on the combinaiton of those rules 1549 1550 UErrorCode status = U_ZERO_ERROR; 1551 1552 Collator* temp = Collator::createInstance(locale, status); 1553 if (U_SUCCESS(status) && 1554 temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) { 1555 1556 RuleBasedCollator* newCollator = (RuleBasedCollator*)temp; 1557 if (lenientParseRules) { 1558 UnicodeString rules(newCollator->getRules()); 1559 rules.append(*lenientParseRules); 1560 1561 newCollator = new RuleBasedCollator(rules, status); 1562 // Exit if newCollator could not be created. 1563 if (newCollator == NULL) { 1564 return NULL; 1565 } 1566 } else { 1567 temp = NULL; 1568 } 1569 if (U_SUCCESS(status)) { 1570 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status); 1571 // cast away const 1572 ((RuleBasedNumberFormat*)this)->collator = newCollator; 1573 } else { 1574 delete newCollator; 1575 } 1576 } 1577 delete temp; 1578 } 1579#endif 1580 1581 // if lenient-parse mode is off, this will be null 1582 // (see setLenientParseMode()) 1583 return collator; 1584} 1585 1586 1587/** 1588 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat 1589 * instances owned by this formatter. This object is lazily created: this function 1590 * creates it the first time it's called. 1591 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat 1592 * instances owned by this formatter. 1593*/ 1594DecimalFormatSymbols* 1595RuleBasedNumberFormat::getDecimalFormatSymbols() const 1596{ 1597 // lazy-evaluate the DecimalFormatSymbols object. This object 1598 // is shared by all DecimalFormat instances belonging to this 1599 // formatter 1600 if (decimalFormatSymbols == NULL) { 1601 UErrorCode status = U_ZERO_ERROR; 1602 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); 1603 if (U_SUCCESS(status)) { 1604 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp; 1605 } else { 1606 delete temp; 1607 } 1608 } 1609 return decimalFormatSymbols; 1610} 1611 1612U_NAMESPACE_END 1613 1614/* U_HAVE_RBNF */ 1615#endif 1616