1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4****************************************************************************** 5* Copyright (C) 1997-2015, International Business Machines 6* Corporation and others. All Rights Reserved. 7****************************************************************************** 8* file name: nfrs.cpp 9* encoding: UTF-8 10* tab size: 8 (not used) 11* indentation:4 12* 13* Modification history 14* Date Name Comments 15* 10/11/2001 Doug Ported from ICU4J 16*/ 17 18#include "nfrs.h" 19 20#if U_HAVE_RBNF 21 22#include "unicode/uchar.h" 23#include "nfrule.h" 24#include "nfrlist.h" 25#include "patternprops.h" 26#include "putilimp.h" 27 28#ifdef RBNF_DEBUG 29#include "cmemory.h" 30#endif 31 32enum { 33 /** -x */ 34 NEGATIVE_RULE_INDEX = 0, 35 /** x.x */ 36 IMPROPER_FRACTION_RULE_INDEX = 1, 37 /** 0.x */ 38 PROPER_FRACTION_RULE_INDEX = 2, 39 /** x.0 */ 40 MASTER_RULE_INDEX = 3, 41 /** Inf */ 42 INFINITY_RULE_INDEX = 4, 43 /** NaN */ 44 NAN_RULE_INDEX = 5, 45 NON_NUMERICAL_RULE_LENGTH = 6 46}; 47 48U_NAMESPACE_BEGIN 49 50#if 0 51// euclid's algorithm works with doubles 52// note, doubles only get us up to one quadrillion or so, which 53// isn't as much range as we get with longs. We probably still 54// want either 64-bit math, or BigInteger. 55 56static int64_t 57util_lcm(int64_t x, int64_t y) 58{ 59 x.abs(); 60 y.abs(); 61 62 if (x == 0 || y == 0) { 63 return 0; 64 } else { 65 do { 66 if (x < y) { 67 int64_t t = x; x = y; y = t; 68 } 69 x -= y * (x/y); 70 } while (x != 0); 71 72 return y; 73 } 74} 75 76#else 77/** 78 * Calculates the least common multiple of x and y. 79 */ 80static int64_t 81util_lcm(int64_t x, int64_t y) 82{ 83 // binary gcd algorithm from Knuth, "The Art of Computer Programming," 84 // vol. 2, 1st ed., pp. 298-299 85 int64_t x1 = x; 86 int64_t y1 = y; 87 88 int p2 = 0; 89 while ((x1 & 1) == 0 && (y1 & 1) == 0) { 90 ++p2; 91 x1 >>= 1; 92 y1 >>= 1; 93 } 94 95 int64_t t; 96 if ((x1 & 1) == 1) { 97 t = -y1; 98 } else { 99 t = x1; 100 } 101 102 while (t != 0) { 103 while ((t & 1) == 0) { 104 t = t >> 1; 105 } 106 if (t > 0) { 107 x1 = t; 108 } else { 109 y1 = -t; 110 } 111 t = x1 - y1; 112 } 113 114 int64_t gcd = x1 << p2; 115 116 // x * y == gcd(x, y) * lcm(x, y) 117 return x / gcd * y; 118} 119#endif 120 121static const UChar gPercent = 0x0025; 122static const UChar gColon = 0x003a; 123static const UChar gSemicolon = 0x003b; 124static const UChar gLineFeed = 0x000a; 125 126static const UChar gPercentPercent[] = 127{ 128 0x25, 0x25, 0 129}; /* "%%" */ 130 131static const UChar gNoparse[] = 132{ 133 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 134}; /* "@noparse" */ 135 136NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) 137 : name() 138 , rules(0) 139 , owner(_owner) 140 , fractionRules() 141 , fIsFractionRuleSet(FALSE) 142 , fIsPublic(FALSE) 143 , fIsParseable(TRUE) 144{ 145 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { 146 nonNumericalRules[i] = NULL; 147 } 148 149 if (U_FAILURE(status)) { 150 return; 151 } 152 153 UnicodeString& description = descriptions[index]; // !!! make sure index is valid 154 155 if (description.length() == 0) { 156 // throw new IllegalArgumentException("Empty rule set description"); 157 status = U_PARSE_ERROR; 158 return; 159 } 160 161 // if the description begins with a rule set name (the rule set 162 // name can be omitted in formatter descriptions that consist 163 // of only one rule set), copy it out into our "name" member 164 // and delete it from the description 165 if (description.charAt(0) == gPercent) { 166 int32_t pos = description.indexOf(gColon); 167 if (pos == -1) { 168 // throw new IllegalArgumentException("Rule set name doesn't end in colon"); 169 status = U_PARSE_ERROR; 170 } else { 171 name.setTo(description, 0, pos); 172 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { 173 } 174 description.remove(0, pos); 175 } 176 } else { 177 name.setTo(UNICODE_STRING_SIMPLE("%default")); 178 } 179 180 if (description.length() == 0) { 181 // throw new IllegalArgumentException("Empty rule set description"); 182 status = U_PARSE_ERROR; 183 } 184 185 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; 186 187 if ( name.endsWith(gNoparse,8) ) { 188 fIsParseable = FALSE; 189 name.truncate(name.length()-8); // remove the @noparse from the name 190 } 191 192 // all of the other members of NFRuleSet are initialized 193 // by parseRules() 194} 195 196void 197NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) 198{ 199 // start by creating a Vector whose elements are Strings containing 200 // the descriptions of the rules (one rule per element). The rules 201 // are separated by semicolons (there's no escape facility: ALL 202 // semicolons are rule delimiters) 203 204 if (U_FAILURE(status)) { 205 return; 206 } 207 208 // ensure we are starting with an empty rule list 209 rules.deleteAll(); 210 211 // dlf - the original code kept a separate description array for no reason, 212 // so I got rid of it. The loop was too complex so I simplified it. 213 214 UnicodeString currentDescription; 215 int32_t oldP = 0; 216 while (oldP < description.length()) { 217 int32_t p = description.indexOf(gSemicolon, oldP); 218 if (p == -1) { 219 p = description.length(); 220 } 221 currentDescription.setTo(description, oldP, p - oldP); 222 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); 223 oldP = p + 1; 224 } 225 226 // for rules that didn't specify a base value, their base values 227 // were initialized to 0. Make another pass through the list and 228 // set all those rules' base values. We also remove any special 229 // rules from the list and put them into their own member variables 230 int64_t defaultBaseValue = 0; 231 232 // (this isn't a for loop because we might be deleting items from 233 // the vector-- we want to make sure we only increment i when 234 // we _didn't_ delete aything from the vector) 235 int32_t rulesSize = rules.size(); 236 for (int32_t i = 0; i < rulesSize; i++) { 237 NFRule* rule = rules[i]; 238 int64_t baseValue = rule->getBaseValue(); 239 240 if (baseValue == 0) { 241 // if the rule's base value is 0, fill in a default 242 // base value (this will be 1 plus the preceding 243 // rule's base value for regular rule sets, and the 244 // same as the preceding rule's base value in fraction 245 // rule sets) 246 rule->setBaseValue(defaultBaseValue, status); 247 } 248 else { 249 // if it's a regular rule that already knows its base value, 250 // check to make sure the rules are in order, and update 251 // the default base value for the next rule 252 if (baseValue < defaultBaseValue) { 253 // throw new IllegalArgumentException("Rules are not in order"); 254 status = U_PARSE_ERROR; 255 return; 256 } 257 defaultBaseValue = baseValue; 258 } 259 if (!fIsFractionRuleSet) { 260 ++defaultBaseValue; 261 } 262 } 263} 264 265/** 266 * Set one of the non-numerical rules. 267 * @param rule The rule to set. 268 */ 269void NFRuleSet::setNonNumericalRule(NFRule *rule) { 270 int64_t baseValue = rule->getBaseValue(); 271 if (baseValue == NFRule::kNegativeNumberRule) { 272 delete nonNumericalRules[NEGATIVE_RULE_INDEX]; 273 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; 274 } 275 else if (baseValue == NFRule::kImproperFractionRule) { 276 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE); 277 } 278 else if (baseValue == NFRule::kProperFractionRule) { 279 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE); 280 } 281 else if (baseValue == NFRule::kMasterRule) { 282 setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE); 283 } 284 else if (baseValue == NFRule::kInfinityRule) { 285 delete nonNumericalRules[INFINITY_RULE_INDEX]; 286 nonNumericalRules[INFINITY_RULE_INDEX] = rule; 287 } 288 else if (baseValue == NFRule::kNaNRule) { 289 delete nonNumericalRules[NAN_RULE_INDEX]; 290 nonNumericalRules[NAN_RULE_INDEX] = rule; 291 } 292} 293 294/** 295 * Determine the best fraction rule to use. Rules matching the decimal point from 296 * DecimalFormatSymbols become the main set of rules to use. 297 * @param originalIndex The index into nonNumericalRules 298 * @param newRule The new rule to consider 299 * @param rememberRule Should the new rule be added to fractionRules. 300 */ 301void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { 302 if (rememberRule) { 303 fractionRules.add(newRule); 304 } 305 NFRule *bestResult = nonNumericalRules[originalIndex]; 306 if (bestResult == NULL) { 307 nonNumericalRules[originalIndex] = newRule; 308 } 309 else { 310 // We have more than one. Which one is better? 311 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); 312 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) 313 == newRule->getDecimalPoint()) 314 { 315 nonNumericalRules[originalIndex] = newRule; 316 } 317 // else leave it alone 318 } 319} 320 321NFRuleSet::~NFRuleSet() 322{ 323 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 324 if (i != IMPROPER_FRACTION_RULE_INDEX 325 && i != PROPER_FRACTION_RULE_INDEX 326 && i != MASTER_RULE_INDEX) 327 { 328 delete nonNumericalRules[i]; 329 } 330 // else it will be deleted via NFRuleList fractionRules 331 } 332} 333 334static UBool 335util_equalRules(const NFRule* rule1, const NFRule* rule2) 336{ 337 if (rule1) { 338 if (rule2) { 339 return *rule1 == *rule2; 340 } 341 } else if (!rule2) { 342 return TRUE; 343 } 344 return FALSE; 345} 346 347UBool 348NFRuleSet::operator==(const NFRuleSet& rhs) const 349{ 350 if (rules.size() == rhs.rules.size() && 351 fIsFractionRuleSet == rhs.fIsFractionRuleSet && 352 name == rhs.name) { 353 354 // ...then compare the non-numerical rule lists... 355 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 356 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { 357 return FALSE; 358 } 359 } 360 361 // ...then compare the rule lists... 362 for (uint32_t i = 0; i < rules.size(); ++i) { 363 if (*rules[i] != *rhs.rules[i]) { 364 return FALSE; 365 } 366 } 367 return TRUE; 368 } 369 return FALSE; 370} 371 372void 373NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { 374 for (uint32_t i = 0; i < rules.size(); ++i) { 375 rules[i]->setDecimalFormatSymbols(newSymbols, status); 376 } 377 // Switch the fraction rules to mirror the DecimalFormatSymbols. 378 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) { 379 if (nonNumericalRules[nonNumericalIdx]) { 380 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { 381 NFRule *fractionRule = fractionRules[fIdx]; 382 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { 383 setBestFractionRule(nonNumericalIdx, fractionRule, FALSE); 384 } 385 } 386 } 387 } 388 389 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { 390 NFRule *rule = nonNumericalRules[nnrIdx]; 391 if (rule) { 392 rule->setDecimalFormatSymbols(newSymbols, status); 393 } 394 } 395} 396 397#define RECURSION_LIMIT 64 398 399void 400NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const 401{ 402 if (recursionCount >= RECURSION_LIMIT) { 403 // stop recursion 404 status = U_INVALID_STATE_ERROR; 405 return; 406 } 407 const NFRule *rule = findNormalRule(number); 408 if (rule) { // else error, but can't report it 409 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); 410 } 411} 412 413void 414NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const 415{ 416 if (recursionCount >= RECURSION_LIMIT) { 417 // stop recursion 418 status = U_INVALID_STATE_ERROR; 419 return; 420 } 421 const NFRule *rule = findDoubleRule(number); 422 if (rule) { // else error, but can't report it 423 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); 424 } 425} 426 427const NFRule* 428NFRuleSet::findDoubleRule(double number) const 429{ 430 // if this is a fraction rule set, use findFractionRuleSetRule() 431 if (isFractionRuleSet()) { 432 return findFractionRuleSetRule(number); 433 } 434 435 if (uprv_isNaN(number)) { 436 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; 437 if (!rule) { 438 rule = owner->getDefaultNaNRule(); 439 } 440 return rule; 441 } 442 443 // if the number is negative, return the negative number rule 444 // (if there isn't a negative-number rule, we pretend it's a 445 // positive number) 446 if (number < 0) { 447 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { 448 return nonNumericalRules[NEGATIVE_RULE_INDEX]; 449 } else { 450 number = -number; 451 } 452 } 453 454 if (uprv_isInfinite(number)) { 455 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; 456 if (!rule) { 457 rule = owner->getDefaultInfinityRule(); 458 } 459 return rule; 460 } 461 462 // if the number isn't an integer, we use one of the fraction rules... 463 if (number != uprv_floor(number)) { 464 // if the number is between 0 and 1, return the proper 465 // fraction rule 466 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { 467 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; 468 } 469 // otherwise, return the improper fraction rule 470 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { 471 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; 472 } 473 } 474 475 // if there's a master rule, use it to format the number 476 if (nonNumericalRules[MASTER_RULE_INDEX]) { 477 return nonNumericalRules[MASTER_RULE_INDEX]; 478 } 479 480 // and if we haven't yet returned a rule, use findNormalRule() 481 // to find the applicable rule 482 int64_t r = util64_fromDouble(number + 0.5); 483 return findNormalRule(r); 484} 485 486const NFRule * 487NFRuleSet::findNormalRule(int64_t number) const 488{ 489 // if this is a fraction rule set, use findFractionRuleSetRule() 490 // to find the rule (we should only go into this clause if the 491 // value is 0) 492 if (fIsFractionRuleSet) { 493 return findFractionRuleSetRule((double)number); 494 } 495 496 // if the number is negative, return the negative-number rule 497 // (if there isn't one, pretend the number is positive) 498 if (number < 0) { 499 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { 500 return nonNumericalRules[NEGATIVE_RULE_INDEX]; 501 } else { 502 number = -number; 503 } 504 } 505 506 // we have to repeat the preceding two checks, even though we 507 // do them in findRule(), because the version of format() that 508 // takes a long bypasses findRule() and goes straight to this 509 // function. This function does skip the fraction rules since 510 // we know the value is an integer (it also skips the master 511 // rule, since it's considered a fraction rule. Skipping the 512 // master rule in this function is also how we avoid infinite 513 // recursion) 514 515 // {dlf} unfortunately this fails if there are no rules except 516 // special rules. If there are no rules, use the master rule. 517 518 // binary-search the rule list for the applicable rule 519 // (a rule is used for all values from its base value to 520 // the next rule's base value) 521 int32_t hi = rules.size(); 522 if (hi > 0) { 523 int32_t lo = 0; 524 525 while (lo < hi) { 526 int32_t mid = (lo + hi) / 2; 527 if (rules[mid]->getBaseValue() == number) { 528 return rules[mid]; 529 } 530 else if (rules[mid]->getBaseValue() > number) { 531 hi = mid; 532 } 533 else { 534 lo = mid + 1; 535 } 536 } 537 if (hi == 0) { // bad rule set, minimum base > 0 538 return NULL; // want to throw exception here 539 } 540 541 NFRule *result = rules[hi - 1]; 542 543 // use shouldRollBack() to see whether we need to invoke the 544 // rollback rule (see shouldRollBack()'s documentation for 545 // an explanation of the rollback rule). If we do, roll back 546 // one rule and return that one instead of the one we'd normally 547 // return 548 if (result->shouldRollBack(number)) { 549 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base 550 return NULL; 551 } 552 result = rules[hi - 2]; 553 } 554 return result; 555 } 556 // else use the master rule 557 return nonNumericalRules[MASTER_RULE_INDEX]; 558} 559 560/** 561 * If this rule is a fraction rule set, this function is used by 562 * findRule() to select the most appropriate rule for formatting 563 * the number. Basically, the base value of each rule in the rule 564 * set is treated as the denominator of a fraction. Whichever 565 * denominator can produce the fraction closest in value to the 566 * number passed in is the result. If there's a tie, the earlier 567 * one in the list wins. (If there are two rules in a row with the 568 * same base value, the first one is used when the numerator of the 569 * fraction would be 1, and the second rule is used the rest of the 570 * time. 571 * @param number The number being formatted (which will always be 572 * a number between 0 and 1) 573 * @return The rule to use to format this number 574 */ 575const NFRule* 576NFRuleSet::findFractionRuleSetRule(double number) const 577{ 578 // the obvious way to do this (multiply the value being formatted 579 // by each rule's base value until you get an integral result) 580 // doesn't work because of rounding error. This method is more 581 // accurate 582 583 // find the least common multiple of the rules' base values 584 // and multiply this by the number being formatted. This is 585 // all the precision we need, and we can do all of the rest 586 // of the math using integer arithmetic 587 int64_t leastCommonMultiple = rules[0]->getBaseValue(); 588 int64_t numerator; 589 { 590 for (uint32_t i = 1; i < rules.size(); ++i) { 591 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); 592 } 593 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); 594 } 595 // for each rule, do the following... 596 int64_t tempDifference; 597 int64_t difference = util64_fromDouble(uprv_maxMantissa()); 598 int32_t winner = 0; 599 for (uint32_t i = 0; i < rules.size(); ++i) { 600 // "numerator" is the numerator of the fraction if the 601 // denominator is the LCD. The numerator if the rule's 602 // base value is the denominator is "numerator" times the 603 // base value divided bythe LCD. Here we check to see if 604 // that's an integer, and if not, how close it is to being 605 // an integer. 606 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; 607 608 609 // normalize the result of the above calculation: we want 610 // the numerator's distance from the CLOSEST multiple 611 // of the LCD 612 if (leastCommonMultiple - tempDifference < tempDifference) { 613 tempDifference = leastCommonMultiple - tempDifference; 614 } 615 616 // if this is as close as we've come, keep track of how close 617 // that is, and the line number of the rule that did it. If 618 // we've scored a direct hit, we don't have to look at any more 619 // rules 620 if (tempDifference < difference) { 621 difference = tempDifference; 622 winner = i; 623 if (difference == 0) { 624 break; 625 } 626 } 627 } 628 629 // if we have two successive rules that both have the winning base 630 // value, then the first one (the one we found above) is used if 631 // the numerator of the fraction is 1 and the second one is used if 632 // the numerator of the fraction is anything else (this lets us 633 // do things like "one third"/"two thirds" without haveing to define 634 // a whole bunch of extra rule sets) 635 if ((unsigned)(winner + 1) < rules.size() && 636 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { 637 double n = ((double)rules[winner]->getBaseValue()) * number; 638 if (n < 0.5 || n >= 2) { 639 ++winner; 640 } 641 } 642 643 // finally, return the winning rule 644 return rules[winner]; 645} 646 647/** 648 * Parses a string. Matches the string to be parsed against each 649 * of its rules (with a base value less than upperBound) and returns 650 * the value produced by the rule that matched the most charcters 651 * in the source string. 652 * @param text The string to parse 653 * @param parsePosition The initial position is ignored and assumed 654 * to be 0. On exit, this object has been updated to point to the 655 * first character position this rule set didn't consume. 656 * @param upperBound Limits the rules that can be allowed to match. 657 * Only rules whose base values are strictly less than upperBound 658 * are considered. 659 * @return The numerical result of parsing this string. This will 660 * be the matching rule's base value, composed appropriately with 661 * the results of matching any of its substitutions. The object 662 * will be an instance of Long if it's an integral value; otherwise, 663 * it will be an instance of Double. This function always returns 664 * a valid object: If nothing matched the input string at all, 665 * this function returns new Long(0), and the parse position is 666 * left unchanged. 667 */ 668#ifdef RBNF_DEBUG 669#include <stdio.h> 670 671static void dumpUS(FILE* f, const UnicodeString& us) { 672 int len = us.length(); 673 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; 674 if (buf != NULL) { 675 us.extract(0, len, buf); 676 buf[len] = 0; 677 fprintf(f, "%s", buf); 678 uprv_free(buf); //delete[] buf; 679 } 680} 681#endif 682 683UBool 684NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const 685{ 686 // try matching each rule in the rule set against the text being 687 // parsed. Whichever one matches the most characters is the one 688 // that determines the value we return. 689 690 result.setLong(0); 691 692 // dump out if there's no text to parse 693 if (text.length() == 0) { 694 return 0; 695 } 696 697 ParsePosition highWaterMark; 698 ParsePosition workingPos = pos; 699 700#ifdef RBNF_DEBUG 701 fprintf(stderr, "<nfrs> %x '", this); 702 dumpUS(stderr, name); 703 fprintf(stderr, "' text '"); 704 dumpUS(stderr, text); 705 fprintf(stderr, "'\n"); 706 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); 707#endif 708 // Try each of the negative rules, fraction rules, infinity rules and NaN rules 709 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { 710 if (nonNumericalRules[i]) { 711 Formattable tempResult; 712 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); 713 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { 714 result = tempResult; 715 highWaterMark = workingPos; 716 } 717 workingPos = pos; 718 } 719 } 720#ifdef RBNF_DEBUG 721 fprintf(stderr, "<nfrs> continue other with text '"); 722 dumpUS(stderr, text); 723 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); 724#endif 725 726 // finally, go through the regular rules one at a time. We start 727 // at the end of the list because we want to try matching the most 728 // sigificant rule first (this helps ensure that we parse 729 // "five thousand three hundred six" as 730 // "(five thousand) (three hundred) (six)" rather than 731 // "((five thousand three) hundred) (six)"). Skip rules whose 732 // base values are higher than the upper bound (again, this helps 733 // limit ambiguity by making sure the rules that match a rule's 734 // are less significant than the rule containing the substitutions)/ 735 { 736 int64_t ub = util64_fromDouble(upperBound); 737#ifdef RBNF_DEBUG 738 { 739 char ubstr[64]; 740 util64_toa(ub, ubstr, 64); 741 char ubstrhex[64]; 742 util64_toa(ub, ubstrhex, 64, 16); 743 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); 744 } 745#endif 746 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { 747 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { 748 continue; 749 } 750 Formattable tempResult; 751 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); 752 if (success && workingPos.getIndex() > highWaterMark.getIndex()) { 753 result = tempResult; 754 highWaterMark = workingPos; 755 } 756 workingPos = pos; 757 } 758 } 759#ifdef RBNF_DEBUG 760 fprintf(stderr, "<nfrs> exit\n"); 761#endif 762 // finally, update the parse postion we were passed to point to the 763 // first character we didn't use, and return the result that 764 // corresponds to that string of characters 765 pos = highWaterMark; 766 767 return 1; 768} 769 770void 771NFRuleSet::appendRules(UnicodeString& result) const 772{ 773 uint32_t i; 774 775 // the rule set name goes first... 776 result.append(name); 777 result.append(gColon); 778 result.append(gLineFeed); 779 780 // followed by the regular rules... 781 for (i = 0; i < rules.size(); i++) { 782 rules[i]->_appendRuleText(result); 783 result.append(gLineFeed); 784 } 785 786 // followed by the special rules (if they exist) 787 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { 788 NFRule *rule = nonNumericalRules[i]; 789 if (nonNumericalRules[i]) { 790 if (rule->getBaseValue() == NFRule::kImproperFractionRule 791 || rule->getBaseValue() == NFRule::kProperFractionRule 792 || rule->getBaseValue() == NFRule::kMasterRule) 793 { 794 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { 795 NFRule *fractionRule = fractionRules[fIdx]; 796 if (fractionRule->getBaseValue() == rule->getBaseValue()) { 797 fractionRule->_appendRuleText(result); 798 result.append(gLineFeed); 799 } 800 } 801 } 802 else { 803 rule->_appendRuleText(result); 804 result.append(gLineFeed); 805 } 806 } 807 } 808} 809 810// utility functions 811 812int64_t util64_fromDouble(double d) { 813 int64_t result = 0; 814 if (!uprv_isNaN(d)) { 815 double mant = uprv_maxMantissa(); 816 if (d < -mant) { 817 d = -mant; 818 } else if (d > mant) { 819 d = mant; 820 } 821 UBool neg = d < 0; 822 if (neg) { 823 d = -d; 824 } 825 result = (int64_t)uprv_floor(d); 826 if (neg) { 827 result = -result; 828 } 829 } 830 return result; 831} 832 833uint64_t util64_pow(uint32_t base, uint16_t exponent) { 834 if (base == 0) { 835 return 0; 836 } 837 uint64_t result = 1; 838 uint64_t pow = base; 839 while (true) { 840 if ((exponent & 1) == 1) { 841 result *= pow; 842 } 843 exponent >>= 1; 844 if (exponent == 0) { 845 break; 846 } 847 pow *= pow; 848 } 849 return result; 850} 851 852static const uint8_t asciiDigits[] = { 853 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 854 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, 855 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, 856 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, 857 0x77u, 0x78u, 0x79u, 0x7au, 858}; 859 860static const UChar kUMinus = (UChar)0x002d; 861 862#ifdef RBNF_DEBUG 863static const char kMinus = '-'; 864 865static const uint8_t digitInfo[] = { 866 0, 0, 0, 0, 0, 0, 0, 0, 867 0, 0, 0, 0, 0, 0, 0, 0, 868 0, 0, 0, 0, 0, 0, 0, 0, 869 0, 0, 0, 0, 0, 0, 0, 0, 870 0, 0, 0, 0, 0, 0, 0, 0, 871 0, 0, 0, 0, 0, 0, 0, 0, 872 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, 873 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, 874 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 875 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 876 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 877 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 878 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, 879 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, 880 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, 881 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, 882}; 883 884int64_t util64_atoi(const char* str, uint32_t radix) 885{ 886 if (radix > 36) { 887 radix = 36; 888 } else if (radix < 2) { 889 radix = 2; 890 } 891 int64_t lradix = radix; 892 893 int neg = 0; 894 if (*str == kMinus) { 895 ++str; 896 neg = 1; 897 } 898 int64_t result = 0; 899 uint8_t b; 900 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { 901 result *= lradix; 902 result += (int32_t)b; 903 } 904 if (neg) { 905 result = -result; 906 } 907 return result; 908} 909 910int64_t util64_utoi(const UChar* str, uint32_t radix) 911{ 912 if (radix > 36) { 913 radix = 36; 914 } else if (radix < 2) { 915 radix = 2; 916 } 917 int64_t lradix = radix; 918 919 int neg = 0; 920 if (*str == kUMinus) { 921 ++str; 922 neg = 1; 923 } 924 int64_t result = 0; 925 UChar c; 926 uint8_t b; 927 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { 928 result *= lradix; 929 result += (int32_t)b; 930 } 931 if (neg) { 932 result = -result; 933 } 934 return result; 935} 936 937uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) 938{ 939 if (radix > 36) { 940 radix = 36; 941 } else if (radix < 2) { 942 radix = 2; 943 } 944 int64_t base = radix; 945 946 char* p = buf; 947 if (len && (w < 0) && (radix == 10) && !raw) { 948 w = -w; 949 *p++ = kMinus; 950 --len; 951 } else if (len && (w == 0)) { 952 *p++ = (char)raw ? 0 : asciiDigits[0]; 953 --len; 954 } 955 956 while (len && w != 0) { 957 int64_t n = w / base; 958 int64_t m = n * base; 959 int32_t d = (int32_t)(w-m); 960 *p++ = raw ? (char)d : asciiDigits[d]; 961 w = n; 962 --len; 963 } 964 if (len) { 965 *p = 0; // null terminate if room for caller convenience 966 } 967 968 len = p - buf; 969 if (*buf == kMinus) { 970 ++buf; 971 } 972 while (--p > buf) { 973 char c = *p; 974 *p = *buf; 975 *buf = c; 976 ++buf; 977 } 978 979 return len; 980} 981#endif 982 983uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) 984{ 985 if (radix > 36) { 986 radix = 36; 987 } else if (radix < 2) { 988 radix = 2; 989 } 990 int64_t base = radix; 991 992 UChar* p = buf; 993 if (len && (w < 0) && (radix == 10) && !raw) { 994 w = -w; 995 *p++ = kUMinus; 996 --len; 997 } else if (len && (w == 0)) { 998 *p++ = (UChar)raw ? 0 : asciiDigits[0]; 999 --len; 1000 } 1001 1002 while (len && (w != 0)) { 1003 int64_t n = w / base; 1004 int64_t m = n * base; 1005 int32_t d = (int32_t)(w-m); 1006 *p++ = (UChar)(raw ? d : asciiDigits[d]); 1007 w = n; 1008 --len; 1009 } 1010 if (len) { 1011 *p = 0; // null terminate if room for caller convenience 1012 } 1013 1014 len = (uint32_t)(p - buf); 1015 if (*buf == kUMinus) { 1016 ++buf; 1017 } 1018 while (--p > buf) { 1019 UChar c = *p; 1020 *p = *buf; 1021 *buf = c; 1022 ++buf; 1023 } 1024 1025 return len; 1026} 1027 1028 1029U_NAMESPACE_END 1030 1031/* U_HAVE_RBNF */ 1032#endif 1033