1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*   Copyright (C) 1997-2015, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7******************************************************************************
8*   file name:  nfrs.cpp
9*   encoding:   UTF-8
10*   tab size:   8 (not used)
11*   indentation:4
12*
13* Modification history
14* Date        Name      Comments
15* 10/11/2001  Doug      Ported from ICU4J
16*/
17
18#include "nfrs.h"
19
20#if U_HAVE_RBNF
21
22#include "unicode/uchar.h"
23#include "nfrule.h"
24#include "nfrlist.h"
25#include "patternprops.h"
26#include "putilimp.h"
27
28#ifdef RBNF_DEBUG
29#include "cmemory.h"
30#endif
31
32enum {
33    /** -x */
34    NEGATIVE_RULE_INDEX = 0,
35    /** x.x */
36    IMPROPER_FRACTION_RULE_INDEX = 1,
37    /** 0.x */
38    PROPER_FRACTION_RULE_INDEX = 2,
39    /** x.0 */
40    MASTER_RULE_INDEX = 3,
41    /** Inf */
42    INFINITY_RULE_INDEX = 4,
43    /** NaN */
44    NAN_RULE_INDEX = 5,
45    NON_NUMERICAL_RULE_LENGTH = 6
46};
47
48U_NAMESPACE_BEGIN
49
50#if 0
51// euclid's algorithm works with doubles
52// note, doubles only get us up to one quadrillion or so, which
53// isn't as much range as we get with longs.  We probably still
54// want either 64-bit math, or BigInteger.
55
56static int64_t
57util_lcm(int64_t x, int64_t y)
58{
59    x.abs();
60    y.abs();
61
62    if (x == 0 || y == 0) {
63        return 0;
64    } else {
65        do {
66            if (x < y) {
67                int64_t t = x; x = y; y = t;
68            }
69            x -= y * (x/y);
70        } while (x != 0);
71
72        return y;
73    }
74}
75
76#else
77/**
78 * Calculates the least common multiple of x and y.
79 */
80static int64_t
81util_lcm(int64_t x, int64_t y)
82{
83    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84    // vol. 2, 1st ed., pp. 298-299
85    int64_t x1 = x;
86    int64_t y1 = y;
87
88    int p2 = 0;
89    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90        ++p2;
91        x1 >>= 1;
92        y1 >>= 1;
93    }
94
95    int64_t t;
96    if ((x1 & 1) == 1) {
97        t = -y1;
98    } else {
99        t = x1;
100    }
101
102    while (t != 0) {
103        while ((t & 1) == 0) {
104            t = t >> 1;
105        }
106        if (t > 0) {
107            x1 = t;
108        } else {
109            y1 = -t;
110        }
111        t = x1 - y1;
112    }
113
114    int64_t gcd = x1 << p2;
115
116    // x * y == gcd(x, y) * lcm(x, y)
117    return x / gcd * y;
118}
119#endif
120
121static const UChar gPercent = 0x0025;
122static const UChar gColon = 0x003a;
123static const UChar gSemicolon = 0x003b;
124static const UChar gLineFeed = 0x000a;
125
126static const UChar gPercentPercent[] =
127{
128    0x25, 0x25, 0
129}; /* "%%" */
130
131static const UChar gNoparse[] =
132{
133    0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134}; /* "@noparse" */
135
136NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137  : name()
138  , rules(0)
139  , owner(_owner)
140  , fractionRules()
141  , fIsFractionRuleSet(FALSE)
142  , fIsPublic(FALSE)
143  , fIsParseable(TRUE)
144{
145    for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146        nonNumericalRules[i] = NULL;
147    }
148
149    if (U_FAILURE(status)) {
150        return;
151    }
152
153    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154
155    if (description.length() == 0) {
156        // throw new IllegalArgumentException("Empty rule set description");
157        status = U_PARSE_ERROR;
158        return;
159    }
160
161    // if the description begins with a rule set name (the rule set
162    // name can be omitted in formatter descriptions that consist
163    // of only one rule set), copy it out into our "name" member
164    // and delete it from the description
165    if (description.charAt(0) == gPercent) {
166        int32_t pos = description.indexOf(gColon);
167        if (pos == -1) {
168            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169            status = U_PARSE_ERROR;
170        } else {
171            name.setTo(description, 0, pos);
172            while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173            }
174            description.remove(0, pos);
175        }
176    } else {
177        name.setTo(UNICODE_STRING_SIMPLE("%default"));
178    }
179
180    if (description.length() == 0) {
181        // throw new IllegalArgumentException("Empty rule set description");
182        status = U_PARSE_ERROR;
183    }
184
185    fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186
187    if ( name.endsWith(gNoparse,8) ) {
188        fIsParseable = FALSE;
189        name.truncate(name.length()-8); // remove the @noparse from the name
190    }
191
192    // all of the other members of NFRuleSet are initialized
193    // by parseRules()
194}
195
196void
197NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198{
199    // start by creating a Vector whose elements are Strings containing
200    // the descriptions of the rules (one rule per element).  The rules
201    // are separated by semicolons (there's no escape facility: ALL
202    // semicolons are rule delimiters)
203
204    if (U_FAILURE(status)) {
205        return;
206    }
207
208    // ensure we are starting with an empty rule list
209    rules.deleteAll();
210
211    // dlf - the original code kept a separate description array for no reason,
212    // so I got rid of it.  The loop was too complex so I simplified it.
213
214    UnicodeString currentDescription;
215    int32_t oldP = 0;
216    while (oldP < description.length()) {
217        int32_t p = description.indexOf(gSemicolon, oldP);
218        if (p == -1) {
219            p = description.length();
220        }
221        currentDescription.setTo(description, oldP, p - oldP);
222        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223        oldP = p + 1;
224    }
225
226    // for rules that didn't specify a base value, their base values
227    // were initialized to 0.  Make another pass through the list and
228    // set all those rules' base values.  We also remove any special
229    // rules from the list and put them into their own member variables
230    int64_t defaultBaseValue = 0;
231
232    // (this isn't a for loop because we might be deleting items from
233    // the vector-- we want to make sure we only increment i when
234    // we _didn't_ delete aything from the vector)
235    int32_t rulesSize = rules.size();
236    for (int32_t i = 0; i < rulesSize; i++) {
237        NFRule* rule = rules[i];
238        int64_t baseValue = rule->getBaseValue();
239
240        if (baseValue == 0) {
241            // if the rule's base value is 0, fill in a default
242            // base value (this will be 1 plus the preceding
243            // rule's base value for regular rule sets, and the
244            // same as the preceding rule's base value in fraction
245            // rule sets)
246            rule->setBaseValue(defaultBaseValue, status);
247        }
248        else {
249            // if it's a regular rule that already knows its base value,
250            // check to make sure the rules are in order, and update
251            // the default base value for the next rule
252            if (baseValue < defaultBaseValue) {
253                // throw new IllegalArgumentException("Rules are not in order");
254                status = U_PARSE_ERROR;
255                return;
256            }
257            defaultBaseValue = baseValue;
258        }
259        if (!fIsFractionRuleSet) {
260            ++defaultBaseValue;
261        }
262    }
263}
264
265/**
266 * Set one of the non-numerical rules.
267 * @param rule The rule to set.
268 */
269void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270    int64_t baseValue = rule->getBaseValue();
271    if (baseValue == NFRule::kNegativeNumberRule) {
272        delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273        nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274    }
275    else if (baseValue == NFRule::kImproperFractionRule) {
276        setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
277    }
278    else if (baseValue == NFRule::kProperFractionRule) {
279        setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
280    }
281    else if (baseValue == NFRule::kMasterRule) {
282        setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
283    }
284    else if (baseValue == NFRule::kInfinityRule) {
285        delete nonNumericalRules[INFINITY_RULE_INDEX];
286        nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287    }
288    else if (baseValue == NFRule::kNaNRule) {
289        delete nonNumericalRules[NAN_RULE_INDEX];
290        nonNumericalRules[NAN_RULE_INDEX] = rule;
291    }
292}
293
294/**
295 * Determine the best fraction rule to use. Rules matching the decimal point from
296 * DecimalFormatSymbols become the main set of rules to use.
297 * @param originalIndex The index into nonNumericalRules
298 * @param newRule The new rule to consider
299 * @param rememberRule Should the new rule be added to fractionRules.
300 */
301void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
302    if (rememberRule) {
303        fractionRules.add(newRule);
304    }
305    NFRule *bestResult = nonNumericalRules[originalIndex];
306    if (bestResult == NULL) {
307        nonNumericalRules[originalIndex] = newRule;
308    }
309    else {
310        // We have more than one. Which one is better?
311        const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
312        if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
313            == newRule->getDecimalPoint())
314        {
315            nonNumericalRules[originalIndex] = newRule;
316        }
317        // else leave it alone
318    }
319}
320
321NFRuleSet::~NFRuleSet()
322{
323    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
324        if (i != IMPROPER_FRACTION_RULE_INDEX
325            && i != PROPER_FRACTION_RULE_INDEX
326            && i != MASTER_RULE_INDEX)
327        {
328            delete nonNumericalRules[i];
329        }
330        // else it will be deleted via NFRuleList fractionRules
331    }
332}
333
334static UBool
335util_equalRules(const NFRule* rule1, const NFRule* rule2)
336{
337    if (rule1) {
338        if (rule2) {
339            return *rule1 == *rule2;
340        }
341    } else if (!rule2) {
342        return TRUE;
343    }
344    return FALSE;
345}
346
347UBool
348NFRuleSet::operator==(const NFRuleSet& rhs) const
349{
350    if (rules.size() == rhs.rules.size() &&
351        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
352        name == rhs.name) {
353
354        // ...then compare the non-numerical rule lists...
355        for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
356            if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
357                return FALSE;
358            }
359        }
360
361        // ...then compare the rule lists...
362        for (uint32_t i = 0; i < rules.size(); ++i) {
363            if (*rules[i] != *rhs.rules[i]) {
364                return FALSE;
365            }
366        }
367        return TRUE;
368    }
369    return FALSE;
370}
371
372void
373NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
374    for (uint32_t i = 0; i < rules.size(); ++i) {
375        rules[i]->setDecimalFormatSymbols(newSymbols, status);
376    }
377    // Switch the fraction rules to mirror the DecimalFormatSymbols.
378    for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
379        if (nonNumericalRules[nonNumericalIdx]) {
380            for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
381                NFRule *fractionRule = fractionRules[fIdx];
382                if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
383                    setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
384                }
385            }
386        }
387    }
388
389    for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
390        NFRule *rule = nonNumericalRules[nnrIdx];
391        if (rule) {
392            rule->setDecimalFormatSymbols(newSymbols, status);
393        }
394    }
395}
396
397#define RECURSION_LIMIT 64
398
399void
400NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
401{
402    if (recursionCount >= RECURSION_LIMIT) {
403        // stop recursion
404        status = U_INVALID_STATE_ERROR;
405        return;
406    }
407    const NFRule *rule = findNormalRule(number);
408    if (rule) { // else error, but can't report it
409        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
410    }
411}
412
413void
414NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
415{
416    if (recursionCount >= RECURSION_LIMIT) {
417        // stop recursion
418        status = U_INVALID_STATE_ERROR;
419        return;
420    }
421    const NFRule *rule = findDoubleRule(number);
422    if (rule) { // else error, but can't report it
423        rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
424    }
425}
426
427const NFRule*
428NFRuleSet::findDoubleRule(double number) const
429{
430    // if this is a fraction rule set, use findFractionRuleSetRule()
431    if (isFractionRuleSet()) {
432        return findFractionRuleSetRule(number);
433    }
434
435    if (uprv_isNaN(number)) {
436        const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
437        if (!rule) {
438            rule = owner->getDefaultNaNRule();
439        }
440        return rule;
441    }
442
443    // if the number is negative, return the negative number rule
444    // (if there isn't a negative-number rule, we pretend it's a
445    // positive number)
446    if (number < 0) {
447        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
448            return  nonNumericalRules[NEGATIVE_RULE_INDEX];
449        } else {
450            number = -number;
451        }
452    }
453
454    if (uprv_isInfinite(number)) {
455        const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
456        if (!rule) {
457            rule = owner->getDefaultInfinityRule();
458        }
459        return rule;
460    }
461
462    // if the number isn't an integer, we use one of the fraction rules...
463    if (number != uprv_floor(number)) {
464        // if the number is between 0 and 1, return the proper
465        // fraction rule
466        if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
467            return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
468        }
469        // otherwise, return the improper fraction rule
470        else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
471            return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
472        }
473    }
474
475    // if there's a master rule, use it to format the number
476    if (nonNumericalRules[MASTER_RULE_INDEX]) {
477        return nonNumericalRules[MASTER_RULE_INDEX];
478    }
479
480    // and if we haven't yet returned a rule, use findNormalRule()
481    // to find the applicable rule
482    int64_t r = util64_fromDouble(number + 0.5);
483    return findNormalRule(r);
484}
485
486const NFRule *
487NFRuleSet::findNormalRule(int64_t number) const
488{
489    // if this is a fraction rule set, use findFractionRuleSetRule()
490    // to find the rule (we should only go into this clause if the
491    // value is 0)
492    if (fIsFractionRuleSet) {
493        return findFractionRuleSetRule((double)number);
494    }
495
496    // if the number is negative, return the negative-number rule
497    // (if there isn't one, pretend the number is positive)
498    if (number < 0) {
499        if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
500            return nonNumericalRules[NEGATIVE_RULE_INDEX];
501        } else {
502            number = -number;
503        }
504    }
505
506    // we have to repeat the preceding two checks, even though we
507    // do them in findRule(), because the version of format() that
508    // takes a long bypasses findRule() and goes straight to this
509    // function.  This function does skip the fraction rules since
510    // we know the value is an integer (it also skips the master
511    // rule, since it's considered a fraction rule.  Skipping the
512    // master rule in this function is also how we avoid infinite
513    // recursion)
514
515    // {dlf} unfortunately this fails if there are no rules except
516    // special rules.  If there are no rules, use the master rule.
517
518    // binary-search the rule list for the applicable rule
519    // (a rule is used for all values from its base value to
520    // the next rule's base value)
521    int32_t hi = rules.size();
522    if (hi > 0) {
523        int32_t lo = 0;
524
525        while (lo < hi) {
526            int32_t mid = (lo + hi) / 2;
527            if (rules[mid]->getBaseValue() == number) {
528                return rules[mid];
529            }
530            else if (rules[mid]->getBaseValue() > number) {
531                hi = mid;
532            }
533            else {
534                lo = mid + 1;
535            }
536        }
537        if (hi == 0) { // bad rule set, minimum base > 0
538            return NULL; // want to throw exception here
539        }
540
541        NFRule *result = rules[hi - 1];
542
543        // use shouldRollBack() to see whether we need to invoke the
544        // rollback rule (see shouldRollBack()'s documentation for
545        // an explanation of the rollback rule).  If we do, roll back
546        // one rule and return that one instead of the one we'd normally
547        // return
548        if (result->shouldRollBack(number)) {
549            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
550                return NULL;
551            }
552            result = rules[hi - 2];
553        }
554        return result;
555    }
556    // else use the master rule
557    return nonNumericalRules[MASTER_RULE_INDEX];
558}
559
560/**
561 * If this rule is a fraction rule set, this function is used by
562 * findRule() to select the most appropriate rule for formatting
563 * the number.  Basically, the base value of each rule in the rule
564 * set is treated as the denominator of a fraction.  Whichever
565 * denominator can produce the fraction closest in value to the
566 * number passed in is the result.  If there's a tie, the earlier
567 * one in the list wins.  (If there are two rules in a row with the
568 * same base value, the first one is used when the numerator of the
569 * fraction would be 1, and the second rule is used the rest of the
570 * time.
571 * @param number The number being formatted (which will always be
572 * a number between 0 and 1)
573 * @return The rule to use to format this number
574 */
575const NFRule*
576NFRuleSet::findFractionRuleSetRule(double number) const
577{
578    // the obvious way to do this (multiply the value being formatted
579    // by each rule's base value until you get an integral result)
580    // doesn't work because of rounding error.  This method is more
581    // accurate
582
583    // find the least common multiple of the rules' base values
584    // and multiply this by the number being formatted.  This is
585    // all the precision we need, and we can do all of the rest
586    // of the math using integer arithmetic
587    int64_t leastCommonMultiple = rules[0]->getBaseValue();
588    int64_t numerator;
589    {
590        for (uint32_t i = 1; i < rules.size(); ++i) {
591            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
592        }
593        numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
594    }
595    // for each rule, do the following...
596    int64_t tempDifference;
597    int64_t difference = util64_fromDouble(uprv_maxMantissa());
598    int32_t winner = 0;
599    for (uint32_t i = 0; i < rules.size(); ++i) {
600        // "numerator" is the numerator of the fraction if the
601        // denominator is the LCD.  The numerator if the rule's
602        // base value is the denominator is "numerator" times the
603        // base value divided bythe LCD.  Here we check to see if
604        // that's an integer, and if not, how close it is to being
605        // an integer.
606        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
607
608
609        // normalize the result of the above calculation: we want
610        // the numerator's distance from the CLOSEST multiple
611        // of the LCD
612        if (leastCommonMultiple - tempDifference < tempDifference) {
613            tempDifference = leastCommonMultiple - tempDifference;
614        }
615
616        // if this is as close as we've come, keep track of how close
617        // that is, and the line number of the rule that did it.  If
618        // we've scored a direct hit, we don't have to look at any more
619        // rules
620        if (tempDifference < difference) {
621            difference = tempDifference;
622            winner = i;
623            if (difference == 0) {
624                break;
625            }
626        }
627    }
628
629    // if we have two successive rules that both have the winning base
630    // value, then the first one (the one we found above) is used if
631    // the numerator of the fraction is 1 and the second one is used if
632    // the numerator of the fraction is anything else (this lets us
633    // do things like "one third"/"two thirds" without haveing to define
634    // a whole bunch of extra rule sets)
635    if ((unsigned)(winner + 1) < rules.size() &&
636        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
637        double n = ((double)rules[winner]->getBaseValue()) * number;
638        if (n < 0.5 || n >= 2) {
639            ++winner;
640        }
641    }
642
643    // finally, return the winning rule
644    return rules[winner];
645}
646
647/**
648 * Parses a string.  Matches the string to be parsed against each
649 * of its rules (with a base value less than upperBound) and returns
650 * the value produced by the rule that matched the most charcters
651 * in the source string.
652 * @param text The string to parse
653 * @param parsePosition The initial position is ignored and assumed
654 * to be 0.  On exit, this object has been updated to point to the
655 * first character position this rule set didn't consume.
656 * @param upperBound Limits the rules that can be allowed to match.
657 * Only rules whose base values are strictly less than upperBound
658 * are considered.
659 * @return The numerical result of parsing this string.  This will
660 * be the matching rule's base value, composed appropriately with
661 * the results of matching any of its substitutions.  The object
662 * will be an instance of Long if it's an integral value; otherwise,
663 * it will be an instance of Double.  This function always returns
664 * a valid object: If nothing matched the input string at all,
665 * this function returns new Long(0), and the parse position is
666 * left unchanged.
667 */
668#ifdef RBNF_DEBUG
669#include <stdio.h>
670
671static void dumpUS(FILE* f, const UnicodeString& us) {
672  int len = us.length();
673  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
674  if (buf != NULL) {
675	  us.extract(0, len, buf);
676	  buf[len] = 0;
677	  fprintf(f, "%s", buf);
678	  uprv_free(buf); //delete[] buf;
679  }
680}
681#endif
682
683UBool
684NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
685{
686    // try matching each rule in the rule set against the text being
687    // parsed.  Whichever one matches the most characters is the one
688    // that determines the value we return.
689
690    result.setLong(0);
691
692    // dump out if there's no text to parse
693    if (text.length() == 0) {
694        return 0;
695    }
696
697    ParsePosition highWaterMark;
698    ParsePosition workingPos = pos;
699
700#ifdef RBNF_DEBUG
701    fprintf(stderr, "<nfrs> %x '", this);
702    dumpUS(stderr, name);
703    fprintf(stderr, "' text '");
704    dumpUS(stderr, text);
705    fprintf(stderr, "'\n");
706    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
707#endif
708    // Try each of the negative rules, fraction rules, infinity rules and NaN rules
709    for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
710        if (nonNumericalRules[i]) {
711            Formattable tempResult;
712            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
713            if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
714                result = tempResult;
715                highWaterMark = workingPos;
716            }
717            workingPos = pos;
718        }
719    }
720#ifdef RBNF_DEBUG
721    fprintf(stderr, "<nfrs> continue other with text '");
722    dumpUS(stderr, text);
723    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
724#endif
725
726    // finally, go through the regular rules one at a time.  We start
727    // at the end of the list because we want to try matching the most
728    // sigificant rule first (this helps ensure that we parse
729    // "five thousand three hundred six" as
730    // "(five thousand) (three hundred) (six)" rather than
731    // "((five thousand three) hundred) (six)").  Skip rules whose
732    // base values are higher than the upper bound (again, this helps
733    // limit ambiguity by making sure the rules that match a rule's
734    // are less significant than the rule containing the substitutions)/
735    {
736        int64_t ub = util64_fromDouble(upperBound);
737#ifdef RBNF_DEBUG
738        {
739            char ubstr[64];
740            util64_toa(ub, ubstr, 64);
741            char ubstrhex[64];
742            util64_toa(ub, ubstrhex, 64, 16);
743            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
744        }
745#endif
746        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
747            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
748                continue;
749            }
750            Formattable tempResult;
751            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
752            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
753                result = tempResult;
754                highWaterMark = workingPos;
755            }
756            workingPos = pos;
757        }
758    }
759#ifdef RBNF_DEBUG
760    fprintf(stderr, "<nfrs> exit\n");
761#endif
762    // finally, update the parse postion we were passed to point to the
763    // first character we didn't use, and return the result that
764    // corresponds to that string of characters
765    pos = highWaterMark;
766
767    return 1;
768}
769
770void
771NFRuleSet::appendRules(UnicodeString& result) const
772{
773    uint32_t i;
774
775    // the rule set name goes first...
776    result.append(name);
777    result.append(gColon);
778    result.append(gLineFeed);
779
780    // followed by the regular rules...
781    for (i = 0; i < rules.size(); i++) {
782        rules[i]->_appendRuleText(result);
783        result.append(gLineFeed);
784    }
785
786    // followed by the special rules (if they exist)
787    for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
788        NFRule *rule = nonNumericalRules[i];
789        if (nonNumericalRules[i]) {
790            if (rule->getBaseValue() == NFRule::kImproperFractionRule
791                || rule->getBaseValue() == NFRule::kProperFractionRule
792                || rule->getBaseValue() == NFRule::kMasterRule)
793            {
794                for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
795                    NFRule *fractionRule = fractionRules[fIdx];
796                    if (fractionRule->getBaseValue() == rule->getBaseValue()) {
797                        fractionRule->_appendRuleText(result);
798                        result.append(gLineFeed);
799                    }
800                }
801            }
802            else {
803                rule->_appendRuleText(result);
804                result.append(gLineFeed);
805            }
806        }
807    }
808}
809
810// utility functions
811
812int64_t util64_fromDouble(double d) {
813    int64_t result = 0;
814    if (!uprv_isNaN(d)) {
815        double mant = uprv_maxMantissa();
816        if (d < -mant) {
817            d = -mant;
818        } else if (d > mant) {
819            d = mant;
820        }
821        UBool neg = d < 0;
822        if (neg) {
823            d = -d;
824        }
825        result = (int64_t)uprv_floor(d);
826        if (neg) {
827            result = -result;
828        }
829    }
830    return result;
831}
832
833uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
834    if (base == 0) {
835        return 0;
836    }
837    uint64_t result = 1;
838    uint64_t pow = base;
839    while (true) {
840        if ((exponent & 1) == 1) {
841            result *= pow;
842        }
843        exponent >>= 1;
844        if (exponent == 0) {
845            break;
846        }
847        pow *= pow;
848    }
849    return result;
850}
851
852static const uint8_t asciiDigits[] = {
853    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
854    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
855    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
856    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
857    0x77u, 0x78u, 0x79u, 0x7au,
858};
859
860static const UChar kUMinus = (UChar)0x002d;
861
862#ifdef RBNF_DEBUG
863static const char kMinus = '-';
864
865static const uint8_t digitInfo[] = {
866        0,     0,     0,     0,     0,     0,     0,     0,
867        0,     0,     0,     0,     0,     0,     0,     0,
868        0,     0,     0,     0,     0,     0,     0,     0,
869        0,     0,     0,     0,     0,     0,     0,     0,
870        0,     0,     0,     0,     0,     0,     0,     0,
871        0,     0,     0,     0,     0,     0,     0,     0,
872    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
873    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
874        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
875    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
876    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
877    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
878        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
879    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
880    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
881    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
882};
883
884int64_t util64_atoi(const char* str, uint32_t radix)
885{
886    if (radix > 36) {
887        radix = 36;
888    } else if (radix < 2) {
889        radix = 2;
890    }
891    int64_t lradix = radix;
892
893    int neg = 0;
894    if (*str == kMinus) {
895        ++str;
896        neg = 1;
897    }
898    int64_t result = 0;
899    uint8_t b;
900    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
901        result *= lradix;
902        result += (int32_t)b;
903    }
904    if (neg) {
905        result = -result;
906    }
907    return result;
908}
909
910int64_t util64_utoi(const UChar* str, uint32_t radix)
911{
912    if (radix > 36) {
913        radix = 36;
914    } else if (radix < 2) {
915        radix = 2;
916    }
917    int64_t lradix = radix;
918
919    int neg = 0;
920    if (*str == kUMinus) {
921        ++str;
922        neg = 1;
923    }
924    int64_t result = 0;
925    UChar c;
926    uint8_t b;
927    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
928        result *= lradix;
929        result += (int32_t)b;
930    }
931    if (neg) {
932        result = -result;
933    }
934    return result;
935}
936
937uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
938{
939    if (radix > 36) {
940        radix = 36;
941    } else if (radix < 2) {
942        radix = 2;
943    }
944    int64_t base = radix;
945
946    char* p = buf;
947    if (len && (w < 0) && (radix == 10) && !raw) {
948        w = -w;
949        *p++ = kMinus;
950        --len;
951    } else if (len && (w == 0)) {
952        *p++ = (char)raw ? 0 : asciiDigits[0];
953        --len;
954    }
955
956    while (len && w != 0) {
957        int64_t n = w / base;
958        int64_t m = n * base;
959        int32_t d = (int32_t)(w-m);
960        *p++ = raw ? (char)d : asciiDigits[d];
961        w = n;
962        --len;
963    }
964    if (len) {
965        *p = 0; // null terminate if room for caller convenience
966    }
967
968    len = p - buf;
969    if (*buf == kMinus) {
970        ++buf;
971    }
972    while (--p > buf) {
973        char c = *p;
974        *p = *buf;
975        *buf = c;
976        ++buf;
977    }
978
979    return len;
980}
981#endif
982
983uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
984{
985    if (radix > 36) {
986        radix = 36;
987    } else if (radix < 2) {
988        radix = 2;
989    }
990    int64_t base = radix;
991
992    UChar* p = buf;
993    if (len && (w < 0) && (radix == 10) && !raw) {
994        w = -w;
995        *p++ = kUMinus;
996        --len;
997    } else if (len && (w == 0)) {
998        *p++ = (UChar)raw ? 0 : asciiDigits[0];
999        --len;
1000    }
1001
1002    while (len && (w != 0)) {
1003        int64_t n = w / base;
1004        int64_t m = n * base;
1005        int32_t d = (int32_t)(w-m);
1006        *p++ = (UChar)(raw ? d : asciiDigits[d]);
1007        w = n;
1008        --len;
1009    }
1010    if (len) {
1011        *p = 0; // null terminate if room for caller convenience
1012    }
1013
1014    len = (uint32_t)(p - buf);
1015    if (*buf == kUMinus) {
1016        ++buf;
1017    }
1018    while (--p > buf) {
1019        UChar c = *p;
1020        *p = *buf;
1021        *buf = c;
1022        ++buf;
1023    }
1024
1025    return len;
1026}
1027
1028
1029U_NAMESPACE_END
1030
1031/* U_HAVE_RBNF */
1032#endif
1033