1/*
2******************************************************************************
3*   Copyright (C) 1997-2008, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5******************************************************************************
6*   file name:  nfrs.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11* Modification history
12* Date        Name      Comments
13* 10/11/2001  Doug      Ported from ICU4J
14*/
15
16#include "nfrs.h"
17
18#if U_HAVE_RBNF
19
20#include "unicode/uchar.h"
21#include "nfrule.h"
22#include "nfrlist.h"
23
24#ifdef RBNF_DEBUG
25#include "cmemory.h"
26#endif
27
28#include "util.h"
29
30U_NAMESPACE_BEGIN
31
32#if 0
33// euclid's algorithm works with doubles
34// note, doubles only get us up to one quadrillion or so, which
35// isn't as much range as we get with longs.  We probably still
36// want either 64-bit math, or BigInteger.
37
38static int64_t
39util_lcm(int64_t x, int64_t y)
40{
41    x.abs();
42    y.abs();
43
44    if (x == 0 || y == 0) {
45        return 0;
46    } else {
47        do {
48            if (x < y) {
49                int64_t t = x; x = y; y = t;
50            }
51            x -= y * (x/y);
52        } while (x != 0);
53
54        return y;
55    }
56}
57
58#else
59/**
60 * Calculates the least common multiple of x and y.
61 */
62static int64_t
63util_lcm(int64_t x, int64_t y)
64{
65    // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
66    // vol. 2, 1st ed., pp. 298-299
67    int64_t x1 = x;
68    int64_t y1 = y;
69
70    int p2 = 0;
71    while ((x1 & 1) == 0 && (y1 & 1) == 0) {
72        ++p2;
73        x1 >>= 1;
74        y1 >>= 1;
75    }
76
77    int64_t t;
78    if ((x1 & 1) == 1) {
79        t = -y1;
80    } else {
81        t = x1;
82    }
83
84    while (t != 0) {
85        while ((t & 1) == 0) {
86            t = t >> 1;
87        }
88        if (t > 0) {
89            x1 = t;
90        } else {
91            y1 = -t;
92        }
93        t = x1 - y1;
94    }
95
96    int64_t gcd = x1 << p2;
97
98    // x * y == gcd(x, y) * lcm(x, y)
99    return x / gcd * y;
100}
101#endif
102
103static const UChar gPercent = 0x0025;
104static const UChar gColon = 0x003a;
105static const UChar gSemicolon = 0x003b;
106static const UChar gLineFeed = 0x000a;
107
108static const UChar gFourSpaces[] =
109{
110    0x20, 0x20, 0x20, 0x20, 0
111}; /* "    " */
112static const UChar gPercentPercent[] =
113{
114    0x25, 0x25, 0
115}; /* "%%" */
116
117NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
118  : name()
119  , rules(0)
120  , negativeNumberRule(NULL)
121  , fIsFractionRuleSet(FALSE)
122  , fIsPublic(FALSE)
123  , fRecursionCount(0)
124{
125    for (int i = 0; i < 3; ++i) {
126        fractionRules[i] = NULL;
127    }
128
129    if (U_FAILURE(status)) {
130        return;
131    }
132
133    UnicodeString& description = descriptions[index]; // !!! make sure index is valid
134
135    if (description.length() == 0) {
136        // throw new IllegalArgumentException("Empty rule set description");
137        status = U_PARSE_ERROR;
138        return;
139    }
140
141    // if the description begins with a rule set name (the rule set
142    // name can be omitted in formatter descriptions that consist
143    // of only one rule set), copy it out into our "name" member
144    // and delete it from the description
145    if (description.charAt(0) == gPercent) {
146        int32_t pos = description.indexOf(gColon);
147        if (pos == -1) {
148            // throw new IllegalArgumentException("Rule set name doesn't end in colon");
149            status = U_PARSE_ERROR;
150        } else {
151            name.setTo(description, 0, pos);
152            while (pos < description.length() && uprv_isRuleWhiteSpace(description.charAt(++pos))) {
153            }
154            description.remove(0, pos);
155        }
156    } else {
157        name.setTo(UNICODE_STRING_SIMPLE("%default"));
158    }
159
160    if (description.length() == 0) {
161        // throw new IllegalArgumentException("Empty rule set description");
162        status = U_PARSE_ERROR;
163    }
164
165    fIsPublic = name.indexOf(gPercentPercent) != 0;
166
167    // all of the other members of NFRuleSet are initialized
168    // by parseRules()
169}
170
171void
172NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
173{
174    // start by creating a Vector whose elements are Strings containing
175    // the descriptions of the rules (one rule per element).  The rules
176    // are separated by semicolons (there's no escape facility: ALL
177    // semicolons are rule delimiters)
178
179    if (U_FAILURE(status)) {
180        return;
181    }
182
183    // dlf - the original code kept a separate description array for no reason,
184    // so I got rid of it.  The loop was too complex so I simplified it.
185
186    UnicodeString currentDescription;
187    int32_t oldP = 0;
188    while (oldP < description.length()) {
189        int32_t p = description.indexOf(gSemicolon, oldP);
190        if (p == -1) {
191            p = description.length();
192        }
193        currentDescription.setTo(description, oldP, p - oldP);
194        NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
195        oldP = p + 1;
196    }
197
198    // for rules that didn't specify a base value, their base values
199    // were initialized to 0.  Make another pass through the list and
200    // set all those rules' base values.  We also remove any special
201    // rules from the list and put them into their own member variables
202    int64_t defaultBaseValue = 0;
203
204    // (this isn't a for loop because we might be deleting items from
205    // the vector-- we want to make sure we only increment i when
206    // we _didn't_ delete aything from the vector)
207    uint32_t i = 0;
208    while (i < rules.size()) {
209        NFRule* rule = rules[i];
210
211        switch (rule->getType()) {
212            // if the rule's base value is 0, fill in a default
213            // base value (this will be 1 plus the preceding
214            // rule's base value for regular rule sets, and the
215            // same as the preceding rule's base value in fraction
216            // rule sets)
217        case NFRule::kNoBase:
218            rule->setBaseValue(defaultBaseValue, status);
219            if (!isFractionRuleSet()) {
220                ++defaultBaseValue;
221            }
222            ++i;
223            break;
224
225            // if it's the negative-number rule, copy it into its own
226            // data member and delete it from the list
227        case NFRule::kNegativeNumberRule:
228            negativeNumberRule = rules.remove(i);
229            break;
230
231            // if it's the improper fraction rule, copy it into the
232            // correct element of fractionRules
233        case NFRule::kImproperFractionRule:
234            fractionRules[0] = rules.remove(i);
235            break;
236
237            // if it's the proper fraction rule, copy it into the
238            // correct element of fractionRules
239        case NFRule::kProperFractionRule:
240            fractionRules[1] = rules.remove(i);
241            break;
242
243            // if it's the master rule, copy it into the
244            // correct element of fractionRules
245        case NFRule::kMasterRule:
246            fractionRules[2] = rules.remove(i);
247            break;
248
249            // if it's a regular rule that already knows its base value,
250            // check to make sure the rules are in order, and update
251            // the default base value for the next rule
252        default:
253            if (rule->getBaseValue() < defaultBaseValue) {
254                // throw new IllegalArgumentException("Rules are not in order");
255                status = U_PARSE_ERROR;
256                return;
257            }
258            defaultBaseValue = rule->getBaseValue();
259            if (!isFractionRuleSet()) {
260                ++defaultBaseValue;
261            }
262            ++i;
263            break;
264        }
265    }
266}
267
268NFRuleSet::~NFRuleSet()
269{
270    delete negativeNumberRule;
271    delete fractionRules[0];
272    delete fractionRules[1];
273    delete fractionRules[2];
274}
275
276static UBool
277util_equalRules(const NFRule* rule1, const NFRule* rule2)
278{
279    if (rule1) {
280        if (rule2) {
281            return *rule1 == *rule2;
282        }
283    } else if (!rule2) {
284        return TRUE;
285    }
286    return FALSE;
287}
288
289UBool
290NFRuleSet::operator==(const NFRuleSet& rhs) const
291{
292    if (rules.size() == rhs.rules.size() &&
293        fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
294        name == rhs.name &&
295        util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
296        util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
297        util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
298        util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
299
300        for (uint32_t i = 0; i < rules.size(); ++i) {
301            if (*rules[i] != *rhs.rules[i]) {
302                return FALSE;
303            }
304        }
305        return TRUE;
306    }
307    return FALSE;
308}
309
310#define RECURSION_LIMIT 50
311
312void
313NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
314{
315    NFRule *rule = findNormalRule(number);
316    if (rule) { // else error, but can't report it
317        NFRuleSet* ncThis = (NFRuleSet*)this;
318        if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
319            // stop recursion
320            ncThis->fRecursionCount = 0;
321        } else {
322            rule->doFormat(number, toAppendTo, pos);
323            ncThis->fRecursionCount--;
324        }
325    }
326}
327
328void
329NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
330{
331    NFRule *rule = findDoubleRule(number);
332    if (rule) { // else error, but can't report it
333        NFRuleSet* ncThis = (NFRuleSet*)this;
334        if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
335            // stop recursion
336            ncThis->fRecursionCount = 0;
337        } else {
338            rule->doFormat(number, toAppendTo, pos);
339            ncThis->fRecursionCount--;
340        }
341    }
342}
343
344NFRule*
345NFRuleSet::findDoubleRule(double number) const
346{
347    // if this is a fraction rule set, use findFractionRuleSetRule()
348    if (isFractionRuleSet()) {
349        return findFractionRuleSetRule(number);
350    }
351
352    // if the number is negative, return the negative number rule
353    // (if there isn't a negative-number rule, we pretend it's a
354    // positive number)
355    if (number < 0) {
356        if (negativeNumberRule) {
357            return  negativeNumberRule;
358        } else {
359            number = -number;
360        }
361    }
362
363    // if the number isn't an integer, we use one of the fraction rules...
364    if (number != uprv_floor(number)) {
365        // if the number is between 0 and 1, return the proper
366        // fraction rule
367        if (number < 1 && fractionRules[1]) {
368            return fractionRules[1];
369        }
370        // otherwise, return the improper fraction rule
371        else if (fractionRules[0]) {
372            return fractionRules[0];
373        }
374    }
375
376    // if there's a master rule, use it to format the number
377    if (fractionRules[2]) {
378        return fractionRules[2];
379    }
380
381    // and if we haven't yet returned a rule, use findNormalRule()
382    // to find the applicable rule
383    int64_t r = util64_fromDouble(number + 0.5);
384    return findNormalRule(r);
385}
386
387NFRule *
388NFRuleSet::findNormalRule(int64_t number) const
389{
390    // if this is a fraction rule set, use findFractionRuleSetRule()
391    // to find the rule (we should only go into this clause if the
392    // value is 0)
393    if (fIsFractionRuleSet) {
394        return findFractionRuleSetRule((double)number);
395    }
396
397    // if the number is negative, return the negative-number rule
398    // (if there isn't one, pretend the number is positive)
399    if (number < 0) {
400        if (negativeNumberRule) {
401            return negativeNumberRule;
402        } else {
403            number = -number;
404        }
405    }
406
407    // we have to repeat the preceding two checks, even though we
408    // do them in findRule(), because the version of format() that
409    // takes a long bypasses findRule() and goes straight to this
410    // function.  This function does skip the fraction rules since
411    // we know the value is an integer (it also skips the master
412    // rule, since it's considered a fraction rule.  Skipping the
413    // master rule in this function is also how we avoid infinite
414    // recursion)
415
416    // {dlf} unfortunately this fails if there are no rules except
417    // special rules.  If there are no rules, use the master rule.
418
419    // binary-search the rule list for the applicable rule
420    // (a rule is used for all values from its base value to
421    // the next rule's base value)
422    int32_t hi = rules.size();
423    if (hi > 0) {
424        int32_t lo = 0;
425
426        while (lo < hi) {
427            int32_t mid = (lo + hi) / 2;
428            if (rules[mid]->getBaseValue() == number) {
429                return rules[mid];
430            }
431            else if (rules[mid]->getBaseValue() > number) {
432                hi = mid;
433            }
434            else {
435                lo = mid + 1;
436            }
437        }
438        if (hi == 0) { // bad rule set, minimum base > 0
439            return NULL; // want to throw exception here
440        }
441
442        NFRule *result = rules[hi - 1];
443
444        // use shouldRollBack() to see whether we need to invoke the
445        // rollback rule (see shouldRollBack()'s documentation for
446        // an explanation of the rollback rule).  If we do, roll back
447        // one rule and return that one instead of the one we'd normally
448        // return
449        if (result->shouldRollBack((double)number)) {
450            if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
451                return NULL;
452            }
453            result = rules[hi - 2];
454        }
455        return result;
456    }
457    // else use the master rule
458    return fractionRules[2];
459}
460
461/**
462 * If this rule is a fraction rule set, this function is used by
463 * findRule() to select the most appropriate rule for formatting
464 * the number.  Basically, the base value of each rule in the rule
465 * set is treated as the denominator of a fraction.  Whichever
466 * denominator can produce the fraction closest in value to the
467 * number passed in is the result.  If there's a tie, the earlier
468 * one in the list wins.  (If there are two rules in a row with the
469 * same base value, the first one is used when the numerator of the
470 * fraction would be 1, and the second rule is used the rest of the
471 * time.
472 * @param number The number being formatted (which will always be
473 * a number between 0 and 1)
474 * @return The rule to use to format this number
475 */
476NFRule*
477NFRuleSet::findFractionRuleSetRule(double number) const
478{
479    // the obvious way to do this (multiply the value being formatted
480    // by each rule's base value until you get an integral result)
481    // doesn't work because of rounding error.  This method is more
482    // accurate
483
484    // find the least common multiple of the rules' base values
485    // and multiply this by the number being formatted.  This is
486    // all the precision we need, and we can do all of the rest
487    // of the math using integer arithmetic
488    int64_t leastCommonMultiple = rules[0]->getBaseValue();
489    int64_t numerator;
490    {
491        for (uint32_t i = 1; i < rules.size(); ++i) {
492            leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
493        }
494        numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
495    }
496    // for each rule, do the following...
497    int64_t tempDifference;
498    int64_t difference = util64_fromDouble(uprv_maxMantissa());
499    int32_t winner = 0;
500    for (uint32_t i = 0; i < rules.size(); ++i) {
501        // "numerator" is the numerator of the fraction if the
502        // denominator is the LCD.  The numerator if the rule's
503        // base value is the denominator is "numerator" times the
504        // base value divided bythe LCD.  Here we check to see if
505        // that's an integer, and if not, how close it is to being
506        // an integer.
507        tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
508
509
510        // normalize the result of the above calculation: we want
511        // the numerator's distance from the CLOSEST multiple
512        // of the LCD
513        if (leastCommonMultiple - tempDifference < tempDifference) {
514            tempDifference = leastCommonMultiple - tempDifference;
515        }
516
517        // if this is as close as we've come, keep track of how close
518        // that is, and the line number of the rule that did it.  If
519        // we've scored a direct hit, we don't have to look at any more
520        // rules
521        if (tempDifference < difference) {
522            difference = tempDifference;
523            winner = i;
524            if (difference == 0) {
525                break;
526            }
527        }
528    }
529
530    // if we have two successive rules that both have the winning base
531    // value, then the first one (the one we found above) is used if
532    // the numerator of the fraction is 1 and the second one is used if
533    // the numerator of the fraction is anything else (this lets us
534    // do things like "one third"/"two thirds" without haveing to define
535    // a whole bunch of extra rule sets)
536    if ((unsigned)(winner + 1) < rules.size() &&
537        rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
538        double n = ((double)rules[winner]->getBaseValue()) * number;
539        if (n < 0.5 || n >= 2) {
540            ++winner;
541        }
542    }
543
544    // finally, return the winning rule
545    return rules[winner];
546}
547
548/**
549 * Parses a string.  Matches the string to be parsed against each
550 * of its rules (with a base value less than upperBound) and returns
551 * the value produced by the rule that matched the most charcters
552 * in the source string.
553 * @param text The string to parse
554 * @param parsePosition The initial position is ignored and assumed
555 * to be 0.  On exit, this object has been updated to point to the
556 * first character position this rule set didn't consume.
557 * @param upperBound Limits the rules that can be allowed to match.
558 * Only rules whose base values are strictly less than upperBound
559 * are considered.
560 * @return The numerical result of parsing this string.  This will
561 * be the matching rule's base value, composed appropriately with
562 * the results of matching any of its substitutions.  The object
563 * will be an instance of Long if it's an integral value; otherwise,
564 * it will be an instance of Double.  This function always returns
565 * a valid object: If nothing matched the input string at all,
566 * this function returns new Long(0), and the parse position is
567 * left unchanged.
568 */
569#ifdef RBNF_DEBUG
570#include <stdio.h>
571
572static void dumpUS(FILE* f, const UnicodeString& us) {
573  int len = us.length();
574  char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
575  if (buf != NULL) {
576	  us.extract(0, len, buf);
577	  buf[len] = 0;
578	  fprintf(f, "%s", buf);
579	  uprv_free(buf); //delete[] buf;
580  }
581}
582#endif
583
584UBool
585NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
586{
587    // try matching each rule in the rule set against the text being
588    // parsed.  Whichever one matches the most characters is the one
589    // that determines the value we return.
590
591    result.setLong(0);
592
593    // dump out if there's no text to parse
594    if (text.length() == 0) {
595        return 0;
596    }
597
598    ParsePosition highWaterMark;
599    ParsePosition workingPos = pos;
600
601#ifdef RBNF_DEBUG
602    fprintf(stderr, "<nfrs> %x '", this);
603    dumpUS(stderr, name);
604    fprintf(stderr, "' text '");
605    dumpUS(stderr, text);
606    fprintf(stderr, "'\n");
607    fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
608#endif
609
610    // start by trying the negative number rule (if there is one)
611    if (negativeNumberRule) {
612        Formattable tempResult;
613#ifdef RBNF_DEBUG
614        fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
615#endif
616        UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
617#ifdef RBNF_DEBUG
618        fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
619#endif
620        if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
621            result = tempResult;
622            highWaterMark = workingPos;
623        }
624        workingPos = pos;
625    }
626#ifdef RBNF_DEBUG
627    fprintf(stderr, "<nfrs> continue fractional with text '");
628    dumpUS(stderr, text);
629    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
630#endif
631    // then try each of the fraction rules
632    {
633        for (int i = 0; i < 3; i++) {
634            if (fractionRules[i]) {
635                Formattable tempResult;
636                UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
637                if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
638                    result = tempResult;
639                    highWaterMark = workingPos;
640                }
641                workingPos = pos;
642            }
643        }
644    }
645#ifdef RBNF_DEBUG
646    fprintf(stderr, "<nfrs> continue other with text '");
647    dumpUS(stderr, text);
648    fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
649#endif
650
651    // finally, go through the regular rules one at a time.  We start
652    // at the end of the list because we want to try matching the most
653    // sigificant rule first (this helps ensure that we parse
654    // "five thousand three hundred six" as
655    // "(five thousand) (three hundred) (six)" rather than
656    // "((five thousand three) hundred) (six)").  Skip rules whose
657    // base values are higher than the upper bound (again, this helps
658    // limit ambiguity by making sure the rules that match a rule's
659    // are less significant than the rule containing the substitutions)/
660    {
661        int64_t ub = util64_fromDouble(upperBound);
662#ifdef RBNF_DEBUG
663        {
664            char ubstr[64];
665            util64_toa(ub, ubstr, 64);
666            char ubstrhex[64];
667            util64_toa(ub, ubstrhex, 64, 16);
668            fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
669        }
670#endif
671        for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
672            if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
673                continue;
674            }
675            Formattable tempResult;
676            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
677            if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
678                result = tempResult;
679                highWaterMark = workingPos;
680            }
681            workingPos = pos;
682        }
683    }
684#ifdef RBNF_DEBUG
685    fprintf(stderr, "<nfrs> exit\n");
686#endif
687    // finally, update the parse postion we were passed to point to the
688    // first character we didn't use, and return the result that
689    // corresponds to that string of characters
690    pos = highWaterMark;
691
692    return 1;
693}
694
695void
696NFRuleSet::appendRules(UnicodeString& result) const
697{
698    // the rule set name goes first...
699    result.append(name);
700    result.append(gColon);
701    result.append(gLineFeed);
702
703    // followed by the regular rules...
704    for (uint32_t i = 0; i < rules.size(); i++) {
705        result.append(gFourSpaces);
706        rules[i]->_appendRuleText(result);
707        result.append(gLineFeed);
708    }
709
710    // followed by the special rules (if they exist)
711    if (negativeNumberRule) {
712        result.append(gFourSpaces);
713        negativeNumberRule->_appendRuleText(result);
714        result.append(gLineFeed);
715    }
716
717    {
718        for (uint32_t i = 0; i < 3; ++i) {
719            if (fractionRules[i]) {
720                result.append(gFourSpaces);
721                fractionRules[i]->_appendRuleText(result);
722                result.append(gLineFeed);
723            }
724        }
725    }
726}
727
728// utility functions
729
730int64_t util64_fromDouble(double d) {
731    int64_t result = 0;
732    if (!uprv_isNaN(d)) {
733        double mant = uprv_maxMantissa();
734        if (d < -mant) {
735            d = -mant;
736        } else if (d > mant) {
737            d = mant;
738        }
739        UBool neg = d < 0;
740        if (neg) {
741            d = -d;
742        }
743        result = (int64_t)uprv_floor(d);
744        if (neg) {
745            result = -result;
746        }
747    }
748    return result;
749}
750
751int64_t util64_pow(int32_t r, uint32_t e)  {
752    if (r == 0) {
753        return 0;
754    } else if (e == 0) {
755        return 1;
756    } else {
757        int64_t n = r;
758        while (--e > 0) {
759            n *= r;
760        }
761        return n;
762    }
763}
764
765static const uint8_t asciiDigits[] = {
766    0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
767    0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
768    0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
769    0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
770    0x77u, 0x78u, 0x79u, 0x7au,
771};
772
773static const UChar kUMinus = (UChar)0x002d;
774
775#ifdef RBNF_DEBUG
776static const char kMinus = '-';
777
778static const uint8_t digitInfo[] = {
779        0,     0,     0,     0,     0,     0,     0,     0,
780        0,     0,     0,     0,     0,     0,     0,     0,
781        0,     0,     0,     0,     0,     0,     0,     0,
782        0,     0,     0,     0,     0,     0,     0,     0,
783        0,     0,     0,     0,     0,     0,     0,     0,
784        0,     0,     0,     0,     0,     0,     0,     0,
785    0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
786    0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
787        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
788    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
789    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
790    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
791        0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
792    0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
793    0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
794    0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
795};
796
797int64_t util64_atoi(const char* str, uint32_t radix)
798{
799    if (radix > 36) {
800        radix = 36;
801    } else if (radix < 2) {
802        radix = 2;
803    }
804    int64_t lradix = radix;
805
806    int neg = 0;
807    if (*str == kMinus) {
808        ++str;
809        neg = 1;
810    }
811    int64_t result = 0;
812    uint8_t b;
813    while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
814        result *= lradix;
815        result += (int32_t)b;
816    }
817    if (neg) {
818        result = -result;
819    }
820    return result;
821}
822
823int64_t util64_utoi(const UChar* str, uint32_t radix)
824{
825    if (radix > 36) {
826        radix = 36;
827    } else if (radix < 2) {
828        radix = 2;
829    }
830    int64_t lradix = radix;
831
832    int neg = 0;
833    if (*str == kUMinus) {
834        ++str;
835        neg = 1;
836    }
837    int64_t result = 0;
838    UChar c;
839    uint8_t b;
840    while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
841        result *= lradix;
842        result += (int32_t)b;
843    }
844    if (neg) {
845        result = -result;
846    }
847    return result;
848}
849
850uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
851{
852    if (radix > 36) {
853        radix = 36;
854    } else if (radix < 2) {
855        radix = 2;
856    }
857    int64_t base = radix;
858
859    char* p = buf;
860    if (len && (w < 0) && (radix == 10) && !raw) {
861        w = -w;
862        *p++ = kMinus;
863        --len;
864    } else if (len && (w == 0)) {
865        *p++ = (char)raw ? 0 : asciiDigits[0];
866        --len;
867    }
868
869    while (len && w != 0) {
870        int64_t n = w / base;
871        int64_t m = n * base;
872        int32_t d = (int32_t)(w-m);
873        *p++ = raw ? (char)d : asciiDigits[d];
874        w = n;
875        --len;
876    }
877    if (len) {
878        *p = 0; // null terminate if room for caller convenience
879    }
880
881    len = p - buf;
882    if (*buf == kMinus) {
883        ++buf;
884    }
885    while (--p > buf) {
886        char c = *p;
887        *p = *buf;
888        *buf = c;
889        ++buf;
890    }
891
892    return len;
893}
894#endif
895
896uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
897{
898    if (radix > 36) {
899        radix = 36;
900    } else if (radix < 2) {
901        radix = 2;
902    }
903    int64_t base = radix;
904
905    UChar* p = buf;
906    if (len && (w < 0) && (radix == 10) && !raw) {
907        w = -w;
908        *p++ = kUMinus;
909        --len;
910    } else if (len && (w == 0)) {
911        *p++ = (UChar)raw ? 0 : asciiDigits[0];
912        --len;
913    }
914
915    while (len && (w != 0)) {
916        int64_t n = w / base;
917        int64_t m = n * base;
918        int32_t d = (int32_t)(w-m);
919        *p++ = (UChar)(raw ? d : asciiDigits[d]);
920        w = n;
921        --len;
922    }
923    if (len) {
924        *p = 0; // null terminate if room for caller convenience
925    }
926
927    len = (uint32_t)(p - buf);
928    if (*buf == kUMinus) {
929        ++buf;
930    }
931    while (--p > buf) {
932        UChar c = *p;
933        *p = *buf;
934        *buf = c;
935        ++buf;
936    }
937
938    return len;
939}
940
941
942U_NAMESPACE_END
943
944/* U_HAVE_RBNF */
945#endif
946
947