1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
7
8#include "resource.h"
9#include "number_compact.h"
10#include "unicode/ustring.h"
11#include "unicode/ures.h"
12#include "cstring.h"
13#include "charstr.h"
14#include "uresimp.h"
15
16using namespace icu;
17using namespace icu::number;
18using namespace icu::number::impl;
19
20namespace {
21
22// A dummy object used when a "0" compact decimal entry is encountered. This is necessary
23// in order to prevent falling back to root. Object equality ("==") is intended.
24const UChar *USE_FALLBACK = u"<USE FALLBACK>";
25
26/** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */
27void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType,
28                                 CharString &sb, UErrorCode &status) {
29    sb.clear();
30    sb.append("NumberElements/", status);
31    sb.append(nsName, status);
32    sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status);
33    sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status);
34}
35
36int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) {
37    return magnitude * StandardPlural::COUNT + plural;
38}
39
40int32_t countZeros(const UChar *patternString, int32_t patternLength) {
41    // NOTE: This strategy for computing the number of zeros is a hack for efficiency.
42    // It could break if there are any 0s that aren't part of the main pattern.
43    int32_t numZeros = 0;
44    for (int32_t i = 0; i < patternLength; i++) {
45        if (patternString[i] == u'0') {
46            numZeros++;
47        } else if (numZeros > 0) {
48            break; // zeros should always be contiguous
49        }
50    }
51    return numZeros;
52}
53
54} // namespace
55
56// NOTE: patterns and multipliers both get zero-initialized.
57CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) {
58}
59
60void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,
61                           CompactType compactType, UErrorCode &status) {
62    CompactDataSink sink(*this);
63    LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status));
64    if (U_FAILURE(status)) { return; }
65
66    bool nsIsLatn = strcmp(nsName, "latn") == 0;
67    bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT;
68
69    // Fall back to latn numbering system and/or short compact style.
70    CharString resourceKey;
71    getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status);
72    UErrorCode localStatus = U_ZERO_ERROR;
73    ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
74    if (isEmpty && !nsIsLatn) {
75        getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status);
76        localStatus = U_ZERO_ERROR;
77        ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
78    }
79    if (isEmpty && !compactIsShort) {
80        getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
81        localStatus = U_ZERO_ERROR;
82        ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
83    }
84    if (isEmpty && !nsIsLatn && !compactIsShort) {
85        getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status);
86        localStatus = U_ZERO_ERROR;
87        ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);
88    }
89
90    // The last fallback should be guaranteed to return data.
91    if (isEmpty) {
92        status = U_INTERNAL_PROGRAM_ERROR;
93    }
94}
95
96int32_t CompactData::getMultiplier(int32_t magnitude) const {
97    if (magnitude < 0) {
98        return 0;
99    }
100    if (magnitude > largestMagnitude) {
101        magnitude = largestMagnitude;
102    }
103    return multipliers[magnitude];
104}
105
106const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const {
107    if (magnitude < 0) {
108        return nullptr;
109    }
110    if (magnitude > largestMagnitude) {
111        magnitude = largestMagnitude;
112    }
113    const UChar *patternString = patterns[getIndex(magnitude, plural)];
114    if (patternString == nullptr && plural != StandardPlural::OTHER) {
115        // Fall back to "other" plural variant
116        patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)];
117    }
118    if (patternString == USE_FALLBACK) { // == is intended
119        // Return null if USE_FALLBACK is present
120        patternString = nullptr;
121    }
122    return patternString;
123}
124
125void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const {
126    U_ASSERT(output.isEmpty());
127    // NOTE: In C++, this is done more manually with a UVector.
128    // In Java, we can take advantage of JDK HashSet.
129    for (auto pattern : patterns) {
130        if (pattern == nullptr || pattern == USE_FALLBACK) {
131            continue;
132        }
133
134        // Insert pattern into the UVector if the UVector does not already contain the pattern.
135        // Search the UVector from the end since identical patterns are likely to be adjacent.
136        for (int32_t i = output.size() - 1; i >= 0; i--) {
137            if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) {
138                goto continue_outer;
139            }
140        }
141
142        // The string was not found; add it to the UVector.
143        // ANDY: This requires a const_cast.  Why?
144        output.addElement(const_cast<UChar *>(pattern), status);
145
146        continue_outer:
147        continue;
148    }
149}
150
151void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/,
152                                       UErrorCode &status) {
153    // traverse into the table of powers of ten
154    ResourceTable powersOfTenTable = value.getTable(status);
155    if (U_FAILURE(status)) { return; }
156    for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
157
158        // Assumes that the keys are always of the form "10000" where the magnitude is the
159        // length of the key minus one.  We expect magnitudes to be less than MAX_DIGITS.
160        auto magnitude = static_cast<int8_t> (strlen(key) - 1);
161        int8_t multiplier = data.multipliers[magnitude];
162        U_ASSERT(magnitude < COMPACT_MAX_DIGITS);
163
164        // Iterate over the plural variants ("one", "other", etc)
165        ResourceTable pluralVariantsTable = value.getTable(status);
166        if (U_FAILURE(status)) { return; }
167        for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
168
169            // Skip this magnitude/plural if we already have it from a child locale.
170            // Note: This also skips USE_FALLBACK entries.
171            StandardPlural::Form plural = StandardPlural::fromString(key, status);
172            if (U_FAILURE(status)) { return; }
173            if (data.patterns[getIndex(magnitude, plural)] != nullptr) {
174                continue;
175            }
176
177            // The value "0" means that we need to use the default pattern and not fall back
178            // to parent locales. Example locale where this is relevant: 'it'.
179            int32_t patternLength;
180            const UChar *patternString = value.getString(patternLength, status);
181            if (U_FAILURE(status)) { return; }
182            if (u_strcmp(patternString, u"0") == 0) {
183                patternString = USE_FALLBACK;
184                patternLength = 0;
185            }
186
187            // Save the pattern string. We will parse it lazily.
188            data.patterns[getIndex(magnitude, plural)] = patternString;
189
190            // If necessary, compute the multiplier: the difference between the magnitude
191            // and the number of zeros in the pattern.
192            if (multiplier == 0) {
193                int32_t numZeros = countZeros(patternString, patternLength);
194                if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun"
195                    multiplier = static_cast<int8_t> (numZeros - magnitude - 1);
196                }
197            }
198        }
199
200        // Save the multiplier.
201        if (data.multipliers[magnitude] == 0) {
202            data.multipliers[magnitude] = multiplier;
203            if (magnitude > data.largestMagnitude) {
204                data.largestMagnitude = magnitude;
205            }
206            data.isEmpty = false;
207        } else {
208            U_ASSERT(data.multipliers[magnitude] == multiplier);
209        }
210    }
211}
212
213///////////////////////////////////////////////////////////
214/// END OF CompactData.java; BEGIN CompactNotation.java ///
215///////////////////////////////////////////////////////////
216
217CompactHandler::CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName,
218                               CompactType compactType, const PluralRules *rules,
219                               MutablePatternModifier *buildReference, const MicroPropsGenerator *parent,
220                               UErrorCode &status)
221        : rules(rules), parent(parent) {
222    data.populate(locale, nsName, compactStyle, compactType, status);
223    if (buildReference != nullptr) {
224        // Safe code path
225        precomputeAllModifiers(*buildReference, status);
226        safe = TRUE;
227    } else {
228        // Unsafe code path
229        safe = FALSE;
230    }
231}
232
233CompactHandler::~CompactHandler() {
234    for (int32_t i = 0; i < precomputedModsLength; i++) {
235        delete precomputedMods[i].mod;
236    }
237}
238
239void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) {
240    if (U_FAILURE(status)) { return; }
241
242    // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T
243    UVector allPatterns(12, status);
244    if (U_FAILURE(status)) { return; }
245    data.getUniquePatterns(allPatterns, status);
246    if (U_FAILURE(status)) { return; }
247
248    // C++ only: ensure that precomputedMods has room.
249    precomputedModsLength = allPatterns.size();
250    if (precomputedMods.getCapacity() < precomputedModsLength) {
251        precomputedMods.resize(allPatterns.size(), status);
252        if (U_FAILURE(status)) { return; }
253    }
254
255    for (int32_t i = 0; i < precomputedModsLength; i++) {
256        auto patternString = static_cast<const UChar *>(allPatterns[i]);
257        UnicodeString hello(patternString);
258        CompactModInfo &info = precomputedMods[i];
259        ParsedPatternInfo patternInfo;
260        PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
261        if (U_FAILURE(status)) { return; }
262        buildReference.setPatternInfo(&patternInfo);
263        info.mod = buildReference.createImmutable(status);
264        if (U_FAILURE(status)) { return; }
265        info.numDigits = patternInfo.positive.integerTotal;
266        info.patternString = patternString;
267    }
268}
269
270void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
271                                     UErrorCode &status) const {
272    parent->processQuantity(quantity, micros, status);
273    if (U_FAILURE(status)) { return; }
274
275    // Treat zero as if it had magnitude 0
276    int magnitude;
277    if (quantity.isZero()) {
278        magnitude = 0;
279        micros.rounding.apply(quantity, status);
280    } else {
281        // TODO: Revisit chooseMultiplierAndApply
282        int multiplier = micros.rounding.chooseMultiplierAndApply(quantity, data, status);
283        magnitude = quantity.isZero() ? 0 : quantity.getMagnitude();
284        magnitude -= multiplier;
285    }
286
287    StandardPlural::Form plural = quantity.getStandardPlural(rules);
288    const UChar *patternString = data.getPattern(magnitude, plural);
289    int numDigits = -1;
290    if (patternString == nullptr) {
291        // Use the default (non-compact) modifier.
292        // No need to take any action.
293    } else if (safe) {
294        // Safe code path.
295        // Java uses a hash set here for O(1) lookup.  C++ uses a linear search.
296        // TODO: Benchmark this and maybe change to a binary search or hash table.
297        int32_t i = 0;
298        for (; i < precomputedModsLength; i++) {
299            const CompactModInfo &info = precomputedMods[i];
300            if (u_strcmp(patternString, info.patternString) == 0) {
301                info.mod->applyToMicros(micros, quantity);
302                numDigits = info.numDigits;
303                break;
304            }
305        }
306        // It should be guaranteed that we found the entry.
307        U_ASSERT(i < precomputedModsLength);
308    } else {
309        // Unsafe code path.
310        // Overwrite the PatternInfo in the existing modMiddle.
311        // C++ Note: Use unsafePatternInfo for proper lifecycle.
312        ParsedPatternInfo &patternInfo = const_cast<CompactHandler *>(this)->unsafePatternInfo;
313        PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);
314        static_cast<MutablePatternModifier*>(const_cast<Modifier*>(micros.modMiddle))
315            ->setPatternInfo(&patternInfo);
316        numDigits = patternInfo.positive.integerTotal;
317    }
318
319    // FIXME: Deal with numDigits == 0 (Awaiting a test case)
320    (void)numDigits;
321
322    // We already performed rounding. Do not perform it again.
323    micros.rounding = Rounder::constructPassThrough();
324}
325
326#endif /* #if !UCONFIG_NO_FORMATTING */
327