1// © 2017 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3 4#include "unicode/utypes.h" 5 6#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT 7 8#include "resource.h" 9#include "number_compact.h" 10#include "unicode/ustring.h" 11#include "unicode/ures.h" 12#include "cstring.h" 13#include "charstr.h" 14#include "uresimp.h" 15 16using namespace icu; 17using namespace icu::number; 18using namespace icu::number::impl; 19 20namespace { 21 22// A dummy object used when a "0" compact decimal entry is encountered. This is necessary 23// in order to prevent falling back to root. Object equality ("==") is intended. 24const UChar *USE_FALLBACK = u"<USE FALLBACK>"; 25 26/** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */ 27void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType, 28 CharString &sb, UErrorCode &status) { 29 sb.clear(); 30 sb.append("NumberElements/", status); 31 sb.append(nsName, status); 32 sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status); 33 sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status); 34} 35 36int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) { 37 return magnitude * StandardPlural::COUNT + plural; 38} 39 40int32_t countZeros(const UChar *patternString, int32_t patternLength) { 41 // NOTE: This strategy for computing the number of zeros is a hack for efficiency. 42 // It could break if there are any 0s that aren't part of the main pattern. 43 int32_t numZeros = 0; 44 for (int32_t i = 0; i < patternLength; i++) { 45 if (patternString[i] == u'0') { 46 numZeros++; 47 } else if (numZeros > 0) { 48 break; // zeros should always be contiguous 49 } 50 } 51 return numZeros; 52} 53 54} // namespace 55 56// NOTE: patterns and multipliers both get zero-initialized. 57CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(TRUE) { 58} 59 60void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle, 61 CompactType compactType, UErrorCode &status) { 62 CompactDataSink sink(*this); 63 LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status)); 64 if (U_FAILURE(status)) { return; } 65 66 bool nsIsLatn = strcmp(nsName, "latn") == 0; 67 bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT; 68 69 // Fall back to latn numbering system and/or short compact style. 70 CharString resourceKey; 71 getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status); 72 UErrorCode localStatus = U_ZERO_ERROR; 73 ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); 74 if (isEmpty && !nsIsLatn) { 75 getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status); 76 localStatus = U_ZERO_ERROR; 77 ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); 78 } 79 if (isEmpty && !compactIsShort) { 80 getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status); 81 localStatus = U_ZERO_ERROR; 82 ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); 83 } 84 if (isEmpty && !nsIsLatn && !compactIsShort) { 85 getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status); 86 localStatus = U_ZERO_ERROR; 87 ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus); 88 } 89 90 // The last fallback should be guaranteed to return data. 91 if (isEmpty) { 92 status = U_INTERNAL_PROGRAM_ERROR; 93 } 94} 95 96int32_t CompactData::getMultiplier(int32_t magnitude) const { 97 if (magnitude < 0) { 98 return 0; 99 } 100 if (magnitude > largestMagnitude) { 101 magnitude = largestMagnitude; 102 } 103 return multipliers[magnitude]; 104} 105 106const UChar *CompactData::getPattern(int32_t magnitude, StandardPlural::Form plural) const { 107 if (magnitude < 0) { 108 return nullptr; 109 } 110 if (magnitude > largestMagnitude) { 111 magnitude = largestMagnitude; 112 } 113 const UChar *patternString = patterns[getIndex(magnitude, plural)]; 114 if (patternString == nullptr && plural != StandardPlural::OTHER) { 115 // Fall back to "other" plural variant 116 patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)]; 117 } 118 if (patternString == USE_FALLBACK) { // == is intended 119 // Return null if USE_FALLBACK is present 120 patternString = nullptr; 121 } 122 return patternString; 123} 124 125void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { 126 U_ASSERT(output.isEmpty()); 127 // NOTE: In C++, this is done more manually with a UVector. 128 // In Java, we can take advantage of JDK HashSet. 129 for (auto pattern : patterns) { 130 if (pattern == nullptr || pattern == USE_FALLBACK) { 131 continue; 132 } 133 134 // Insert pattern into the UVector if the UVector does not already contain the pattern. 135 // Search the UVector from the end since identical patterns are likely to be adjacent. 136 for (int32_t i = output.size() - 1; i >= 0; i--) { 137 if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) { 138 goto continue_outer; 139 } 140 } 141 142 // The string was not found; add it to the UVector. 143 // ANDY: This requires a const_cast. Why? 144 output.addElement(const_cast<UChar *>(pattern), status); 145 146 continue_outer: 147 continue; 148 } 149} 150 151void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/, 152 UErrorCode &status) { 153 // traverse into the table of powers of ten 154 ResourceTable powersOfTenTable = value.getTable(status); 155 if (U_FAILURE(status)) { return; } 156 for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { 157 158 // Assumes that the keys are always of the form "10000" where the magnitude is the 159 // length of the key minus one. We expect magnitudes to be less than MAX_DIGITS. 160 auto magnitude = static_cast<int8_t> (strlen(key) - 1); 161 int8_t multiplier = data.multipliers[magnitude]; 162 U_ASSERT(magnitude < COMPACT_MAX_DIGITS); 163 164 // Iterate over the plural variants ("one", "other", etc) 165 ResourceTable pluralVariantsTable = value.getTable(status); 166 if (U_FAILURE(status)) { return; } 167 for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { 168 169 // Skip this magnitude/plural if we already have it from a child locale. 170 // Note: This also skips USE_FALLBACK entries. 171 StandardPlural::Form plural = StandardPlural::fromString(key, status); 172 if (U_FAILURE(status)) { return; } 173 if (data.patterns[getIndex(magnitude, plural)] != nullptr) { 174 continue; 175 } 176 177 // The value "0" means that we need to use the default pattern and not fall back 178 // to parent locales. Example locale where this is relevant: 'it'. 179 int32_t patternLength; 180 const UChar *patternString = value.getString(patternLength, status); 181 if (U_FAILURE(status)) { return; } 182 if (u_strcmp(patternString, u"0") == 0) { 183 patternString = USE_FALLBACK; 184 patternLength = 0; 185 } 186 187 // Save the pattern string. We will parse it lazily. 188 data.patterns[getIndex(magnitude, plural)] = patternString; 189 190 // If necessary, compute the multiplier: the difference between the magnitude 191 // and the number of zeros in the pattern. 192 if (multiplier == 0) { 193 int32_t numZeros = countZeros(patternString, patternLength); 194 if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun" 195 multiplier = static_cast<int8_t> (numZeros - magnitude - 1); 196 } 197 } 198 } 199 200 // Save the multiplier. 201 if (data.multipliers[magnitude] == 0) { 202 data.multipliers[magnitude] = multiplier; 203 if (magnitude > data.largestMagnitude) { 204 data.largestMagnitude = magnitude; 205 } 206 data.isEmpty = false; 207 } else { 208 U_ASSERT(data.multipliers[magnitude] == multiplier); 209 } 210 } 211} 212 213/////////////////////////////////////////////////////////// 214/// END OF CompactData.java; BEGIN CompactNotation.java /// 215/////////////////////////////////////////////////////////// 216 217CompactHandler::CompactHandler(CompactStyle compactStyle, const Locale &locale, const char *nsName, 218 CompactType compactType, const PluralRules *rules, 219 MutablePatternModifier *buildReference, const MicroPropsGenerator *parent, 220 UErrorCode &status) 221 : rules(rules), parent(parent) { 222 data.populate(locale, nsName, compactStyle, compactType, status); 223 if (buildReference != nullptr) { 224 // Safe code path 225 precomputeAllModifiers(*buildReference, status); 226 safe = TRUE; 227 } else { 228 // Unsafe code path 229 safe = FALSE; 230 } 231} 232 233CompactHandler::~CompactHandler() { 234 for (int32_t i = 0; i < precomputedModsLength; i++) { 235 delete precomputedMods[i].mod; 236 } 237} 238 239void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) { 240 if (U_FAILURE(status)) { return; } 241 242 // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T 243 UVector allPatterns(12, status); 244 if (U_FAILURE(status)) { return; } 245 data.getUniquePatterns(allPatterns, status); 246 if (U_FAILURE(status)) { return; } 247 248 // C++ only: ensure that precomputedMods has room. 249 precomputedModsLength = allPatterns.size(); 250 if (precomputedMods.getCapacity() < precomputedModsLength) { 251 precomputedMods.resize(allPatterns.size(), status); 252 if (U_FAILURE(status)) { return; } 253 } 254 255 for (int32_t i = 0; i < precomputedModsLength; i++) { 256 auto patternString = static_cast<const UChar *>(allPatterns[i]); 257 UnicodeString hello(patternString); 258 CompactModInfo &info = precomputedMods[i]; 259 ParsedPatternInfo patternInfo; 260 PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); 261 if (U_FAILURE(status)) { return; } 262 buildReference.setPatternInfo(&patternInfo); 263 info.mod = buildReference.createImmutable(status); 264 if (U_FAILURE(status)) { return; } 265 info.numDigits = patternInfo.positive.integerTotal; 266 info.patternString = patternString; 267 } 268} 269 270void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs, 271 UErrorCode &status) const { 272 parent->processQuantity(quantity, micros, status); 273 if (U_FAILURE(status)) { return; } 274 275 // Treat zero as if it had magnitude 0 276 int magnitude; 277 if (quantity.isZero()) { 278 magnitude = 0; 279 micros.rounding.apply(quantity, status); 280 } else { 281 // TODO: Revisit chooseMultiplierAndApply 282 int multiplier = micros.rounding.chooseMultiplierAndApply(quantity, data, status); 283 magnitude = quantity.isZero() ? 0 : quantity.getMagnitude(); 284 magnitude -= multiplier; 285 } 286 287 StandardPlural::Form plural = quantity.getStandardPlural(rules); 288 const UChar *patternString = data.getPattern(magnitude, plural); 289 int numDigits = -1; 290 if (patternString == nullptr) { 291 // Use the default (non-compact) modifier. 292 // No need to take any action. 293 } else if (safe) { 294 // Safe code path. 295 // Java uses a hash set here for O(1) lookup. C++ uses a linear search. 296 // TODO: Benchmark this and maybe change to a binary search or hash table. 297 int32_t i = 0; 298 for (; i < precomputedModsLength; i++) { 299 const CompactModInfo &info = precomputedMods[i]; 300 if (u_strcmp(patternString, info.patternString) == 0) { 301 info.mod->applyToMicros(micros, quantity); 302 numDigits = info.numDigits; 303 break; 304 } 305 } 306 // It should be guaranteed that we found the entry. 307 U_ASSERT(i < precomputedModsLength); 308 } else { 309 // Unsafe code path. 310 // Overwrite the PatternInfo in the existing modMiddle. 311 // C++ Note: Use unsafePatternInfo for proper lifecycle. 312 ParsedPatternInfo &patternInfo = const_cast<CompactHandler *>(this)->unsafePatternInfo; 313 PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); 314 static_cast<MutablePatternModifier*>(const_cast<Modifier*>(micros.modMiddle)) 315 ->setPatternInfo(&patternInfo); 316 numDigits = patternInfo.positive.integerTotal; 317 } 318 319 // FIXME: Deal with numDigits == 0 (Awaiting a test case) 320 (void)numDigits; 321 322 // We already performed rounding. Do not perform it again. 323 micros.rounding = Rounder::constructPassThrough(); 324} 325 326#endif /* #if !UCONFIG_NO_FORMATTING */ 327