PluralSamples.java revision 7935b1839a081ed19ae0d33029ad3c09632a2caa
19d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish/* 29d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish ******************************************************************************* 39d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * Copyright (C) 2013-2014, International Business Machines Corporation and * 4e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish * others. All Rights Reserved. * 5e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish ******************************************************************************* 69d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish */ 79d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishpackage com.ibm.icu.text; 8e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish 99d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.ArrayList; 109d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.Collection; 119d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.Collections; 129d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.Comparator; 139d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.HashMap; 149d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.HashSet; 159d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.LinkedHashSet; 169d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.List; 179d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.Map; 18e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrishimport java.util.Map.Entry; 19e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrishimport java.util.Set; 209d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport java.util.TreeSet; 219d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 229d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport com.ibm.icu.text.PluralRules.FixedDecimal; 239d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport com.ibm.icu.text.PluralRules.KeywordStatus; 249d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport com.ibm.icu.text.PluralRules.StandardPluralCategories; 259d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishimport com.ibm.icu.util.Output; 269d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 279d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish/** 289d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @author markdavis 299d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * Refactor samples as first step to moving into CLDR 309d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * 319d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @internal 329d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @deprecated This API is ICU internal only. 339d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish */ 349d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish@Deprecated 359d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrishpublic class PluralSamples { 369d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 379d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish private PluralRules pluralRules; 389d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish private final Map<String, List<Double>> _keySamplesMap; 399d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 409d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish /** 419d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @internal 429d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @deprecated This API is ICU internal only. 439d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish */ 449d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish @Deprecated 459d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish public final Map<String, Boolean> _keyLimitedMap; 469d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish private final Map<String, Set<FixedDecimal>> _keyFractionSamplesMap; 479d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish private final Set<FixedDecimal> _fractionSamples; 489d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 499d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish /** 509d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @internal 519d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish * @deprecated This API is ICU internal only. 529d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish */ 539d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish @Deprecated 549d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish public PluralSamples(PluralRules pluralRules) { 559d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish this.pluralRules = pluralRules; 569d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish Set<String> keywords = pluralRules.getKeywords(); 57e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish // ensure both _keySamplesMap and _keyLimitedMap are initialized. 58e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish // If this were allowed to vary on a per-call basis, we'd have to recheck and 59e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish // possibly rebuild the samples cache. Doesn't seem worth it. 60e07896813c99dccd0e27e837fc74b02ef00b7360Jonathan Gerrish // This 'max samples' value only applies to keywords that are unlimited, for 619d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish // other keywords all the matching values are returned. This might be a lot. 629d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish final int MAX_SAMPLES = 3; 639d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 649d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish Map<String, Boolean> temp = new HashMap<String, Boolean>(); 659d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish for (String k : keywords) { 669d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish temp.put(k, pluralRules.isLimited(k)); 679d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish } 689d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish _keyLimitedMap = temp; 699d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 709d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish Map<String, List<Double>> sampleMap = new HashMap<String, List<Double>>(); 719d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish int keywordsRemaining = keywords.size(); 729d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 739d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish int limit = 128; // Math.max(5, getRepeatLimit() * MAX_SAMPLES) * 2; 749d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 759d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish for (int i = 0; keywordsRemaining > 0 && i < limit; ++i) { 769d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish keywordsRemaining = addSimpleSamples(pluralRules, MAX_SAMPLES, sampleMap, keywordsRemaining, i / 2.0); 779d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish } 789d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish // Hack for Celtic 799d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish keywordsRemaining = addSimpleSamples(pluralRules, MAX_SAMPLES, sampleMap, keywordsRemaining, 1000000); 809d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 819d68c9a82b0c22bb716270222ec8174a1da48eafJonathan Gerrish 82 // collect explicit samples 83 Map<String, Set<FixedDecimal>> sampleFractionMap = new HashMap<String, Set<FixedDecimal>>(); 84 Set<FixedDecimal> mentioned = new TreeSet<FixedDecimal>(); 85 // make sure that there is at least one 'other' value 86 Map<String, Set<FixedDecimal>> foundKeywords = new HashMap<String, Set<FixedDecimal>>(); 87 for (FixedDecimal s : mentioned) { 88 String keyword = pluralRules.select(s); 89 addRelation(foundKeywords, keyword, s); 90 } 91 main: 92 if (foundKeywords.size() != keywords.size()) { 93 for (int i = 1; i < 1000; ++i) { 94 boolean done = addIfNotPresent(i, mentioned, foundKeywords); 95 if (done) break main; 96 } 97 // if we are not done, try tenths 98 for (int i = 10; i < 1000; ++i) { 99 boolean done = addIfNotPresent(i/10d, mentioned, foundKeywords); 100 if (done) break main; 101 } 102 System.out.println("Failed to find sample for each keyword: " + foundKeywords + "\n\t" + pluralRules + "\n\t" + mentioned); 103 } 104 mentioned.add(new FixedDecimal(0)); // always there 105 mentioned.add(new FixedDecimal(1)); // always there 106 mentioned.add(new FixedDecimal(2)); // always there 107 mentioned.add(new FixedDecimal(0.1,1)); // always there 108 mentioned.add(new FixedDecimal(1.99,2)); // always there 109 mentioned.addAll(fractions(mentioned)); 110 for (FixedDecimal s : mentioned) { 111 String keyword = pluralRules.select(s); 112 Set<FixedDecimal> list = sampleFractionMap.get(keyword); 113 if (list == null) { 114 list = new LinkedHashSet<FixedDecimal>(); // will be sorted because the iteration is 115 sampleFractionMap.put(keyword, list); 116 } 117 list.add(s); 118 } 119 120 if (keywordsRemaining > 0) { 121 for (String k : keywords) { 122 if (!sampleMap.containsKey(k)) { 123 sampleMap.put(k, Collections.<Double>emptyList()); 124 } 125 if (!sampleFractionMap.containsKey(k)) { 126 sampleFractionMap.put(k, Collections.<FixedDecimal>emptySet()); 127 } 128 } 129 } 130 131 // Make lists immutable so we can return them directly 132 for (Entry<String, List<Double>> entry : sampleMap.entrySet()) { 133 sampleMap.put(entry.getKey(), Collections.unmodifiableList(entry.getValue())); 134 } 135 for (Entry<String, Set<FixedDecimal>> entry : sampleFractionMap.entrySet()) { 136 sampleFractionMap.put(entry.getKey(), Collections.unmodifiableSet(entry.getValue())); 137 } 138 _keySamplesMap = sampleMap; 139 _keyFractionSamplesMap = sampleFractionMap; 140 _fractionSamples = Collections.unmodifiableSet(mentioned); 141 } 142 143 private int addSimpleSamples(PluralRules pluralRules, final int MAX_SAMPLES, Map<String, List<Double>> sampleMap, 144 int keywordsRemaining, double val) { 145 String keyword = pluralRules.select(val); 146 boolean keyIsLimited = _keyLimitedMap.get(keyword); 147 148 List<Double> list = sampleMap.get(keyword); 149 if (list == null) { 150 list = new ArrayList<Double>(MAX_SAMPLES); 151 sampleMap.put(keyword, list); 152 } else if (!keyIsLimited && list.size() == MAX_SAMPLES) { 153 return keywordsRemaining; 154 } 155 list.add(Double.valueOf(val)); 156 157 if (!keyIsLimited && list.size() == MAX_SAMPLES) { 158 --keywordsRemaining; 159 } 160 return keywordsRemaining; 161 } 162 163 private void addRelation(Map<String, Set<FixedDecimal>> foundKeywords, String keyword, FixedDecimal s) { 164 Set<FixedDecimal> set = foundKeywords.get(keyword); 165 if (set == null) { 166 foundKeywords.put(keyword, set = new HashSet<FixedDecimal>()); 167 } 168 set.add(s); 169 } 170 171 private boolean addIfNotPresent(double d, Set<FixedDecimal> mentioned, Map<String, Set<FixedDecimal>> foundKeywords) { 172 FixedDecimal numberInfo = new FixedDecimal(d); 173 String keyword = pluralRules.select(numberInfo); 174 if (!foundKeywords.containsKey(keyword) || keyword.equals("other")) { 175 addRelation(foundKeywords, keyword, numberInfo); 176 mentioned.add(numberInfo); 177 if (keyword.equals("other")) { 178 if (foundKeywords.get("other").size() > 1) { 179 return true; 180 } 181 } 182 } 183 return false; 184 } 185 186 private static final int[] TENS = {1, 10, 100, 1000, 10000, 100000, 1000000}; 187 188 private static final int LIMIT_FRACTION_SAMPLES = 3; 189 190 191 private Set<FixedDecimal> fractions(Set<FixedDecimal> original) { 192 Set<FixedDecimal> toAddTo = new HashSet<FixedDecimal>(); 193 194 Set<Integer> result = new HashSet<Integer>(); 195 for (FixedDecimal base1 : original) { 196 result.add((int)base1.integerValue); 197 } 198 List<Integer> ints = new ArrayList<Integer>(result); 199 Set<String> keywords = new HashSet<String>(); 200 201 for (int j = 0; j < ints.size(); ++j) { 202 Integer base = ints.get(j); 203 String keyword = pluralRules.select(base); 204 if (keywords.contains(keyword)) { 205 continue; 206 } 207 keywords.add(keyword); 208 toAddTo.add(new FixedDecimal(base,1)); // add .0 209 toAddTo.add(new FixedDecimal(base,2)); // add .00 210 Integer fract = getDifferentCategory(ints, keyword); 211 if (fract >= TENS[LIMIT_FRACTION_SAMPLES-1]) { // make sure that we always get the value 212 toAddTo.add(new FixedDecimal(base + "." + fract)); 213 } else { 214 for (int visibleFractions = 1; visibleFractions < LIMIT_FRACTION_SAMPLES; ++visibleFractions) { 215 for (int i = 1; i <= visibleFractions; ++i) { 216 // with visible fractions = 3, and fract = 1, then we should get x.10, 0.01 217 // with visible fractions = 3, and fract = 15, then we should get x.15, x.15 218 if (fract >= TENS[i]) { 219 continue; 220 } 221 toAddTo.add(new FixedDecimal(base + fract/(double)TENS[i], visibleFractions)); 222 } 223 } 224 } 225 } 226 return toAddTo; 227 } 228 229 private Integer getDifferentCategory(List<Integer> ints, String keyword) { 230 for (int i = ints.size() - 1; i >= 0; --i) { 231 Integer other = ints.get(i); 232 String keywordOther = pluralRules.select(other); 233 if (!keywordOther.equals(keyword)) { 234 return other; 235 } 236 } 237 return 37; 238 } 239 240 @SuppressWarnings("unused") 241 private static final Comparator<String> KEYWORD_COMPARATOR = new Comparator<String> () { 242 public int compare(String arg0, String arg1) { 243 StandardPluralCategories a = StandardPluralCategories.forString(arg0); 244 StandardPluralCategories b = StandardPluralCategories.forString(arg1); 245 return a == null 246 ? (b == null ? arg0.compareTo(arg1) : -1) 247 : (b == null ? 1 : a.compareTo(b)); 248 } 249 }; 250 251 /** 252 * @internal 253 * @deprecated This API is ICU internal only. 254 */ 255 @Deprecated 256 public KeywordStatus getStatus(String keyword, int offset, Set<Double> explicits, Output<Double> uniqueValue) { 257 if (uniqueValue != null) { 258 uniqueValue.value = null; 259 } 260 261 if (!pluralRules.getKeywords().contains(keyword)) { 262 return KeywordStatus.INVALID; 263 } 264 Collection<Double> values = pluralRules.getAllKeywordValues(keyword); 265 if (values == null) { 266 return KeywordStatus.UNBOUNDED; 267 } 268 int originalSize = values.size(); 269 270 if (explicits == null) { 271 explicits = Collections.emptySet(); 272 } 273 274 // Quick check on whether there are multiple elements 275 276 if (originalSize > explicits.size()) { 277 if (originalSize == 1) { 278 if (uniqueValue != null) { 279 uniqueValue.value = values.iterator().next(); 280 } 281 return KeywordStatus.UNIQUE; 282 } 283 return KeywordStatus.BOUNDED; 284 } 285 286 // Compute if the quick test is insufficient. 287 288 HashSet<Double> subtractedSet = new HashSet<Double>(values); 289 for (Double explicit : explicits) { 290 subtractedSet.remove(explicit - offset); 291 } 292 if (subtractedSet.size() == 0) { 293 return KeywordStatus.SUPPRESSED; 294 } 295 296 if (uniqueValue != null && subtractedSet.size() == 1) { 297 uniqueValue.value = subtractedSet.iterator().next(); 298 } 299 300 return originalSize == 1 ? KeywordStatus.UNIQUE : KeywordStatus.BOUNDED; 301 } 302 303 Map<String, List<Double>> getKeySamplesMap() { 304 return _keySamplesMap; 305 } 306 307 Map<String, Set<FixedDecimal>> getKeyFractionSamplesMap() { 308 return _keyFractionSamplesMap; 309 } 310 311 Set<FixedDecimal> getFractionSamples() { 312 return _fractionSamples; 313 } 314 315 /** 316 * Returns all the values that trigger this keyword, or null if the number of such 317 * values is unlimited. 318 * 319 * @param keyword the keyword 320 * @return the values that trigger this keyword, or null. The returned collection 321 * is immutable. It will be empty if the keyword is not defined. 322 * @stable ICU 4.8 323 */ 324 325 Collection<Double> getAllKeywordValues(String keyword) { 326 // HACK for now 327 if (!pluralRules.getKeywords().contains(keyword)) { 328 return Collections.<Double>emptyList(); 329 } 330 Collection<Double> result = getKeySamplesMap().get(keyword); 331 332 // We depend on MAX_SAMPLES here. It's possible for a conjunction 333 // of unlimited rules that 'looks' unlimited to return a limited 334 // number of values. There's no bounds to this limited number, in 335 // general, because you can construct arbitrarily complex rules. Since 336 // we always generate 3 samples if a rule is really unlimited, that's 337 // where we put the cutoff. 338 if (result.size() > 2 && !_keyLimitedMap.get(keyword)) { 339 return null; 340 } 341 return result; 342 } 343} 344