1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18// BEGIN android-note 19// The icu implementation used was changed from icu4j to icu4jni. 20// END android-note 21 22package java.text; 23 24import java.security.AccessController; 25import java.security.PrivilegedAction; 26import java.util.Comparator; 27import java.util.Locale; 28import java.util.Vector; 29 30/** 31 * Performs locale-sensitive string comparison. A concrete subclass, 32 * {@link RuleBasedCollator}, allows customization of the collation ordering by 33 * the use of rule sets. 34 * <p> 35 * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s 36 * specifications for the <a 37 * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation 38 * Algorithm (UCA)</a>, there are 4 different levels of strength used in 39 * comparisons: 40 * <ul> 41 * <li>PRIMARY strength: Typically, this is used to denote differences between 42 * base characters (for example, "a" < "b"). It is the strongest difference. 43 * For example, dictionaries are divided into different sections by base 44 * character. 45 * <li>SECONDARY strength: Accents in the characters are considered secondary 46 * differences (for example, "as" < "às" < "at"). Other differences 47 * between letters can also be considered secondary differences, depending on 48 * the language. A secondary difference is ignored when there is a primary 49 * difference anywhere in the strings. 50 * <li>TERTIARY strength: Upper and lower case differences in characters are 51 * distinguished at tertiary strength (for example, "ao" < "Ao" < 52 * "aò"). In addition, a variant of a letter differs from the base form 53 * on the tertiary strength (such as "A" and "Ⓐ"). Another example is the 54 * difference between large and small Kana. A tertiary difference is ignored 55 * when there is a primary or secondary difference anywhere in the strings. 56 * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL 57 * strength is used as a tiebreaker. The Unicode code point values of the NFD 58 * form of each string are compared, just in case there is no difference. For 59 * example, Hebrew cantellation marks are only distinguished at this strength. 60 * This strength should be used sparingly, as only code point value differences 61 * between two strings are an extremely rare occurrence. Using this strength 62 * substantially decreases the performance for both comparison and collation key 63 * generation APIs. This strength also increases the size of the collation key. 64 * </ul> 65 * <p> 66 * This {@code Collator} deals only with two decomposition modes, the canonical 67 * decomposition mode and one that does not use any decomposition. The 68 * compatibility decomposition mode 69 * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the 70 * canonical decomposition mode is set, {@code Collator} handles un-normalized 71 * text properly, producing the same results as if the text were normalized in 72 * NFD. If canonical decomposition is turned off, it is the user's 73 * responsibility to ensure that all text is already in the appropriate form 74 * before performing a comparison or before getting a {@link CollationKey}. 75 * <p> 76 * <em>Examples:</em> 77 * <blockquote> 78 * 79 * <pre> 80 * // Get the Collator for US English and set its strength to PRIMARY 81 * Collator usCollator = Collator.getInstance(Locale.US); 82 * usCollator.setStrength(Collator.PRIMARY); 83 * if (usCollator.compare("abc", "ABC") == 0) { 84 * System.out.println("Strings are equivalent"); 85 * } 86 * </pre> 87 * 88 * </blockquote> 89 * <p> 90 * The following example shows how to compare two strings using the collator for 91 * the default locale. 92 * <blockquote> 93 * 94 * <pre> 95 * // Compare two strings in the default locale 96 * Collator myCollator = Collator.getInstance(); 97 * myCollator.setDecomposition(Collator.NO_DECOMPOSITION); 98 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) { 99 * System.out.println("\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition"); 100 * myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 101 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) { 102 * System.out.println("Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition"); 103 * } else { 104 * System.out.println("\u00e0\u0325 is equal to a\u0325\u0300 with decomposition"); 105 * } 106 * } else { 107 * System.out.println("Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition"); 108 * } 109 * </pre> 110 * 111 * </blockquote> 112 * 113 * @see RuleBasedCollator 114 * @see CollationKey 115 */ 116public abstract class Collator implements Comparator<Object>, Cloneable { 117 118 static final int EQUAL = 0; 119 120 static final int GREATER = 1; 121 122 static final int LESS = -1; 123 124 /** 125 * Constant used to specify the decomposition rule. 126 */ 127 public static final int NO_DECOMPOSITION = 0; 128 129 /** 130 * Constant used to specify the decomposition rule. 131 */ 132 public static final int CANONICAL_DECOMPOSITION = 1; 133 134 /** 135 * Constant used to specify the decomposition rule. This value for 136 * decomposition is not supported. 137 */ 138 public static final int FULL_DECOMPOSITION = 2; 139 140 /** 141 * Constant used to specify the collation strength. 142 */ 143 public static final int PRIMARY = 0; 144 145 /** 146 * Constant used to specify the collation strength. 147 */ 148 public static final int SECONDARY = 1; 149 150 /** 151 * Constant used to specify the collation strength. 152 */ 153 public static final int TERTIARY = 2; 154 155 /** 156 * Constant used to specify the collation strength. 157 */ 158 public static final int IDENTICAL = 3; 159 160 private static int CACHE_SIZE; 161 162 static { 163 // CACHE_SIZE includes key and value, so needs to be double 164 String cacheSize = AccessController 165 .doPrivileged(new PrivilegedAction<String>() { 166 public String run() { 167 return System.getProperty("collator.cache"); //$NON-NLS-1$ 168 } 169 }); 170 if (cacheSize != null) { 171 try { 172 CACHE_SIZE = Integer.parseInt(cacheSize); 173 } catch (NumberFormatException e) { 174 CACHE_SIZE = 6; 175 } 176 } else { 177 CACHE_SIZE = 6; 178 } 179 } 180 181 private static Vector<Collator> cache = new Vector<Collator>(CACHE_SIZE); 182 183 // Wrapper class of ICU4JNI Collator 184 com.ibm.icu4jni.text.Collator icuColl; 185 186 Collator(com.ibm.icu4jni.text.Collator wrapper) { 187 this.icuColl = wrapper; 188 } 189 190 /** 191 * Constructs a new {@code Collator} instance. 192 */ 193 protected Collator() { 194 super(); 195 // BEGIN android-added 196 icuColl = com.ibm.icu4jni.text.Collator.getInstance(Locale.getDefault()); 197 // END android-added 198 } 199 200 /** 201 * Returns a new collator with the same decomposition mode and 202 * strength value as this collator. 203 * 204 * @return a shallow copy of this collator. 205 * @see java.lang.Cloneable 206 */ 207 @Override 208 public Object clone() { 209 try { 210 Collator clone = (Collator) super.clone(); 211 clone.icuColl = (com.ibm.icu4jni.text.Collator) this.icuColl.clone(); 212 return clone; 213 } catch (CloneNotSupportedException e) { 214 throw new AssertionError(e); // android-changed 215 } 216 } 217 218 /** 219 * Compares two objects to determine their relative order. The objects must 220 * be strings. 221 * 222 * @param object1 223 * the first string to compare. 224 * @param object2 225 * the second string to compare. 226 * @return a negative value if {@code object1} is less than {@code object2}, 227 * 0 if they are equal, and a positive value if {@code object1} is 228 * greater than {@code object2}. 229 * @throws ClassCastException 230 * if {@code object1} or {@code object2} is not a {@code String}. 231 */ 232 public int compare(Object object1, Object object2) { 233 return compare((String) object1, (String) object2); 234 } 235 236 /** 237 * Compares two strings to determine their relative order. 238 * 239 * @param string1 240 * the first string to compare. 241 * @param string2 242 * the second string to compare. 243 * @return a negative value if {@code string1} is less than {@code string2}, 244 * 0 if they are equal and a positive value if {@code string1} is 245 * greater than {@code string2}. 246 */ 247 public abstract int compare(String string1, String string2); 248 249 /** 250 * Compares this collator with the specified object and indicates if they 251 * are equal. 252 * 253 * @param object 254 * the object to compare with this object. 255 * @return {@code true} if {@code object} is a {@code Collator} object and 256 * it has the same strength and decomposition values as this 257 * collator; {@code false} otherwise. 258 * @see #hashCode 259 */ 260 @Override 261 public boolean equals(Object object) { 262 if (!(object instanceof Collator)) { 263 return false; 264 } 265 Collator collator = (Collator) object; 266 return this.icuColl == null ? collator.icuColl == null : this.icuColl 267 .equals(collator.icuColl); 268 } 269 270 /** 271 * Compares two strings using the collation rules to determine if they are 272 * equal. 273 * 274 * @param string1 275 * the first string to compare. 276 * @param string2 277 * the second string to compare. 278 * @return {@code true} if {@code string1} and {@code string2} are equal 279 * using the collation rules, false otherwise. 280 */ 281 public boolean equals(String string1, String string2) { 282 return compare(string1, string2) == 0; 283 } 284 285 /** 286 * Gets the list of installed {@link java.util.Locale} objects which support 287 * {@code Collator}. 288 * 289 * @return an array of {@code Locale}. 290 */ 291 public static Locale[] getAvailableLocales() { 292 return com.ibm.icu4jni.text.Collator.getAvailableLocales(); 293 } 294 295 /** 296 * Returns a {@link CollationKey} for the specified string for this collator 297 * with the current decomposition rule and strength value. 298 * 299 * @param string 300 * the source string that is converted into a collation key. 301 * @return the collation key for {@code string}. 302 */ 303 public abstract CollationKey getCollationKey(String string); 304 305 /** 306 * Returns the decomposition rule for this collator. 307 * 308 * @return the decomposition rule, either {@code NO_DECOMPOSITION} or 309 * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is 310 * not supported. 311 */ 312 public int getDecomposition() { 313 return decompositionMode_ICU_Java(this.icuColl.getDecomposition()); 314 } 315 316 /** 317 * Returns a {@code Collator} instance which is appropriate for the default 318 * {@code Locale}. 319 * 320 * @return the collator for the default locale. 321 */ 322 public static Collator getInstance() { 323 return getInstance(Locale.getDefault()); 324 } 325 326 /** 327 * Returns a {@code Collator} instance which is appropriate for the 328 * specified {@code Locale}. 329 * 330 * @param locale 331 * the locale. 332 * @return the collator for {@code locale}. 333 */ 334 public static Collator getInstance(Locale locale) { 335 String key = locale.toString(); 336 for (int i = cache.size() - 1; i >= 0; i -= 2) { 337 if (cache.elementAt(i).equals(key)) { 338 return (Collator) (cache.elementAt(i - 1)).clone(); 339 } 340 } 341 342 return new RuleBasedCollator(com.ibm.icu4jni.text.Collator 343 .getInstance(locale)); 344 } 345 346 /** 347 * Returns the strength value for this collator. 348 * 349 * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or 350 * IDENTICAL. 351 */ 352 public int getStrength() { 353 return strength_ICU_Java(this.icuColl.getStrength()); 354 } 355 356 /** 357 * Returns an integer hash code for this collator. 358 * 359 * @return this collator's hash code. 360 * 361 * @see #equals(Object) 362 * @see #equals(String, String) 363 */ 364 @Override 365 public abstract int hashCode(); 366 367 /** 368 * Sets the decomposition rule for this collator. 369 * 370 * @param value 371 * the decomposition rule, either {@code NO_DECOMPOSITION} or 372 * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} 373 * is not supported. 374 * @throws IllegalArgumentException 375 * if the provided decomposition rule is not valid. This includes 376 * {@code FULL_DECOMPOSITION}. 377 */ 378 public void setDecomposition(int value) { 379 this.icuColl.setDecomposition(decompositionMode_Java_ICU(value)); 380 } 381 382 /** 383 * Sets the strength value for this collator. 384 * 385 * @param value 386 * the strength value, either PRIMARY, SECONDARY, TERTIARY, or 387 * IDENTICAL. 388 * @throws IllegalArgumentException 389 * if the provided strength value is not valid. 390 */ 391 public void setStrength(int value) { 392 this.icuColl.setStrength(strength_Java_ICU(value)); 393 } 394 395 private int decompositionMode_Java_ICU(int mode) { 396 int icuDecomp = mode; 397 switch (mode) { 398 case Collator.CANONICAL_DECOMPOSITION: 399 icuDecomp = com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION; 400 break; 401 case Collator.NO_DECOMPOSITION: 402 icuDecomp = com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION; 403 break; 404 } 405 return icuDecomp; 406 } 407 408 private int decompositionMode_ICU_Java(int mode) { 409 int javaMode = mode; 410 switch (mode) { 411 case com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION: 412 javaMode = Collator.NO_DECOMPOSITION; 413 break; 414 case com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION: 415 javaMode = Collator.CANONICAL_DECOMPOSITION; 416 break; 417 } 418 return javaMode; 419 } 420 421 private int strength_Java_ICU(int value) { 422 int icuValue = value; 423 switch (value) { 424 case Collator.PRIMARY: 425 icuValue = com.ibm.icu4jni.text.Collator.PRIMARY; 426 break; 427 case Collator.SECONDARY: 428 icuValue = com.ibm.icu4jni.text.Collator.SECONDARY; 429 break; 430 case Collator.TERTIARY: 431 icuValue = com.ibm.icu4jni.text.Collator.TERTIARY; 432 break; 433 case Collator.IDENTICAL: 434 icuValue = com.ibm.icu4jni.text.Collator.IDENTICAL; 435 break; 436 } 437 return icuValue; 438 439 } 440 441 private int strength_ICU_Java(int value) { 442 int javaValue = value; 443 switch (value) { 444 case com.ibm.icu4jni.text.Collator.PRIMARY: 445 javaValue = Collator.PRIMARY; 446 break; 447 case com.ibm.icu4jni.text.Collator.SECONDARY: 448 javaValue = Collator.SECONDARY; 449 break; 450 case com.ibm.icu4jni.text.Collator.TERTIARY: 451 javaValue = Collator.TERTIARY; 452 break; 453 case com.ibm.icu4jni.text.Collator.IDENTICAL: 454 javaValue = Collator.IDENTICAL; 455 break; 456 } 457 return javaValue; 458 } 459} 460