1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.text; 19 20import java.util.Comparator; 21import java.util.Locale; 22import libcore.icu.ICU; 23import libcore.icu.RuleBasedCollatorICU; 24 25/** 26 * Performs locale-sensitive string comparison. A concrete subclass, 27 * {@link RuleBasedCollator}, allows customization of the collation ordering by 28 * the use of rule sets. 29 * <p> 30 * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s 31 * specifications for the <a 32 * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation 33 * Algorithm (UCA)</a>, there are 4 different levels of strength used in 34 * comparisons: 35 * <ul> 36 * <li>PRIMARY strength: Typically, this is used to denote differences between 37 * base characters (for example, "a" < "b"). It is the strongest difference. 38 * For example, dictionaries are divided into different sections by base 39 * character. 40 * <li>SECONDARY strength: Accents in the characters are considered secondary 41 * differences (for example, "as" < "às" < "at"). Other differences 42 * between letters can also be considered secondary differences, depending on 43 * the language. A secondary difference is ignored when there is a primary 44 * difference anywhere in the strings. 45 * <li>TERTIARY strength: Upper and lower case differences in characters are 46 * distinguished at tertiary strength (for example, "ao" < "Ao" < 47 * "aò"). In addition, a variant of a letter differs from the base form 48 * on the tertiary strength (such as "A" and "Ⓐ"). Another example is the 49 * difference between large and small Kana. A tertiary difference is ignored 50 * when there is a primary or secondary difference anywhere in the strings. 51 * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL 52 * strength is used as a tiebreaker. The Unicode code point values of the NFD 53 * form of each string are compared, just in case there is no difference. For 54 * example, Hebrew cantellation marks are only distinguished at this strength. 55 * This strength should be used sparingly, as only code point value differences 56 * between two strings are an extremely rare occurrence. Using this strength 57 * substantially decreases the performance for both comparison and collation key 58 * generation APIs. This strength also increases the size of the collation key. 59 * </ul> 60 * <p> 61 * This {@code Collator} deals only with two decomposition modes, the canonical 62 * decomposition mode and one that does not use any decomposition. The 63 * compatibility decomposition mode 64 * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the 65 * canonical decomposition mode is set, {@code Collator} handles un-normalized 66 * text properly, producing the same results as if the text were normalized in 67 * NFD. If canonical decomposition is turned off, it is the user's 68 * responsibility to ensure that all text is already in the appropriate form 69 * before performing a comparison or before getting a {@link CollationKey}. 70 * <p> 71 * <em>Examples:</em> 72 * <blockquote> 73 * 74 * <pre> 75 * // Get the Collator for US English and set its strength to PRIMARY 76 * Collator usCollator = Collator.getInstance(Locale.US); 77 * usCollator.setStrength(Collator.PRIMARY); 78 * if (usCollator.compare("abc", "ABC") == 0) { 79 * System.out.println("Strings are equivalent"); 80 * } 81 * </pre> 82 * 83 * </blockquote> 84 * <p> 85 * The following example shows how to compare two strings using the collator for 86 * the default locale. 87 * <blockquote> 88 * 89 * <pre> 90 * // Compare two strings in the default locale 91 * Collator myCollator = Collator.getInstance(); 92 * myCollator.setDecomposition(Collator.NO_DECOMPOSITION); 93 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) { 94 * System.out.println("\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition"); 95 * myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 96 * if (myCollator.compare("\u00e0\u0325", "a\u0325\u0300") != 0) { 97 * System.out.println("Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition"); 98 * } else { 99 * System.out.println("\u00e0\u0325 is equal to a\u0325\u0300 with decomposition"); 100 * } 101 * } else { 102 * System.out.println("Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition"); 103 * } 104 * </pre> 105 * 106 * </blockquote> 107 * 108 * @see RuleBasedCollator 109 * @see CollationKey 110 */ 111public abstract class Collator implements Comparator<Object>, Cloneable { 112 /** 113 * Constant used to specify the decomposition rule. 114 */ 115 public static final int NO_DECOMPOSITION = 0; 116 117 /** 118 * Constant used to specify the decomposition rule. 119 */ 120 public static final int CANONICAL_DECOMPOSITION = 1; 121 122 /** 123 * Constant used to specify the decomposition rule. This value for 124 * decomposition is not supported. 125 */ 126 public static final int FULL_DECOMPOSITION = 2; 127 128 /** 129 * Constant used to specify the collation strength. 130 */ 131 public static final int PRIMARY = 0; 132 133 /** 134 * Constant used to specify the collation strength. 135 */ 136 public static final int SECONDARY = 1; 137 138 /** 139 * Constant used to specify the collation strength. 140 */ 141 public static final int TERTIARY = 2; 142 143 /** 144 * Constant used to specify the collation strength. 145 */ 146 public static final int IDENTICAL = 3; 147 148 RuleBasedCollatorICU icuColl; 149 150 Collator(RuleBasedCollatorICU icuColl) { 151 this.icuColl = icuColl; 152 } 153 154 /** 155 * Constructs a new {@code Collator} instance. 156 */ 157 protected Collator() { 158 icuColl = new RuleBasedCollatorICU(Locale.getDefault()); 159 } 160 161 /** 162 * Returns a new collator with the same decomposition mode and 163 * strength value as this collator. 164 * 165 * @return a shallow copy of this collator. 166 * @see java.lang.Cloneable 167 */ 168 @Override 169 public Object clone() { 170 try { 171 Collator clone = (Collator) super.clone(); 172 clone.icuColl = (RuleBasedCollatorICU) icuColl.clone(); 173 return clone; 174 } catch (CloneNotSupportedException e) { 175 throw new AssertionError(e); 176 } 177 } 178 179 /** 180 * Compares two objects to determine their relative order. The objects must 181 * be strings. 182 * 183 * @param object1 184 * the first string to compare. 185 * @param object2 186 * the second string to compare. 187 * @return a negative value if {@code object1} is less than {@code object2}, 188 * 0 if they are equal, and a positive value if {@code object1} is 189 * greater than {@code object2}. 190 * @throws ClassCastException 191 * if {@code object1} or {@code object2} is not a {@code String}. 192 */ 193 public int compare(Object object1, Object object2) { 194 return compare((String) object1, (String) object2); 195 } 196 197 /** 198 * Compares two strings to determine their relative order. 199 * 200 * @param string1 201 * the first string to compare. 202 * @param string2 203 * the second string to compare. 204 * @return a negative value if {@code string1} is less than {@code string2}, 205 * 0 if they are equal and a positive value if {@code string1} is 206 * greater than {@code string2}. 207 */ 208 public abstract int compare(String string1, String string2); 209 210 /** 211 * Compares this collator with the specified object and indicates if they 212 * are equal. 213 * 214 * @param object 215 * the object to compare with this object. 216 * @return {@code true} if {@code object} is a {@code Collator} object and 217 * it has the same strength and decomposition values as this 218 * collator; {@code false} otherwise. 219 * @see #hashCode 220 */ 221 @Override 222 public boolean equals(Object object) { 223 if (!(object instanceof Collator)) { 224 return false; 225 } 226 Collator collator = (Collator) object; 227 return icuColl == null ? collator.icuColl == null : icuColl.equals(collator.icuColl); 228 } 229 230 /** 231 * Compares two strings using the collation rules to determine if they are 232 * equal. 233 * 234 * @param string1 235 * the first string to compare. 236 * @param string2 237 * the second string to compare. 238 * @return {@code true} if {@code string1} and {@code string2} are equal 239 * using the collation rules, false otherwise. 240 */ 241 public boolean equals(String string1, String string2) { 242 return compare(string1, string2) == 0; 243 } 244 245 /** 246 * Returns an array of locales for which custom {@code Collator} instances 247 * are available. 248 * <p>Note that Android does not support user-supplied locale service providers. 249 */ 250 public static Locale[] getAvailableLocales() { 251 return ICU.getAvailableCollatorLocales(); 252 } 253 254 /** 255 * Returns a {@link CollationKey} for the specified string for this collator 256 * with the current decomposition rule and strength value. 257 * 258 * @param string 259 * the source string that is converted into a collation key. 260 * @return the collation key for {@code string}. 261 */ 262 public abstract CollationKey getCollationKey(String string); 263 264 /** 265 * Returns the decomposition rule for this collator. 266 * 267 * @return the decomposition rule, either {@code NO_DECOMPOSITION} or 268 * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is 269 * not supported. 270 */ 271 public int getDecomposition() { 272 return decompositionMode_ICU_Java(icuColl.getDecomposition()); 273 } 274 275 /** 276 * Returns a {@code Collator} instance which is appropriate for the user's default 277 * {@code Locale}. 278 * See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>". 279 */ 280 public static Collator getInstance() { 281 return getInstance(Locale.getDefault()); 282 } 283 284 /** 285 * Returns a {@code Collator} instance which is appropriate for {@code locale}. 286 */ 287 public static Collator getInstance(Locale locale) { 288 if (locale == null) { 289 throw new NullPointerException("locale == null"); 290 } 291 return new RuleBasedCollator(new RuleBasedCollatorICU(locale)); 292 } 293 294 /** 295 * Returns the strength value for this collator. 296 * 297 * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or 298 * IDENTICAL. 299 */ 300 public int getStrength() { 301 return strength_ICU_Java(icuColl.getStrength()); 302 } 303 304 @Override 305 public abstract int hashCode(); 306 307 /** 308 * Sets the decomposition rule for this collator. 309 * 310 * @param value 311 * the decomposition rule, either {@code NO_DECOMPOSITION} or 312 * {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} 313 * is not supported. 314 * @throws IllegalArgumentException 315 * if the provided decomposition rule is not valid. This includes 316 * {@code FULL_DECOMPOSITION}. 317 */ 318 public void setDecomposition(int value) { 319 icuColl.setDecomposition(decompositionMode_Java_ICU(value)); 320 } 321 322 /** 323 * Sets the strength value for this collator. 324 * 325 * @param value 326 * the strength value, either PRIMARY, SECONDARY, TERTIARY, or 327 * IDENTICAL. 328 * @throws IllegalArgumentException 329 * if the provided strength value is not valid. 330 */ 331 public void setStrength(int value) { 332 icuColl.setStrength(strength_Java_ICU(value)); 333 } 334 335 private int decompositionMode_Java_ICU(int mode) { 336 switch (mode) { 337 case Collator.CANONICAL_DECOMPOSITION: 338 return RuleBasedCollatorICU.VALUE_ON; 339 case Collator.NO_DECOMPOSITION: 340 return RuleBasedCollatorICU.VALUE_OFF; 341 } 342 throw new IllegalArgumentException("Bad mode: " + mode); 343 } 344 345 private int decompositionMode_ICU_Java(int mode) { 346 int javaMode = mode; 347 switch (mode) { 348 case RuleBasedCollatorICU.VALUE_OFF: 349 javaMode = Collator.NO_DECOMPOSITION; 350 break; 351 case RuleBasedCollatorICU.VALUE_ON: 352 javaMode = Collator.CANONICAL_DECOMPOSITION; 353 break; 354 } 355 return javaMode; 356 } 357 358 private int strength_Java_ICU(int value) { 359 switch (value) { 360 case Collator.PRIMARY: 361 return RuleBasedCollatorICU.VALUE_PRIMARY; 362 case Collator.SECONDARY: 363 return RuleBasedCollatorICU.VALUE_SECONDARY; 364 case Collator.TERTIARY: 365 return RuleBasedCollatorICU.VALUE_TERTIARY; 366 case Collator.IDENTICAL: 367 return RuleBasedCollatorICU.VALUE_IDENTICAL; 368 } 369 throw new IllegalArgumentException("Bad strength: " + value); 370 } 371 372 private int strength_ICU_Java(int value) { 373 int javaValue = value; 374 switch (value) { 375 case RuleBasedCollatorICU.VALUE_PRIMARY: 376 javaValue = Collator.PRIMARY; 377 break; 378 case RuleBasedCollatorICU.VALUE_SECONDARY: 379 javaValue = Collator.SECONDARY; 380 break; 381 case RuleBasedCollatorICU.VALUE_TERTIARY: 382 javaValue = Collator.TERTIARY; 383 break; 384 case RuleBasedCollatorICU.VALUE_IDENTICAL: 385 javaValue = Collator.IDENTICAL; 386 break; 387 } 388 return javaValue; 389 } 390} 391