1/** 2******************************************************************************* 3* Copyright (C) 1996-2015, International Business Machines Corporation and 4* others. All Rights Reserved. 5******************************************************************************* 6*/ 7package com.ibm.icu.text; 8 9import java.util.Comparator; 10import java.util.Enumeration; 11import java.util.Iterator; 12import java.util.LinkedList; 13import java.util.Locale; 14import java.util.MissingResourceException; 15import java.util.Set; 16 17import com.ibm.icu.impl.ICUDebug; 18import com.ibm.icu.impl.ICUResourceBundle; 19import com.ibm.icu.impl.coll.CollationData; 20import com.ibm.icu.impl.coll.CollationRoot; 21import com.ibm.icu.lang.UCharacter; 22import com.ibm.icu.lang.UProperty; 23import com.ibm.icu.lang.UScript; 24import com.ibm.icu.util.Freezable; 25import com.ibm.icu.util.ICUException; 26import com.ibm.icu.util.ULocale; 27import com.ibm.icu.util.ULocale.Category; 28import com.ibm.icu.util.UResourceBundle; 29import com.ibm.icu.util.VersionInfo; 30 31/** 32* {@icuenhanced java.text.Collator}.{@icu _usage_} 33* 34* <p>Collator performs locale-sensitive string comparison. A concrete 35* subclass, RuleBasedCollator, allows customization of the collation 36* ordering by the use of rule sets.</p> 37* 38* <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link Freezable}. 39* 40* <p>Following the <a href=http://www.unicode.org>Unicode 41* Consortium</a>'s specifications for the 42* <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation 43* Algorithm (UCA)</a>, there are 5 different levels of strength used 44* in comparisons: 45* 46* <ul> 47* <li>PRIMARY strength: Typically, this is used to denote differences between 48* base characters (for example, "a" < "b"). 49* It is the strongest difference. For example, dictionaries are divided 50* into different sections by base character. 51* <li>SECONDARY strength: Accents in the characters are considered secondary 52* differences (for example, "as" < "às" < "at"). Other 53* differences 54* between letters can also be considered secondary differences, depending 55* on the language. A secondary difference is ignored when there is a 56* primary difference anywhere in the strings. 57* <li>TERTIARY strength: Upper and lower case differences in characters are 58* distinguished at tertiary strength (for example, "ao" < "Ao" < 59* "aò"). In addition, a variant of a letter differs from the base 60* form on the tertiary strength (such as "A" and "Ⓐ"). Another 61* example is the 62* difference between large and small Kana. A tertiary difference is ignored 63* when there is a primary or secondary difference anywhere in the strings. 64* <li>QUATERNARY strength: When punctuation is ignored 65* (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation"> 66* Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY 67* strength, an additional strength level can 68* be used to distinguish words with and without punctuation (for example, 69* "ab" < "a-b" < "aB"). 70* This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY 71* difference. The QUATERNARY strength should only be used if ignoring 72* punctuation is required. 73* <li>IDENTICAL strength: 74* When all other strengths are equal, the IDENTICAL strength is used as a 75* tiebreaker. The Unicode code point values of the NFD form of each string 76* are compared, just in case there is no difference. 77* For example, Hebrew cantellation marks are only distinguished at this 78* strength. This strength should be used sparingly, as only code point 79* value differences between two strings is an extremely rare occurrence. 80* Using this strength substantially decreases the performance for both 81* comparison and collation key generation APIs. This strength also 82* increases the size of the collation key. 83* </ul> 84* 85* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes, 86* the canonical decomposition mode and one that does not use any decomposition. 87* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION 88* is not supported here. If the canonical 89* decomposition mode is set, the Collator handles un-normalized text properly, 90* producing the same results as if the text were normalized in NFD. If 91* canonical decomposition is turned off, it is the user's responsibility to 92* ensure that all text is already in the appropriate form before performing 93* a comparison or before getting a CollationKey.</p> 94* 95* <p>For more information about the collation service see the 96* <a href="http://userguide.icu-project.org/collation">User Guide</a>.</p> 97* 98* <p>Examples of use 99* <pre> 100* // Get the Collator for US English and set its strength to PRIMARY 101* Collator usCollator = Collator.getInstance(Locale.US); 102* usCollator.setStrength(Collator.PRIMARY); 103* if (usCollator.compare("abc", "ABC") == 0) { 104* System.out.println("Strings are equivalent"); 105* } 106* 107* The following example shows how to compare two strings using the 108* Collator for the default locale. 109* 110* // Compare two strings in the default locale 111* Collator myCollator = Collator.getInstance(); 112* myCollator.setDecomposition(NO_DECOMPOSITION); 113* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) { 114* System.out.println("à\u0325 is not equals to a\u0325̀ without decomposition"); 115* myCollator.setDecomposition(CANONICAL_DECOMPOSITION); 116* if (myCollator.compare("à\u0325", "a\u0325̀") != 0) { 117* System.out.println("Error: à\u0325 should be equals to a\u0325̀ with decomposition"); 118* } 119* else { 120* System.out.println("à\u0325 is equals to a\u0325̀ with decomposition"); 121* } 122* } 123* else { 124* System.out.println("Error: à\u0325 should be not equals to a\u0325̀ without decomposition"); 125* } 126* </pre> 127* </p> 128* @see RuleBasedCollator 129* @see CollationKey 130* @author Syn Wee Quek 131* @stable ICU 2.8 132*/ 133public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable 134{ 135 // public data members --------------------------------------------------- 136 137 /** 138 * Strongest collator strength value. Typically used to denote differences 139 * between base characters. See class documentation for more explanation. 140 * @see #setStrength 141 * @see #getStrength 142 * @stable ICU 2.8 143 */ 144 public final static int PRIMARY = 0; 145 146 /** 147 * Second level collator strength value. 148 * Accents in the characters are considered secondary differences. 149 * Other differences between letters can also be considered secondary 150 * differences, depending on the language. 151 * See class documentation for more explanation. 152 * @see #setStrength 153 * @see #getStrength 154 * @stable ICU 2.8 155 */ 156 public final static int SECONDARY = 1; 157 158 /** 159 * Third level collator strength value. 160 * Upper and lower case differences in characters are distinguished at this 161 * strength level. In addition, a variant of a letter differs from the base 162 * form on the tertiary level. 163 * See class documentation for more explanation. 164 * @see #setStrength 165 * @see #getStrength 166 * @stable ICU 2.8 167 */ 168 public final static int TERTIARY = 2; 169 170 /** 171 * {@icu} Fourth level collator strength value. 172 * When punctuation is ignored 173 * (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation"> 174 * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY 175 * strength, an additional strength level can 176 * be used to distinguish words with and without punctuation. 177 * See class documentation for more explanation. 178 * @see #setStrength 179 * @see #getStrength 180 * @stable ICU 2.8 181 */ 182 public final static int QUATERNARY = 3; 183 184 /** 185 * Smallest Collator strength value. When all other strengths are equal, 186 * the IDENTICAL strength is used as a tiebreaker. The Unicode code point 187 * values of the NFD form of each string are compared, just in case there 188 * is no difference. 189 * See class documentation for more explanation. 190 * </p> 191 * <p> 192 * Note this value is different from JDK's 193 * </p> 194 * @stable ICU 2.8 195 */ 196 public final static int IDENTICAL = 15; 197 198 /** 199 * {@icunote} This is for backwards compatibility with Java APIs only. It 200 * should not be used, IDENTICAL should be used instead. ICU's 201 * collation does not support Java's FULL_DECOMPOSITION mode. 202 * @stable ICU 3.4 203 */ 204 public final static int FULL_DECOMPOSITION = IDENTICAL; 205 206 /** 207 * Decomposition mode value. With NO_DECOMPOSITION set, Strings 208 * will not be decomposed for collation. This is the default 209 * decomposition setting unless otherwise specified by the locale 210 * used to create the Collator.</p> 211 * 212 * <p><strong>Note</strong> this value is different from the JDK's.</p> 213 * @see #CANONICAL_DECOMPOSITION 214 * @see #getDecomposition 215 * @see #setDecomposition 216 * @stable ICU 2.8 217 */ 218 public final static int NO_DECOMPOSITION = 16; 219 220 /** 221 * Decomposition mode value. With CANONICAL_DECOMPOSITION set, 222 * characters that are canonical variants according to the Unicode standard 223 * will be decomposed for collation.</p> 224 * 225 * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as 226 * described in <a href="http://www.unicode.org/unicode/reports/tr15/"> 227 * Unicode Technical Report #15</a>. 228 * </p> 229 * @see #NO_DECOMPOSITION 230 * @see #getDecomposition 231 * @see #setDecomposition 232 * @stable ICU 2.8 233 */ 234 public final static int CANONICAL_DECOMPOSITION = 17; 235 236 /** 237 * Reordering codes for non-script groups that can be reordered under collation. 238 * 239 * @see #getReorderCodes 240 * @see #setReorderCodes 241 * @see #getEquivalentReorderCodes 242 * @stable ICU 4.8 243 */ 244 public static interface ReorderCodes { 245 /** 246 * A special reordering code that is used to specify the default reordering codes for a locale. 247 * @stable ICU 4.8 248 */ 249 public final static int DEFAULT = -1; // == UScript.INVALID_CODE 250 /** 251 * A special reordering code that is used to specify no reordering codes. 252 * @stable ICU 4.8 253 */ 254 public final static int NONE = UScript.UNKNOWN; 255 /** 256 * A special reordering code that is used to specify all other codes used for reordering except 257 * for the codes listed as ReorderingCodes and those listed explicitly in a reordering. 258 * @stable ICU 4.8 259 */ 260 public final static int OTHERS = UScript.UNKNOWN; 261 /** 262 * Characters with the space property. 263 * This is equivalent to the rule value "space". 264 * @stable ICU 4.8 265 */ 266 public final static int SPACE = 0x1000; 267 /** 268 * The first entry in the enumeration of reordering groups. This is intended for use in 269 * range checking and enumeration of the reorder codes. 270 * @stable ICU 4.8 271 */ 272 public final static int FIRST = SPACE; 273 /** 274 * Characters with the punctuation property. 275 * This is equivalent to the rule value "punct". 276 * @stable ICU 4.8 277 */ 278 public final static int PUNCTUATION = 0x1001; 279 /** 280 * Characters with the symbol property. 281 * This is equivalent to the rule value "symbol". 282 * @stable ICU 4.8 283 */ 284 public final static int SYMBOL = 0x1002; 285 /** 286 * Characters with the currency property. 287 * This is equivalent to the rule value "currency". 288 * @stable ICU 4.8 289 */ 290 public final static int CURRENCY = 0x1003; 291 /** 292 * Characters with the digit property. 293 * This is equivalent to the rule value "digit". 294 * @stable ICU 4.8 295 */ 296 public final static int DIGIT = 0x1004; 297 /** 298 * The limit of the reorder codes. This is intended for use in range checking 299 * and enumeration of the reorder codes. 300 * @stable ICU 4.8 301 */ 302 public final static int LIMIT = 0x1005; 303 } 304 305 // public methods -------------------------------------------------------- 306 307 /** 308 * Compares the equality of two Collator objects. Collator objects are equal if they have the same 309 * collation (sorting & searching) behavior. 310 * 311 * <p>The base class checks for null and for equal types. 312 * Subclasses should override. 313 * 314 * @param obj the Collator to compare to. 315 * @return true if this Collator has exactly the same collation behavior as obj, false otherwise. 316 * @stable ICU 2.8 317 */ 318 @Override 319 public boolean equals(Object obj) { 320 // Subclasses: Call this method and then add more specific checks. 321 return this == obj || (obj != null && getClass() == obj.getClass()); 322 } 323 324 // public setters -------------------------------------------------------- 325 326 private void checkNotFrozen() { 327 if (isFrozen()) { 328 throw new UnsupportedOperationException("Attempt to modify frozen Collator"); 329 } 330 } 331 332 /** 333 * Sets this Collator's strength attribute. The strength attribute 334 * determines the minimum level of difference considered significant 335 * during comparison.</p> 336 * 337 * <p>The base class method does nothing. Subclasses should override it if appropriate. 338 * 339 * <p>See the Collator class description for an example of use.</p> 340 * @param newStrength the new strength value. 341 * @see #getStrength 342 * @see #PRIMARY 343 * @see #SECONDARY 344 * @see #TERTIARY 345 * @see #QUATERNARY 346 * @see #IDENTICAL 347 * @throws IllegalArgumentException if the new strength value is not valid. 348 * @stable ICU 2.8 349 */ 350 public void setStrength(int newStrength) 351 { 352 checkNotFrozen(); 353 } 354 355 /** 356 * @return this, for chaining 357 * @internal Used in UnicodeTools 358 * @deprecated This API is ICU internal only. 359 */ 360 @Deprecated 361 public Collator setStrength2(int newStrength) 362 { 363 setStrength(newStrength); 364 return this; 365 } 366 367 /** 368 * Sets the decomposition mode of this Collator. Setting this 369 * decomposition attribute with CANONICAL_DECOMPOSITION allows the 370 * Collator to handle un-normalized text properly, producing the 371 * same results as if the text were normalized. If 372 * NO_DECOMPOSITION is set, it is the user's responsibility to 373 * insure that all text is already in the appropriate form before 374 * a comparison or before getting a CollationKey. Adjusting 375 * decomposition mode allows the user to select between faster and 376 * more complete collation behavior.</p> 377 * 378 * <p>Since a great many of the world's languages do not require 379 * text normalization, most locales set NO_DECOMPOSITION as the 380 * default decomposition mode.</p> 381 * 382 * <p>The base class method does nothing. Subclasses should override it if appropriate. 383 * 384 * <p>See getDecomposition for a description of decomposition 385 * mode.</p> 386 * 387 * @param decomposition the new decomposition mode 388 * @see #getDecomposition 389 * @see #NO_DECOMPOSITION 390 * @see #CANONICAL_DECOMPOSITION 391 * @throws IllegalArgumentException If the given value is not a valid 392 * decomposition mode. 393 * @stable ICU 2.8 394 */ 395 public void setDecomposition(int decomposition) 396 { 397 checkNotFrozen(); 398 } 399 400 /** 401 * Sets the reordering codes for this collator. 402 * Collation reordering allows scripts and some other groups of characters 403 * to be moved relative to each other. This reordering is done on top of 404 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 405 * at the start and/or the end of the collation order. These groups are specified using 406 * UScript codes and {@link Collator.ReorderCodes} entries. 407 * 408 * <p>By default, reordering codes specified for the start of the order are placed in the 409 * order given after several special non-script blocks. These special groups of characters 410 * are space, punctuation, symbol, currency, and digit. These special groups are represented with 411 * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with 412 * these special non-script groups if those special groups are explicitly specified in the reordering. 413 * 414 * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS} 415 * stands for any script that is not explicitly 416 * mentioned in the list of reordering codes given. Anything that is after OTHERS 417 * will go at the very end of the reordering in the order given. 418 * 419 * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT} 420 * will reset the reordering for this collator 421 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 422 * was specified when this collator was created from resource data or from rules. The 423 * DEFAULT code <b>must</b> be the sole code supplied when it is used. 424 * If not, then an {@link IllegalArgumentException} will be thrown. 425 * 426 * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE} 427 * will remove any reordering for this collator. 428 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 429 * NONE code <b>must</b> be the sole code supplied when it is used. 430 * 431 * @param order the reordering codes to apply to this collator; if this is null or an empty array 432 * then this clears any existing reordering 433 * @see #getReorderCodes 434 * @see #getEquivalentReorderCodes 435 * @see Collator.ReorderCodes 436 * @see UScript 437 * @stable ICU 4.8 438 */ 439 public void setReorderCodes(int... order) 440 { 441 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 442 } 443 444 // public getters -------------------------------------------------------- 445 446 /** 447 * Returns the Collator for the current default locale. 448 * The default locale is determined by java.util.Locale.getDefault(). 449 * @return the Collator for the default locale (for example, en_US) if it 450 * is created successfully. Otherwise if there is no Collator 451 * associated with the current locale, the root collator 452 * will be returned. 453 * @see java.util.Locale#getDefault() 454 * @see #getInstance(Locale) 455 * @stable ICU 2.8 456 */ 457 public static final Collator getInstance() 458 { 459 return getInstance(ULocale.getDefault()); 460 } 461 462 /** 463 * Clones the collator. 464 * @stable ICU 2.6 465 * @return a clone of this collator. 466 */ 467 public Object clone() throws CloneNotSupportedException { 468 return super.clone(); 469 } 470 471 // begin registry stuff 472 473 /** 474 * A factory used with registerFactory to register multiple collators and provide 475 * display names for them. If standard locale display names are sufficient, 476 * Collator instances may be registered instead. 477 * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses 478 * ULocale instead of Locale. Instead of overriding createCollator(Locale), 479 * new implementations should override createCollator(ULocale). Note that 480 * one of these two methods <b>MUST</b> be overridden or else an infinite 481 * loop will occur. 482 * @stable ICU 2.6 483 */ 484 public static abstract class CollatorFactory { 485 /** 486 * Return true if this factory will be visible. Default is true. 487 * If not visible, the locales supported by this factory will not 488 * be listed by getAvailableLocales. 489 * 490 * @return true if this factory is visible 491 * @stable ICU 2.6 492 */ 493 public boolean visible() { 494 return true; 495 } 496 497 /** 498 * Return an instance of the appropriate collator. If the locale 499 * is not supported, return null. 500 * <b>Note:</b> as of ICU4J 3.2, implementations should override 501 * this method instead of createCollator(Locale). 502 * @param loc the locale for which this collator is to be created. 503 * @return the newly created collator. 504 * @stable ICU 3.2 505 */ 506 public Collator createCollator(ULocale loc) { 507 return createCollator(loc.toLocale()); 508 } 509 510 /** 511 * Return an instance of the appropriate collator. If the locale 512 * is not supported, return null. 513 * <p><b>Note:</b> as of ICU4J 3.2, implementations should override 514 * createCollator(ULocale) instead of this method, and inherit this 515 * method's implementation. This method is no longer abstract 516 * and instead delegates to createCollator(ULocale). 517 * @param loc the locale for which this collator is to be created. 518 * @return the newly created collator. 519 * @stable ICU 2.6 520 */ 521 public Collator createCollator(Locale loc) { 522 return createCollator(ULocale.forLocale(loc)); 523 } 524 525 /** 526 * Return the name of the collator for the objectLocale, localized for the displayLocale. 527 * If objectLocale is not visible or not defined by the factory, return null. 528 * @param objectLocale the locale identifying the collator 529 * @param displayLocale the locale for which the display name of the collator should be localized 530 * @return the display name 531 * @stable ICU 2.6 532 */ 533 public String getDisplayName(Locale objectLocale, Locale displayLocale) { 534 return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale)); 535 } 536 537 /** 538 * Return the name of the collator for the objectLocale, localized for the displayLocale. 539 * If objectLocale is not visible or not defined by the factory, return null. 540 * @param objectLocale the locale identifying the collator 541 * @param displayLocale the locale for which the display name of the collator should be localized 542 * @return the display name 543 * @stable ICU 3.2 544 */ 545 public String getDisplayName(ULocale objectLocale, ULocale displayLocale) { 546 if (visible()) { 547 Set<String> supported = getSupportedLocaleIDs(); 548 String name = objectLocale.getBaseName(); 549 if (supported.contains(name)) { 550 return objectLocale.getDisplayName(displayLocale); 551 } 552 } 553 return null; 554 } 555 556 /** 557 * Return an unmodifiable collection of the locale names directly 558 * supported by this factory. 559 * 560 * @return the set of supported locale IDs. 561 * @stable ICU 2.6 562 */ 563 public abstract Set<String> getSupportedLocaleIDs(); 564 565 /** 566 * Empty default constructor. 567 * @stable ICU 2.6 568 */ 569 protected CollatorFactory() { 570 } 571 } 572 573 static abstract class ServiceShim { 574 abstract Collator getInstance(ULocale l); 575 abstract Object registerInstance(Collator c, ULocale l); 576 abstract Object registerFactory(CollatorFactory f); 577 abstract boolean unregister(Object k); 578 abstract Locale[] getAvailableLocales(); // TODO remove 579 abstract ULocale[] getAvailableULocales(); 580 abstract String getDisplayName(ULocale ol, ULocale dl); 581 } 582 583 private static ServiceShim shim; 584 private static ServiceShim getShim() { 585 // Note: this instantiation is safe on loose-memory-model configurations 586 // despite lack of synchronization, since the shim instance has no state-- 587 // it's all in the class init. The worst problem is we might instantiate 588 // two shim instances, but they'll share the same state so that's ok. 589 if (shim == null) { 590 try { 591 Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim"); 592 shim = (ServiceShim)cls.newInstance(); 593 } 594 catch (MissingResourceException e) 595 { 596 ///CLOVER:OFF 597 throw e; 598 ///CLOVER:ON 599 } 600 catch (Exception e) { 601 ///CLOVER:OFF 602 if(DEBUG){ 603 e.printStackTrace(); 604 } 605 throw new ICUException(e); 606 ///CLOVER:ON 607 } 608 } 609 return shim; 610 } 611 612 /** 613 * Simpler/faster methods for ASCII than ones based on Unicode data. 614 * TODO: There should be code like this somewhere already?? 615 */ 616 private static final class ASCII { 617 static boolean equalIgnoreCase(CharSequence left, CharSequence right) { 618 int length = left.length(); 619 if (length != right.length()) { return false; } 620 for (int i = 0; i < length; ++i) { 621 char lc = left.charAt(i); 622 char rc = right.charAt(i); 623 if (lc == rc) { continue; } 624 if ('A' <= lc && lc <= 'Z') { 625 if ((lc + 0x20) == rc) { continue; } 626 } else if ('A' <= rc && rc <= 'Z') { 627 if ((rc + 0x20) == lc) { continue; } 628 } 629 return false; 630 } 631 return true; 632 } 633 } 634 635 private static final boolean getYesOrNo(String keyword, String s) { 636 if (ASCII.equalIgnoreCase(s, "yes")) { 637 return true; 638 } 639 if (ASCII.equalIgnoreCase(s, "no")) { 640 return false; 641 } 642 throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s); 643 } 644 645 private static final int getIntValue(String keyword, String s, String... values) { 646 for (int i = 0; i < values.length; ++i) { 647 if (ASCII.equalIgnoreCase(s, values[i])) { 648 return i; 649 } 650 } 651 throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s); 652 } 653 654 private static final int getReorderCode(String keyword, String s) { 655 return Collator.ReorderCodes.FIRST + 656 getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit"); 657 // Not supporting "others" = UCOL_REORDER_CODE_OTHERS 658 // as a synonym for Zzzz = USCRIPT_UNKNOWN for now: 659 // Avoid introducing synonyms/aliases. 660 } 661 662 /** 663 * Sets collation attributes according to locale keywords. See 664 * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings 665 * 666 * Using "alias" keywords and values where defined: 667 * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax 668 * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml 669 */ 670 private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) { 671 // Check for collation keywords that were already deprecated 672 // before any were supported in createInstance() (except for "collation"). 673 String value = loc.getKeywordValue("colHiraganaQuaternary"); 674 if (value != null) { 675 throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary"); 676 } 677 value = loc.getKeywordValue("variableTop"); 678 if (value != null) { 679 throw new UnsupportedOperationException("locale keyword vt/variableTop"); 680 } 681 // Parse known collation keywords, ignore others. 682 value = loc.getKeywordValue("colStrength"); 683 if (value != null) { 684 // Note: Not supporting typo "quarternary" because it was never supported in locale IDs. 685 int strength = getIntValue("colStrength", value, 686 "primary", "secondary", "tertiary", "quaternary", "identical"); 687 coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL); 688 } 689 value = loc.getKeywordValue("colBackwards"); 690 if (value != null) { 691 if (rbc != null) { 692 rbc.setFrenchCollation(getYesOrNo("colBackwards", value)); 693 } else { 694 throw new UnsupportedOperationException( 695 "locale keyword kb/colBackwards only settable for RuleBasedCollator"); 696 } 697 } 698 value = loc.getKeywordValue("colCaseLevel"); 699 if (value != null) { 700 if (rbc != null) { 701 rbc.setCaseLevel(getYesOrNo("colCaseLevel", value)); 702 } else { 703 throw new UnsupportedOperationException( 704 "locale keyword kb/colBackwards only settable for RuleBasedCollator"); 705 } 706 } 707 value = loc.getKeywordValue("colCaseFirst"); 708 if (value != null) { 709 if (rbc != null) { 710 int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper"); 711 if (cf == 0) { 712 rbc.setLowerCaseFirst(false); 713 rbc.setUpperCaseFirst(false); 714 } else if (cf == 1) { 715 rbc.setLowerCaseFirst(true); 716 } else /* cf == 2 */ { 717 rbc.setUpperCaseFirst(true); 718 } 719 } else { 720 throw new UnsupportedOperationException( 721 "locale keyword kf/colCaseFirst only settable for RuleBasedCollator"); 722 } 723 } 724 value = loc.getKeywordValue("colAlternate"); 725 if (value != null) { 726 if (rbc != null) { 727 rbc.setAlternateHandlingShifted( 728 getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0); 729 } else { 730 throw new UnsupportedOperationException( 731 "locale keyword ka/colAlternate only settable for RuleBasedCollator"); 732 } 733 } 734 value = loc.getKeywordValue("colNormalization"); 735 if (value != null) { 736 coll.setDecomposition(getYesOrNo("colNormalization", value) ? 737 Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION); 738 } 739 value = loc.getKeywordValue("colNumeric"); 740 if (value != null) { 741 if (rbc != null) { 742 rbc.setNumericCollation(getYesOrNo("colNumeric", value)); 743 } else { 744 throw new UnsupportedOperationException( 745 "locale keyword kn/colNumeric only settable for RuleBasedCollator"); 746 } 747 } 748 value = loc.getKeywordValue("colReorder"); 749 if (value != null) { 750 int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST]; 751 int codesLength = 0; 752 int scriptNameStart = 0; 753 for (;;) { 754 if (codesLength == codes.length) { 755 throw new IllegalArgumentException( 756 "too many script codes for colReorder locale keyword: " + value); 757 } 758 int limit = scriptNameStart; 759 while (limit < value.length() && value.charAt(limit) != '-') { ++limit; } 760 String scriptName = value.substring(scriptNameStart, limit); 761 int code; 762 if (scriptName.length() == 4) { 763 // Strict parsing, accept only 4-letter script codes, not long names. 764 code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName); 765 } else { 766 code = getReorderCode("colReorder", scriptName); 767 } 768 codes[codesLength++] = code; 769 if (limit == value.length()) { break; } 770 scriptNameStart = limit + 1; 771 } 772 if (codesLength == 0) { 773 throw new IllegalArgumentException("no script codes for colReorder locale keyword"); 774 } 775 int[] args = new int[codesLength]; 776 System.arraycopy(codes, 0, args, 0, codesLength); 777 coll.setReorderCodes(args); 778 } 779 value = loc.getKeywordValue("kv"); 780 if (value != null) { 781 coll.setMaxVariable(getReorderCode("kv", value)); 782 } 783 } 784 785 /** 786 * {@icu} Returns the Collator for the desired locale. 787 * 788 * <p>For some languages, multiple collation types are available; 789 * for example, "de@collation=phonebook". 790 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 791 * in the old locale extension syntax ("el@colCaseFirst=upper") 792 * or in language tag syntax ("el-u-kf-upper"). 793 * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>. 794 * 795 * @param locale the desired locale. 796 * @return Collator for the desired locale if it is created successfully. 797 * Otherwise if there is no Collator 798 * associated with the current locale, the root collator will 799 * be returned. 800 * @see java.util.Locale 801 * @see java.util.ResourceBundle 802 * @see #getInstance(Locale) 803 * @see #getInstance() 804 * @stable ICU 3.0 805 */ 806 public static final Collator getInstance(ULocale locale) { 807 // fetching from service cache is faster than instantiation 808 if (locale == null) { 809 locale = ULocale.getDefault(); 810 } 811 Collator coll = getShim().getInstance(locale); 812 if (!locale.getName().equals(locale.getBaseName())) { // any keywords? 813 setAttributesFromKeywords(locale, coll, 814 (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null); 815 } 816 return coll; 817 } 818 819 /** 820 * Returns the Collator for the desired locale. 821 * 822 * <p>For some languages, multiple collation types are available; 823 * for example, "de-u-co-phonebk". 824 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 825 * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale}) 826 * or in language tag syntax ("el-u-kf-upper"). 827 * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>. 828 * 829 * @param locale the desired locale. 830 * @return Collator for the desired locale if it is created successfully. 831 * Otherwise if there is no Collator 832 * associated with the current locale, the root collator will 833 * be returned. 834 * @see java.util.Locale 835 * @see java.util.ResourceBundle 836 * @see #getInstance(ULocale) 837 * @see #getInstance() 838 * @stable ICU 2.8 839 */ 840 public static final Collator getInstance(Locale locale) { 841 return getInstance(ULocale.forLocale(locale)); 842 } 843 844 /** 845 * {@icu} Registers a collator as the default collator for the provided locale. The 846 * collator should not be modified after it is registered. 847 * 848 * <p>Because ICU may choose to cache Collator objects internally, this must 849 * be called at application startup, prior to any calls to 850 * Collator.getInstance to avoid undefined behavior. 851 * 852 * @param collator the collator to register 853 * @param locale the locale for which this is the default collator 854 * @return an object that can be used to unregister the registered collator. 855 * 856 * @stable ICU 3.2 857 */ 858 public static final Object registerInstance(Collator collator, ULocale locale) { 859 return getShim().registerInstance(collator, locale); 860 } 861 862 /** 863 * {@icu} Registers a collator factory. 864 * 865 * <p>Because ICU may choose to cache Collator objects internally, this must 866 * be called at application startup, prior to any calls to 867 * Collator.getInstance to avoid undefined behavior. 868 * 869 * @param factory the factory to register 870 * @return an object that can be used to unregister the registered factory. 871 * 872 * @stable ICU 2.6 873 */ 874 public static final Object registerFactory(CollatorFactory factory) { 875 return getShim().registerFactory(factory); 876 } 877 878 /** 879 * {@icu} Unregisters a collator previously registered using registerInstance. 880 * @param registryKey the object previously returned by registerInstance. 881 * @return true if the collator was successfully unregistered. 882 * @stable ICU 2.6 883 */ 884 public static final boolean unregister(Object registryKey) { 885 if (shim == null) { 886 return false; 887 } 888 return shim.unregister(registryKey); 889 } 890 891 /** 892 * Returns the set of locales, as Locale objects, for which collators 893 * are installed. Note that Locale objects do not support RFC 3066. 894 * @return the list of locales in which collators are installed. 895 * This list includes any that have been registered, in addition to 896 * those that are installed with ICU4J. 897 * @stable ICU 2.4 898 */ 899 public static Locale[] getAvailableLocales() { 900 // TODO make this wrap getAvailableULocales later 901 if (shim == null) { 902 return ICUResourceBundle.getAvailableLocales( 903 ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); 904 } 905 return shim.getAvailableLocales(); 906 } 907 908 /** 909 * {@icu} Returns the set of locales, as ULocale objects, for which collators 910 * are installed. ULocale objects support RFC 3066. 911 * @return the list of locales in which collators are installed. 912 * This list includes any that have been registered, in addition to 913 * those that are installed with ICU4J. 914 * @stable ICU 3.0 915 */ 916 public static final ULocale[] getAvailableULocales() { 917 if (shim == null) { 918 return ICUResourceBundle.getAvailableULocales( 919 ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER); 920 } 921 return shim.getAvailableULocales(); 922 } 923 924 /** 925 * The list of keywords for this service. This must be kept in sync with 926 * the resource data. 927 * @since ICU 3.0 928 */ 929 private static final String[] KEYWORDS = { "collation" }; 930 931 /** 932 * The resource name for this service. Note that this is not the same as 933 * the keyword for this service. 934 * @since ICU 3.0 935 */ 936 private static final String RESOURCE = "collations"; 937 938 /** 939 * The resource bundle base name for this service. 940 * *since ICU 3.0 941 */ 942 943 private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME; 944 945 /** 946 * {@icu} Returns an array of all possible keywords that are relevant to 947 * collation. At this point, the only recognized keyword for this 948 * service is "collation". 949 * @return an array of valid collation keywords. 950 * @see #getKeywordValues 951 * @stable ICU 3.0 952 */ 953 public static final String[] getKeywords() { 954 return KEYWORDS; 955 } 956 957 /** 958 * {@icu} Given a keyword, returns an array of all values for 959 * that keyword that are currently in use. 960 * @param keyword one of the keywords returned by getKeywords. 961 * @see #getKeywords 962 * @stable ICU 3.0 963 */ 964 public static final String[] getKeywordValues(String keyword) { 965 if (!keyword.equals(KEYWORDS[0])) { 966 throw new IllegalArgumentException("Invalid keyword: " + keyword); 967 } 968 return ICUResourceBundle.getKeywordValues(BASE, RESOURCE); 969 } 970 971 /** 972 * {@icu} Given a key and a locale, returns an array of string values in a preferred 973 * order that would make a difference. These are all and only those values where 974 * the open (creation) of the service with the locale formed from the input locale 975 * plus input keyword and that value has different behavior than creation with the 976 * input locale alone. 977 * @param key one of the keys supported by this service. For now, only 978 * "collation" is supported. 979 * @param locale the locale 980 * @param commonlyUsed if set to true it will return only commonly used values 981 * with the given locale in preferred order. Otherwise, 982 * it will return all the available values for the locale. 983 * @return an array of string values for the given key and the locale. 984 * @stable ICU 4.2 985 */ 986 public static final String[] getKeywordValuesForLocale(String key, ULocale locale, 987 boolean commonlyUsed) { 988 // Note: The parameter commonlyUsed is actually not used. 989 // The switch is in the method signature for consistency 990 // with other locale services. 991 992 // Read available collation values from collation bundles 993 String baseLoc = locale.getBaseName(); 994 LinkedList<String> values = new LinkedList<String>(); 995 996 UResourceBundle bundle = UResourceBundle.getBundleInstance( 997 ICUResourceBundle.ICU_COLLATION_BASE_NAME, baseLoc); 998 999 String defcoll = null; 1000 while (bundle != null) { 1001 UResourceBundle collations = bundle.get("collations"); 1002 Enumeration<String> collEnum = collations.getKeys(); 1003 while (collEnum.hasMoreElements()) { 1004 String collkey = collEnum.nextElement(); 1005 if (collkey.equals("default")) { 1006 if (defcoll == null) { 1007 // Keep the default 1008 defcoll = collations.getString("default"); 1009 } 1010 } else if (!collkey.startsWith("private-") && !values.contains(collkey)) { 1011 values.add(collkey); 1012 } 1013 } 1014 bundle = ((ICUResourceBundle)bundle).getParent(); 1015 } 1016 // Reordering 1017 Iterator<String> itr = values.iterator(); 1018 String[] result = new String[values.size()]; 1019 result[0] = defcoll; 1020 int idx = 1; 1021 while (itr.hasNext()) { 1022 String collKey = itr.next(); 1023 if (!collKey.equals(defcoll)) { 1024 result[idx++] = collKey; 1025 } 1026 } 1027 return result; 1028 } 1029 1030 /** 1031 * {@icu} Returns the functionally equivalent locale for the given 1032 * requested locale, with respect to given keyword, for the 1033 * collation service. If two locales return the same result, then 1034 * collators instantiated for these locales will behave 1035 * equivalently. The converse is not always true; two collators 1036 * may in fact be equivalent, but return different results, due to 1037 * internal details. The return result has no other meaning than 1038 * that stated above, and implies nothing as to the relationship 1039 * between the two locales. This is intended for use by 1040 * applications who wish to cache collators, or otherwise reuse 1041 * collators when possible. The functional equivalent may change 1042 * over time. For more information, please see the <a 1043 * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services"> 1044 * Locales and Services</a> section of the ICU User Guide. 1045 * @param keyword a particular keyword as enumerated by 1046 * getKeywords. 1047 * @param locID The requested locale 1048 * @param isAvailable If non-null, isAvailable[0] will receive and 1049 * output boolean that indicates whether the requested locale was 1050 * 'available' to the collation service. If non-null, isAvailable 1051 * must have length >= 1. 1052 * @return the locale 1053 * @stable ICU 3.0 1054 */ 1055 public static final ULocale getFunctionalEquivalent(String keyword, 1056 ULocale locID, 1057 boolean isAvailable[]) { 1058 return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE, 1059 keyword, locID, isAvailable, true); 1060 } 1061 1062 /** 1063 * {@icu} Returns the functionally equivalent locale for the given 1064 * requested locale, with respect to given keyword, for the 1065 * collation service. 1066 * @param keyword a particular keyword as enumerated by 1067 * getKeywords. 1068 * @param locID The requested locale 1069 * @return the locale 1070 * @see #getFunctionalEquivalent(String,ULocale,boolean[]) 1071 * @stable ICU 3.0 1072 */ 1073 public static final ULocale getFunctionalEquivalent(String keyword, 1074 ULocale locID) { 1075 return getFunctionalEquivalent(keyword, locID, null); 1076 } 1077 1078 /** 1079 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1080 * displayLocale. 1081 * @param objectLocale the locale of the collator 1082 * @param displayLocale the locale for the collator's display name 1083 * @return the display name 1084 * @stable ICU 2.6 1085 */ 1086 static public String getDisplayName(Locale objectLocale, Locale displayLocale) { 1087 return getShim().getDisplayName(ULocale.forLocale(objectLocale), 1088 ULocale.forLocale(displayLocale)); 1089 } 1090 1091 /** 1092 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1093 * displayLocale. 1094 * @param objectLocale the locale of the collator 1095 * @param displayLocale the locale for the collator's display name 1096 * @return the display name 1097 * @stable ICU 3.2 1098 */ 1099 static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) { 1100 return getShim().getDisplayName(objectLocale, displayLocale); 1101 } 1102 1103 /** 1104 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1105 * default <code>DISPLAY</code> locale. 1106 * @param objectLocale the locale of the collator 1107 * @return the display name 1108 * @see com.ibm.icu.util.ULocale.Category#DISPLAY 1109 * @stable ICU 2.6 1110 */ 1111 static public String getDisplayName(Locale objectLocale) { 1112 return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY)); 1113 } 1114 1115 /** 1116 * {@icu} Returns the name of the collator for the objectLocale, localized for the 1117 * default <code>DISPLAY</code> locale. 1118 * @param objectLocale the locale of the collator 1119 * @return the display name 1120 * @see com.ibm.icu.util.ULocale.Category#DISPLAY 1121 * @stable ICU 3.2 1122 */ 1123 static public String getDisplayName(ULocale objectLocale) { 1124 return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY)); 1125 } 1126 1127 /** 1128 * Returns this Collator's strength attribute. The strength attribute 1129 * determines the minimum level of difference considered significant. 1130 * </p> 1131 * {@icunote} This can return QUATERNARY strength, which is not supported by the 1132 * JDK version. 1133 * <p> 1134 * See the Collator class description for more details. 1135 * </p> 1136 * <p>The base class method always returns {@link #TERTIARY}. 1137 * Subclasses should override it if appropriate. 1138 * 1139 * @return this Collator's current strength attribute. 1140 * @see #setStrength 1141 * @see #PRIMARY 1142 * @see #SECONDARY 1143 * @see #TERTIARY 1144 * @see #QUATERNARY 1145 * @see #IDENTICAL 1146 * @stable ICU 2.8 1147 */ 1148 public int getStrength() 1149 { 1150 return TERTIARY; 1151 } 1152 1153 /** 1154 * Returns the decomposition mode of this Collator. The decomposition mode 1155 * determines how Unicode composed characters are handled. 1156 * </p> 1157 * <p> 1158 * See the Collator class description for more details. 1159 * </p> 1160 * <p>The base class method always returns {@link #NO_DECOMPOSITION}. 1161 * Subclasses should override it if appropriate. 1162 * 1163 * @return the decomposition mode 1164 * @see #setDecomposition 1165 * @see #NO_DECOMPOSITION 1166 * @see #CANONICAL_DECOMPOSITION 1167 * @stable ICU 2.8 1168 */ 1169 public int getDecomposition() 1170 { 1171 return NO_DECOMPOSITION; 1172 } 1173 1174 // public other methods ------------------------------------------------- 1175 1176 /** 1177 * Compares the equality of two text Strings using 1178 * this Collator's rules, strength and decomposition mode. Convenience method. 1179 * @param source the source string to be compared. 1180 * @param target the target string to be compared. 1181 * @return true if the strings are equal according to the collation 1182 * rules, otherwise false. 1183 * @see #compare 1184 * @throws NullPointerException thrown if either arguments is null. 1185 * @stable ICU 2.8 1186 */ 1187 public boolean equals(String source, String target) 1188 { 1189 return (compare(source, target) == 0); 1190 } 1191 1192 /** 1193 * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored 1194 * in this collator. 1195 * @return a pointer to a UnicodeSet object containing all the 1196 * code points and sequences that may sort differently than 1197 * in the root collator. 1198 * @stable ICU 2.4 1199 */ 1200 public UnicodeSet getTailoredSet() 1201 { 1202 return new UnicodeSet(0, 0x10FFFF); 1203 } 1204 1205 /** 1206 * Compares the source text String to the target text String according to 1207 * this Collator's rules, strength and decomposition mode. 1208 * Returns an integer less than, 1209 * equal to or greater than zero depending on whether the source String is 1210 * less than, equal to or greater than the target String. See the Collator 1211 * class description for an example of use. 1212 * </p> 1213 * @param source the source String. 1214 * @param target the target String. 1215 * @return Returns an integer value. Value is less than zero if source is 1216 * less than target, value is zero if source and target are equal, 1217 * value is greater than zero if source is greater than target. 1218 * @see CollationKey 1219 * @see #getCollationKey 1220 * @throws NullPointerException thrown if either argument is null. 1221 * @stable ICU 2.8 1222 */ 1223 public abstract int compare(String source, String target); 1224 1225 /** 1226 * Compares the source Object to the target Object. 1227 * </p> 1228 * @param source the source Object. 1229 * @param target the target Object. 1230 * @return Returns an integer value. Value is less than zero if source is 1231 * less than target, value is zero if source and target are equal, 1232 * value is greater than zero if source is greater than target. 1233 * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence. 1234 * @stable ICU 4.2 1235 */ 1236 public int compare(Object source, Object target) { 1237 return doCompare((CharSequence)source, (CharSequence)target); 1238 } 1239 1240 /** 1241 * Compares two CharSequences. 1242 * The base class just calls compare(left.toString(), right.toString()). 1243 * Subclasses should instead implement this method and have the String API call this method. 1244 * @internal 1245 * @deprecated This API is ICU internal only. 1246 */ 1247 @Deprecated 1248 protected int doCompare(CharSequence left, CharSequence right) { 1249 return compare(left.toString(), right.toString()); 1250 } 1251 1252 /** 1253 * <p> 1254 * Transforms the String into a CollationKey suitable for efficient 1255 * repeated comparison. The resulting key depends on the collator's 1256 * rules, strength and decomposition mode. 1257 * 1258 * <p>Note that collation keys are often less efficient than simply doing comparison. 1259 * For more details, see the ICU User Guide. 1260 * 1261 * <p>See the CollationKey class documentation for more information.</p> 1262 * @param source the string to be transformed into a CollationKey. 1263 * @return the CollationKey for the given String based on this Collator's 1264 * collation rules. If the source String is null, a null 1265 * CollationKey is returned. 1266 * @see CollationKey 1267 * @see #compare(String, String) 1268 * @see #getRawCollationKey 1269 * @stable ICU 2.8 1270 */ 1271 public abstract CollationKey getCollationKey(String source); 1272 1273 /** 1274 * {@icu} Returns the simpler form of a CollationKey for the String source following 1275 * the rules of this Collator and stores the result into the user provided argument 1276 * key. If key has a internal byte array of length that's too small for the result, 1277 * the internal byte array will be grown to the exact required size. 1278 * 1279 * <p>Note that collation keys are often less efficient than simply doing comparison. 1280 * For more details, see the ICU User Guide. 1281 * 1282 * @param source the text String to be transformed into a RawCollationKey 1283 * @return If key is null, a new instance of RawCollationKey will be 1284 * created and returned, otherwise the user provided key will be 1285 * returned. 1286 * @see #compare(String, String) 1287 * @see #getCollationKey 1288 * @see RawCollationKey 1289 * @stable ICU 2.8 1290 */ 1291 public abstract RawCollationKey getRawCollationKey(String source, 1292 RawCollationKey key); 1293 1294 /** 1295 * {@icu} Sets the variable top to the top of the specified reordering group. 1296 * The variable top determines the highest-sorting character 1297 * which is affected by the alternate handling behavior. 1298 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. 1299 * 1300 * <p>The base class implementation throws an UnsupportedOperationException. 1301 * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION, 1302 * Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY; 1303 * or Collator.ReorderCodes.DEFAULT to restore the default max variable group 1304 * @return this 1305 * @see #getMaxVariable 1306 * @stable ICU 53 1307 */ 1308 public Collator setMaxVariable(int group) { 1309 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1310 } 1311 1312 /** 1313 * {@icu} Returns the maximum reordering group whose characters are affected by 1314 * the alternate handling behavior. 1315 * 1316 * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION. 1317 * @return the maximum variable reordering group. 1318 * @see #setMaxVariable 1319 * @stable ICU 53 1320 */ 1321 public int getMaxVariable() { 1322 return Collator.ReorderCodes.PUNCTUATION; 1323 } 1324 1325 /** 1326 * {@icu} Sets the variable top to the primary weight of the specified string. 1327 * 1328 * <p>Beginning with ICU 53, the variable top is pinned to 1329 * the top of one of the supported reordering groups, 1330 * and it must not be beyond the last of those groups. 1331 * See {@link #setMaxVariable(int)}. 1332 * 1333 * @param varTop one or more (if contraction) characters to which the 1334 * variable top should be set 1335 * @return variable top primary weight 1336 * @exception IllegalArgumentException 1337 * is thrown if varTop argument is not a valid variable top element. A variable top element is 1338 * invalid when 1339 * <ul> 1340 * <li>it is a contraction that does not exist in the Collation order 1341 * <li>the variable top is beyond 1342 * the last reordering group supported by setMaxVariable() 1343 * <li>when the varTop argument is null or zero in length. 1344 * </ul> 1345 * @see #getVariableTop 1346 * @see RuleBasedCollator#setAlternateHandlingShifted 1347 * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead. 1348 */ 1349 @Deprecated 1350 public abstract int setVariableTop(String varTop); 1351 1352 /** 1353 * {@icu} Gets the variable top value of a Collator. 1354 * 1355 * @return the variable top primary weight 1356 * @see #getMaxVariable 1357 * @stable ICU 2.6 1358 */ 1359 public abstract int getVariableTop(); 1360 1361 /** 1362 * {@icu} Sets the variable top to the specified primary weight. 1363 * 1364 * <p>Beginning with ICU 53, the variable top is pinned to 1365 * the top of one of the supported reordering groups, 1366 * and it must not be beyond the last of those groups. 1367 * See {@link #setMaxVariable(int)}. 1368 * 1369 * @param varTop primary weight, as returned by setVariableTop or getVariableTop 1370 * @see #getVariableTop 1371 * @see #setVariableTop(String) 1372 * @deprecated ICU 53 Call setMaxVariable() instead. 1373 */ 1374 @Deprecated 1375 public abstract void setVariableTop(int varTop); 1376 1377 /** 1378 * {@icu} Returns the version of this collator object. 1379 * @return the version object associated with this collator 1380 * @stable ICU 2.8 1381 */ 1382 public abstract VersionInfo getVersion(); 1383 1384 /** 1385 * {@icu} Returns the UCA version of this collator object. 1386 * @return the version object associated with this collator 1387 * @stable ICU 2.8 1388 */ 1389 public abstract VersionInfo getUCAVersion(); 1390 1391 /** 1392 * Retrieves the reordering codes for this collator. 1393 * These reordering codes are a combination of UScript codes and ReorderCodes. 1394 * @return a copy of the reordering codes for this collator; 1395 * if none are set then returns an empty array 1396 * @see #setReorderCodes 1397 * @see #getEquivalentReorderCodes 1398 * @see Collator.ReorderCodes 1399 * @see UScript 1400 * @stable ICU 4.8 1401 */ 1402 public int[] getReorderCodes() 1403 { 1404 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1405 } 1406 1407 /** 1408 * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder 1409 * codes are grouped and must reorder together. 1410 * Beginning with ICU 55, scripts only reorder together if they are primary-equal, 1411 * for example Hiragana and Katakana. 1412 * 1413 * @param reorderCode The reorder code to determine equivalence for. 1414 * @return the set of all reorder codes in the same group as the given reorder code. 1415 * @see #setReorderCodes 1416 * @see #getReorderCodes 1417 * @see Collator.ReorderCodes 1418 * @see UScript 1419 * @stable ICU 4.8 1420 */ 1421 public static int[] getEquivalentReorderCodes(int reorderCode) { 1422 CollationData baseData = CollationRoot.getData(); 1423 return baseData.getEquivalentScripts(reorderCode); 1424 } 1425 1426 1427 // Freezable interface implementation ------------------------------------------------- 1428 1429 /** 1430 * Determines whether the object has been frozen or not. 1431 * 1432 * <p>An unfrozen Collator is mutable and not thread-safe. 1433 * A frozen Collator is immutable and thread-safe. 1434 * 1435 * @stable ICU 4.8 1436 */ 1437 public boolean isFrozen() { 1438 return false; 1439 } 1440 1441 /** 1442 * Freezes the collator. 1443 * @return the collator itself. 1444 * @stable ICU 4.8 1445 */ 1446 public Collator freeze() { 1447 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1448 } 1449 1450 /** 1451 * Provides for the clone operation. Any clone is initially unfrozen. 1452 * @stable ICU 4.8 1453 */ 1454 public Collator cloneAsThawed() { 1455 throw new UnsupportedOperationException("Needs to be implemented by the subclass."); 1456 } 1457 1458 /** 1459 * Empty default constructor to make javadocs happy 1460 * @stable ICU 2.4 1461 */ 1462 protected Collator() 1463 { 1464 } 1465 1466 private static final boolean DEBUG = ICUDebug.enabled("collator"); 1467 1468 // -------- BEGIN ULocale boilerplate -------- 1469 1470 /** 1471 * {@icu} Returns the locale that was used to create this object, or null. 1472 * This may may differ from the locale requested at the time of 1473 * this object's creation. For example, if an object is created 1474 * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be 1475 * drawn from <tt>en</tt> (the <i>actual</i> locale), and 1476 * <tt>en_US</tt> may be the most specific locale that exists (the 1477 * <i>valid</i> locale). 1478 * 1479 * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8 1480 * contains a partial preview implementation. The * <i>actual</i> 1481 * locale is returned correctly, but the <i>valid</i> locale is 1482 * not, in most cases. 1483 * 1484 * <p>The base class method always returns {@link ULocale#ROOT}. 1485 * Subclasses should override it if appropriate. 1486 * 1487 * @param type type of information requested, either {@link 1488 * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link 1489 * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}. 1490 * @return the information specified by <i>type</i>, or null if 1491 * this object was not constructed from locale data. 1492 * @see com.ibm.icu.util.ULocale 1493 * @see com.ibm.icu.util.ULocale#VALID_LOCALE 1494 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE 1495 * @draft ICU 2.8 (retain) 1496 * @provisional This API might change or be removed in a future release. 1497 */ 1498 public ULocale getLocale(ULocale.Type type) { 1499 return ULocale.ROOT; 1500 } 1501 1502 /** 1503 * Set information about the locales that were used to create this 1504 * object. If the object was not constructed from locale data, 1505 * both arguments should be set to null. Otherwise, neither 1506 * should be null. The actual locale must be at the same level or 1507 * less specific than the valid locale. This method is intended 1508 * for use by factories or other entities that create objects of 1509 * this class. 1510 * 1511 * <p>The base class method does nothing. Subclasses should override it if appropriate. 1512 * 1513 * @param valid the most specific locale containing any resource 1514 * data, or null 1515 * @param actual the locale containing data used to construct this 1516 * object, or null 1517 * @see com.ibm.icu.util.ULocale 1518 * @see com.ibm.icu.util.ULocale#VALID_LOCALE 1519 * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE 1520 */ 1521 void setLocale(ULocale valid, ULocale actual) {} 1522 1523 // -------- END ULocale boilerplate -------- 1524} 1525