1/** 2 ******************************************************************************* 3 * Copyright (C) 1996-2016, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7package com.ibm.icu.text; 8 9import java.lang.reflect.InvocationTargetException; 10import java.lang.reflect.Method; 11import java.text.CharacterIterator; 12import java.text.ParseException; 13import java.util.Arrays; 14import java.util.concurrent.locks.Lock; 15import java.util.concurrent.locks.ReentrantLock; 16 17import com.ibm.icu.impl.ClassLoaderUtil; 18import com.ibm.icu.impl.Normalizer2Impl; 19import com.ibm.icu.impl.Normalizer2Impl.ReorderingBuffer; 20import com.ibm.icu.impl.Utility; 21import com.ibm.icu.impl.coll.BOCSU; 22import com.ibm.icu.impl.coll.Collation; 23import com.ibm.icu.impl.coll.CollationCompare; 24import com.ibm.icu.impl.coll.CollationData; 25import com.ibm.icu.impl.coll.CollationFastLatin; 26import com.ibm.icu.impl.coll.CollationIterator; 27import com.ibm.icu.impl.coll.CollationKeys; 28import com.ibm.icu.impl.coll.CollationKeys.SortKeyByteSink; 29import com.ibm.icu.impl.coll.CollationLoader; 30import com.ibm.icu.impl.coll.CollationRoot; 31import com.ibm.icu.impl.coll.CollationSettings; 32import com.ibm.icu.impl.coll.CollationTailoring; 33import com.ibm.icu.impl.coll.ContractionsAndExpansions; 34import com.ibm.icu.impl.coll.FCDUTF16CollationIterator; 35import com.ibm.icu.impl.coll.SharedObject; 36import com.ibm.icu.impl.coll.TailoredSet; 37import com.ibm.icu.impl.coll.UTF16CollationIterator; 38import com.ibm.icu.lang.UScript; 39import com.ibm.icu.util.ULocale; 40import com.ibm.icu.util.VersionInfo; 41 42/** 43 * <p> 44 * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule 45 * sets. RuleBasedCollator is designed to be fully compliant to the <a 46 * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651. 47 * 48 * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link com.ibm.icu.util.Freezable}. 49 * 50 * <p> 51 * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User 52 * Guide</a> for more information about the collation service before using this class. 53 * 54 * <p> 55 * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class 56 * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the 57 * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String) 58 * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while 59 * re-adjusting the attributes and orders of the characters in the specified rule accordingly. 60 * 61 * <p> 62 * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale 63 * is not available, the orders eventually falls back to the 64 * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 65 * 66 * <p> 67 * For information about the collation rule syntax and details about customization, please refer to the <a 68 * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the 69 * User Guide. 70 * 71 * <p> 72 * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J: 73 * 74 * <ul> 75 * <li>According to the JDK documentation: <br> 76 * <i>Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range 77 * \U0E40-\U0E44 precedes a Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the range 78 * \U0EC0-\U0EC4 precedes a Lao consonant of the range \U0E81-\U0EAE then the vowel is placed after the 79 * consonant for collation purposes. 80 * <br> 81 * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on. 82 * </i> 83 * <br> 84 * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly 85 * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.</li> 86 * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.</li> 87 * </ul> 88 * <p> 89 * <strong>Examples</strong> 90 * <p> 91 * Creating Customized RuleBasedCollators: <blockquote> 92 * 93 * <pre> 94 * String simple = "& a < b < c < d"; 95 * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple); 96 * 97 * String norwegian = "& a , A < b , B < c , C < d , D < e , E " 98 * + "< f , F < g , G < h , H < i , I < j , " 99 * + "J < k , K < l , L < m , M < n , N < " 100 * + "o , O < p , P < q , Q <r , R <s , S < " 101 * + "t , T < u , U < v , V < w , W < x , X " 102 * + "< y , Y < z , Z < \u00E5 = a\u030A " 103 * + ", \u00C5 = A\u030A ; aa , AA < \u00E6 " 104 * + ", \u00C6 < \u00F8 , \u00D8"; 105 * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian); 106 * </pre> 107 * 108 * </blockquote> 109 * 110 * Concatenating rules to combine <code>Collator</code>s: <blockquote> 111 * 112 * <pre> 113 * // Create an en_US Collator object 114 * RuleBasedCollator en_USCollator = (RuleBasedCollator) 115 * Collator.getInstance(new Locale("en", "US", "")); 116 * // Create a da_DK Collator object 117 * RuleBasedCollator da_DKCollator = (RuleBasedCollator) 118 * Collator.getInstance(new Locale("da", "DK", "")); 119 * // Combine the two 120 * // First, get the collation rules from en_USCollator 121 * String en_USRules = en_USCollator.getRules(); 122 * // Second, get the collation rules from da_DKCollator 123 * String da_DKRules = da_DKCollator.getRules(); 124 * RuleBasedCollator newCollator = 125 * new RuleBasedCollator(en_USRules + da_DKRules); 126 * // newCollator has the combined rules 127 * </pre> 128 * 129 * </blockquote> 130 * 131 * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to 132 * the existing rule: <blockquote> 133 * 134 * <pre> 135 * // Create a new Collator object with additional rules 136 * String addRules = "& C < ch, cH, Ch, CH"; 137 * RuleBasedCollator myCollator = 138 * new RuleBasedCollator(en_USCollator.getRules() + addRules); 139 * // myCollator contains the new rules 140 * </pre> 141 * 142 * </blockquote> 143 * 144 * How to change the order of non-spacing accents: <blockquote> 145 * 146 * <pre> 147 * // old rule with main accents 148 * String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 " 149 * + "; \u0327 ; \u0303 ; \u0304 ; \u0305 " 150 * + "; \u0306 ; \u0307 ; \u0309 ; \u030A " 151 * + "; \u030B ; \u030C ; \u030D ; \u030E " 152 * + "; \u030F ; \u0310 ; \u0311 ; \u0312 " 153 * + "< a , A ; ae, AE ; \u00e6 , \u00c6 " 154 * + "< b , B < c, C < e, E & C < d , D"; 155 * // change the order of accent characters 156 * String addOn = "& \u0300 ; \u0308 ; \u0302"; 157 * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn); 158 * </pre> 159 * 160 * </blockquote> 161 * 162 * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese 163 * characters in the Japanese <code>Collator</code>: <blockquote> 164 * 165 * <pre> 166 * // get en_US Collator rules 167 * RuleBasedCollator en_USCollator 168 * = (RuleBasedCollator)Collator.getInstance(Locale.US); 169 * // add a few Japanese characters to sort before English characters 170 * // suppose the last character before the first base letter 'a' in 171 * // the English collation rule is \u2212 172 * String jaString = "& \u2212 <\u3041, \u3042 <\u3043, " 173 * + "\u3044"; 174 * RuleBasedCollator myJapaneseCollator 175 * = new RuleBasedCollator(en_USCollator.getRules() + jaString); 176 * </pre> 177 * 178 * </blockquote> 179 * <p> 180 * This class is not subclassable 181 * 182 * @author Syn Wee Quek 183 * @stable ICU 2.8 184 */ 185public final class RuleBasedCollator extends Collator { 186 // public constructors --------------------------------------------------- 187 188 /** 189 * <p> 190 * Constructor that takes the argument rules for customization. 191 * The collator will be based on the CLDR root collation, with the 192 * attributes and re-ordering of the characters specified in the argument rules. 193 * <p> 194 * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization"> 195 * Collation Customization</a> for details on the rule syntax. 196 * 197 * @param rules 198 * the collation rules to build the collation table from. 199 * @exception ParseException 200 * and IOException thrown. ParseException thrown when argument rules have an invalid syntax. 201 * IOException thrown when an error occurred while reading internal data. 202 * @stable ICU 2.8 203 */ 204 public RuleBasedCollator(String rules) throws Exception { 205 if (rules == null) { 206 throw new IllegalArgumentException("Collation rules can not be null"); 207 } 208 validLocale = ULocale.ROOT; 209 internalBuildTailoring(rules); 210 } 211 212 /** 213 * Implements from-rule constructors. 214 * @param rules rule string 215 * @throws Exception 216 */ 217 private final void internalBuildTailoring(String rules) throws Exception { 218 CollationTailoring base = CollationRoot.getRoot(); 219 // Most code using Collator does not need to build a Collator from rules. 220 // By using reflection, most code will not have a static dependency on the builder code. 221 // CollationBuilder builder = new CollationBuilder(base); 222 ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass()); 223 CollationTailoring t; 224 try { 225 Class<?> builderClass = classLoader.loadClass("com.ibm.icu.impl.coll.CollationBuilder"); 226 Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base); 227 // builder.parseAndBuild(rules); 228 Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class); 229 t = (CollationTailoring)parseAndBuild.invoke(builder, rules); 230 } catch(InvocationTargetException e) { 231 throw (Exception)e.getTargetException(); 232 } 233 t.actualLocale = null; 234 adoptTailoring(t); 235 } 236 237 // public methods -------------------------------------------------------- 238 239 /** 240 * Clones the RuleBasedCollator 241 * 242 * @return a new instance of this RuleBasedCollator object 243 * @stable ICU 2.8 244 */ 245 @Override 246 public Object clone() throws CloneNotSupportedException { 247 if (isFrozen()) { 248 return this; 249 } 250 return cloneAsThawed(); 251 } 252 253 private final void initMaxExpansions() { 254 synchronized(tailoring) { 255 if (tailoring.maxExpansions == null) { 256 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data); 257 } 258 } 259 } 260 261 /** 262 * Return a CollationElementIterator for the given String. 263 * 264 * @see CollationElementIterator 265 * @stable ICU 2.8 266 */ 267 public CollationElementIterator getCollationElementIterator(String source) { 268 initMaxExpansions(); 269 return new CollationElementIterator(source, this); 270 } 271 272 /** 273 * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be 274 * preserved since a new copy will be created for use. 275 * 276 * @see CollationElementIterator 277 * @stable ICU 2.8 278 */ 279 public CollationElementIterator getCollationElementIterator(CharacterIterator source) { 280 initMaxExpansions(); 281 CharacterIterator newsource = (CharacterIterator) source.clone(); 282 return new CollationElementIterator(newsource, this); 283 } 284 285 /** 286 * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be 287 * preserved since a new copy will be created for use. 288 * 289 * @see CollationElementIterator 290 * @stable ICU 2.8 291 */ 292 public CollationElementIterator getCollationElementIterator(UCharacterIterator source) { 293 initMaxExpansions(); 294 return new CollationElementIterator(source, this); 295 } 296 297 // Freezable interface implementation ------------------------------------------------- 298 299 /** 300 * Determines whether the object has been frozen or not. 301 * 302 * <p>An unfrozen Collator is mutable and not thread-safe. 303 * A frozen Collator is immutable and thread-safe. 304 * 305 * @stable ICU 4.8 306 */ 307 @Override 308 public boolean isFrozen() { 309 return frozenLock != null; 310 } 311 312 /** 313 * Freezes the collator. 314 * @return the collator itself. 315 * @stable ICU 4.8 316 */ 317 @Override 318 public Collator freeze() { 319 if (!isFrozen()) { 320 frozenLock = new ReentrantLock(); 321 if (collationBuffer == null) { 322 collationBuffer = new CollationBuffer(data); 323 } 324 } 325 return this; 326 } 327 328 /** 329 * Provides for the clone operation. Any clone is initially unfrozen. 330 * @stable ICU 4.8 331 */ 332 @Override 333 public RuleBasedCollator cloneAsThawed() { 334 try { 335 RuleBasedCollator result = (RuleBasedCollator) super.clone(); 336 // since all collation data in the RuleBasedCollator do not change 337 // we can safely assign the result.fields to this collator 338 // except in cases where we can't 339 result.settings = settings.clone(); 340 result.collationBuffer = null; 341 result.frozenLock = null; 342 return result; 343 } catch (CloneNotSupportedException e) { 344 // Clone is implemented 345 return null; 346 } 347 } 348 349 // public setters -------------------------------------------------------- 350 351 private void checkNotFrozen() { 352 if (isFrozen()) { 353 throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator"); 354 } 355 } 356 357 private final CollationSettings getOwnedSettings() { 358 return settings.copyOnWrite(); 359 } 360 361 private final CollationSettings getDefaultSettings() { 362 return tailoring.settings.readOnly(); 363 } 364 365 /** 366 * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator 367 * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a 368 * correct JIS collation order, distinguishing between Katakana and Hiragana characters. 369 * 370 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 371 * Since ICU 50, this attribute is not settable any more via API functions. 372 * Since CLDR 25/ICU 53, explicit quaternary relations are used 373 * to achieve the same Japanese sort order. 374 * 375 * @param flag 376 * true if Hiragana Quaternary mode is to be on, false otherwise 377 * @see #setHiraganaQuaternaryDefault 378 * @see #isHiraganaQuaternary 379 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 380 */ 381 @Deprecated 382 public void setHiraganaQuaternary(boolean flag) { 383 checkNotFrozen(); 384 } 385 386 /** 387 * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See 388 * setHiraganaQuaternary(boolean) for more details. 389 * 390 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 391 * Since ICU 50, this attribute is not settable any more via API functions. 392 * Since CLDR 25/ICU 53, explicit quaternary relations are used 393 * to achieve the same Japanese sort order. 394 * 395 * @see #setHiraganaQuaternary(boolean) 396 * @see #isHiraganaQuaternary 397 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 398 */ 399 @Deprecated 400 public void setHiraganaQuaternaryDefault() { 401 checkNotFrozen(); 402 } 403 404 /** 405 * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The 406 * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case 407 * characters first. 408 * 409 * @param upperfirst 410 * true to sort uppercase characters before lowercase characters, false to sort lowercase characters 411 * before uppercase characters 412 * @see #isLowerCaseFirst 413 * @see #isUpperCaseFirst 414 * @see #setLowerCaseFirst 415 * @see #setCaseFirstDefault 416 * @stable ICU 2.8 417 */ 418 public void setUpperCaseFirst(boolean upperfirst) { 419 checkNotFrozen(); 420 if (upperfirst == isUpperCaseFirst()) { return; } 421 CollationSettings ownedSettings = getOwnedSettings(); 422 ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0); 423 setFastLatinOptions(ownedSettings); 424 } 425 426 /** 427 * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The 428 * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper 429 * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences. 430 * 431 * @param lowerfirst 432 * true for sorting lower cased characters before upper cased characters, false to ignore case 433 * preferences. 434 * @see #isLowerCaseFirst 435 * @see #isUpperCaseFirst 436 * @see #setUpperCaseFirst 437 * @see #setCaseFirstDefault 438 * @stable ICU 2.8 439 */ 440 public void setLowerCaseFirst(boolean lowerfirst) { 441 checkNotFrozen(); 442 if (lowerfirst == isLowerCaseFirst()) { return; } 443 CollationSettings ownedSettings = getOwnedSettings(); 444 ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0); 445 setFastLatinOptions(ownedSettings); 446 } 447 448 /** 449 * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See 450 * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details. 451 * 452 * @see #isLowerCaseFirst 453 * @see #isUpperCaseFirst 454 * @see #setLowerCaseFirst(boolean) 455 * @see #setUpperCaseFirst(boolean) 456 * @stable ICU 2.8 457 */ 458 public final void setCaseFirstDefault() { 459 checkNotFrozen(); 460 CollationSettings defaultSettings = getDefaultSettings(); 461 if(settings.readOnly() == defaultSettings) { return; } 462 CollationSettings ownedSettings = getOwnedSettings(); 463 ownedSettings.setCaseFirstDefault(defaultSettings.options); 464 setFastLatinOptions(ownedSettings); 465 } 466 467 /** 468 * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See 469 * setAlternateHandling(boolean) for more details. 470 * 471 * @see #setAlternateHandlingShifted(boolean) 472 * @see #isAlternateHandlingShifted() 473 * @stable ICU 2.8 474 */ 475 public void setAlternateHandlingDefault() { 476 checkNotFrozen(); 477 CollationSettings defaultSettings = getDefaultSettings(); 478 if(settings.readOnly() == defaultSettings) { return; } 479 CollationSettings ownedSettings = getOwnedSettings(); 480 ownedSettings.setAlternateHandlingDefault(defaultSettings.options); 481 setFastLatinOptions(ownedSettings); 482 } 483 484 /** 485 * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See 486 * setCaseLevel(boolean) for more details. 487 * 488 * @see #setCaseLevel(boolean) 489 * @see #isCaseLevel 490 * @stable ICU 2.8 491 */ 492 public void setCaseLevelDefault() { 493 checkNotFrozen(); 494 CollationSettings defaultSettings = getDefaultSettings(); 495 if(settings.readOnly() == defaultSettings) { return; } 496 CollationSettings ownedSettings = getOwnedSettings(); 497 ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options); 498 setFastLatinOptions(ownedSettings); 499 } 500 501 /** 502 * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See 503 * setDecomposition(int) for more details. 504 * 505 * @see #getDecomposition 506 * @see #setDecomposition(int) 507 * @stable ICU 2.8 508 */ 509 public void setDecompositionDefault() { 510 checkNotFrozen(); 511 CollationSettings defaultSettings = getDefaultSettings(); 512 if(settings.readOnly() == defaultSettings) { return; } 513 CollationSettings ownedSettings = getOwnedSettings(); 514 ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options); 515 setFastLatinOptions(ownedSettings); 516 } 517 518 /** 519 * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See 520 * setFrenchCollation(boolean) for more details. 521 * 522 * @see #isFrenchCollation 523 * @see #setFrenchCollation(boolean) 524 * @stable ICU 2.8 525 */ 526 public void setFrenchCollationDefault() { 527 checkNotFrozen(); 528 CollationSettings defaultSettings = getDefaultSettings(); 529 if(settings.readOnly() == defaultSettings) { return; } 530 CollationSettings ownedSettings = getOwnedSettings(); 531 ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options); 532 setFastLatinOptions(ownedSettings); 533 } 534 535 /** 536 * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See 537 * setStrength(int) for more details. 538 * 539 * @see #setStrength(int) 540 * @see #getStrength 541 * @stable ICU 2.8 542 */ 543 public void setStrengthDefault() { 544 checkNotFrozen(); 545 CollationSettings defaultSettings = getDefaultSettings(); 546 if(settings.readOnly() == defaultSettings) { return; } 547 CollationSettings ownedSettings = getOwnedSettings(); 548 ownedSettings.setStrengthDefault(defaultSettings.options); 549 setFastLatinOptions(ownedSettings); 550 } 551 552 /** 553 * Method to set numeric collation to its default value. 554 * 555 * @see #getNumericCollation 556 * @see #setNumericCollation 557 * @stable ICU 2.8 558 */ 559 public void setNumericCollationDefault() { 560 checkNotFrozen(); 561 CollationSettings defaultSettings = getDefaultSettings(); 562 if(settings.readOnly() == defaultSettings) { return; } 563 CollationSettings ownedSettings = getOwnedSettings(); 564 ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options); 565 setFastLatinOptions(ownedSettings); 566 } 567 568 /** 569 * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false, 570 * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted 571 * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture"> 572 * French collation</a> for more information. 573 * 574 * @param flag 575 * true to set the French collation on, false to set it off 576 * @stable ICU 2.8 577 * @see #isFrenchCollation 578 * @see #setFrenchCollationDefault 579 */ 580 public void setFrenchCollation(boolean flag) { 581 checkNotFrozen(); 582 if(flag == isFrenchCollation()) { return; } 583 CollationSettings ownedSettings = getOwnedSettings(); 584 ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag); 585 setFastLatinOptions(ownedSettings); 586 } 587 588 /** 589 * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition 590 * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This 591 * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false, 592 * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all 593 * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior 594 * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the 595 * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order. 596 * 597 * @param shifted 598 * true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior. 599 * @see #isAlternateHandlingShifted 600 * @see #setAlternateHandlingDefault 601 * @stable ICU 2.8 602 */ 603 public void setAlternateHandlingShifted(boolean shifted) { 604 checkNotFrozen(); 605 if(shifted == isAlternateHandlingShifted()) { return; } 606 CollationSettings ownedSettings = getOwnedSettings(); 607 ownedSettings.setAlternateHandlingShifted(shifted); 608 setFastLatinOptions(ownedSettings); 609 } 610 611 /** 612 * <p> 613 * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known 614 * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level 615 * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value 616 * is false, which means the case level is not generated. The contents of the case level are affected by the case 617 * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable 618 * case level. 619 * <p> 620 * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case 621 * level</a> for more information. 622 * 623 * @param flag 624 * true if case level sorting is required, false otherwise 625 * @stable ICU 2.8 626 * @see #setCaseLevelDefault 627 * @see #isCaseLevel 628 */ 629 public void setCaseLevel(boolean flag) { 630 checkNotFrozen(); 631 if(flag == isCaseLevel()) { return; } 632 CollationSettings ownedSettings = getOwnedSettings(); 633 ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag); 634 setFastLatinOptions(ownedSettings); 635 } 636 637 /** 638 * Sets the decomposition mode of this Collator. Setting this 639 * decomposition attribute with CANONICAL_DECOMPOSITION allows the 640 * Collator to handle un-normalized text properly, producing the 641 * same results as if the text were normalized. If 642 * NO_DECOMPOSITION is set, it is the user's responsibility to 643 * insure that all text is already in the appropriate form before 644 * a comparison or before getting a CollationKey. Adjusting 645 * decomposition mode allows the user to select between faster and 646 * more complete collation behavior. 647 * 648 * <p>Since a great many of the world's languages do not require 649 * text normalization, most locales set NO_DECOMPOSITION as the 650 * default decomposition mode. 651 * 652 * The default decompositon mode for the Collator is 653 * NO_DECOMPOSITON, unless specified otherwise by the locale used 654 * to create the Collator. 655 * 656 * <p>See getDecomposition for a description of decomposition 657 * mode. 658 * 659 * @param decomposition the new decomposition mode 660 * @see #getDecomposition 661 * @see #NO_DECOMPOSITION 662 * @see #CANONICAL_DECOMPOSITION 663 * @throws IllegalArgumentException If the given value is not a valid 664 * decomposition mode. 665 * @stable ICU 2.8 666 */ 667 @Override 668 public void setDecomposition(int decomposition) 669 { 670 checkNotFrozen(); 671 boolean flag; 672 switch(decomposition) { 673 case NO_DECOMPOSITION: 674 flag = false; 675 break; 676 case CANONICAL_DECOMPOSITION: 677 flag = true; 678 break; 679 default: 680 throw new IllegalArgumentException("Wrong decomposition mode."); 681 } 682 if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; } 683 CollationSettings ownedSettings = getOwnedSettings(); 684 ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag); 685 setFastLatinOptions(ownedSettings); 686 } 687 688 /** 689 * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference 690 * considered significant during comparison. 691 * 692 * <p>See the Collator class description for an example of use. 693 * 694 * @param newStrength 695 * the new strength value. 696 * @see #getStrength 697 * @see #setStrengthDefault 698 * @see #PRIMARY 699 * @see #SECONDARY 700 * @see #TERTIARY 701 * @see #QUATERNARY 702 * @see #IDENTICAL 703 * @exception IllegalArgumentException 704 * If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL. 705 * @stable ICU 2.8 706 */ 707 @Override 708 public void setStrength(int newStrength) { 709 checkNotFrozen(); 710 if(newStrength == getStrength()) { return; } 711 CollationSettings ownedSettings = getOwnedSettings(); 712 ownedSettings.setStrength(newStrength); 713 setFastLatinOptions(ownedSettings); 714 } 715 716 /** 717 * {@icu} Sets the variable top to the top of the specified reordering group. 718 * The variable top determines the highest-sorting character 719 * which is affected by the alternate handling behavior. 720 * If that attribute is set to NON_IGNORABLE, then the variable top has no effect. 721 * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION, 722 * Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY; 723 * or Collator.ReorderCodes.DEFAULT to restore the default max variable group 724 * @return this 725 * @see #getMaxVariable 726 * @stable ICU 53 727 */ 728 @Override 729 public RuleBasedCollator setMaxVariable(int group) { 730 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1. 731 int value; 732 if(group == Collator.ReorderCodes.DEFAULT) { 733 value = -1; // UCOL_DEFAULT 734 } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) { 735 value = group - Collator.ReorderCodes.FIRST; 736 } else { 737 throw new IllegalArgumentException("illegal max variable group " + group); 738 } 739 int oldValue = settings.readOnly().getMaxVariable(); 740 if(value == oldValue) { 741 return this; 742 } 743 CollationSettings defaultSettings = getDefaultSettings(); 744 if(settings.readOnly() == defaultSettings) { 745 if(value < 0) { // UCOL_DEFAULT 746 return this; 747 } 748 } 749 CollationSettings ownedSettings = getOwnedSettings(); 750 751 if(group == Collator.ReorderCodes.DEFAULT) { 752 group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable(); 753 } 754 long varTop = data.getLastPrimaryForGroup(group); 755 assert(varTop != 0); 756 ownedSettings.setMaxVariable(value, defaultSettings.options); 757 ownedSettings.variableTop = varTop; 758 setFastLatinOptions(ownedSettings); 759 return this; 760 } 761 762 /** 763 * {@icu} Returns the maximum reordering group whose characters are affected by 764 * the alternate handling behavior. 765 * @return the maximum variable reordering group. 766 * @see #setMaxVariable 767 * @stable ICU 53 768 */ 769 @Override 770 public int getMaxVariable() { 771 return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable(); 772 } 773 774 /** 775 * {@icu} Sets the variable top to the primary weight of the specified string. 776 * 777 * <p>Beginning with ICU 53, the variable top is pinned to 778 * the top of one of the supported reordering groups, 779 * and it must not be beyond the last of those groups. 780 * See {@link #setMaxVariable(int)}. 781 * 782 * @param varTop 783 * one or more (if contraction) characters to which the variable top should be set 784 * @return variable top primary weight 785 * @exception IllegalArgumentException 786 * is thrown if varTop argument is not a valid variable top element. A variable top element is 787 * invalid when 788 * <ul> 789 * <li>it is a contraction that does not exist in the Collation order 790 * <li>the variable top is beyond 791 * the last reordering group supported by setMaxVariable() 792 * <li>when the varTop argument is null or zero in length. 793 * </ul> 794 * @see #getVariableTop 795 * @see RuleBasedCollator#setAlternateHandlingShifted 796 * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead. 797 */ 798 @Override 799 @Deprecated 800 public int setVariableTop(String varTop) { 801 checkNotFrozen(); 802 if (varTop == null || varTop.length() == 0) { 803 throw new IllegalArgumentException("Variable top argument string can not be null or zero in length."); 804 } 805 boolean numeric = settings.readOnly().isNumeric(); 806 long ce1, ce2; 807 if(settings.readOnly().dontCheckFCD()) { 808 UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0); 809 ce1 = ci.nextCE(); 810 ce2 = ci.nextCE(); 811 } else { 812 FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0); 813 ce1 = ci.nextCE(); 814 ce2 = ci.nextCE(); 815 } 816 if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) { 817 throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element"); 818 } 819 internalSetVariableTop(ce1 >>> 32); 820 return (int)settings.readOnly().variableTop; 821 } 822 823 /** 824 * {@icu} Sets the variable top to the specified primary weight. 825 * 826 * <p>Beginning with ICU 53, the variable top is pinned to 827 * the top of one of the supported reordering groups, 828 * and it must not be beyond the last of those groups. 829 * See {@link #setMaxVariable(int)}. 830 * 831 * @param varTop primary weight, as returned by setVariableTop or getVariableTop 832 * @see #getVariableTop 833 * @see #setVariableTop(String) 834 * @deprecated ICU 53 Call setMaxVariable() instead. 835 */ 836 @Override 837 @Deprecated 838 public void setVariableTop(int varTop) { 839 checkNotFrozen(); 840 internalSetVariableTop(varTop & 0xffffffffL); 841 } 842 843 private void internalSetVariableTop(long varTop) { 844 if(varTop != settings.readOnly().variableTop) { 845 // Pin the variable top to the end of the reordering group which contains it. 846 // Only a few special groups are supported. 847 int group = data.getGroupForPrimary(varTop); 848 if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) { 849 throw new IllegalArgumentException("The variable top must be a primary weight in " + 850 "the space/punctuation/symbols/currency symbols range"); 851 } 852 long v = data.getLastPrimaryForGroup(group); 853 assert(v != 0 && v >= varTop); 854 varTop = v; 855 if(varTop != settings.readOnly().variableTop) { 856 CollationSettings ownedSettings = getOwnedSettings(); 857 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST, 858 getDefaultSettings().options); 859 ownedSettings.variableTop = varTop; 860 setFastLatinOptions(ownedSettings); 861 } 862 } 863 } 864 865 /** 866 * {@icu} When numeric collation is turned on, this Collator makes 867 * substrings of digits sort according to their numeric values. 868 * 869 * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest 870 * digit substring that can be treated as a single unit is 871 * 254 digits (not counting leading zeros). If a digit substring is 872 * longer than that, the digits beyond the limit will be treated as a 873 * separate digit substring. 874 * 875 * <p>A "digit" in this sense is a code point with General_Category=Nd, 876 * which does not include circled numbers, roman numerals, etc. 877 * Only a contiguous digit substring is considered, that is, 878 * non-negative integers without separators. 879 * There is no support for plus/minus signs, decimals, exponents, etc. 880 * 881 * @param flag 882 * true to turn numeric collation on and false to turn it off 883 * @see #getNumericCollation 884 * @see #setNumericCollationDefault 885 * @stable ICU 2.8 886 */ 887 public void setNumericCollation(boolean flag) { 888 checkNotFrozen(); 889 // sort substrings of digits as numbers 890 if(flag == getNumericCollation()) { return; } 891 CollationSettings ownedSettings = getOwnedSettings(); 892 ownedSettings.setFlag(CollationSettings.NUMERIC, flag); 893 setFastLatinOptions(ownedSettings); 894 } 895 896 /** 897 * {@inheritDoc} 898 * 899 * @param order the reordering codes to apply to this collator; if this is null or an empty array 900 * then this clears any existing reordering 901 * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts) 902 * @see #getReorderCodes 903 * @see Collator#getEquivalentReorderCodes 904 * @see Collator.ReorderCodes 905 * @see UScript 906 * @stable ICU 4.8 907 */ 908 @Override 909 public void setReorderCodes(int... order) { 910 checkNotFrozen(); 911 int length = (order != null) ? order.length : 0; 912 if(length == 1 && order[0] == ReorderCodes.NONE) { 913 length = 0; 914 } 915 if(length == 0 ? 916 settings.readOnly().reorderCodes.length == 0 : 917 Arrays.equals(order, settings.readOnly().reorderCodes)) { 918 return; 919 } 920 CollationSettings defaultSettings = getDefaultSettings(); 921 if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) { 922 if(settings.readOnly() != defaultSettings) { 923 CollationSettings ownedSettings = getOwnedSettings(); 924 ownedSettings.copyReorderingFrom(defaultSettings); 925 setFastLatinOptions(ownedSettings); 926 } 927 return; 928 } 929 CollationSettings ownedSettings = getOwnedSettings(); 930 if(length == 0) { 931 ownedSettings.resetReordering(); 932 } else { 933 ownedSettings.setReordering(data, order.clone()); 934 } 935 setFastLatinOptions(ownedSettings); 936 } 937 938 private void setFastLatinOptions(CollationSettings ownedSettings) { 939 ownedSettings.fastLatinOptions = CollationFastLatin.getOptions( 940 data, ownedSettings, ownedSettings.fastLatinPrimaries); 941 } 942 943 // public getters -------------------------------------------------------- 944 945 /** 946 * Gets the collation tailoring rules for this RuleBasedCollator. 947 * Equivalent to String getRules(false). 948 * 949 * @return the collation tailoring rules 950 * @see #getRules(boolean) 951 * @stable ICU 2.8 952 */ 953 public String getRules() { 954 return tailoring.getRules(); 955 } 956 957 /** 958 * Returns current rules. 959 * The argument defines whether full rules (root collation + tailored) rules are returned 960 * or just the tailoring. 961 * 962 * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order. 963 * They are almost never used or useful at runtime and can be removed from the data. 964 * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide: 965 * Collation Customization, Building on Existing Locales</a> 966 * 967 * <p>{@link #getRules()} should normally be used instead. 968 * @param fullrules 969 * true if the rules that defines the full set of collation order is required, otherwise false for 970 * returning only the tailored rules 971 * @return the current rules that defines this Collator. 972 * @see #getRules() 973 * @stable ICU 2.6 974 */ 975 public String getRules(boolean fullrules) { 976 if (!fullrules) { 977 return tailoring.getRules(); 978 } 979 return CollationLoader.getRootRules() + tailoring.getRules(); 980 } 981 982 /** 983 * Get a UnicodeSet that contains all the characters and sequences tailored in this collator. 984 * 985 * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently 986 * than in the root collator. 987 * @stable ICU 2.4 988 */ 989 @Override 990 public UnicodeSet getTailoredSet() { 991 UnicodeSet tailored = new UnicodeSet(); 992 if(data.base != null) { 993 new TailoredSet(tailored).forData(data); 994 } 995 return tailored; 996 } 997 998 /** 999 * Gets unicode sets containing contractions and/or expansions of a collator 1000 * 1001 * @param contractions 1002 * if not null, set to contain contractions 1003 * @param expansions 1004 * if not null, set to contain expansions 1005 * @param addPrefixes 1006 * add the prefix contextual elements to contractions 1007 * @throws Exception 1008 * Throws an exception if any errors occurs. 1009 * @stable ICU 3.4 1010 */ 1011 public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes) 1012 throws Exception { 1013 if (contractions != null) { 1014 contractions.clear(); 1015 } 1016 if (expansions != null) { 1017 expansions.clear(); 1018 } 1019 new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data); 1020 } 1021 1022 /** 1023 * Adds the contractions that start with character c to the set. 1024 * Ignores prefixes. Used by AlphabeticIndex. 1025 * @internal 1026 * @deprecated This API is ICU internal only. 1027 */ 1028 void internalAddContractions(int c, UnicodeSet set) { 1029 new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c); 1030 } 1031 1032 /** 1033 * <p> 1034 * Get a Collation key for the argument String source from this RuleBasedCollator. 1035 * <p> 1036 * General recommendation: <br> 1037 * If comparison are to be done to the same String multiple times, it would be more efficient to generate 1038 * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each 1039 * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better 1040 * performance. 1041 * <p> 1042 * See the class documentation for an explanation about CollationKeys. 1043 * 1044 * @param source 1045 * the text String to be transformed into a collation key. 1046 * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source 1047 * String is null, a null CollationKey is returned. 1048 * @see CollationKey 1049 * @see #compare(String, String) 1050 * @see #getRawCollationKey 1051 * @stable ICU 2.8 1052 */ 1053 @Override 1054 public CollationKey getCollationKey(String source) { 1055 if (source == null) { 1056 return null; 1057 } 1058 CollationBuffer buffer = null; 1059 try { 1060 buffer = getCollationBuffer(); 1061 return getCollationKey(source, buffer); 1062 } finally { 1063 releaseCollationBuffer(buffer); 1064 } 1065 } 1066 1067 private CollationKey getCollationKey(String source, CollationBuffer buffer) { 1068 buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer); 1069 return new CollationKey(source, buffer.rawCollationKey); 1070 } 1071 1072 /** 1073 * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the 1074 * result into the user provided argument key. If key has a internal byte array of length that's too small for the 1075 * result, the internal byte array will be grown to the exact required size. 1076 * 1077 * @param source the text String to be transformed into a RawCollationKey 1078 * @param key output RawCollationKey to store results 1079 * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user 1080 * provided key will be returned. 1081 * @see #getCollationKey 1082 * @see #compare(String, String) 1083 * @see RawCollationKey 1084 * @stable ICU 2.8 1085 */ 1086 @Override 1087 public RawCollationKey getRawCollationKey(String source, RawCollationKey key) { 1088 if (source == null) { 1089 return null; 1090 } 1091 CollationBuffer buffer = null; 1092 try { 1093 buffer = getCollationBuffer(); 1094 return getRawCollationKey(source, key, buffer); 1095 } finally { 1096 releaseCollationBuffer(buffer); 1097 } 1098 } 1099 1100 private static final class CollationKeyByteSink extends SortKeyByteSink { 1101 CollationKeyByteSink(RawCollationKey key) { 1102 super(key.bytes); 1103 key_ = key; 1104 } 1105 1106 @Override 1107 protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) { 1108 // n > 0 && appended_ > capacity_ 1109 if (Resize(n, length)) { 1110 System.arraycopy(bytes, start, buffer_, length, n); 1111 } 1112 } 1113 1114 @Override 1115 protected boolean Resize(int appendCapacity, int length) { 1116 int newCapacity = 2 * buffer_.length; 1117 int altCapacity = length + 2 * appendCapacity; 1118 if (newCapacity < altCapacity) { 1119 newCapacity = altCapacity; 1120 } 1121 if (newCapacity < 200) { 1122 newCapacity = 200; 1123 } 1124 // Do not call key_.ensureCapacity(newCapacity) because we do not 1125 // keep key_.size in sync with appended_. 1126 // We only set it when we are done. 1127 byte[] newBytes = new byte[newCapacity]; 1128 System.arraycopy(buffer_, 0, newBytes, 0, length); 1129 buffer_ = key_.bytes = newBytes; 1130 return true; 1131 } 1132 1133 private RawCollationKey key_; 1134 } 1135 1136 private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) { 1137 if (key == null) { 1138 key = new RawCollationKey(simpleKeyLengthEstimate(source)); 1139 } else if (key.bytes == null) { 1140 key.bytes = new byte[simpleKeyLengthEstimate(source)]; 1141 } 1142 CollationKeyByteSink sink = new CollationKeyByteSink(key); 1143 writeSortKey(source, sink, buffer); 1144 key.size = sink.NumberOfBytesAppended(); 1145 return key; 1146 } 1147 1148 private int simpleKeyLengthEstimate(CharSequence source) { 1149 return 2 * source.length() + 10; 1150 } 1151 1152 private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) { 1153 boolean numeric = settings.readOnly().isNumeric(); 1154 if(settings.readOnly().dontCheckFCD()) { 1155 buffer.leftUTF16CollIter.setText(numeric, s, 0); 1156 CollationKeys.writeSortKeyUpToQuaternary( 1157 buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(), 1158 sink, Collation.PRIMARY_LEVEL, 1159 CollationKeys.SIMPLE_LEVEL_FALLBACK, true); 1160 } else { 1161 buffer.leftFCDUTF16Iter.setText(numeric, s, 0); 1162 CollationKeys.writeSortKeyUpToQuaternary( 1163 buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(), 1164 sink, Collation.PRIMARY_LEVEL, 1165 CollationKeys.SIMPLE_LEVEL_FALLBACK, true); 1166 } 1167 if(settings.readOnly().getStrength() == IDENTICAL) { 1168 writeIdenticalLevel(s, sink); 1169 } 1170 sink.Append(Collation.TERMINATOR_BYTE); 1171 } 1172 1173 private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) { 1174 // NFD quick check 1175 int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null); 1176 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 1177 // Sync the ByteArrayWrapper size with the key length. 1178 sink.key_.size = sink.NumberOfBytesAppended(); 1179 int prev = 0; 1180 if(nfdQCYesLimit != 0) { 1181 prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_); 1182 } 1183 // Is there non-NFD text? 1184 if(nfdQCYesLimit < s.length()) { 1185 int destLengthEstimate = s.length() - nfdQCYesLimit; 1186 StringBuilder nfd = new StringBuilder(); 1187 data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate); 1188 BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_); 1189 } 1190 // Sync the key with the buffer again which got bytes appended and may have been reallocated. 1191 sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size); 1192 } 1193 1194 /** 1195 * Returns the CEs for the string. 1196 * @param str the string 1197 * @internal for tests & tools 1198 * @deprecated This API is ICU internal only. 1199 */ 1200 @Deprecated 1201 public long[] internalGetCEs(CharSequence str) { 1202 CollationBuffer buffer = null; 1203 try { 1204 buffer = getCollationBuffer(); 1205 boolean numeric = settings.readOnly().isNumeric(); 1206 CollationIterator iter; 1207 if(settings.readOnly().dontCheckFCD()) { 1208 buffer.leftUTF16CollIter.setText(numeric, str, 0); 1209 iter = buffer.leftUTF16CollIter; 1210 } else { 1211 buffer.leftFCDUTF16Iter.setText(numeric, str, 0); 1212 iter = buffer.leftFCDUTF16Iter; 1213 } 1214 int length = iter.fetchCEs() - 1; 1215 assert length >= 0 && iter.getCE(length) == Collation.NO_CE; 1216 long[] ces = new long[length]; 1217 System.arraycopy(iter.getCEs(), 0, ces, 0, length); 1218 return ces; 1219 } finally { 1220 releaseCollationBuffer(buffer); 1221 } 1222 } 1223 1224 /** 1225 * Returns this Collator's strength attribute. The strength attribute 1226 * determines the minimum level of difference considered significant. 1227 * 1228 * <p>{@icunote} This can return QUATERNARY strength, which is not supported by the 1229 * JDK version. 1230 * 1231 * <p>See the Collator class description for more details. 1232 * 1233 * @return this Collator's current strength attribute. 1234 * @see #setStrength 1235 * @see #PRIMARY 1236 * @see #SECONDARY 1237 * @see #TERTIARY 1238 * @see #QUATERNARY 1239 * @see #IDENTICAL 1240 * @stable ICU 2.8 1241 */ 1242 @Override 1243 public int getStrength() { 1244 return settings.readOnly().getStrength(); 1245 } 1246 1247 /** 1248 * Returns the decomposition mode of this Collator. The decomposition mode 1249 * determines how Unicode composed characters are handled. 1250 * 1251 * <p>See the Collator class description for more details. 1252 * 1253 * @return the decomposition mode 1254 * @see #setDecomposition 1255 * @see #NO_DECOMPOSITION 1256 * @see #CANONICAL_DECOMPOSITION 1257 * @stable ICU 2.8 1258 */ 1259 @Override 1260 public int getDecomposition() { 1261 return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ? 1262 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION; 1263 } 1264 1265 /** 1266 * Return true if an uppercase character is sorted before the corresponding lowercase character. See 1267 * setCaseFirst(boolean) for details. 1268 * 1269 * @see #setUpperCaseFirst 1270 * @see #setLowerCaseFirst 1271 * @see #isLowerCaseFirst 1272 * @see #setCaseFirstDefault 1273 * @return true if upper cased characters are sorted before lower cased characters, false otherwise 1274 * @stable ICU 2.8 1275 */ 1276 public boolean isUpperCaseFirst() { 1277 return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK); 1278 } 1279 1280 /** 1281 * Return true if a lowercase character is sorted before the corresponding uppercase character. See 1282 * setCaseFirst(boolean) for details. 1283 * 1284 * @see #setUpperCaseFirst 1285 * @see #setLowerCaseFirst 1286 * @see #isUpperCaseFirst 1287 * @see #setCaseFirstDefault 1288 * @return true lower cased characters are sorted before upper cased characters, false otherwise 1289 * @stable ICU 2.8 1290 */ 1291 public boolean isLowerCaseFirst() { 1292 return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST); 1293 } 1294 1295 /** 1296 * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true, 1297 * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the 1298 * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more 1299 * details. 1300 * 1301 * @return true or false 1302 * @see #setAlternateHandlingShifted(boolean) 1303 * @see #setAlternateHandlingDefault 1304 * @stable ICU 2.8 1305 */ 1306 public boolean isAlternateHandlingShifted() { 1307 return settings.readOnly().getAlternateHandling(); 1308 } 1309 1310 /** 1311 * Checks if case level is set to true. See setCaseLevel(boolean) for details. 1312 * 1313 * @return the case level mode 1314 * @see #setCaseLevelDefault 1315 * @see #isCaseLevel 1316 * @see #setCaseLevel(boolean) 1317 * @stable ICU 2.8 1318 */ 1319 public boolean isCaseLevel() { 1320 return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0; 1321 } 1322 1323 /** 1324 * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details. 1325 * 1326 * @return true if French Collation is set to true, false otherwise 1327 * @see #setFrenchCollation(boolean) 1328 * @see #setFrenchCollationDefault 1329 * @stable ICU 2.8 1330 */ 1331 public boolean isFrenchCollation() { 1332 return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0; 1333 } 1334 1335 /** 1336 * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details. 1337 * 1338 * <p>This attribute was an implementation detail of the CLDR Japanese tailoring. 1339 * Since ICU 50, this attribute is not settable any more via API functions. 1340 * Since CLDR 25/ICU 53, explicit quaternary relations are used 1341 * to achieve the same Japanese sort order. 1342 * 1343 * @return false 1344 * @see #setHiraganaQuaternaryDefault 1345 * @see #setHiraganaQuaternary(boolean) 1346 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 1347 */ 1348 @Deprecated 1349 public boolean isHiraganaQuaternary() { 1350 return false; 1351 } 1352 1353 /** 1354 * {@icu} Gets the variable top value of a Collator. 1355 * 1356 * @return the variable top primary weight 1357 * @see #getMaxVariable 1358 * @stable ICU 2.6 1359 */ 1360 @Override 1361 public int getVariableTop() { 1362 return (int)settings.readOnly().variableTop; 1363 } 1364 1365 /** 1366 * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a 1367 * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2' 1368 * 1369 * @see #setNumericCollation 1370 * @see #setNumericCollationDefault 1371 * @return true if numeric collation is turned on, false otherwise 1372 * @stable ICU 2.8 1373 */ 1374 public boolean getNumericCollation() { 1375 return (settings.readOnly().options & CollationSettings.NUMERIC) != 0; 1376 } 1377 1378 /** 1379 * Retrieves the reordering codes for this collator. 1380 * These reordering codes are a combination of UScript codes and ReorderCodes. 1381 * @return a copy of the reordering codes for this collator; 1382 * if none are set then returns an empty array 1383 * @see #setReorderCodes 1384 * @see Collator#getEquivalentReorderCodes 1385 * @stable ICU 4.8 1386 */ 1387 @Override 1388 public int[] getReorderCodes() { 1389 return settings.readOnly().reorderCodes.clone(); 1390 } 1391 1392 // public other methods ------------------------------------------------- 1393 1394 /** 1395 * {@inheritDoc} 1396 * @stable ICU 2.8 1397 */ 1398 @Override 1399 public boolean equals(Object obj) { 1400 if (this == obj) { 1401 return true; 1402 } 1403 if (!super.equals(obj)) { 1404 return false; 1405 } 1406 RuleBasedCollator o = (RuleBasedCollator) obj; 1407 if(!settings.readOnly().equals(o.settings.readOnly())) { return false; } 1408 if(data == o.data) { return true; } 1409 boolean thisIsRoot = data.base == null; 1410 boolean otherIsRoot = o.data.base == null; 1411 assert(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be == 1412 if(thisIsRoot != otherIsRoot) { return false; } 1413 String theseRules = tailoring.getRules(); 1414 String otherRules = o.tailoring.getRules(); 1415 if((thisIsRoot || theseRules.length() != 0) && 1416 (otherIsRoot || otherRules.length() != 0)) { 1417 // Shortcut: If both collators have valid rule strings, then compare those. 1418 if(theseRules.equals(otherRules)) { return true; } 1419 } 1420 // Different rule strings can result in the same or equivalent tailoring. 1421 // The rule strings are optional in ICU resource bundles, although included by default. 1422 // cloneBinary() drops the rule string. 1423 UnicodeSet thisTailored = getTailoredSet(); 1424 UnicodeSet otherTailored = o.getTailoredSet(); 1425 if(!thisTailored.equals(otherTailored)) { return false; } 1426 // For completeness, we should compare all of the mappings; 1427 // or we should create a list of strings, sort it with one collator, 1428 // and check if both collators compare adjacent strings the same 1429 // (order & strength, down to quaternary); or similar. 1430 // Testing equality of collators seems unusual. 1431 return true; 1432 } 1433 1434 /** 1435 * Generates a unique hash code for this RuleBasedCollator. 1436 * 1437 * @return the unique hash code for this Collator 1438 * @stable ICU 2.8 1439 */ 1440 @Override 1441 public int hashCode() { 1442 int h = settings.readOnly().hashCode(); 1443 if(data.base == null) { return h; } // root collator 1444 // Do not rely on the rule string, see comments in operator==(). 1445 UnicodeSet set = getTailoredSet(); 1446 UnicodeSetIterator iter = new UnicodeSetIterator(set); 1447 while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) { 1448 h ^= data.getCE32(iter.codepoint); 1449 } 1450 return h; 1451 } 1452 1453 /** 1454 * Compares the source text String to the target text String according to the collation rules, strength and 1455 * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero 1456 * depending on whether the source String is less than, equal to or greater than the target String. See the Collator 1457 * class description for an example of use. 1458 * <p> 1459 * General recommendation: <br> 1460 * If comparison are to be done to the same String multiple times, it would be more efficient to generate 1461 * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed 1462 * performance is critical and object instantiation is to be reduced, further optimization may be achieved by 1463 * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method 1464 * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey 1465 * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key 1466 * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String, 1467 * String) will have a better performance. 1468 * 1469 * @param source 1470 * the source text String. 1471 * @param target 1472 * the target text String. 1473 * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source 1474 * and target are equal, value is greater than zero if source is greater than target. 1475 * @see CollationKey 1476 * @see #getCollationKey 1477 * @stable ICU 2.8 1478 */ 1479 @Override 1480 public int compare(String source, String target) { 1481 return doCompare(source, target); 1482 } 1483 1484 /** 1485 * Abstract iterator for identical-level string comparisons. 1486 * Returns FCD code points and handles temporary switching to NFD. 1487 * 1488 * <p>As with CollationIterator, 1489 * Java NFDIterator instances are partially constructed and cached, 1490 * and completed when reset for use. 1491 * C++ NFDIterator instances are stack-allocated. 1492 */ 1493 private static abstract class NFDIterator { 1494 /** 1495 * Partial constructor, must call reset(). 1496 */ 1497 NFDIterator() {} 1498 final void reset() { 1499 index = -1; 1500 } 1501 1502 /** 1503 * Returns the next code point from the internal normalization buffer, 1504 * or else the next text code point. 1505 * Returns -1 at the end of the text. 1506 */ 1507 final int nextCodePoint() { 1508 if(index >= 0) { 1509 if(index == decomp.length()) { 1510 index = -1; 1511 } else { 1512 int c = Character.codePointAt(decomp, index); 1513 index += Character.charCount(c); 1514 return c; 1515 } 1516 } 1517 return nextRawCodePoint(); 1518 } 1519 /** 1520 * @param nfcImpl 1521 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint() 1522 * @return the first code point in c's decomposition, 1523 * or c itself if it was decomposed already or if it does not decompose 1524 */ 1525 final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) { 1526 if(index >= 0) { return c; } 1527 decomp = nfcImpl.getDecomposition(c); 1528 if(decomp == null) { return c; } 1529 c = Character.codePointAt(decomp, 0); 1530 index = Character.charCount(c); 1531 return c; 1532 } 1533 1534 /** 1535 * Returns the next text code point in FCD order. 1536 * Returns -1 at the end of the text. 1537 */ 1538 protected abstract int nextRawCodePoint(); 1539 1540 private String decomp; 1541 private int index; 1542 } 1543 1544 private static class UTF16NFDIterator extends NFDIterator { 1545 UTF16NFDIterator() {} 1546 void setText(CharSequence seq, int start) { 1547 reset(); 1548 s = seq; 1549 pos = start; 1550 } 1551 1552 @Override 1553 protected int nextRawCodePoint() { 1554 if(pos == s.length()) { return Collation.SENTINEL_CP; } 1555 int c = Character.codePointAt(s, pos); 1556 pos += Character.charCount(c); 1557 return c; 1558 } 1559 1560 protected CharSequence s; 1561 protected int pos; 1562 } 1563 1564 private static final class FCDUTF16NFDIterator extends UTF16NFDIterator { 1565 FCDUTF16NFDIterator() {} 1566 void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) { 1567 reset(); 1568 int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null); 1569 if(spanLimit == seq.length()) { 1570 s = seq; 1571 pos = start; 1572 } else { 1573 if(str == null) { 1574 str = new StringBuilder(); 1575 } else { 1576 str.setLength(0); 1577 } 1578 str.append(seq, start, spanLimit); 1579 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start); 1580 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer); 1581 s = str; 1582 pos = 0; 1583 } 1584 } 1585 1586 private StringBuilder str; 1587 } 1588 1589 private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) { 1590 for(;;) { 1591 // Fetch the next FCD code point from each string. 1592 int leftCp = left.nextCodePoint(); 1593 int rightCp = right.nextCodePoint(); 1594 if(leftCp == rightCp) { 1595 if(leftCp < 0) { break; } 1596 continue; 1597 } 1598 // If they are different, then decompose each and compare again. 1599 if(leftCp < 0) { 1600 leftCp = -2; // end of string 1601 } else if(leftCp == 0xfffe) { 1602 leftCp = -1; // U+FFFE: merge separator 1603 } else { 1604 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp); 1605 } 1606 if(rightCp < 0) { 1607 rightCp = -2; // end of string 1608 } else if(rightCp == 0xfffe) { 1609 rightCp = -1; // U+FFFE: merge separator 1610 } else { 1611 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp); 1612 } 1613 if(leftCp < rightCp) { return Collation.LESS; } 1614 if(leftCp > rightCp) { return Collation.GREATER; } 1615 } 1616 return Collation.EQUAL; 1617 } 1618 1619 /** 1620 * Compares two CharSequences. 1621 * @internal 1622 * @deprecated This API is ICU internal only. 1623 */ 1624 @Override 1625 @Deprecated 1626 protected int doCompare(CharSequence left, CharSequence right) { 1627 if(left == right) { 1628 return Collation.EQUAL; 1629 } 1630 1631 // Identical-prefix test. 1632 int equalPrefixLength = 0; 1633 for(;;) { 1634 if(equalPrefixLength == left.length()) { 1635 if(equalPrefixLength == right.length()) { return Collation.EQUAL; } 1636 break; 1637 } else if(equalPrefixLength == right.length() || 1638 left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) { 1639 break; 1640 } 1641 ++equalPrefixLength; 1642 } 1643 1644 CollationSettings roSettings = settings.readOnly(); 1645 boolean numeric = roSettings.isNumeric(); 1646 if(equalPrefixLength > 0) { 1647 if((equalPrefixLength != left.length() && 1648 data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) || 1649 (equalPrefixLength != right.length() && 1650 data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) { 1651 // Identical prefix: Back up to the start of a contraction or reordering sequence. 1652 while(--equalPrefixLength > 0 && 1653 data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {} 1654 } 1655 // Notes: 1656 // - A longer string can compare equal to a prefix of it if only ignorables follow. 1657 // - With a backward level, a longer string can compare less-than a prefix of it. 1658 1659 // Pass the actual start of each string into the CollationIterators, 1660 // plus the equalPrefixLength position, 1661 // so that prefix matches back into the equal prefix work. 1662 } 1663 1664 int result; 1665 int fastLatinOptions = roSettings.fastLatinOptions; 1666 if(fastLatinOptions >= 0 && 1667 (equalPrefixLength == left.length() || 1668 left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) && 1669 (equalPrefixLength == right.length() || 1670 right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) { 1671 result = CollationFastLatin.compareUTF16(data.fastLatinTable, 1672 roSettings.fastLatinPrimaries, 1673 fastLatinOptions, 1674 left, right, equalPrefixLength); 1675 } else { 1676 result = CollationFastLatin.BAIL_OUT_RESULT; 1677 } 1678 1679 if(result == CollationFastLatin.BAIL_OUT_RESULT) { 1680 CollationBuffer buffer = null; 1681 try { 1682 buffer = getCollationBuffer(); 1683 if(roSettings.dontCheckFCD()) { 1684 buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength); 1685 buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength); 1686 result = CollationCompare.compareUpToQuaternary( 1687 buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings); 1688 } else { 1689 buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength); 1690 buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength); 1691 result = CollationCompare.compareUpToQuaternary( 1692 buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings); 1693 } 1694 } finally { 1695 releaseCollationBuffer(buffer); 1696 } 1697 } 1698 if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) { 1699 return result; 1700 } 1701 1702 CollationBuffer buffer = null; 1703 try { 1704 buffer = getCollationBuffer(); 1705 // Compare identical level. 1706 Normalizer2Impl nfcImpl = data.nfcImpl; 1707 if(roSettings.dontCheckFCD()) { 1708 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength); 1709 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength); 1710 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter); 1711 } else { 1712 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength); 1713 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength); 1714 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter); 1715 } 1716 } finally { 1717 releaseCollationBuffer(buffer); 1718 } 1719 } 1720 1721 // package private constructors ------------------------------------------ 1722 1723 RuleBasedCollator(CollationTailoring t, ULocale vl) { 1724 data = t.data; 1725 settings = t.settings.clone(); 1726 tailoring = t; 1727 validLocale = vl; 1728 actualLocaleIsSameAsValid = false; 1729 } 1730 1731 private void adoptTailoring(CollationTailoring t) { 1732 assert(settings == null && data == null && tailoring == null); 1733 data = t.data; 1734 settings = t.settings.clone(); 1735 tailoring = t; 1736 validLocale = t.actualLocale; 1737 actualLocaleIsSameAsValid = false; 1738 } 1739 1740 // package private methods ----------------------------------------------- 1741 1742 /** 1743 * Tests whether a character is "unsafe" for use as a collation starting point. 1744 * 1745 * @param c code point or code unit 1746 * @return true if c is unsafe 1747 * @see CollationElementIterator#setOffset(int) 1748 */ 1749 final boolean isUnsafe(int c) { 1750 return data.isUnsafeBackward(c, settings.readOnly().isNumeric()); 1751 } 1752 1753 /** 1754 * Frozen state of the collator. 1755 */ 1756 private Lock frozenLock; 1757 1758 private static final class CollationBuffer { 1759 private CollationBuffer(CollationData data) { 1760 leftUTF16CollIter = new UTF16CollationIterator(data); 1761 rightUTF16CollIter = new UTF16CollationIterator(data); 1762 leftFCDUTF16Iter = new FCDUTF16CollationIterator(data); 1763 rightFCDUTF16Iter = new FCDUTF16CollationIterator(data); 1764 leftUTF16NFDIter = new UTF16NFDIterator(); 1765 rightUTF16NFDIter = new UTF16NFDIterator(); 1766 leftFCDUTF16NFDIter = new FCDUTF16NFDIterator(); 1767 rightFCDUTF16NFDIter = new FCDUTF16NFDIterator(); 1768 } 1769 1770 UTF16CollationIterator leftUTF16CollIter; 1771 UTF16CollationIterator rightUTF16CollIter; 1772 FCDUTF16CollationIterator leftFCDUTF16Iter; 1773 FCDUTF16CollationIterator rightFCDUTF16Iter; 1774 1775 UTF16NFDIterator leftUTF16NFDIter; 1776 UTF16NFDIterator rightUTF16NFDIter; 1777 FCDUTF16NFDIterator leftFCDUTF16NFDIter; 1778 FCDUTF16NFDIterator rightFCDUTF16NFDIter; 1779 1780 RawCollationKey rawCollationKey; 1781 } 1782 1783 /** 1784 * Get the version of this collator object. 1785 * 1786 * @return the version object associated with this collator 1787 * @stable ICU 2.8 1788 */ 1789 @Override 1790 public VersionInfo getVersion() { 1791 int version = tailoring.version; 1792 int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); 1793 return VersionInfo.getInstance( 1794 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4), 1795 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff)); 1796 } 1797 1798 /** 1799 * Get the UCA version of this collator object. 1800 * 1801 * @return the version object associated with this collator 1802 * @stable ICU 2.8 1803 */ 1804 @Override 1805 public VersionInfo getUCAVersion() { 1806 VersionInfo v = getVersion(); 1807 // Note: This is tied to how the current implementation encodes the UCA version 1808 // in the overall getVersion(). 1809 // Alternatively, we could load the root collator and get at lower-level data from there. 1810 // Either way, it will reflect the input collator's UCA version only 1811 // if it is a known implementation. 1812 // (C++ comment) It would be cleaner to make this a virtual Collator method. 1813 // (In Java, it is virtual.) 1814 return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0); 1815 } 1816 1817 private CollationBuffer collationBuffer; 1818 1819 private final CollationBuffer getCollationBuffer() { 1820 if (isFrozen()) { 1821 frozenLock.lock(); 1822 } else if (collationBuffer == null) { 1823 collationBuffer = new CollationBuffer(data); 1824 } 1825 return collationBuffer; 1826 } 1827 1828 private final void releaseCollationBuffer(CollationBuffer buffer) { 1829 if (isFrozen()) { 1830 frozenLock.unlock(); 1831 } 1832 } 1833 1834 /** 1835 * {@inheritDoc} 1836 * @draft ICU 53 (retain) 1837 * @provisional This API might change or be removed in a future release. 1838 */ 1839 @Override 1840 public ULocale getLocale(ULocale.Type type) { 1841 if (type == ULocale.ACTUAL_LOCALE) { 1842 return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale; 1843 } else if(type == ULocale.VALID_LOCALE) { 1844 return validLocale; 1845 } else { 1846 throw new IllegalArgumentException("unknown ULocale.Type " + type); 1847 } 1848 } 1849 1850 /** 1851 * {@inheritDoc} 1852 */ 1853 @Override 1854 void setLocale(ULocale valid, ULocale actual) { 1855 // This method is called 1856 // by other protected functions that checks and makes sure that 1857 // valid and actual are not null before passing 1858 assert (valid == null) == (actual == null); 1859 // Another check we could do is that the actual locale is at 1860 // the same level or less specific than the valid locale. 1861 // TODO: Starting with Java 7, use Objects.equals(a, b). 1862 if(Utility.objectEquals(actual, tailoring.actualLocale)) { 1863 actualLocaleIsSameAsValid = false; 1864 } else { 1865 assert(Utility.objectEquals(actual, valid)); 1866 actualLocaleIsSameAsValid = true; 1867 } 1868 // Do not modify tailoring.actualLocale: 1869 // We cannot be sure that that would be thread-safe. 1870 validLocale = valid; 1871 } 1872 1873 CollationData data; 1874 SharedObject.Reference<CollationSettings> settings; // reference-counted 1875 CollationTailoring tailoring; // C++: reference-counted 1876 private ULocale validLocale; 1877 // Note: No need in Java to track which attributes have been set explicitly. 1878 // int or EnumSet explicitlySetAttributes; 1879 1880 private boolean actualLocaleIsSameAsValid; 1881} 1882