1/**
2*******************************************************************************
3* Copyright (C) 1996-2015, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*/
7package com.ibm.icu.text;
8
9import java.util.Comparator;
10import java.util.Enumeration;
11import java.util.Iterator;
12import java.util.LinkedList;
13import java.util.Locale;
14import java.util.MissingResourceException;
15import java.util.Set;
16
17import com.ibm.icu.impl.ICUDebug;
18import com.ibm.icu.impl.ICUResourceBundle;
19import com.ibm.icu.impl.coll.CollationData;
20import com.ibm.icu.impl.coll.CollationRoot;
21import com.ibm.icu.lang.UCharacter;
22import com.ibm.icu.lang.UProperty;
23import com.ibm.icu.lang.UScript;
24import com.ibm.icu.util.Freezable;
25import com.ibm.icu.util.ICUException;
26import com.ibm.icu.util.ULocale;
27import com.ibm.icu.util.ULocale.Category;
28import com.ibm.icu.util.UResourceBundle;
29import com.ibm.icu.util.VersionInfo;
30
31/**
32* {@icuenhanced java.text.Collator}.{@icu _usage_}
33*
34* <p>Collator performs locale-sensitive string comparison. A concrete
35* subclass, RuleBasedCollator, allows customization of the collation
36* ordering by the use of rule sets.</p>
37*
38* <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link Freezable}.
39*
40* <p>Following the <a href=http://www.unicode.org>Unicode
41* Consortium</a>'s specifications for the
42* <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
43* Algorithm (UCA)</a>, there are 5 different levels of strength used
44* in comparisons:
45*
46* <ul>
47* <li>PRIMARY strength: Typically, this is used to denote differences between
48*     base characters (for example, "a" &lt; "b").
49*     It is the strongest difference. For example, dictionaries are divided
50*     into different sections by base character.
51* <li>SECONDARY strength: Accents in the characters are considered secondary
52*     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other
53*     differences
54*     between letters can also be considered secondary differences, depending
55*     on the language. A secondary difference is ignored when there is a
56*     primary difference anywhere in the strings.
57* <li>TERTIARY strength: Upper and lower case differences in characters are
58*     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
59*     "a&ograve;"). In addition, a variant of a letter differs from the base
60*     form on the tertiary strength (such as "A" and "&#9398;"). Another
61*     example is the
62*     difference between large and small Kana. A tertiary difference is ignored
63*     when there is a primary or secondary difference anywhere in the strings.
64* <li>QUATERNARY strength: When punctuation is ignored
65*     (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
66*     Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY
67*     strength, an additional strength level can
68*     be used to distinguish words with and without punctuation (for example,
69*     "ab" &lt; "a-b" &lt; "aB").
70*     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
71*     difference. The QUATERNARY strength should only be used if ignoring
72*     punctuation is required.
73* <li>IDENTICAL strength:
74*     When all other strengths are equal, the IDENTICAL strength is used as a
75*     tiebreaker. The Unicode code point values of the NFD form of each string
76*     are compared, just in case there is no difference.
77*     For example, Hebrew cantellation marks are only distinguished at this
78*     strength. This strength should be used sparingly, as only code point
79*     value differences between two strings is an extremely rare occurrence.
80*     Using this strength substantially decreases the performance for both
81*     comparison and collation key generation APIs. This strength also
82*     increases the size of the collation key.
83* </ul>
84*
85* Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
86* the canonical decomposition mode and one that does not use any decomposition.
87* The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
88* is not supported here. If the canonical
89* decomposition mode is set, the Collator handles un-normalized text properly,
90* producing the same results as if the text were normalized in NFD. If
91* canonical decomposition is turned off, it is the user's responsibility to
92* ensure that all text is already in the appropriate form before performing
93* a comparison or before getting a CollationKey.</p>
94*
95* <p>For more information about the collation service see the
96* <a href="http://userguide.icu-project.org/collation">User Guide</a>.</p>
97*
98* <p>Examples of use
99* <pre>
100* // Get the Collator for US English and set its strength to PRIMARY
101* Collator usCollator = Collator.getInstance(Locale.US);
102* usCollator.setStrength(Collator.PRIMARY);
103* if (usCollator.compare("abc", "ABC") == 0) {
104*     System.out.println("Strings are equivalent");
105* }
106*
107* The following example shows how to compare two strings using the
108* Collator for the default locale.
109*
110* // Compare two strings in the default locale
111* Collator myCollator = Collator.getInstance();
112* myCollator.setDecomposition(NO_DECOMPOSITION);
113* if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
114*     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
115*     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
116*     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
117*         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
118*     }
119*     else {
120*         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
121*     }
122* }
123* else {
124*     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
125* }
126* </pre>
127* </p>
128* @see RuleBasedCollator
129* @see CollationKey
130* @author Syn Wee Quek
131* @stable ICU 2.8
132*/
133public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable
134{
135    // public data members ---------------------------------------------------
136
137    /**
138     * Strongest collator strength value. Typically used to denote differences
139     * between base characters. See class documentation for more explanation.
140     * @see #setStrength
141     * @see #getStrength
142     * @stable ICU 2.8
143     */
144    public final static int PRIMARY = 0;
145
146    /**
147     * Second level collator strength value.
148     * Accents in the characters are considered secondary differences.
149     * Other differences between letters can also be considered secondary
150     * differences, depending on the language.
151     * See class documentation for more explanation.
152     * @see #setStrength
153     * @see #getStrength
154     * @stable ICU 2.8
155     */
156    public final static int SECONDARY = 1;
157
158    /**
159     * Third level collator strength value.
160     * Upper and lower case differences in characters are distinguished at this
161     * strength level. In addition, a variant of a letter differs from the base
162     * form on the tertiary level.
163     * See class documentation for more explanation.
164     * @see #setStrength
165     * @see #getStrength
166     * @stable ICU 2.8
167     */
168    public final static int TERTIARY = 2;
169
170    /**
171     * {@icu} Fourth level collator strength value.
172     * When punctuation is ignored
173     * (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
174     * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY
175     * strength, an additional strength level can
176     * be used to distinguish words with and without punctuation.
177     * See class documentation for more explanation.
178     * @see #setStrength
179     * @see #getStrength
180     * @stable ICU 2.8
181     */
182    public final static int QUATERNARY = 3;
183
184    /**
185     * Smallest Collator strength value. When all other strengths are equal,
186     * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
187     * values of the NFD form of each string are compared, just in case there
188     * is no difference.
189     * See class documentation for more explanation.
190     * </p>
191     * <p>
192     * Note this value is different from JDK's
193     * </p>
194     * @stable ICU 2.8
195     */
196    public final static int IDENTICAL = 15;
197
198    /**
199     * {@icunote} This is for backwards compatibility with Java APIs only.  It
200     * should not be used, IDENTICAL should be used instead.  ICU's
201     * collation does not support Java's FULL_DECOMPOSITION mode.
202     * @stable ICU 3.4
203     */
204    public final static int FULL_DECOMPOSITION = IDENTICAL;
205
206    /**
207     * Decomposition mode value. With NO_DECOMPOSITION set, Strings
208     * will not be decomposed for collation. This is the default
209     * decomposition setting unless otherwise specified by the locale
210     * used to create the Collator.</p>
211     *
212     * <p><strong>Note</strong> this value is different from the JDK's.</p>
213     * @see #CANONICAL_DECOMPOSITION
214     * @see #getDecomposition
215     * @see #setDecomposition
216     * @stable ICU 2.8
217     */
218    public final static int NO_DECOMPOSITION = 16;
219
220    /**
221     * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
222     * characters that are canonical variants according to the Unicode standard
223     * will be decomposed for collation.</p>
224     *
225     * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
226     * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
227     * Unicode Technical Report #15</a>.
228     * </p>
229     * @see #NO_DECOMPOSITION
230     * @see #getDecomposition
231     * @see #setDecomposition
232     * @stable ICU 2.8
233     */
234    public final static int CANONICAL_DECOMPOSITION = 17;
235
236    /**
237     * Reordering codes for non-script groups that can be reordered under collation.
238     *
239     * @see #getReorderCodes
240     * @see #setReorderCodes
241     * @see #getEquivalentReorderCodes
242     * @stable ICU 4.8
243     */
244    public static interface ReorderCodes {
245        /**
246         * A special reordering code that is used to specify the default reordering codes for a locale.
247         * @stable ICU 4.8
248         */
249        public final static int DEFAULT          = -1;  // == UScript.INVALID_CODE
250        /**
251         * A special reordering code that is used to specify no reordering codes.
252         * @stable ICU 4.8
253         */
254        public final static int NONE          = UScript.UNKNOWN;
255        /**
256         * A special reordering code that is used to specify all other codes used for reordering except
257         * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
258         * @stable ICU 4.8
259         */
260        public final static int OTHERS          = UScript.UNKNOWN;
261        /**
262         * Characters with the space property.
263         * This is equivalent to the rule value "space".
264         * @stable ICU 4.8
265         */
266        public final static int SPACE          = 0x1000;
267        /**
268         * The first entry in the enumeration of reordering groups. This is intended for use in
269         * range checking and enumeration of the reorder codes.
270         * @stable ICU 4.8
271         */
272        public final static int FIRST          = SPACE;
273        /**
274         * Characters with the punctuation property.
275         * This is equivalent to the rule value "punct".
276         * @stable ICU 4.8
277         */
278        public final static int PUNCTUATION    = 0x1001;
279        /**
280         * Characters with the symbol property.
281         * This is equivalent to the rule value "symbol".
282         * @stable ICU 4.8
283         */
284        public final static int SYMBOL         = 0x1002;
285        /**
286         * Characters with the currency property.
287         * This is equivalent to the rule value "currency".
288         * @stable ICU 4.8
289         */
290        public final static int CURRENCY       = 0x1003;
291        /**
292         * Characters with the digit property.
293         * This is equivalent to the rule value "digit".
294         * @stable ICU 4.8
295         */
296        public final static int DIGIT          = 0x1004;
297        /**
298         * The limit of the reorder codes. This is intended for use in range checking
299         * and enumeration of the reorder codes.
300         * @stable ICU 4.8
301         */
302        public final static int LIMIT          = 0x1005;
303    }
304
305    // public methods --------------------------------------------------------
306
307    /**
308     * Compares the equality of two Collator objects. Collator objects are equal if they have the same
309     * collation (sorting & searching) behavior.
310     *
311     * <p>The base class checks for null and for equal types.
312     * Subclasses should override.
313     *
314     * @param obj the Collator to compare to.
315     * @return true if this Collator has exactly the same collation behavior as obj, false otherwise.
316     * @stable ICU 2.8
317     */
318    @Override
319    public boolean equals(Object obj) {
320        // Subclasses: Call this method and then add more specific checks.
321        return this == obj || (obj != null && getClass() == obj.getClass());
322    }
323
324    // public setters --------------------------------------------------------
325
326    private void checkNotFrozen() {
327        if (isFrozen()) {
328            throw new UnsupportedOperationException("Attempt to modify frozen Collator");
329        }
330    }
331
332    /**
333     * Sets this Collator's strength attribute. The strength attribute
334     * determines the minimum level of difference considered significant
335     * during comparison.</p>
336     *
337     * <p>The base class method does nothing. Subclasses should override it if appropriate.
338     *
339     * <p>See the Collator class description for an example of use.</p>
340     * @param newStrength the new strength value.
341     * @see #getStrength
342     * @see #PRIMARY
343     * @see #SECONDARY
344     * @see #TERTIARY
345     * @see #QUATERNARY
346     * @see #IDENTICAL
347     * @throws IllegalArgumentException if the new strength value is not valid.
348     * @stable ICU 2.8
349     */
350    public void setStrength(int newStrength)
351    {
352        checkNotFrozen();
353    }
354
355    /**
356     * @return this, for chaining
357     * @internal Used in UnicodeTools
358     * @deprecated This API is ICU internal only.
359     */
360    @Deprecated
361    public Collator setStrength2(int newStrength)
362    {
363        setStrength(newStrength);
364        return this;
365    }
366
367    /**
368     * Sets the decomposition mode of this Collator.  Setting this
369     * decomposition attribute with CANONICAL_DECOMPOSITION allows the
370     * Collator to handle un-normalized text properly, producing the
371     * same results as if the text were normalized. If
372     * NO_DECOMPOSITION is set, it is the user's responsibility to
373     * insure that all text is already in the appropriate form before
374     * a comparison or before getting a CollationKey. Adjusting
375     * decomposition mode allows the user to select between faster and
376     * more complete collation behavior.</p>
377     *
378     * <p>Since a great many of the world's languages do not require
379     * text normalization, most locales set NO_DECOMPOSITION as the
380     * default decomposition mode.</p>
381     *
382     * <p>The base class method does nothing. Subclasses should override it if appropriate.
383     *
384     * <p>See getDecomposition for a description of decomposition
385     * mode.</p>
386     *
387     * @param decomposition the new decomposition mode
388     * @see #getDecomposition
389     * @see #NO_DECOMPOSITION
390     * @see #CANONICAL_DECOMPOSITION
391     * @throws IllegalArgumentException If the given value is not a valid
392     *            decomposition mode.
393     * @stable ICU 2.8
394     */
395    public void setDecomposition(int decomposition)
396    {
397        checkNotFrozen();
398    }
399
400    /**
401     * Sets the reordering codes for this collator.
402     * Collation reordering allows scripts and some other groups of characters
403     * to be moved relative to each other. This reordering is done on top of
404     * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
405     * at the start and/or the end of the collation order. These groups are specified using
406     * UScript codes and {@link Collator.ReorderCodes} entries.
407     *
408     * <p>By default, reordering codes specified for the start of the order are placed in the
409     * order given after several special non-script blocks. These special groups of characters
410     * are space, punctuation, symbol, currency, and digit. These special groups are represented with
411     * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with
412     * these special non-script groups if those special groups are explicitly specified in the reordering.
413     *
414     * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS}
415     * stands for any script that is not explicitly
416     * mentioned in the list of reordering codes given. Anything that is after OTHERS
417     * will go at the very end of the reordering in the order given.
418     *
419     * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT}
420     * will reset the reordering for this collator
421     * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
422     * was specified when this collator was created from resource data or from rules. The
423     * DEFAULT code <b>must</b> be the sole code supplied when it is used.
424     * If not, then an {@link IllegalArgumentException} will be thrown.
425     *
426     * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE}
427     * will remove any reordering for this collator.
428     * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
429     * NONE code <b>must</b> be the sole code supplied when it is used.
430     *
431     * @param order the reordering codes to apply to this collator; if this is null or an empty array
432     * then this clears any existing reordering
433     * @see #getReorderCodes
434     * @see #getEquivalentReorderCodes
435     * @see Collator.ReorderCodes
436     * @see UScript
437     * @stable ICU 4.8
438     */
439    public void setReorderCodes(int... order)
440    {
441        throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
442    }
443
444    // public getters --------------------------------------------------------
445
446    /**
447     * Returns the Collator for the current default locale.
448     * The default locale is determined by java.util.Locale.getDefault().
449     * @return the Collator for the default locale (for example, en_US) if it
450     *         is created successfully. Otherwise if there is no Collator
451     *         associated with the current locale, the root collator
452     *         will be returned.
453     * @see java.util.Locale#getDefault()
454     * @see #getInstance(Locale)
455     * @stable ICU 2.8
456     */
457    public static final Collator getInstance()
458    {
459        return getInstance(ULocale.getDefault());
460    }
461
462    /**
463     * Clones the collator.
464     * @stable ICU 2.6
465     * @return a clone of this collator.
466     */
467    public Object clone() throws CloneNotSupportedException {
468        return super.clone();
469    }
470
471    // begin registry stuff
472
473    /**
474     * A factory used with registerFactory to register multiple collators and provide
475     * display names for them.  If standard locale display names are sufficient,
476     * Collator instances may be registered instead.
477     * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
478     * ULocale instead of Locale.  Instead of overriding createCollator(Locale),
479     * new implementations should override createCollator(ULocale).  Note that
480     * one of these two methods <b>MUST</b> be overridden or else an infinite
481     * loop will occur.
482     * @stable ICU 2.6
483     */
484    public static abstract class CollatorFactory {
485        /**
486         * Return true if this factory will be visible.  Default is true.
487         * If not visible, the locales supported by this factory will not
488         * be listed by getAvailableLocales.
489         *
490         * @return true if this factory is visible
491         * @stable ICU 2.6
492         */
493        public boolean visible() {
494            return true;
495        }
496
497        /**
498         * Return an instance of the appropriate collator.  If the locale
499         * is not supported, return null.
500         * <b>Note:</b> as of ICU4J 3.2, implementations should override
501         * this method instead of createCollator(Locale).
502         * @param loc the locale for which this collator is to be created.
503         * @return the newly created collator.
504         * @stable ICU 3.2
505         */
506        public Collator createCollator(ULocale loc) {
507            return createCollator(loc.toLocale());
508        }
509
510        /**
511         * Return an instance of the appropriate collator.  If the locale
512         * is not supported, return null.
513         * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
514         * createCollator(ULocale) instead of this method, and inherit this
515         * method's implementation.  This method is no longer abstract
516         * and instead delegates to createCollator(ULocale).
517         * @param loc the locale for which this collator is to be created.
518         * @return the newly created collator.
519         * @stable ICU 2.6
520         */
521         public Collator createCollator(Locale loc) {
522            return createCollator(ULocale.forLocale(loc));
523        }
524
525        /**
526         * Return the name of the collator for the objectLocale, localized for the displayLocale.
527         * If objectLocale is not visible or not defined by the factory, return null.
528         * @param objectLocale the locale identifying the collator
529         * @param displayLocale the locale for which the display name of the collator should be localized
530         * @return the display name
531         * @stable ICU 2.6
532         */
533        public String getDisplayName(Locale objectLocale, Locale displayLocale) {
534            return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
535        }
536
537        /**
538         * Return the name of the collator for the objectLocale, localized for the displayLocale.
539         * If objectLocale is not visible or not defined by the factory, return null.
540         * @param objectLocale the locale identifying the collator
541         * @param displayLocale the locale for which the display name of the collator should be localized
542         * @return the display name
543         * @stable ICU 3.2
544         */
545        public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
546            if (visible()) {
547                Set<String> supported = getSupportedLocaleIDs();
548                String name = objectLocale.getBaseName();
549                if (supported.contains(name)) {
550                    return objectLocale.getDisplayName(displayLocale);
551                }
552            }
553            return null;
554        }
555
556        /**
557         * Return an unmodifiable collection of the locale names directly
558         * supported by this factory.
559         *
560         * @return the set of supported locale IDs.
561         * @stable ICU 2.6
562         */
563        public abstract Set<String> getSupportedLocaleIDs();
564
565        /**
566         * Empty default constructor.
567         * @stable ICU 2.6
568         */
569        protected CollatorFactory() {
570        }
571    }
572
573    static abstract class ServiceShim {
574        abstract Collator getInstance(ULocale l);
575        abstract Object registerInstance(Collator c, ULocale l);
576        abstract Object registerFactory(CollatorFactory f);
577        abstract boolean unregister(Object k);
578        abstract Locale[] getAvailableLocales(); // TODO remove
579        abstract ULocale[] getAvailableULocales();
580        abstract String getDisplayName(ULocale ol, ULocale dl);
581    }
582
583    private static ServiceShim shim;
584    private static ServiceShim getShim() {
585        // Note: this instantiation is safe on loose-memory-model configurations
586        // despite lack of synchronization, since the shim instance has no state--
587        // it's all in the class init.  The worst problem is we might instantiate
588        // two shim instances, but they'll share the same state so that's ok.
589        if (shim == null) {
590            try {
591                Class<?> cls = Class.forName("com.ibm.icu.text.CollatorServiceShim");
592                shim = (ServiceShim)cls.newInstance();
593            }
594            catch (MissingResourceException e)
595            {
596                ///CLOVER:OFF
597                throw e;
598                ///CLOVER:ON
599            }
600            catch (Exception e) {
601                ///CLOVER:OFF
602                if(DEBUG){
603                    e.printStackTrace();
604                }
605                throw new ICUException(e);
606                ///CLOVER:ON
607            }
608        }
609        return shim;
610    }
611
612    /**
613     * Simpler/faster methods for ASCII than ones based on Unicode data.
614     * TODO: There should be code like this somewhere already??
615     */
616    private static final class ASCII {
617        static boolean equalIgnoreCase(CharSequence left, CharSequence right) {
618            int length = left.length();
619            if (length != right.length()) { return false; }
620            for (int i = 0; i < length; ++i) {
621                char lc = left.charAt(i);
622                char rc = right.charAt(i);
623                if (lc == rc) { continue; }
624                if ('A' <= lc && lc <= 'Z') {
625                    if ((lc + 0x20) == rc) { continue; }
626                } else if ('A' <= rc && rc <= 'Z') {
627                    if ((rc + 0x20) == lc) { continue; }
628                }
629                return false;
630            }
631            return true;
632        }
633    }
634
635    private static final boolean getYesOrNo(String keyword, String s) {
636        if (ASCII.equalIgnoreCase(s, "yes")) {
637            return true;
638        }
639        if (ASCII.equalIgnoreCase(s, "no")) {
640            return false;
641        }
642        throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
643    }
644
645    private static final int getIntValue(String keyword, String s, String... values) {
646        for (int i = 0; i < values.length; ++i) {
647            if (ASCII.equalIgnoreCase(s, values[i])) {
648                return i;
649            }
650        }
651        throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
652    }
653
654    private static final int getReorderCode(String keyword, String s) {
655        return Collator.ReorderCodes.FIRST +
656                getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit");
657        // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
658        // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
659        // Avoid introducing synonyms/aliases.
660    }
661
662    /**
663     * Sets collation attributes according to locale keywords. See
664     * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
665     *
666     * Using "alias" keywords and values where defined:
667     * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
668     * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
669     */
670    private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) {
671        // Check for collation keywords that were already deprecated
672        // before any were supported in createInstance() (except for "collation").
673        String value = loc.getKeywordValue("colHiraganaQuaternary");
674        if (value != null) {
675            throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary");
676        }
677        value = loc.getKeywordValue("variableTop");
678        if (value != null) {
679            throw new UnsupportedOperationException("locale keyword vt/variableTop");
680        }
681        // Parse known collation keywords, ignore others.
682        value = loc.getKeywordValue("colStrength");
683        if (value != null) {
684            // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
685            int strength = getIntValue("colStrength", value,
686                    "primary", "secondary", "tertiary", "quaternary", "identical");
687            coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL);
688        }
689        value = loc.getKeywordValue("colBackwards");
690        if (value != null) {
691            if (rbc != null) {
692                rbc.setFrenchCollation(getYesOrNo("colBackwards", value));
693            } else {
694                throw new UnsupportedOperationException(
695                        "locale keyword kb/colBackwards only settable for RuleBasedCollator");
696            }
697        }
698        value = loc.getKeywordValue("colCaseLevel");
699        if (value != null) {
700            if (rbc != null) {
701                rbc.setCaseLevel(getYesOrNo("colCaseLevel", value));
702            } else {
703                throw new UnsupportedOperationException(
704                        "locale keyword kb/colBackwards only settable for RuleBasedCollator");
705            }
706        }
707        value = loc.getKeywordValue("colCaseFirst");
708        if (value != null) {
709            if (rbc != null) {
710                int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper");
711                if (cf == 0) {
712                    rbc.setLowerCaseFirst(false);
713                    rbc.setUpperCaseFirst(false);
714                } else if (cf == 1) {
715                    rbc.setLowerCaseFirst(true);
716                } else /* cf == 2 */ {
717                    rbc.setUpperCaseFirst(true);
718                }
719            } else {
720                throw new UnsupportedOperationException(
721                        "locale keyword kf/colCaseFirst only settable for RuleBasedCollator");
722            }
723        }
724        value = loc.getKeywordValue("colAlternate");
725        if (value != null) {
726            if (rbc != null) {
727                rbc.setAlternateHandlingShifted(
728                        getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0);
729            } else {
730                throw new UnsupportedOperationException(
731                        "locale keyword ka/colAlternate only settable for RuleBasedCollator");
732            }
733        }
734        value = loc.getKeywordValue("colNormalization");
735        if (value != null) {
736            coll.setDecomposition(getYesOrNo("colNormalization", value) ?
737                    Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION);
738        }
739        value = loc.getKeywordValue("colNumeric");
740        if (value != null) {
741            if (rbc != null) {
742                rbc.setNumericCollation(getYesOrNo("colNumeric", value));
743            } else {
744                throw new UnsupportedOperationException(
745                        "locale keyword kn/colNumeric only settable for RuleBasedCollator");
746            }
747        }
748        value = loc.getKeywordValue("colReorder");
749        if (value != null) {
750            int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST];
751            int codesLength = 0;
752            int scriptNameStart = 0;
753            for (;;) {
754                if (codesLength == codes.length) {
755                    throw new IllegalArgumentException(
756                            "too many script codes for colReorder locale keyword: " + value);
757                }
758                int limit = scriptNameStart;
759                while (limit < value.length() && value.charAt(limit) != '-') { ++limit; }
760                String scriptName = value.substring(scriptNameStart, limit);
761                int code;
762                if (scriptName.length() == 4) {
763                    // Strict parsing, accept only 4-letter script codes, not long names.
764                    code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName);
765                } else {
766                    code = getReorderCode("colReorder", scriptName);
767                }
768                codes[codesLength++] = code;
769                if (limit == value.length()) { break; }
770                scriptNameStart = limit + 1;
771            }
772            if (codesLength == 0) {
773                throw new IllegalArgumentException("no script codes for colReorder locale keyword");
774            }
775            int[] args = new int[codesLength];
776            System.arraycopy(codes, 0, args, 0, codesLength);
777            coll.setReorderCodes(args);
778        }
779        value = loc.getKeywordValue("kv");
780        if (value != null) {
781            coll.setMaxVariable(getReorderCode("kv", value));
782        }
783    }
784
785    /**
786     * {@icu} Returns the Collator for the desired locale.
787     *
788     * <p>For some languages, multiple collation types are available;
789     * for example, "de@collation=phonebook".
790     * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
791     * in the old locale extension syntax ("el@colCaseFirst=upper")
792     * or in language tag syntax ("el-u-kf-upper").
793     * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
794     *
795     * @param locale the desired locale.
796     * @return Collator for the desired locale if it is created successfully.
797     *         Otherwise if there is no Collator
798     *         associated with the current locale, the root collator will
799     *         be returned.
800     * @see java.util.Locale
801     * @see java.util.ResourceBundle
802     * @see #getInstance(Locale)
803     * @see #getInstance()
804     * @stable ICU 3.0
805     */
806    public static final Collator getInstance(ULocale locale) {
807        // fetching from service cache is faster than instantiation
808        if (locale == null) {
809            locale = ULocale.getDefault();
810        }
811        Collator coll = getShim().getInstance(locale);
812        if (!locale.getName().equals(locale.getBaseName())) {  // any keywords?
813            setAttributesFromKeywords(locale, coll,
814                    (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null);
815        }
816        return coll;
817    }
818
819    /**
820     * Returns the Collator for the desired locale.
821     *
822     * <p>For some languages, multiple collation types are available;
823     * for example, "de-u-co-phonebk".
824     * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
825     * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale})
826     * or in language tag syntax ("el-u-kf-upper").
827     * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
828     *
829     * @param locale the desired locale.
830     * @return Collator for the desired locale if it is created successfully.
831     *         Otherwise if there is no Collator
832     *         associated with the current locale, the root collator will
833     *         be returned.
834     * @see java.util.Locale
835     * @see java.util.ResourceBundle
836     * @see #getInstance(ULocale)
837     * @see #getInstance()
838     * @stable ICU 2.8
839     */
840    public static final Collator getInstance(Locale locale) {
841        return getInstance(ULocale.forLocale(locale));
842    }
843
844    /**
845     * {@icu} Registers a collator as the default collator for the provided locale.  The
846     * collator should not be modified after it is registered.
847     *
848     * <p>Because ICU may choose to cache Collator objects internally, this must
849     * be called at application startup, prior to any calls to
850     * Collator.getInstance to avoid undefined behavior.
851     *
852     * @param collator the collator to register
853     * @param locale the locale for which this is the default collator
854     * @return an object that can be used to unregister the registered collator.
855     *
856     * @stable ICU 3.2
857     */
858    public static final Object registerInstance(Collator collator, ULocale locale) {
859        return getShim().registerInstance(collator, locale);
860    }
861
862    /**
863     * {@icu} Registers a collator factory.
864     *
865     * <p>Because ICU may choose to cache Collator objects internally, this must
866     * be called at application startup, prior to any calls to
867     * Collator.getInstance to avoid undefined behavior.
868     *
869     * @param factory the factory to register
870     * @return an object that can be used to unregister the registered factory.
871     *
872     * @stable ICU 2.6
873     */
874    public static final Object registerFactory(CollatorFactory factory) {
875        return getShim().registerFactory(factory);
876    }
877
878    /**
879     * {@icu} Unregisters a collator previously registered using registerInstance.
880     * @param registryKey the object previously returned by registerInstance.
881     * @return true if the collator was successfully unregistered.
882     * @stable ICU 2.6
883     */
884    public static final boolean unregister(Object registryKey) {
885        if (shim == null) {
886            return false;
887        }
888        return shim.unregister(registryKey);
889    }
890
891    /**
892     * Returns the set of locales, as Locale objects, for which collators
893     * are installed.  Note that Locale objects do not support RFC 3066.
894     * @return the list of locales in which collators are installed.
895     * This list includes any that have been registered, in addition to
896     * those that are installed with ICU4J.
897     * @stable ICU 2.4
898     */
899    public static Locale[] getAvailableLocales() {
900        // TODO make this wrap getAvailableULocales later
901        if (shim == null) {
902            return ICUResourceBundle.getAvailableLocales(
903                ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
904        }
905        return shim.getAvailableLocales();
906    }
907
908    /**
909     * {@icu} Returns the set of locales, as ULocale objects, for which collators
910     * are installed.  ULocale objects support RFC 3066.
911     * @return the list of locales in which collators are installed.
912     * This list includes any that have been registered, in addition to
913     * those that are installed with ICU4J.
914     * @stable ICU 3.0
915     */
916    public static final ULocale[] getAvailableULocales() {
917        if (shim == null) {
918            return ICUResourceBundle.getAvailableULocales(
919                ICUResourceBundle.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
920        }
921        return shim.getAvailableULocales();
922    }
923
924    /**
925     * The list of keywords for this service.  This must be kept in sync with
926     * the resource data.
927     * @since ICU 3.0
928     */
929    private static final String[] KEYWORDS = { "collation" };
930
931    /**
932     * The resource name for this service.  Note that this is not the same as
933     * the keyword for this service.
934     * @since ICU 3.0
935     */
936    private static final String RESOURCE = "collations";
937
938    /**
939     * The resource bundle base name for this service.
940     * *since ICU 3.0
941     */
942
943    private static final String BASE = ICUResourceBundle.ICU_COLLATION_BASE_NAME;
944
945    /**
946     * {@icu} Returns an array of all possible keywords that are relevant to
947     * collation. At this point, the only recognized keyword for this
948     * service is "collation".
949     * @return an array of valid collation keywords.
950     * @see #getKeywordValues
951     * @stable ICU 3.0
952     */
953    public static final String[] getKeywords() {
954        return KEYWORDS;
955    }
956
957    /**
958     * {@icu} Given a keyword, returns an array of all values for
959     * that keyword that are currently in use.
960     * @param keyword one of the keywords returned by getKeywords.
961     * @see #getKeywords
962     * @stable ICU 3.0
963     */
964    public static final String[] getKeywordValues(String keyword) {
965        if (!keyword.equals(KEYWORDS[0])) {
966            throw new IllegalArgumentException("Invalid keyword: " + keyword);
967        }
968        return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
969    }
970
971    /**
972     * {@icu} Given a key and a locale, returns an array of string values in a preferred
973     * order that would make a difference. These are all and only those values where
974     * the open (creation) of the service with the locale formed from the input locale
975     * plus input keyword and that value has different behavior than creation with the
976     * input locale alone.
977     * @param key           one of the keys supported by this service.  For now, only
978     *                      "collation" is supported.
979     * @param locale        the locale
980     * @param commonlyUsed  if set to true it will return only commonly used values
981     *                      with the given locale in preferred order.  Otherwise,
982     *                      it will return all the available values for the locale.
983     * @return an array of string values for the given key and the locale.
984     * @stable ICU 4.2
985     */
986    public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
987                                                           boolean commonlyUsed) {
988        // Note: The parameter commonlyUsed is actually not used.
989        // The switch is in the method signature for consistency
990        // with other locale services.
991
992        // Read available collation values from collation bundles
993        String baseLoc = locale.getBaseName();
994        LinkedList<String> values = new LinkedList<String>();
995
996        UResourceBundle bundle = UResourceBundle.getBundleInstance(
997                ICUResourceBundle.ICU_COLLATION_BASE_NAME, baseLoc);
998
999        String defcoll = null;
1000        while (bundle != null) {
1001            UResourceBundle collations = bundle.get("collations");
1002            Enumeration<String> collEnum = collations.getKeys();
1003            while (collEnum.hasMoreElements()) {
1004                String collkey = collEnum.nextElement();
1005                if (collkey.equals("default")) {
1006                    if (defcoll == null) {
1007                        // Keep the default
1008                        defcoll = collations.getString("default");
1009                    }
1010                } else if (!collkey.startsWith("private-") && !values.contains(collkey)) {
1011                    values.add(collkey);
1012                }
1013            }
1014            bundle = ((ICUResourceBundle)bundle).getParent();
1015        }
1016        // Reordering
1017        Iterator<String> itr = values.iterator();
1018        String[] result = new String[values.size()];
1019        result[0] = defcoll;
1020        int idx = 1;
1021        while (itr.hasNext()) {
1022            String collKey = itr.next();
1023            if (!collKey.equals(defcoll)) {
1024                result[idx++] = collKey;
1025            }
1026        }
1027        return result;
1028    }
1029
1030    /**
1031     * {@icu} Returns the functionally equivalent locale for the given
1032     * requested locale, with respect to given keyword, for the
1033     * collation service.  If two locales return the same result, then
1034     * collators instantiated for these locales will behave
1035     * equivalently.  The converse is not always true; two collators
1036     * may in fact be equivalent, but return different results, due to
1037     * internal details.  The return result has no other meaning than
1038     * that stated above, and implies nothing as to the relationship
1039     * between the two locales.  This is intended for use by
1040     * applications who wish to cache collators, or otherwise reuse
1041     * collators when possible.  The functional equivalent may change
1042     * over time.  For more information, please see the <a
1043     * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
1044     * Locales and Services</a> section of the ICU User Guide.
1045     * @param keyword a particular keyword as enumerated by
1046     * getKeywords.
1047     * @param locID The requested locale
1048     * @param isAvailable If non-null, isAvailable[0] will receive and
1049     * output boolean that indicates whether the requested locale was
1050     * 'available' to the collation service. If non-null, isAvailable
1051     * must have length >= 1.
1052     * @return the locale
1053     * @stable ICU 3.0
1054     */
1055    public static final ULocale getFunctionalEquivalent(String keyword,
1056                                                        ULocale locID,
1057                                                        boolean isAvailable[]) {
1058        return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
1059                                                         keyword, locID, isAvailable, true);
1060    }
1061
1062    /**
1063     * {@icu} Returns the functionally equivalent locale for the given
1064     * requested locale, with respect to given keyword, for the
1065     * collation service.
1066     * @param keyword a particular keyword as enumerated by
1067     * getKeywords.
1068     * @param locID The requested locale
1069     * @return the locale
1070     * @see #getFunctionalEquivalent(String,ULocale,boolean[])
1071     * @stable ICU 3.0
1072     */
1073    public static final ULocale getFunctionalEquivalent(String keyword,
1074                                                        ULocale locID) {
1075        return getFunctionalEquivalent(keyword, locID, null);
1076    }
1077
1078    /**
1079     * {@icu} Returns the name of the collator for the objectLocale, localized for the
1080     * displayLocale.
1081     * @param objectLocale the locale of the collator
1082     * @param displayLocale the locale for the collator's display name
1083     * @return the display name
1084     * @stable ICU 2.6
1085     */
1086    static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
1087        return getShim().getDisplayName(ULocale.forLocale(objectLocale),
1088                                        ULocale.forLocale(displayLocale));
1089    }
1090
1091    /**
1092     * {@icu} Returns the name of the collator for the objectLocale, localized for the
1093     * displayLocale.
1094     * @param objectLocale the locale of the collator
1095     * @param displayLocale the locale for the collator's display name
1096     * @return the display name
1097     * @stable ICU 3.2
1098     */
1099    static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
1100        return getShim().getDisplayName(objectLocale, displayLocale);
1101    }
1102
1103    /**
1104     * {@icu} Returns the name of the collator for the objectLocale, localized for the
1105     * default <code>DISPLAY</code> locale.
1106     * @param objectLocale the locale of the collator
1107     * @return the display name
1108     * @see com.ibm.icu.util.ULocale.Category#DISPLAY
1109     * @stable ICU 2.6
1110     */
1111    static public String getDisplayName(Locale objectLocale) {
1112        return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
1113    }
1114
1115    /**
1116     * {@icu} Returns the name of the collator for the objectLocale, localized for the
1117     * default <code>DISPLAY</code> locale.
1118     * @param objectLocale the locale of the collator
1119     * @return the display name
1120     * @see com.ibm.icu.util.ULocale.Category#DISPLAY
1121     * @stable ICU 3.2
1122     */
1123    static public String getDisplayName(ULocale objectLocale) {
1124        return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
1125    }
1126
1127    /**
1128     * Returns this Collator's strength attribute. The strength attribute
1129     * determines the minimum level of difference considered significant.
1130     * </p>
1131     * {@icunote} This can return QUATERNARY strength, which is not supported by the
1132     * JDK version.
1133     * <p>
1134     * See the Collator class description for more details.
1135     * </p>
1136     * <p>The base class method always returns {@link #TERTIARY}.
1137     * Subclasses should override it if appropriate.
1138     *
1139     * @return this Collator's current strength attribute.
1140     * @see #setStrength
1141     * @see #PRIMARY
1142     * @see #SECONDARY
1143     * @see #TERTIARY
1144     * @see #QUATERNARY
1145     * @see #IDENTICAL
1146     * @stable ICU 2.8
1147     */
1148    public int getStrength()
1149    {
1150        return TERTIARY;
1151    }
1152
1153    /**
1154     * Returns the decomposition mode of this Collator. The decomposition mode
1155     * determines how Unicode composed characters are handled.
1156     * </p>
1157     * <p>
1158     * See the Collator class description for more details.
1159     * </p>
1160     * <p>The base class method always returns {@link #NO_DECOMPOSITION}.
1161     * Subclasses should override it if appropriate.
1162     *
1163     * @return the decomposition mode
1164     * @see #setDecomposition
1165     * @see #NO_DECOMPOSITION
1166     * @see #CANONICAL_DECOMPOSITION
1167     * @stable ICU 2.8
1168     */
1169    public int getDecomposition()
1170    {
1171        return NO_DECOMPOSITION;
1172    }
1173
1174    // public other methods -------------------------------------------------
1175
1176    /**
1177     * Compares the equality of two text Strings using
1178     * this Collator's rules, strength and decomposition mode.  Convenience method.
1179     * @param source the source string to be compared.
1180     * @param target the target string to be compared.
1181     * @return true if the strings are equal according to the collation
1182     *         rules, otherwise false.
1183     * @see #compare
1184     * @throws NullPointerException thrown if either arguments is null.
1185     * @stable ICU 2.8
1186     */
1187    public boolean equals(String source, String target)
1188    {
1189        return (compare(source, target) == 0);
1190    }
1191
1192    /**
1193     * {@icu} Returns a UnicodeSet that contains all the characters and sequences tailored
1194     * in this collator.
1195     * @return a pointer to a UnicodeSet object containing all the
1196     *         code points and sequences that may sort differently than
1197     *         in the root collator.
1198     * @stable ICU 2.4
1199     */
1200    public UnicodeSet getTailoredSet()
1201    {
1202        return new UnicodeSet(0, 0x10FFFF);
1203    }
1204
1205    /**
1206     * Compares the source text String to the target text String according to
1207     * this Collator's rules, strength and decomposition mode.
1208     * Returns an integer less than,
1209     * equal to or greater than zero depending on whether the source String is
1210     * less than, equal to or greater than the target String. See the Collator
1211     * class description for an example of use.
1212     * </p>
1213     * @param source the source String.
1214     * @param target the target String.
1215     * @return Returns an integer value. Value is less than zero if source is
1216     *         less than target, value is zero if source and target are equal,
1217     *         value is greater than zero if source is greater than target.
1218     * @see CollationKey
1219     * @see #getCollationKey
1220     * @throws NullPointerException thrown if either argument is null.
1221     * @stable ICU 2.8
1222     */
1223    public abstract int compare(String source, String target);
1224
1225    /**
1226     * Compares the source Object to the target Object.
1227     * </p>
1228     * @param source the source Object.
1229     * @param target the target Object.
1230     * @return Returns an integer value. Value is less than zero if source is
1231     *         less than target, value is zero if source and target are equal,
1232     *         value is greater than zero if source is greater than target.
1233     * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence.
1234     * @stable ICU 4.2
1235     */
1236    public int compare(Object source, Object target) {
1237        return doCompare((CharSequence)source, (CharSequence)target);
1238    }
1239
1240    /**
1241     * Compares two CharSequences.
1242     * The base class just calls compare(left.toString(), right.toString()).
1243     * Subclasses should instead implement this method and have the String API call this method.
1244     * @internal
1245     * @deprecated This API is ICU internal only.
1246     */
1247    @Deprecated
1248    protected int doCompare(CharSequence left, CharSequence right) {
1249        return compare(left.toString(), right.toString());
1250    }
1251
1252    /**
1253     * <p>
1254     * Transforms the String into a CollationKey suitable for efficient
1255     * repeated comparison.  The resulting key depends on the collator's
1256     * rules, strength and decomposition mode.
1257     *
1258     * <p>Note that collation keys are often less efficient than simply doing comparison.
1259     * For more details, see the ICU User Guide.
1260     *
1261     * <p>See the CollationKey class documentation for more information.</p>
1262     * @param source the string to be transformed into a CollationKey.
1263     * @return the CollationKey for the given String based on this Collator's
1264     *         collation rules. If the source String is null, a null
1265     *         CollationKey is returned.
1266     * @see CollationKey
1267     * @see #compare(String, String)
1268     * @see #getRawCollationKey
1269     * @stable ICU 2.8
1270     */
1271    public abstract CollationKey getCollationKey(String source);
1272
1273    /**
1274     * {@icu} Returns the simpler form of a CollationKey for the String source following
1275     * the rules of this Collator and stores the result into the user provided argument
1276     * key.  If key has a internal byte array of length that's too small for the result,
1277     * the internal byte array will be grown to the exact required size.
1278     *
1279     * <p>Note that collation keys are often less efficient than simply doing comparison.
1280     * For more details, see the ICU User Guide.
1281     *
1282     * @param source the text String to be transformed into a RawCollationKey
1283     * @return If key is null, a new instance of RawCollationKey will be
1284     *         created and returned, otherwise the user provided key will be
1285     *         returned.
1286     * @see #compare(String, String)
1287     * @see #getCollationKey
1288     * @see RawCollationKey
1289     * @stable ICU 2.8
1290     */
1291    public abstract RawCollationKey getRawCollationKey(String source,
1292                                                       RawCollationKey key);
1293
1294    /**
1295     * {@icu} Sets the variable top to the top of the specified reordering group.
1296     * The variable top determines the highest-sorting character
1297     * which is affected by the alternate handling behavior.
1298     * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
1299     *
1300     * <p>The base class implementation throws an UnsupportedOperationException.
1301     * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
1302     *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
1303     *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
1304     * @return this
1305     * @see #getMaxVariable
1306     * @stable ICU 53
1307     */
1308    public Collator setMaxVariable(int group) {
1309        throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1310    }
1311
1312    /**
1313     * {@icu} Returns the maximum reordering group whose characters are affected by
1314     * the alternate handling behavior.
1315     *
1316     * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION.
1317     * @return the maximum variable reordering group.
1318     * @see #setMaxVariable
1319     * @stable ICU 53
1320     */
1321    public int getMaxVariable() {
1322        return Collator.ReorderCodes.PUNCTUATION;
1323    }
1324
1325    /**
1326     * {@icu} Sets the variable top to the primary weight of the specified string.
1327     *
1328     * <p>Beginning with ICU 53, the variable top is pinned to
1329     * the top of one of the supported reordering groups,
1330     * and it must not be beyond the last of those groups.
1331     * See {@link #setMaxVariable(int)}.
1332     *
1333     * @param varTop one or more (if contraction) characters to which the
1334     *               variable top should be set
1335     * @return variable top primary weight
1336     * @exception IllegalArgumentException
1337     *                is thrown if varTop argument is not a valid variable top element. A variable top element is
1338     *                invalid when
1339     *                <ul>
1340     *                <li>it is a contraction that does not exist in the Collation order
1341     *                <li>the variable top is beyond
1342     *                    the last reordering group supported by setMaxVariable()
1343     *                <li>when the varTop argument is null or zero in length.
1344     *                </ul>
1345     * @see #getVariableTop
1346     * @see RuleBasedCollator#setAlternateHandlingShifted
1347     * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
1348     */
1349    @Deprecated
1350    public abstract int setVariableTop(String varTop);
1351
1352    /**
1353     * {@icu} Gets the variable top value of a Collator.
1354     *
1355     * @return the variable top primary weight
1356     * @see #getMaxVariable
1357     * @stable ICU 2.6
1358     */
1359    public abstract int getVariableTop();
1360
1361    /**
1362     * {@icu} Sets the variable top to the specified primary weight.
1363     *
1364     * <p>Beginning with ICU 53, the variable top is pinned to
1365     * the top of one of the supported reordering groups,
1366     * and it must not be beyond the last of those groups.
1367     * See {@link #setMaxVariable(int)}.
1368     *
1369     * @param varTop primary weight, as returned by setVariableTop or getVariableTop
1370     * @see #getVariableTop
1371     * @see #setVariableTop(String)
1372     * @deprecated ICU 53 Call setMaxVariable() instead.
1373     */
1374    @Deprecated
1375    public abstract void setVariableTop(int varTop);
1376
1377    /**
1378     * {@icu} Returns the version of this collator object.
1379     * @return the version object associated with this collator
1380     * @stable ICU 2.8
1381     */
1382    public abstract VersionInfo getVersion();
1383
1384    /**
1385     * {@icu} Returns the UCA version of this collator object.
1386     * @return the version object associated with this collator
1387     * @stable ICU 2.8
1388     */
1389    public abstract VersionInfo getUCAVersion();
1390
1391    /**
1392     * Retrieves the reordering codes for this collator.
1393     * These reordering codes are a combination of UScript codes and ReorderCodes.
1394     * @return a copy of the reordering codes for this collator;
1395     * if none are set then returns an empty array
1396     * @see #setReorderCodes
1397     * @see #getEquivalentReorderCodes
1398     * @see Collator.ReorderCodes
1399     * @see UScript
1400     * @stable ICU 4.8
1401     */
1402    public int[] getReorderCodes()
1403    {
1404        throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1405    }
1406
1407    /**
1408     * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
1409     * codes are grouped and must reorder together.
1410     * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
1411     * for example Hiragana and Katakana.
1412     *
1413     * @param reorderCode The reorder code to determine equivalence for.
1414     * @return the set of all reorder codes in the same group as the given reorder code.
1415     * @see #setReorderCodes
1416     * @see #getReorderCodes
1417     * @see Collator.ReorderCodes
1418     * @see UScript
1419     * @stable ICU 4.8
1420     */
1421    public static int[] getEquivalentReorderCodes(int reorderCode) {
1422        CollationData baseData = CollationRoot.getData();
1423        return baseData.getEquivalentScripts(reorderCode);
1424    }
1425
1426
1427    // Freezable interface implementation -------------------------------------------------
1428
1429    /**
1430     * Determines whether the object has been frozen or not.
1431     *
1432     * <p>An unfrozen Collator is mutable and not thread-safe.
1433     * A frozen Collator is immutable and thread-safe.
1434     *
1435     * @stable ICU 4.8
1436     */
1437    public boolean isFrozen() {
1438        return false;
1439    }
1440
1441    /**
1442     * Freezes the collator.
1443     * @return the collator itself.
1444     * @stable ICU 4.8
1445     */
1446    public Collator freeze() {
1447        throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1448    }
1449
1450    /**
1451     * Provides for the clone operation. Any clone is initially unfrozen.
1452     * @stable ICU 4.8
1453     */
1454    public Collator cloneAsThawed() {
1455        throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
1456    }
1457
1458    /**
1459     * Empty default constructor to make javadocs happy
1460     * @stable ICU 2.4
1461     */
1462    protected Collator()
1463    {
1464    }
1465
1466    private static final boolean DEBUG = ICUDebug.enabled("collator");
1467
1468    // -------- BEGIN ULocale boilerplate --------
1469
1470    /**
1471     * {@icu} Returns the locale that was used to create this object, or null.
1472     * This may may differ from the locale requested at the time of
1473     * this object's creation.  For example, if an object is created
1474     * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
1475     * drawn from <tt>en</tt> (the <i>actual</i> locale), and
1476     * <tt>en_US</tt> may be the most specific locale that exists (the
1477     * <i>valid</i> locale).
1478     *
1479     * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
1480     * contains a partial preview implementation.  The * <i>actual</i>
1481     * locale is returned correctly, but the <i>valid</i> locale is
1482     * not, in most cases.
1483     *
1484     * <p>The base class method always returns {@link ULocale#ROOT}.
1485     * Subclasses should override it if appropriate.
1486     *
1487     * @param type type of information requested, either {@link
1488     * com.ibm.icu.util.ULocale#VALID_LOCALE} or {@link
1489     * com.ibm.icu.util.ULocale#ACTUAL_LOCALE}.
1490     * @return the information specified by <i>type</i>, or null if
1491     * this object was not constructed from locale data.
1492     * @see com.ibm.icu.util.ULocale
1493     * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1494     * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1495     * @draft ICU 2.8 (retain)
1496     * @provisional This API might change or be removed in a future release.
1497     */
1498    public ULocale getLocale(ULocale.Type type) {
1499        return ULocale.ROOT;
1500    }
1501
1502    /**
1503     * Set information about the locales that were used to create this
1504     * object.  If the object was not constructed from locale data,
1505     * both arguments should be set to null.  Otherwise, neither
1506     * should be null.  The actual locale must be at the same level or
1507     * less specific than the valid locale.  This method is intended
1508     * for use by factories or other entities that create objects of
1509     * this class.
1510     *
1511     * <p>The base class method does nothing. Subclasses should override it if appropriate.
1512     *
1513     * @param valid the most specific locale containing any resource
1514     * data, or null
1515     * @param actual the locale containing data used to construct this
1516     * object, or null
1517     * @see com.ibm.icu.util.ULocale
1518     * @see com.ibm.icu.util.ULocale#VALID_LOCALE
1519     * @see com.ibm.icu.util.ULocale#ACTUAL_LOCALE
1520     */
1521    void setLocale(ULocale valid, ULocale actual) {}
1522
1523    // -------- END ULocale boilerplate --------
1524}
1525