1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.text;
19
20import java.util.Comparator;
21import java.util.Locale;
22import libcore.icu.ICU;
23import libcore.icu.RuleBasedCollatorICU;
24
25/**
26 * Performs locale-sensitive string comparison. A concrete subclass,
27 * {@link RuleBasedCollator}, allows customization of the collation ordering by
28 * the use of rule sets.
29 * <p>
30 * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s
31 * specifications for the <a
32 * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation
33 * Algorithm (UCA)</a>, there are 4 different levels of strength used in
34 * comparisons:
35 * <ul>
36 * <li>PRIMARY strength: Typically, this is used to denote differences between
37 * base characters (for example, "a" &lt; "b"). It is the strongest difference.
38 * For example, dictionaries are divided into different sections by base
39 * character.
40 * <li>SECONDARY strength: Accents in the characters are considered secondary
41 * differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other differences
42 * between letters can also be considered secondary differences, depending on
43 * the language. A secondary difference is ignored when there is a primary
44 * difference anywhere in the strings.
45 * <li>TERTIARY strength: Upper and lower case differences in characters are
46 * distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
47 * "a&ograve;"). In addition, a variant of a letter differs from the base form
48 * on the tertiary strength (such as "A" and "&#9398;"). Another example is the
49 * difference between large and small Kana. A tertiary difference is ignored
50 * when there is a primary or secondary difference anywhere in the strings.
51 * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL
52 * strength is used as a tiebreaker. The Unicode code point values of the NFD
53 * form of each string are compared, just in case there is no difference. For
54 * example, Hebrew cantellation marks are only distinguished at this strength.
55 * This strength should be used sparingly, as only code point value differences
56 * between two strings are an extremely rare occurrence. Using this strength
57 * substantially decreases the performance for both comparison and collation key
58 * generation APIs. This strength also increases the size of the collation key.
59 * </ul>
60 * <p>
61 * This {@code Collator} deals only with two decomposition modes, the canonical
62 * decomposition mode and one that does not use any decomposition. The
63 * compatibility decomposition mode
64 * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the
65 * canonical decomposition mode is set, {@code Collator} handles un-normalized
66 * text properly, producing the same results as if the text were normalized in
67 * NFD. If canonical decomposition is turned off, it is the user's
68 * responsibility to ensure that all text is already in the appropriate form
69 * before performing a comparison or before getting a {@link CollationKey}.
70 * <p>
71 * <em>Examples:</em>
72 * <blockquote>
73 *
74 * <pre>
75 * // Get the Collator for US English and set its strength to PRIMARY
76 * Collator usCollator = Collator.getInstance(Locale.US);
77 * usCollator.setStrength(Collator.PRIMARY);
78 * if (usCollator.compare(&quot;abc&quot;, &quot;ABC&quot;) == 0) {
79 *     System.out.println(&quot;Strings are equivalent&quot;);
80 * }
81 * </pre>
82 *
83 * </blockquote>
84 * <p>
85 * The following example shows how to compare two strings using the collator for
86 * the default locale.
87 * <blockquote>
88 *
89 * <pre>
90 * // Compare two strings in the default locale
91 * Collator myCollator = Collator.getInstance();
92 * myCollator.setDecomposition(Collator.NO_DECOMPOSITION);
93 * if (myCollator.compare(&quot;\u00e0\u0325&quot;, &quot;a\u0325\u0300&quot;) != 0) {
94 *     System.out.println(&quot;\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition&quot;);
95 *     myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
96 *     if (myCollator.compare(&quot;\u00e0\u0325&quot;, &quot;a\u0325\u0300&quot;) != 0) {
97 *         System.out.println(&quot;Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition&quot;);
98 *     } else {
99 *         System.out.println(&quot;\u00e0\u0325 is equal to a\u0325\u0300 with decomposition&quot;);
100 *     }
101 * } else {
102 *     System.out.println(&quot;Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition&quot;);
103 * }
104 * </pre>
105 *
106 * </blockquote>
107 *
108 * @see RuleBasedCollator
109 * @see CollationKey
110 */
111public abstract class Collator implements Comparator<Object>, Cloneable {
112    /**
113     * Constant used to specify the decomposition rule.
114     */
115    public static final int NO_DECOMPOSITION = 0;
116
117    /**
118     * Constant used to specify the decomposition rule.
119     */
120    public static final int CANONICAL_DECOMPOSITION = 1;
121
122    /**
123     * Constant used to specify the decomposition rule. This value for
124     * decomposition is not supported.
125     */
126    public static final int FULL_DECOMPOSITION = 2;
127
128    /**
129     * Constant used to specify the collation strength.
130     */
131    public static final int PRIMARY = 0;
132
133    /**
134     * Constant used to specify the collation strength.
135     */
136    public static final int SECONDARY = 1;
137
138    /**
139     * Constant used to specify the collation strength.
140     */
141    public static final int TERTIARY = 2;
142
143    /**
144     * Constant used to specify the collation strength.
145     */
146    public static final int IDENTICAL = 3;
147
148    RuleBasedCollatorICU icuColl;
149
150    Collator(RuleBasedCollatorICU icuColl) {
151        this.icuColl = icuColl;
152    }
153
154    /**
155     * Constructs a new {@code Collator} instance.
156     */
157    protected Collator() {
158        icuColl = new RuleBasedCollatorICU(Locale.getDefault());
159    }
160
161    /**
162     * Returns a new collator with the same decomposition mode and
163     * strength value as this collator.
164     *
165     * @return a shallow copy of this collator.
166     * @see java.lang.Cloneable
167     */
168    @Override
169    public Object clone() {
170        try {
171            Collator clone = (Collator) super.clone();
172            clone.icuColl = (RuleBasedCollatorICU) icuColl.clone();
173            return clone;
174        } catch (CloneNotSupportedException e) {
175            throw new AssertionError(e);
176        }
177    }
178
179    /**
180     * Compares two objects to determine their relative order. The objects must
181     * be strings.
182     *
183     * @param object1
184     *            the first string to compare.
185     * @param object2
186     *            the second string to compare.
187     * @return a negative value if {@code object1} is less than {@code object2},
188     *         0 if they are equal, and a positive value if {@code object1} is
189     *         greater than {@code object2}.
190     * @throws ClassCastException
191     *         if {@code object1} or {@code object2} is not a {@code String}.
192     */
193    public int compare(Object object1, Object object2) {
194        return compare((String) object1, (String) object2);
195    }
196
197    /**
198     * Compares two strings to determine their relative order.
199     *
200     * @param string1
201     *            the first string to compare.
202     * @param string2
203     *            the second string to compare.
204     * @return a negative value if {@code string1} is less than {@code string2},
205     *         0 if they are equal and a positive value if {@code string1} is
206     *         greater than {@code string2}.
207     */
208    public abstract int compare(String string1, String string2);
209
210    /**
211     * Compares this collator with the specified object and indicates if they
212     * are equal.
213     *
214     * @param object
215     *            the object to compare with this object.
216     * @return {@code true} if {@code object} is a {@code Collator} object and
217     *         it has the same strength and decomposition values as this
218     *         collator; {@code false} otherwise.
219     * @see #hashCode
220     */
221    @Override
222    public boolean equals(Object object) {
223        if (!(object instanceof Collator)) {
224            return false;
225        }
226        Collator collator = (Collator) object;
227        return icuColl == null ? collator.icuColl == null : icuColl.equals(collator.icuColl);
228    }
229
230    /**
231     * Compares two strings using the collation rules to determine if they are
232     * equal.
233     *
234     * @param string1
235     *            the first string to compare.
236     * @param string2
237     *            the second string to compare.
238     * @return {@code true} if {@code string1} and {@code string2} are equal
239     *         using the collation rules, false otherwise.
240     */
241    public boolean equals(String string1, String string2) {
242        return compare(string1, string2) == 0;
243    }
244
245    /**
246     * Returns an array of locales for which custom {@code Collator} instances
247     * are available.
248     * <p>Note that Android does not support user-supplied locale service providers.
249     */
250    public static Locale[] getAvailableLocales() {
251        return ICU.getAvailableCollatorLocales();
252    }
253
254    /**
255     * Returns a {@link CollationKey} for the specified string for this collator
256     * with the current decomposition rule and strength value.
257     *
258     * @param string
259     *            the source string that is converted into a collation key.
260     * @return the collation key for {@code string}.
261     */
262    public abstract CollationKey getCollationKey(String string);
263
264    /**
265     * Returns the decomposition rule for this collator.
266     *
267     * @return the decomposition rule, either {@code NO_DECOMPOSITION} or
268     *         {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is
269     *         not supported.
270     */
271    public int getDecomposition() {
272        return decompositionMode_ICU_Java(icuColl.getDecomposition());
273    }
274
275    /**
276     * Returns a {@code Collator} instance which is appropriate for the user's default
277     * {@code Locale}.
278     * See "<a href="../util/Locale.html#default_locale">Be wary of the default locale</a>".
279     */
280    public static Collator getInstance() {
281        return getInstance(Locale.getDefault());
282    }
283
284    /**
285     * Returns a {@code Collator} instance which is appropriate for {@code locale}.
286     */
287    public static Collator getInstance(Locale locale) {
288        if (locale == null) {
289            throw new NullPointerException("locale == null");
290        }
291        return new RuleBasedCollator(new RuleBasedCollatorICU(locale));
292    }
293
294    /**
295     * Returns the strength value for this collator.
296     *
297     * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or
298     *         IDENTICAL.
299     */
300    public int getStrength() {
301        return strength_ICU_Java(icuColl.getStrength());
302    }
303
304    @Override
305    public abstract int hashCode();
306
307    /**
308     * Sets the decomposition rule for this collator.
309     *
310     * @param value
311     *            the decomposition rule, either {@code NO_DECOMPOSITION} or
312     *            {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION}
313     *            is not supported.
314     * @throws IllegalArgumentException
315     *            if the provided decomposition rule is not valid. This includes
316     *            {@code FULL_DECOMPOSITION}.
317     */
318    public void setDecomposition(int value) {
319        icuColl.setDecomposition(decompositionMode_Java_ICU(value));
320    }
321
322    /**
323     * Sets the strength value for this collator.
324     *
325     * @param value
326     *            the strength value, either PRIMARY, SECONDARY, TERTIARY, or
327     *            IDENTICAL.
328     * @throws IllegalArgumentException
329     *            if the provided strength value is not valid.
330     */
331    public void setStrength(int value) {
332        icuColl.setStrength(strength_Java_ICU(value));
333    }
334
335    private int decompositionMode_Java_ICU(int mode) {
336        switch (mode) {
337        case Collator.CANONICAL_DECOMPOSITION:
338            return RuleBasedCollatorICU.VALUE_ON;
339        case Collator.NO_DECOMPOSITION:
340            return RuleBasedCollatorICU.VALUE_OFF;
341        }
342        throw new IllegalArgumentException("Bad mode: " + mode);
343    }
344
345    private int decompositionMode_ICU_Java(int mode) {
346        int javaMode = mode;
347        switch (mode) {
348        case RuleBasedCollatorICU.VALUE_OFF:
349            javaMode = Collator.NO_DECOMPOSITION;
350            break;
351        case RuleBasedCollatorICU.VALUE_ON:
352            javaMode = Collator.CANONICAL_DECOMPOSITION;
353            break;
354        }
355        return javaMode;
356    }
357
358    private int strength_Java_ICU(int value) {
359        switch (value) {
360        case Collator.PRIMARY:
361            return RuleBasedCollatorICU.VALUE_PRIMARY;
362        case Collator.SECONDARY:
363            return RuleBasedCollatorICU.VALUE_SECONDARY;
364        case Collator.TERTIARY:
365            return RuleBasedCollatorICU.VALUE_TERTIARY;
366        case Collator.IDENTICAL:
367            return RuleBasedCollatorICU.VALUE_IDENTICAL;
368        }
369        throw new IllegalArgumentException("Bad strength: " + value);
370    }
371
372    private int strength_ICU_Java(int value) {
373        int javaValue = value;
374        switch (value) {
375        case RuleBasedCollatorICU.VALUE_PRIMARY:
376            javaValue = Collator.PRIMARY;
377            break;
378        case RuleBasedCollatorICU.VALUE_SECONDARY:
379            javaValue = Collator.SECONDARY;
380            break;
381        case RuleBasedCollatorICU.VALUE_TERTIARY:
382            javaValue = Collator.TERTIARY;
383            break;
384        case RuleBasedCollatorICU.VALUE_IDENTICAL:
385            javaValue = Collator.IDENTICAL;
386            break;
387        }
388        return javaValue;
389    }
390}
391