1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// BEGIN android-note
19// The icu implementation used was changed from icu4j to icu4jni.
20// END android-note
21
22package java.text;
23
24import java.security.AccessController;
25import java.security.PrivilegedAction;
26import java.util.Comparator;
27import java.util.Locale;
28import java.util.Vector;
29
30/**
31 * Performs locale-sensitive string comparison. A concrete subclass,
32 * {@link RuleBasedCollator}, allows customization of the collation ordering by
33 * the use of rule sets.
34 * <p>
35 * Following the <a href=http://www.unicode.org>Unicode Consortium</a>'s
36 * specifications for the <a
37 * href="http://www.unicode.org/unicode/reports/tr10/"> Unicode Collation
38 * Algorithm (UCA)</a>, there are 4 different levels of strength used in
39 * comparisons:
40 * <ul>
41 * <li>PRIMARY strength: Typically, this is used to denote differences between
42 * base characters (for example, "a" &lt; "b"). It is the strongest difference.
43 * For example, dictionaries are divided into different sections by base
44 * character.
45 * <li>SECONDARY strength: Accents in the characters are considered secondary
46 * differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other differences
47 * between letters can also be considered secondary differences, depending on
48 * the language. A secondary difference is ignored when there is a primary
49 * difference anywhere in the strings.
50 * <li>TERTIARY strength: Upper and lower case differences in characters are
51 * distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
52 * "a&ograve;"). In addition, a variant of a letter differs from the base form
53 * on the tertiary strength (such as "A" and "&#9398;"). Another example is the
54 * difference between large and small Kana. A tertiary difference is ignored
55 * when there is a primary or secondary difference anywhere in the strings.
56 * <li>IDENTICAL strength: When all other strengths are equal, the IDENTICAL
57 * strength is used as a tiebreaker. The Unicode code point values of the NFD
58 * form of each string are compared, just in case there is no difference. For
59 * example, Hebrew cantellation marks are only distinguished at this strength.
60 * This strength should be used sparingly, as only code point value differences
61 * between two strings are an extremely rare occurrence. Using this strength
62 * substantially decreases the performance for both comparison and collation key
63 * generation APIs. This strength also increases the size of the collation key.
64 * </ul>
65 * <p>
66 * This {@code Collator} deals only with two decomposition modes, the canonical
67 * decomposition mode and one that does not use any decomposition. The
68 * compatibility decomposition mode
69 * {@code java.text.Collator.FULL_DECOMPOSITION} is not supported here. If the
70 * canonical decomposition mode is set, {@code Collator} handles un-normalized
71 * text properly, producing the same results as if the text were normalized in
72 * NFD. If canonical decomposition is turned off, it is the user's
73 * responsibility to ensure that all text is already in the appropriate form
74 * before performing a comparison or before getting a {@link CollationKey}.
75 * <p>
76 * <em>Examples:</em>
77 * <blockquote>
78 *
79 * <pre>
80 * // Get the Collator for US English and set its strength to PRIMARY
81 * Collator usCollator = Collator.getInstance(Locale.US);
82 * usCollator.setStrength(Collator.PRIMARY);
83 * if (usCollator.compare(&quot;abc&quot;, &quot;ABC&quot;) == 0) {
84 *     System.out.println(&quot;Strings are equivalent&quot;);
85 * }
86 * </pre>
87 *
88 * </blockquote>
89 * <p>
90 * The following example shows how to compare two strings using the collator for
91 * the default locale.
92 * <blockquote>
93 *
94 * <pre>
95 * // Compare two strings in the default locale
96 * Collator myCollator = Collator.getInstance();
97 * myCollator.setDecomposition(Collator.NO_DECOMPOSITION);
98 * if (myCollator.compare(&quot;\u00e0\u0325&quot;, &quot;a\u0325\u0300&quot;) != 0) {
99 *     System.out.println(&quot;\u00e0\u0325 is not equal to a\u0325\u0300 without decomposition&quot;);
100 *     myCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
101 *     if (myCollator.compare(&quot;\u00e0\u0325&quot;, &quot;a\u0325\u0300&quot;) != 0) {
102 *         System.out.println(&quot;Error: \u00e0\u0325 should be equal to a\u0325\u0300 with decomposition&quot;);
103 *     } else {
104 *         System.out.println(&quot;\u00e0\u0325 is equal to a\u0325\u0300 with decomposition&quot;);
105 *     }
106 * } else {
107 *     System.out.println(&quot;Error: \u00e0\u0325 should be not equal to a\u0325\u0300 without decomposition&quot;);
108 * }
109 * </pre>
110 *
111 * </blockquote>
112 *
113 * @see RuleBasedCollator
114 * @see CollationKey
115 */
116public abstract class Collator implements Comparator<Object>, Cloneable {
117
118    static final int EQUAL = 0;
119
120    static final int GREATER = 1;
121
122    static final int LESS = -1;
123
124    /**
125     * Constant used to specify the decomposition rule.
126     */
127    public static final int NO_DECOMPOSITION = 0;
128
129    /**
130     * Constant used to specify the decomposition rule.
131     */
132    public static final int CANONICAL_DECOMPOSITION = 1;
133
134    /**
135     * Constant used to specify the decomposition rule. This value for
136     * decomposition is not supported.
137     */
138    public static final int FULL_DECOMPOSITION = 2;
139
140    /**
141     * Constant used to specify the collation strength.
142     */
143    public static final int PRIMARY = 0;
144
145    /**
146     * Constant used to specify the collation strength.
147     */
148    public static final int SECONDARY = 1;
149
150    /**
151     * Constant used to specify the collation strength.
152     */
153    public static final int TERTIARY = 2;
154
155    /**
156     * Constant used to specify the collation strength.
157     */
158    public static final int IDENTICAL = 3;
159
160    private static int CACHE_SIZE;
161
162    static {
163        // CACHE_SIZE includes key and value, so needs to be double
164        String cacheSize = AccessController
165                .doPrivileged(new PrivilegedAction<String>() {
166                    public String run() {
167                        return System.getProperty("collator.cache"); //$NON-NLS-1$
168                    }
169                });
170        if (cacheSize != null) {
171            try {
172                CACHE_SIZE = Integer.parseInt(cacheSize);
173            } catch (NumberFormatException e) {
174                CACHE_SIZE = 6;
175            }
176        } else {
177            CACHE_SIZE = 6;
178        }
179    }
180
181    private static Vector<Collator> cache = new Vector<Collator>(CACHE_SIZE);
182
183    // Wrapper class of ICU4JNI Collator
184    com.ibm.icu4jni.text.Collator icuColl;
185
186    Collator(com.ibm.icu4jni.text.Collator wrapper) {
187        this.icuColl = wrapper;
188    }
189
190    /**
191     * Constructs a new {@code Collator} instance.
192     */
193    protected Collator() {
194        super();
195        // BEGIN android-added
196        icuColl = com.ibm.icu4jni.text.Collator.getInstance(Locale.getDefault());
197        // END android-added
198    }
199
200    /**
201     * Returns a new collator with the same decomposition mode and
202     * strength value as this collator.
203     *
204     * @return a shallow copy of this collator.
205     * @see java.lang.Cloneable
206     */
207    @Override
208    public Object clone() {
209        try {
210            Collator clone = (Collator) super.clone();
211            clone.icuColl = (com.ibm.icu4jni.text.Collator) this.icuColl.clone();
212            return clone;
213        } catch (CloneNotSupportedException e) {
214            throw new AssertionError(e); // android-changed
215        }
216    }
217
218    /**
219     * Compares two objects to determine their relative order. The objects must
220     * be strings.
221     *
222     * @param object1
223     *            the first string to compare.
224     * @param object2
225     *            the second string to compare.
226     * @return a negative value if {@code object1} is less than {@code object2},
227     *         0 if they are equal, and a positive value if {@code object1} is
228     *         greater than {@code object2}.
229     * @throws ClassCastException
230     *         if {@code object1} or {@code object2} is not a {@code String}.
231     */
232    public int compare(Object object1, Object object2) {
233        return compare((String) object1, (String) object2);
234    }
235
236    /**
237     * Compares two strings to determine their relative order.
238     *
239     * @param string1
240     *            the first string to compare.
241     * @param string2
242     *            the second string to compare.
243     * @return a negative value if {@code string1} is less than {@code string2},
244     *         0 if they are equal and a positive value if {@code string1} is
245     *         greater than {@code string2}.
246     */
247    public abstract int compare(String string1, String string2);
248
249    /**
250     * Compares this collator with the specified object and indicates if they
251     * are equal.
252     *
253     * @param object
254     *            the object to compare with this object.
255     * @return {@code true} if {@code object} is a {@code Collator} object and
256     *         it has the same strength and decomposition values as this
257     *         collator; {@code false} otherwise.
258     * @see #hashCode
259     */
260    @Override
261    public boolean equals(Object object) {
262        if (!(object instanceof Collator)) {
263            return false;
264        }
265        Collator collator = (Collator) object;
266        return this.icuColl == null ? collator.icuColl == null : this.icuColl
267                .equals(collator.icuColl);
268    }
269
270    /**
271     * Compares two strings using the collation rules to determine if they are
272     * equal.
273     *
274     * @param string1
275     *            the first string to compare.
276     * @param string2
277     *            the second string to compare.
278     * @return {@code true} if {@code string1} and {@code string2} are equal
279     *         using the collation rules, false otherwise.
280     */
281    public boolean equals(String string1, String string2) {
282        return compare(string1, string2) == 0;
283    }
284
285    /**
286     * Gets the list of installed {@link java.util.Locale} objects which support
287     * {@code Collator}.
288     *
289     * @return an array of {@code Locale}.
290     */
291    public static Locale[] getAvailableLocales() {
292        return com.ibm.icu4jni.text.Collator.getAvailableLocales();
293    }
294
295    /**
296     * Returns a {@link CollationKey} for the specified string for this collator
297     * with the current decomposition rule and strength value.
298     *
299     * @param string
300     *            the source string that is converted into a collation key.
301     * @return the collation key for {@code string}.
302     */
303    public abstract CollationKey getCollationKey(String string);
304
305    /**
306     * Returns the decomposition rule for this collator.
307     *
308     * @return the decomposition rule, either {@code NO_DECOMPOSITION} or
309     *         {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION} is
310     *         not supported.
311     */
312    public int getDecomposition() {
313        return decompositionMode_ICU_Java(this.icuColl.getDecomposition());
314    }
315
316    /**
317     * Returns a {@code Collator} instance which is appropriate for the default
318     * {@code Locale}.
319     *
320     * @return the collator for the default locale.
321     */
322    public static Collator getInstance() {
323        return getInstance(Locale.getDefault());
324    }
325
326    /**
327     * Returns a {@code Collator} instance which is appropriate for the
328     * specified {@code Locale}.
329     *
330     * @param locale
331     *            the locale.
332     * @return the collator for {@code locale}.
333     */
334    public static Collator getInstance(Locale locale) {
335        String key = locale.toString();
336        for (int i = cache.size() - 1; i >= 0; i -= 2) {
337            if (cache.elementAt(i).equals(key)) {
338                return (Collator) (cache.elementAt(i - 1)).clone();
339            }
340        }
341
342        return new RuleBasedCollator(com.ibm.icu4jni.text.Collator
343                .getInstance(locale));
344    }
345
346    /**
347     * Returns the strength value for this collator.
348     *
349     * @return the strength value, either PRIMARY, SECONDARY, TERTIARY or
350     *         IDENTICAL.
351     */
352    public int getStrength() {
353        return strength_ICU_Java(this.icuColl.getStrength());
354    }
355
356    /**
357     * Returns an integer hash code for this collator.
358     *
359     * @return this collator's hash code.
360     *
361     * @see #equals(Object)
362     * @see #equals(String, String)
363     */
364    @Override
365    public abstract int hashCode();
366
367    /**
368     * Sets the decomposition rule for this collator.
369     *
370     * @param value
371     *            the decomposition rule, either {@code NO_DECOMPOSITION} or
372     *            {@code CANONICAL_DECOMPOSITION}. {@code FULL_DECOMPOSITION}
373     *            is not supported.
374     * @throws IllegalArgumentException
375     *            if the provided decomposition rule is not valid. This includes
376     *            {@code FULL_DECOMPOSITION}.
377     */
378    public void setDecomposition(int value) {
379        this.icuColl.setDecomposition(decompositionMode_Java_ICU(value));
380    }
381
382    /**
383     * Sets the strength value for this collator.
384     *
385     * @param value
386     *            the strength value, either PRIMARY, SECONDARY, TERTIARY, or
387     *            IDENTICAL.
388     * @throws IllegalArgumentException
389     *            if the provided strength value is not valid.
390     */
391    public void setStrength(int value) {
392        this.icuColl.setStrength(strength_Java_ICU(value));
393    }
394
395    private int decompositionMode_Java_ICU(int mode) {
396        int icuDecomp = mode;
397        switch (mode) {
398            case Collator.CANONICAL_DECOMPOSITION:
399                icuDecomp = com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION;
400                break;
401            case Collator.NO_DECOMPOSITION:
402                icuDecomp = com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION;
403                break;
404        }
405        return icuDecomp;
406    }
407
408    private int decompositionMode_ICU_Java(int mode) {
409        int javaMode = mode;
410        switch (mode) {
411            case com.ibm.icu4jni.text.Collator.NO_DECOMPOSITION:
412                javaMode = Collator.NO_DECOMPOSITION;
413                break;
414            case com.ibm.icu4jni.text.Collator.CANONICAL_DECOMPOSITION:
415                javaMode = Collator.CANONICAL_DECOMPOSITION;
416                break;
417        }
418        return javaMode;
419    }
420
421    private int strength_Java_ICU(int value) {
422        int icuValue = value;
423        switch (value) {
424            case Collator.PRIMARY:
425                icuValue = com.ibm.icu4jni.text.Collator.PRIMARY;
426                break;
427            case Collator.SECONDARY:
428                icuValue = com.ibm.icu4jni.text.Collator.SECONDARY;
429                break;
430            case Collator.TERTIARY:
431                icuValue = com.ibm.icu4jni.text.Collator.TERTIARY;
432                break;
433            case Collator.IDENTICAL:
434                icuValue = com.ibm.icu4jni.text.Collator.IDENTICAL;
435                break;
436        }
437        return icuValue;
438
439    }
440
441    private int strength_ICU_Java(int value) {
442        int javaValue = value;
443        switch (value) {
444            case com.ibm.icu4jni.text.Collator.PRIMARY:
445                javaValue = Collator.PRIMARY;
446                break;
447            case com.ibm.icu4jni.text.Collator.SECONDARY:
448                javaValue = Collator.SECONDARY;
449                break;
450            case com.ibm.icu4jni.text.Collator.TERTIARY:
451                javaValue = Collator.TERTIARY;
452                break;
453            case com.ibm.icu4jni.text.Collator.IDENTICAL:
454                javaValue = Collator.IDENTICAL;
455                break;
456        }
457        return javaValue;
458    }
459}
460