1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4*******************************************************************************
5*
6*   Copyright (C) 1996-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*
11* CollationLoader.java, ported from ucol_res.cpp
12*
13* created by: Markus W. Scherer
14*/
15
16package com.ibm.icu.impl.coll;
17
18import java.io.IOException;
19import java.nio.ByteBuffer;
20import java.util.MissingResourceException;
21
22import com.ibm.icu.impl.ICUData;
23import com.ibm.icu.impl.ICUResourceBundle;
24import com.ibm.icu.util.ICUUncheckedIOException;
25import com.ibm.icu.util.Output;
26import com.ibm.icu.util.ULocale;
27import com.ibm.icu.util.UResourceBundle;
28
29/**
30 * Convenience string denoting the Collation data tree
31 */
32public final class CollationLoader {
33
34    // not implemented, all methods are static
35    private CollationLoader() {
36    }
37
38    private static volatile String rootRules = null;
39
40    private static void loadRootRules() {
41        if (rootRules != null) {
42            return;
43        }
44        synchronized(CollationLoader.class) {
45            if (rootRules == null) {
46                UResourceBundle rootBundle = UResourceBundle.getBundleInstance(
47                        ICUData.ICU_COLLATION_BASE_NAME, ULocale.ROOT);
48                rootRules = rootBundle.getString("UCARules");
49            }
50        }
51    }
52
53    // C++: static void appendRootRules(UnicodeString &s)
54    public static String getRootRules() {
55        loadRootRules();
56        return rootRules;
57    }
58
59    /**
60     * Simpler/faster methods for ASCII than ones based on Unicode data.
61     * TODO: There should be code like this somewhere already??
62     */
63    private static final class ASCII {
64        static String toLowerCase(String s) {
65            for (int i = 0; i < s.length(); ++i) {
66                char c = s.charAt(i);
67                if ('A' <= c && c <= 'Z') {
68                    StringBuilder sb = new StringBuilder(s.length());
69                    sb.append(s, 0, i).append((char)(c + 0x20));
70                    while (++i < s.length()) {
71                        c = s.charAt(i);
72                        if ('A' <= c && c <= 'Z') { c = (char)(c + 0x20); }
73                        sb.append(c);
74                    }
75                    return sb.toString();
76                }
77            }
78            return s;
79        }
80    }
81
82    static String loadRules(ULocale locale, String collationType) {
83        UResourceBundle bundle = UResourceBundle.getBundleInstance(
84                ICUData.ICU_COLLATION_BASE_NAME, locale);
85        UResourceBundle data = ((ICUResourceBundle)bundle).getWithFallback(
86                "collations/" + ASCII.toLowerCase(collationType));
87        String rules = data.getString("Sequence");
88        return rules;
89    }
90
91    private static final UResourceBundle findWithFallback(UResourceBundle table, String entryName) {
92        return ((ICUResourceBundle)table).findWithFallback(entryName);
93    }
94
95    public static CollationTailoring loadTailoring(ULocale locale, Output<ULocale> outValidLocale) {
96
97        // Java porting note: ICU4J getWithFallback/getStringWithFallback currently does not
98        // work well when alias table is involved in a resource path, unless full path is specified.
99        // For now, collation resources does not contain such data, so the code below should work fine.
100
101        CollationTailoring root = CollationRoot.getRoot();
102        String localeName = locale.getName();
103        if (localeName.length() == 0 || localeName.equals("root")) {
104            outValidLocale.value = ULocale.ROOT;
105            return root;
106        }
107
108        UResourceBundle bundle = null;
109        try {
110            bundle = ICUResourceBundle.getBundleInstance(
111                    ICUData.ICU_COLLATION_BASE_NAME, locale,
112                    ICUResourceBundle.OpenType.LOCALE_ROOT);
113        } catch (MissingResourceException e) {
114            outValidLocale.value = ULocale.ROOT;
115            return root;
116        }
117
118        ULocale validLocale = bundle.getULocale();
119        // Normalize the root locale. See
120        // http://bugs.icu-project.org/trac/ticket/10715
121        String validLocaleName = validLocale.getName();
122        if (validLocaleName.length() == 0 || validLocaleName.equals("root")) {
123            validLocale = ULocale.ROOT;
124        }
125        outValidLocale.value = validLocale;
126
127        // There are zero or more tailorings in the collations table.
128        UResourceBundle collations;
129        try {
130            collations = bundle.get("collations");
131            if (collations == null) {
132                return root;
133            }
134        } catch(MissingResourceException ignored) {
135            return root;
136        }
137
138        // Fetch the collation type from the locale ID and the default type from the data.
139        String type = locale.getKeywordValue("collation");
140        String defaultType = "standard";
141
142        String defT = ((ICUResourceBundle)collations).findStringWithFallback("default");
143        if (defT != null) {
144            defaultType = defT;
145        }
146
147        if (type == null || type.equals("default")) {
148            type = defaultType;
149        } else {
150            type = ASCII.toLowerCase(type);
151        }
152
153        // Load the collations/type tailoring, with type fallback.
154
155        // Java porting note: typeFallback is used for setting U_USING_DEFAULT_WARNING in
156        // ICU4C, but not used by ICU4J
157
158        // boolean typeFallback = false;
159        UResourceBundle data = findWithFallback(collations, type);
160        if (data == null &&
161                type.length() > 6 && type.startsWith("search")) {
162            // fall back from something like "searchjl" to "search"
163            // typeFallback = true;
164            type = "search";
165            data = findWithFallback(collations, type);
166        }
167
168        if (data == null && !type.equals(defaultType)) {
169            // fall back to the default type
170            // typeFallback = true;
171            type = defaultType;
172            data = findWithFallback(collations, type);
173        }
174
175        if (data == null && !type.equals("standard")) {
176            // fall back to the "standard" type
177            // typeFallback = true;
178            type = "standard";
179            data = findWithFallback(collations, type);
180        }
181
182        if (data == null) {
183            return root;
184        }
185
186        // Is this the same as the root collator? If so, then use that instead.
187        ULocale actualLocale = data.getULocale();
188        // http://bugs.icu-project.org/trac/ticket/10715 ICUResourceBundle(root).getULocale() != ULocale.ROOT
189        // Therefore not just if (actualLocale.equals(ULocale.ROOT) && type.equals("standard")) {
190        String actualLocaleName = actualLocale.getName();
191        if (actualLocaleName.length() == 0 || actualLocaleName.equals("root")) {
192            actualLocale = ULocale.ROOT;
193            if (type.equals("standard")) {
194                return root;
195            }
196        }
197
198        CollationTailoring t = new CollationTailoring(root.settings);
199        t.actualLocale = actualLocale;
200
201        // deserialize
202        UResourceBundle binary = data.get("%%CollationBin");
203        ByteBuffer inBytes = binary.getBinary();
204        try {
205            CollationDataReader.read(root, inBytes, t);
206        } catch (IOException e) {
207            throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:"
208                    + actualLocale + " type:" + type, e);
209        }
210
211        // Try to fetch the optional rules string.
212        try {
213            t.setRulesResource(data.get("Sequence"));
214        } catch(MissingResourceException ignored) {
215        }
216
217        // Set the collation types on the informational locales,
218        // except when they match the default types (for brevity and backwards compatibility).
219        // For the valid locale, suppress the default type.
220        if (!type.equals(defaultType)) {
221            outValidLocale.value = validLocale.setKeywordValue("collation", type);
222        }
223
224        // For the actual locale, suppress the default type *according to the actual locale*.
225        // For example, zh has default=pinyin and contains all of the Chinese tailorings.
226        // zh_Hant has default=stroke but has no other data.
227        // For the valid locale "zh_Hant" we need to suppress stroke.
228        // For the actual locale "zh" we need to suppress pinyin instead.
229        if (!actualLocale.equals(validLocale)) {
230            // Opening a bundle for the actual locale should always succeed.
231            UResourceBundle actualBundle = UResourceBundle.getBundleInstance(
232                    ICUData.ICU_COLLATION_BASE_NAME, actualLocale);
233            defT = ((ICUResourceBundle)actualBundle).findStringWithFallback("collations/default");
234            if (defT != null) {
235                defaultType = defT;
236            }
237        }
238
239        if (!type.equals(defaultType)) {
240            t.actualLocale = t.actualLocale.setKeywordValue("collation", type);
241        }
242
243        // if (typeFallback) {
244        //     ICU4C implementation sets U_USING_DEFAULT_WARNING here
245        // }
246
247        return t;
248    }
249}
250