1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 2015-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10package android.icu.impl.locale;
11
12import java.util.Arrays;
13import java.util.EnumSet;
14import java.util.HashSet;
15import java.util.Set;
16import java.util.regex.Pattern;
17
18import android.icu.impl.ValidIdentifiers;
19import android.icu.impl.ValidIdentifiers.Datasubtype;
20import android.icu.impl.ValidIdentifiers.Datatype;
21import android.icu.impl.locale.KeyTypeData.ValueType;
22import android.icu.util.IllformedLocaleException;
23import android.icu.util.Output;
24import android.icu.util.ULocale;
25
26/**
27 * @author markdavis
28 * @hide Only a subset of ICU is exposed in Android
29 *
30 */
31public class LocaleValidityChecker {
32    private final Set<Datasubtype> datasubtypes;
33    private final boolean allowsDeprecated;
34    public static class Where {
35        public Datatype fieldFailure;
36        public String codeFailure;
37
38        public boolean set(Datatype datatype, String code) {
39            fieldFailure = datatype;
40            codeFailure = code;
41            return false;
42        }
43        @Override
44        public String toString() {
45            return fieldFailure == null ? "OK" : "{" + fieldFailure + ", " + codeFailure + "}";
46        }
47    }
48
49    public LocaleValidityChecker(Set<Datasubtype> datasubtypes) {
50        this.datasubtypes = EnumSet.copyOf(datasubtypes);
51        allowsDeprecated = datasubtypes.contains(Datasubtype.deprecated);
52    }
53
54    public LocaleValidityChecker(Datasubtype... datasubtypes) {
55        this.datasubtypes = EnumSet.copyOf(Arrays.asList(datasubtypes));
56        allowsDeprecated = this.datasubtypes.contains(Datasubtype.deprecated);
57    }
58
59    /**
60     * @return the datasubtypes
61     */
62    public Set<Datasubtype> getDatasubtypes() {
63        return EnumSet.copyOf(datasubtypes);
64    }
65
66    static Pattern SEPARATOR = Pattern.compile("[-_]");
67
68    @SuppressWarnings("unused")
69    private static final Pattern VALID_X = Pattern.compile("[a-zA-Z0-9]{2,8}(-[a-zA-Z0-9]{2,8})*");
70
71    public boolean isValid(ULocale locale, Where where) {
72        where.set(null, null);
73        final String language = locale.getLanguage();
74        final String script = locale.getScript();
75        final String region = locale.getCountry();
76        final String variantString = locale.getVariant();
77        final Set<Character> extensionKeys = locale.getExtensionKeys();
78        //        if (language.isEmpty()) {
79        //            // the only case where this is valid is if there is only an 'x' extension string
80        //            if (!script.isEmpty() || !region.isEmpty() || variantString.isEmpty()
81        //                    || extensionKeys.size() != 1 || !extensionKeys.contains('x')) {
82        //                return where.set(Datatype.x, "Null language only with x-...");
83        //            }
84        //            return true; // for x string, wellformedness = valid
85        //        }
86        if (!isValid(Datatype.language, language, where)) {
87            // special case x
88            if (language.equals("x")) {
89                where.set(null, null); // for x, well-formed == valid
90                return true;
91            }
92            return false;
93        }
94        if (!isValid(Datatype.script, script, where)) return false;
95        if (!isValid(Datatype.region, region, where)) return false;
96        if (!variantString.isEmpty()) {
97            for (String variant : SEPARATOR.split(variantString)) {
98                if (!isValid(Datatype.variant, variant, where)) return false;
99            }
100        }
101        for (Character c : extensionKeys) {
102            try {
103                Datatype datatype = Datatype.valueOf(c+"");
104                switch (datatype) {
105                case x:
106                    return true; // if it is syntactic (checked by ULocale) it is valid
107                case t:
108                case u:
109                    if (!isValidU(locale, datatype, locale.getExtension(c), where)) return false;
110                    break;
111                }
112            } catch (Exception e) {
113                return where.set(Datatype.illegal, c+"");
114            }
115        }
116        return true;
117    }
118
119    // TODO combine this with the KeyTypeData.SpecialType, and get it from the type, not the key
120    enum SpecialCase {
121        normal, anything, reorder, codepoints, subdivision, rgKey;
122        static SpecialCase get(String key) {
123            if (key.equals("kr")) {
124                return reorder;
125            } else if (key.equals("vt")) {
126                return codepoints;
127            } else if (key.equals("sd")) {
128                return subdivision;
129            } else if (key.equals("rg")) {
130                return rgKey;
131            } else if (key.equals("x0")) {
132                return anything;
133            } else {
134                return normal;
135            }
136        }
137    }
138
139    /**
140     * @param locale
141     * @param datatype
142     * @param extension
143     * @param where
144     * @return
145     */
146    private boolean isValidU(ULocale locale, Datatype datatype, String extensionString, Where where) {
147        String key = "";
148        int typeCount = 0;
149        ValueType valueType = null;
150        SpecialCase specialCase = null;
151        StringBuilder prefix = new StringBuilder();
152        Set<String> seen = new HashSet<String>();
153
154        StringBuilder tBuffer = datatype == Datatype.t ? new StringBuilder() : null;
155
156        // TODO: is empty -u- valid?
157
158        for (String subtag : SEPARATOR.split(extensionString)) {
159            if (subtag.length() == 2
160                    && (tBuffer == null || subtag.charAt(1) <= '9')) {
161                // if we have accumulated a t buffer, check that first
162                if (tBuffer != null) {
163                    // Check t buffer. Empty after 't' is ok.
164                    if (tBuffer.length() != 0 && !isValidLocale(tBuffer.toString(),where)) {
165                        return false;
166                    }
167                    tBuffer = null;
168                }
169                key = KeyTypeData.toBcpKey(subtag);
170                if (key == null) {
171                    return where.set(datatype, subtag);
172                }
173                if (!allowsDeprecated && KeyTypeData.isDeprecated(key)) {
174                    return where.set(datatype, key);
175                }
176                valueType = KeyTypeData.getValueType(key);
177                specialCase = SpecialCase.get(key);
178                typeCount = 0;
179            } else if (tBuffer != null) {
180                if (tBuffer.length() != 0) {
181                    tBuffer.append('-');
182                }
183                tBuffer.append(subtag);
184            } else {
185                ++typeCount;
186                switch (valueType) {
187                case single:
188                    if (typeCount > 1) {
189                        return where.set(datatype, key+"-"+subtag);
190                    }
191                    break;
192                case incremental:
193                    if (typeCount == 1) {
194                        prefix.setLength(0);
195                        prefix.append(subtag);
196                    } else {
197                        prefix.append('-').append(subtag);
198                        subtag = prefix.toString();
199                    }
200                    break;
201                case multiple:
202                    if (typeCount == 1) {
203                        seen.clear();
204                    }
205                    break;
206                }
207                switch (specialCase) {
208                case anything:
209                    continue;
210                case codepoints:
211                    try {
212                        if (Integer.parseInt(subtag,16) > 0x10FFFF) {
213                            return where.set(datatype, key+"-"+subtag);
214                        }
215                    } catch (NumberFormatException e) {
216                        return where.set(datatype, key+"-"+subtag);
217                    }
218                    continue;
219                case reorder:
220                    boolean newlyAdded = seen.add(subtag.equals("zzzz") ? "others" : subtag);
221                    if (!newlyAdded || !isScriptReorder(subtag)) {
222                        return where.set(datatype, key+"-"+subtag);
223                    }
224                    continue;
225                case subdivision:
226                    if (!isSubdivision(locale, subtag)) {
227                        return where.set(datatype, key+"-"+subtag);
228                    }
229                    continue;
230                case rgKey:
231                    if (subtag.length() < 6 || !subtag.endsWith("zzzz")) {
232                        return where.set(datatype, subtag);
233                    }
234                    if (!isValid(Datatype.region, subtag.substring(0,subtag.length()-4), where)) {
235                        return false;
236                    }
237                    continue;
238                }
239
240                // en-u-sd-usca
241                // en-US-u-sd-usca
242                Output<Boolean> isKnownKey = new Output<Boolean>();
243                Output<Boolean> isSpecialType = new Output<Boolean>();
244                String type = KeyTypeData.toBcpType(key, subtag, isKnownKey, isSpecialType);
245                if (type == null) {
246                    return where.set(datatype, key+"-"+subtag);
247                }
248                if (!allowsDeprecated && KeyTypeData.isDeprecated(key, subtag)) {
249                    return where.set(datatype, key+"-"+subtag);
250                }
251            }
252        }
253        // Check t buffer. Empty after 't' is ok.
254        if (tBuffer != null && tBuffer.length() != 0 && !isValidLocale(tBuffer.toString(),where)) {
255            return false;
256        }
257        return true;
258    }
259
260    /**
261     * @param locale
262     * @param subtag
263     * @return
264     */
265    private boolean isSubdivision(ULocale locale, String subtag) {
266        // First check if the subtag is valid
267        if (subtag.length() < 3) {
268            return false;
269        }
270        String region = subtag.substring(0, subtag.charAt(0) <= '9' ? 3 : 2);
271        String subdivision = subtag.substring(region.length());
272        if (ValidIdentifiers.isValid(Datatype.subdivision, datasubtypes, region, subdivision) == null) {
273            return false;
274        }
275        // Then check for consistency with the locale's region
276        String localeRegion = locale.getCountry();
277        if (localeRegion.isEmpty()) {
278            ULocale max = ULocale.addLikelySubtags(locale);
279            localeRegion = max.getCountry();
280        }
281        if (!region.equalsIgnoreCase(localeRegion)) {
282            return false;
283        }
284        return true;
285    }
286
287    static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others", "zzzz"));
288    static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy"));
289    static final Set<Datasubtype> REGULAR_ONLY = EnumSet.of(Datasubtype.regular);
290    /**
291     * @param subtag
292     * @return
293     */
294    private boolean isScriptReorder(String subtag) {
295        subtag = AsciiUtil.toLowerString(subtag);
296        if (REORDERING_INCLUDE.contains(subtag)) {
297            return true;
298        } else if (REORDERING_EXCLUDE.contains(subtag)) {
299            return false;
300        }
301        return ValidIdentifiers.isValid(Datatype.script, REGULAR_ONLY, subtag) != null;
302        //        space, punct, symbol, currency, digit - core groups of characters below 'a'
303        //        any script code except Common and Inherited.
304        //      sc ; Zinh                             ; Inherited                        ; Qaai
305        //      sc ; Zyyy                             ; Common
306        //        Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
307        //        others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others.        return false;
308    }
309
310    /**
311     * @param extensionString
312     * @param where
313     * @return
314     */
315    private boolean isValidLocale(String extensionString, Where where) {
316        try {
317            ULocale locale = new ULocale.Builder().setLanguageTag(extensionString).build();
318            return isValid(locale, where);
319        } catch (IllformedLocaleException e) {
320            int startIndex = e.getErrorIndex();
321            String[] list = SEPARATOR.split(extensionString.substring(startIndex));
322            return where.set(Datatype.t, list[0]);
323        } catch (Exception e) {
324            return where.set(Datatype.t, e.getMessage());
325        }
326    }
327
328    /**
329     * @param language
330     * @param language2
331     * @return
332     */
333    private boolean isValid(Datatype datatype, String code, Where where) {
334        return code.isEmpty() ? true :
335            ValidIdentifiers.isValid(datatype, datasubtypes, code) != null ? true :
336                where == null ? false
337                        : where.set(datatype, code);
338    }
339}
340