12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert/*
487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert *******************************************************************************
587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * Copyright (C) 2015-2016, International Business Machines Corporation and
687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * others. All Rights Reserved.
787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert *******************************************************************************
887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert */
987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertpackage com.ibm.icu.impl;
1087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
1187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Collections;
1287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.EnumMap;
1387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.HashMap;
1487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.HashSet;
1587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Map;
1687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Map.Entry;
1787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Set;
1887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
1987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport com.ibm.icu.impl.locale.AsciiUtil;
2087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport com.ibm.icu.util.UResourceBundle;
2187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport com.ibm.icu.util.UResourceBundleIterator;
2287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert/**
2487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * @author markdavis
2587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert *
2687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert */
2787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertpublic class ValidIdentifiers {
2887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
2987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    public enum Datatype {
3087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        currency,
3187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        language,
3287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        region,
3387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        script,
3487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        subdivision,
3587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        unit,
3687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        variant,
3787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        u,
3887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        t,
3987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        x,
4087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        illegal
4187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
4287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
4387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    public enum Datasubtype {
4487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        deprecated,
4587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        private_use,
4687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        regular,
4787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        special,
4887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        unknown,
4987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        macroregion,
5087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
5187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
5287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    public static class ValiditySet {
5387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        public final Set<String> regularData;
5487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        public final Map<String,Set<String>> subdivisionData;
5587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        public ValiditySet(Set<String> plainData, boolean makeMap) {
5687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (makeMap) {
5787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                HashMap<String,Set<String>> _subdivisionData = new HashMap<String,Set<String>>();
5887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                for (String s : plainData) {
5987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    int pos = s.indexOf('-'); // read v28 data also
6087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    int pos2 = pos+1;
6187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    if (pos < 0) {
6287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        pos2 = pos = s.charAt(0) < 'A' ? 3 : 2;
6387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    }
6487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    final String key = s.substring(0, pos);
6587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    final String subdivision = s.substring(pos2);
6687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
6787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    Set<String> oldSet = _subdivisionData.get(key);
6887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    if (oldSet == null) {
6987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        _subdivisionData.put(key, oldSet = new HashSet<String>());
7087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    }
7187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    oldSet.add(subdivision);
7287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
7387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                this.regularData = null;
7487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                HashMap<String,Set<String>> _subdivisionData2 = new HashMap<String,Set<String>>();
7587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                // protect the sets
7687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                for (Entry<String, Set<String>> e : _subdivisionData.entrySet()) {
7787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    Set<String> value = e.getValue();
7887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    // optimize a bit by using singleton
7987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    Set<String> set = value.size() == 1 ? Collections.singleton(value.iterator().next())
8087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                            : Collections.unmodifiableSet(value);
8187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    _subdivisionData2.put(e.getKey(), set);
8287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
8387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
8487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                this.subdivisionData = Collections.unmodifiableMap(_subdivisionData2);
8587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            } else {
8687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                this.regularData = Collections.unmodifiableSet(plainData);
8787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                this.subdivisionData = null;
8887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
8987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
9087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
9187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        public boolean contains(String code) {
9287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (regularData != null) {
9387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                return regularData.contains(code);
9487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            } else {
9587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                int pos = code.indexOf('-');
9687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                String key = code.substring(0,pos);
9787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                final String value = code.substring(pos+1);
9887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                return contains(key, value);
9987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
10087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
10187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
10287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        public boolean contains(String key, String value) {
10387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            Set<String> oldSet = subdivisionData.get(key);
10487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            return oldSet != null && oldSet.contains(value);
10587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
10687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
10787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        @Override
10887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        public String toString() {
10987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (regularData != null) {
11087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                return regularData.toString();
11187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            } else {
11287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                return subdivisionData.toString();
11387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
11487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
11587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
11687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
11787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    private static class ValidityData {
11887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        static final Map<Datatype,Map<Datasubtype,ValiditySet>> data;
11987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        static {
12087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            Map<Datatype, Map<Datasubtype, ValiditySet>> _data = new EnumMap<Datatype,Map<Datasubtype,ValiditySet>>(Datatype.class);
12187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            UResourceBundle suppData = UResourceBundle.getBundleInstance(
1222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert                    ICUData.ICU_BASE_NAME,
12387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    "supplementalData",
12487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    ICUResourceBundle.ICU_DATA_CLASS_LOADER);
12587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            UResourceBundle validityInfo = suppData.get("idValidity");
12687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            for(UResourceBundleIterator datatypeIterator = validityInfo.getIterator();
12787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    datatypeIterator.hasNext();) {
12887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                UResourceBundle datatype = datatypeIterator.next();
12987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                String rawKey = datatype.getKey();
13087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                Datatype key = Datatype.valueOf(rawKey);
13187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                Map<Datasubtype,ValiditySet> values = new EnumMap<Datasubtype,ValiditySet>(Datasubtype.class);
13287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                for(UResourceBundleIterator datasubtypeIterator = datatype.getIterator();
13387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        datasubtypeIterator.hasNext();) {
13487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    UResourceBundle datasubtype = datasubtypeIterator.next();
13587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    String rawsubkey = datasubtype.getKey();
13687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    Datasubtype subkey = Datasubtype.valueOf(rawsubkey);
13787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    // handle single value specially
13887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    Set<String> subvalues = new HashSet<String>();
13987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    if (datasubtype.getType() == UResourceBundle.STRING) {
14087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        addRange(datasubtype.getString(), subvalues);
14187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    } else {
14287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        for (String string : datasubtype.getStringArray()) {
14387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                            addRange(string, subvalues);
14487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        }
14587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    }
14687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    values.put(subkey, new ValiditySet(subvalues, key == Datatype.subdivision));
14787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
14887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                _data.put(key, Collections.unmodifiableMap(values));
14987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
15087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            data = Collections.unmodifiableMap(_data);
15187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
15287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        private static void addRange(String string, Set<String> subvalues) {
15387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            string = AsciiUtil.toLowerString(string);
15487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            int pos = string.indexOf('~');
15587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            if (pos < 0) {
15687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                subvalues.add(string);
15787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            } else {
15887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                StringRange.expand(string.substring(0,pos), string.substring(pos+1), false, subvalues);
15987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
16087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
16187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
16287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
16387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    public static Map<Datatype, Map<Datasubtype, ValiditySet>> getData() {
16487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        return ValidityData.data;
16587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
16687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
16787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    /**
16887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     * Returns the Datasubtype containing the code, or null if there is none.
16987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert     */
17087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    public static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code) {
17187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        Map<Datasubtype, ValiditySet> subtable = ValidityData.data.get(datatype);
17287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        if (subtable != null) {
17387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            for (Datasubtype datasubtype : datasubtypes) {
17487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                ValiditySet validitySet = subtable.get(datasubtype);
17587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (validitySet != null) {
17687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    if (validitySet.contains(AsciiUtil.toLowerString(code))) {
17787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        return datasubtype;
17887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    }
17987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
18087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
18187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
18287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        return null;
18387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
18487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert
18587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    public static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code, String value) {
18687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        Map<Datasubtype, ValiditySet> subtable = ValidityData.data.get(datatype);
18787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        if (subtable != null) {
18887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            code = AsciiUtil.toLowerString(code);
18987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            value = AsciiUtil.toLowerString(value);
19087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            for (Datasubtype datasubtype : datasubtypes) {
19187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                ValiditySet validitySet = subtable.get(datasubtype);
19287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                if (validitySet != null) {
19387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    if (validitySet.contains(code, value)) {
19487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                        return datasubtype;
19587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                    }
19687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert                }
19787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert            }
19887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        }
19987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert        return null;
20087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert    }
20187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert}
202