12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert/* 487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert ******************************************************************************* 587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * Copyright (C) 2015-2016, International Business Machines Corporation and 687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * others. All Rights Reserved. 787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert ******************************************************************************* 887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert */ 987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertpackage com.ibm.icu.impl; 1087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 1187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Collections; 1287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.EnumMap; 1387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.HashMap; 1487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.HashSet; 1587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Map; 1687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Map.Entry; 1787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport java.util.Set; 1887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 1987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport com.ibm.icu.impl.locale.AsciiUtil; 2087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport com.ibm.icu.util.UResourceBundle; 2187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertimport com.ibm.icu.util.UResourceBundleIterator; 2287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 2387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert/** 2487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * @author markdavis 2587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * 2687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert */ 2787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubertpublic class ValidIdentifiers { 2887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 2987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public enum Datatype { 3087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert currency, 3187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert language, 3287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert region, 3387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert script, 3487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert subdivision, 3587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert unit, 3687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert variant, 3787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert u, 3887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert t, 3987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert x, 4087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert illegal 4187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 4287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 4387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public enum Datasubtype { 4487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert deprecated, 4587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert private_use, 4687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert regular, 4787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert special, 4887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert unknown, 4987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert macroregion, 5087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 5187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 5287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public static class ValiditySet { 5387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public final Set<String> regularData; 5487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public final Map<String,Set<String>> subdivisionData; 5587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public ValiditySet(Set<String> plainData, boolean makeMap) { 5687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (makeMap) { 5787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert HashMap<String,Set<String>> _subdivisionData = new HashMap<String,Set<String>>(); 5887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for (String s : plainData) { 5987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert int pos = s.indexOf('-'); // read v28 data also 6087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert int pos2 = pos+1; 6187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (pos < 0) { 6287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert pos2 = pos = s.charAt(0) < 'A' ? 3 : 2; 6387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 6487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert final String key = s.substring(0, pos); 6587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert final String subdivision = s.substring(pos2); 6687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 6787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Set<String> oldSet = _subdivisionData.get(key); 6887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (oldSet == null) { 6987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert _subdivisionData.put(key, oldSet = new HashSet<String>()); 7087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 7187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert oldSet.add(subdivision); 7287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 7387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert this.regularData = null; 7487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert HashMap<String,Set<String>> _subdivisionData2 = new HashMap<String,Set<String>>(); 7587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert // protect the sets 7687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for (Entry<String, Set<String>> e : _subdivisionData.entrySet()) { 7787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Set<String> value = e.getValue(); 7887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert // optimize a bit by using singleton 7987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Set<String> set = value.size() == 1 ? Collections.singleton(value.iterator().next()) 8087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert : Collections.unmodifiableSet(value); 8187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert _subdivisionData2.put(e.getKey(), set); 8287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 8387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 8487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert this.subdivisionData = Collections.unmodifiableMap(_subdivisionData2); 8587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } else { 8687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert this.regularData = Collections.unmodifiableSet(plainData); 8787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert this.subdivisionData = null; 8887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 8987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 9087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 9187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public boolean contains(String code) { 9287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (regularData != null) { 9387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return regularData.contains(code); 9487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } else { 9587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert int pos = code.indexOf('-'); 9687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert String key = code.substring(0,pos); 9787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert final String value = code.substring(pos+1); 9887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return contains(key, value); 9987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 10087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 10187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 10287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public boolean contains(String key, String value) { 10387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Set<String> oldSet = subdivisionData.get(key); 10487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return oldSet != null && oldSet.contains(value); 10587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 10687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 10787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert @Override 10887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public String toString() { 10987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (regularData != null) { 11087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return regularData.toString(); 11187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } else { 11287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return subdivisionData.toString(); 11387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 11487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 11587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 11687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 11787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert private static class ValidityData { 11887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert static final Map<Datatype,Map<Datasubtype,ValiditySet>> data; 11987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert static { 12087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Map<Datatype, Map<Datasubtype, ValiditySet>> _data = new EnumMap<Datatype,Map<Datasubtype,ValiditySet>>(Datatype.class); 12187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert UResourceBundle suppData = UResourceBundle.getBundleInstance( 1222d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert ICUData.ICU_BASE_NAME, 12387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert "supplementalData", 12487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert ICUResourceBundle.ICU_DATA_CLASS_LOADER); 12587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert UResourceBundle validityInfo = suppData.get("idValidity"); 12687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for(UResourceBundleIterator datatypeIterator = validityInfo.getIterator(); 12787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert datatypeIterator.hasNext();) { 12887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert UResourceBundle datatype = datatypeIterator.next(); 12987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert String rawKey = datatype.getKey(); 13087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Datatype key = Datatype.valueOf(rawKey); 13187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Map<Datasubtype,ValiditySet> values = new EnumMap<Datasubtype,ValiditySet>(Datasubtype.class); 13287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for(UResourceBundleIterator datasubtypeIterator = datatype.getIterator(); 13387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert datasubtypeIterator.hasNext();) { 13487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert UResourceBundle datasubtype = datasubtypeIterator.next(); 13587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert String rawsubkey = datasubtype.getKey(); 13687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Datasubtype subkey = Datasubtype.valueOf(rawsubkey); 13787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert // handle single value specially 13887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Set<String> subvalues = new HashSet<String>(); 13987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (datasubtype.getType() == UResourceBundle.STRING) { 14087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert addRange(datasubtype.getString(), subvalues); 14187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } else { 14287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for (String string : datasubtype.getStringArray()) { 14387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert addRange(string, subvalues); 14487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 14587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 14687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert values.put(subkey, new ValiditySet(subvalues, key == Datatype.subdivision)); 14787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 14887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert _data.put(key, Collections.unmodifiableMap(values)); 14987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 15087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert data = Collections.unmodifiableMap(_data); 15187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 15287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert private static void addRange(String string, Set<String> subvalues) { 15387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert string = AsciiUtil.toLowerString(string); 15487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert int pos = string.indexOf('~'); 15587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (pos < 0) { 15687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert subvalues.add(string); 15787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } else { 15887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert StringRange.expand(string.substring(0,pos), string.substring(pos+1), false, subvalues); 15987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 16087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 16187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 16287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 16387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public static Map<Datatype, Map<Datasubtype, ValiditySet>> getData() { 16487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return ValidityData.data; 16587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 16687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 16787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert /** 16887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert * Returns the Datasubtype containing the code, or null if there is none. 16987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert */ 17087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code) { 17187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Map<Datasubtype, ValiditySet> subtable = ValidityData.data.get(datatype); 17287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (subtable != null) { 17387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for (Datasubtype datasubtype : datasubtypes) { 17487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert ValiditySet validitySet = subtable.get(datasubtype); 17587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (validitySet != null) { 17687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (validitySet.contains(AsciiUtil.toLowerString(code))) { 17787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return datasubtype; 17887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 17987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 18087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 18187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 18287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return null; 18387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 18487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert 18587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert public static Datasubtype isValid(Datatype datatype, Set<Datasubtype> datasubtypes, String code, String value) { 18687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert Map<Datasubtype, ValiditySet> subtable = ValidityData.data.get(datatype); 18787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (subtable != null) { 18887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert code = AsciiUtil.toLowerString(code); 18987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert value = AsciiUtil.toLowerString(value); 19087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert for (Datasubtype datasubtype : datasubtypes) { 19187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert ValiditySet validitySet = subtable.get(datasubtype); 19287255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (validitySet != null) { 19387255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert if (validitySet.contains(code, value)) { 19487255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return datasubtype; 19587255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 19687255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 19787255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 19887255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 19987255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert return null; 20087255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert } 20187255a3fc79cc94374b5b8adc76a86e251ac7d3eFredrik Roubert} 202