12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 3bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert/* 4bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert ******************************************************************************* 5bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * Copyright (C) 2010-2011, International Business Machines Corporation and * 6bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * others. All Rights Reserved. * 7bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert ******************************************************************************* 8bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert */ 9bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertpackage com.ibm.icu.impl.locale; 10bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 11bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.ArrayList; 12bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.Collections; 13bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.HashMap; 14bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.List; 15bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.Map; 16bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.Set; 17bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 18bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertpublic class LanguageTag { 19bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private static final boolean JDKIMPL = false; 20bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 21bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 22bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // static fields 23bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 24bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static final String SEP = "-"; 25bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static final String PRIVATEUSE = "x"; 26bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String UNDETERMINED = "und"; 27bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static final String PRIVUSE_VARIANT_PREFIX = "lvariant"; 28bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 29bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 30bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Language subtag fields 31bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 32bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private String _language = ""; // language subtag 33bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private String _script = ""; // script subtag 34bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private String _region = ""; // region subtag 35bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private String _privateuse = ""; // privateuse 36bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 37bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private List<String> _extlangs = Collections.emptyList(); // extlang subtags 38bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private List<String> _variants = Collections.emptyList(); // variant subtags 39bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private List<String> _extensions = Collections.emptyList(); // extensions 40bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 41bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Map contains grandfathered tags and its preferred mappings from 42bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // http://www.ietf.org/rfc/rfc5646.txt 43bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED = 44bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>(); 45bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 46bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert static { 47bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // grandfathered = irregular ; non-redundant tags registered 48bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / regular ; during the RFC 3066 era 49bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 50bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // irregular = "en-GB-oed" ; irregular tags do not match 51bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-ami" ; the 'langtag' production and 52bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-bnn" ; would not otherwise be 53bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-default" ; considered 'well-formed' 54bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-enochian" ; These tags are all valid, 55bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-hak" ; but most are deprecated 56bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-klingon" ; in favor of more modern 57bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-lux" ; subtags or subtag 58bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-mingo" ; combination 59bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-navajo" 60bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-pwn" 61bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-tao" 62bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-tay" 63bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "i-tsu" 64bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "sgn-BE-FR" 65bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "sgn-BE-NL" 66bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "sgn-CH-DE" 67bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 68bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // regular = "art-lojban" ; these tags match the 'langtag' 69bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "cel-gaulish" ; production, but their subtags 70bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "no-bok" ; are not extended language 71bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "no-nyn" ; or variant subtags: their meaning 72bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "zh-guoyu" ; is defined by their registration 73bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "zh-hakka" ; and all of these are deprecated 74bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "zh-min" ; in favor of a more modern 75bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "zh-min-nan" ; subtag or sequence of subtags 76bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / "zh-xiang" 77bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 78bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert final String[][] entries = { 79bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert //{"tag", "preferred"}, 80bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"art-lojban", "jbo"}, 81bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback 82bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"en-GB-oed", "en-GB-x-oed"}, // fallback 83bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-ami", "ami"}, 84bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-bnn", "bnn"}, 85bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-default", "en-x-i-default"}, // fallback 86bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-enochian", "und-x-i-enochian"}, // fallback 87bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-hak", "hak"}, 88bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-klingon", "tlh"}, 89bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-lux", "lb"}, 90bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-mingo", "see-x-i-mingo"}, // fallback 91bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-navajo", "nv"}, 92bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-pwn", "pwn"}, 93bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-tao", "tao"}, 94bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-tay", "tay"}, 95bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"i-tsu", "tsu"}, 96bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"no-bok", "nb"}, 97bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"no-nyn", "nn"}, 98bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"sgn-BE-FR", "sfb"}, 99bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"sgn-BE-NL", "vgt"}, 100bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"sgn-CH-DE", "sgg"}, 101bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"zh-guoyu", "cmn"}, 102bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"zh-hakka", "hak"}, 103bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"zh-min", "nan-x-zh-min"}, // fallback 104bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"zh-min-nan", "nan"}, 105bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert {"zh-xiang", "hsn"}, 106bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert }; 107bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert for (String[] e : entries) { 108bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e); 109bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 110bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 111bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 112bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private LanguageTag() { 113bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 114bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 115bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert /* 116bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * BNF in RFC5464 117bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 118bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * Language-Tag = langtag ; normal language tags 119bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / privateuse ; private use tag 120bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / grandfathered ; grandfathered tags 121bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 122bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 123bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * langtag = language 124bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ["-" script] 125bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ["-" region] 126bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * *("-" variant) 127bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * *("-" extension) 128bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ["-" privateuse] 129bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 130bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * language = 2*3ALPHA ; shortest ISO 639 code 131bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ["-" extlang] ; sometimes followed by 132bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ; extended language subtags 133bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / 4ALPHA ; or reserved for future use 134bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / 5*8ALPHA ; or registered language subtag 135bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 136bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * extlang = 3ALPHA ; selected ISO 639 codes 137bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * *2("-" 3ALPHA) ; permanently reserved 138bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 139bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * script = 4ALPHA ; ISO 15924 code 140bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 141bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * region = 2ALPHA ; ISO 3166-1 code 142bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / 3DIGIT ; UN M.49 code 143bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 144bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * variant = 5*8alphanum ; registered variants 145bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / (DIGIT 3alphanum) 146bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 147bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * extension = singleton 1*("-" (2*8alphanum)) 148bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 149bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ; Single alphanumerics 150bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * ; "x" reserved for private use 151bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * singleton = DIGIT ; 0 - 9 152bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / %x41-57 ; A - W 153bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / %x59-5A ; Y - Z 154bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / %x61-77 ; a - w 155bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * / %x79-7A ; y - z 156bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 157bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * privateuse = "x" 1*("-" (1*8alphanum)) 158bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * 159bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert */ 160bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static LanguageTag parse(String languageTag, ParseStatus sts) { 161bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (sts == null) { 162bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts = new ParseStatus(); 163bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 164bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts.reset(); 165bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 166bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 167bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert StringTokenIterator itr; 168bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 169bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Check if the tag is grandfathered 170bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag)); 171bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (gfmap != null) { 172bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // use preferred mapping 173bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr = new StringTokenIterator(gfmap[1], SEP); 174bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 175bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr = new StringTokenIterator(languageTag, SEP); 176bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 177bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 178bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert LanguageTag tag = new LanguageTag(); 179bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 180bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // langtag must start with either language or privateuse 181bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (tag.parseLanguage(itr, sts)) { 182bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag.parseExtlangs(itr, sts); 183bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag.parseScript(itr, sts); 184bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag.parseRegion(itr, sts); 185bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag.parseVariants(itr, sts); 186bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag.parseExtensions(itr, sts); 187bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 188bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag.parsePrivateuse(itr, sts); 189bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 190bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!itr.isDone() && !sts.isError()) { 191bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 192bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorIndex = itr.currentStart(); 193bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (s.length() == 0) { 194bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorMsg = "Empty subtag"; 195bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 196bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorMsg = "Invalid subtag: " + s; 197bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 198bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 199bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 200bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return tag; 201bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 202bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 203bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 204bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Language subtag parsers 205bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 206bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 207bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) { 208bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 209bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 210bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 211bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 212bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 213bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 214bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 215bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isLanguage(s)) { 216bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 217bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _language = s; 218bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 219bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 220bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 221bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 222bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 223bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 224bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 225bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) { 226bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 227bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 228bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 229bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 230bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 231bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 232bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!itr.isDone()) { 233bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 234bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!isExtlang(s)) { 235bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 236bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 237bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 238bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_extlangs.isEmpty()) { 239bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _extlangs = new ArrayList<String>(3); 240bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 241bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _extlangs.add(s); 242bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 243bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 244bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 245bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_extlangs.size() == 3) { 246bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Maximum 3 extlangs 247bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 248bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 249bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 250bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 251bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 252bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 253bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 254bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parseScript(StringTokenIterator itr, ParseStatus sts) { 255bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 256bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 257bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 258bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 259bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 260bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 261bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 262bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isScript(s)) { 263bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 264bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _script = s; 265bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 266bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 267bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 268bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 269bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 270bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 271bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 272bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) { 273bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 274bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 275bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 276bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 277bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 278bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 279bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 280bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isRegion(s)) { 281bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 282bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _region = s; 283bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 284bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 285bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 286bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 287bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 288bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 289bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 290bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) { 291bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 292bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 293bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 294bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 295bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 296bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 297bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!itr.isDone()) { 298bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 299bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!isVariant(s)) { 300bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 301bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 302bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 303bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_variants.isEmpty()) { 304bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _variants = new ArrayList<String>(3); 305bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 306bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _variants.add(s); 307bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 308bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 309bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 310bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 311bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 312bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 313bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 314bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { 315bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 316bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 317bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 318bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 319bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 320bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 321bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!itr.isDone()) { 322bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 323bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isExtensionSingleton(s)) { 324bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert int start = itr.currentStart(); 325bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String singleton = s; 326bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert StringBuilder sb = new StringBuilder(singleton); 327bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 328bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 329bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!itr.isDone()) { 330bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert s = itr.current(); 331bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isExtensionSubtag(s)) { 332bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(s); 333bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 334bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 335bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 336bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 337bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 338bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 339bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 340bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (sts._parseLength <= start) { 341bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorIndex = start; 342bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorMsg = "Incomplete extension '" + singleton + "'"; 343bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 344bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 345bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 346bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_extensions.size() == 0) { 347bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _extensions = new ArrayList<String>(4); 348bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 349bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _extensions.add(sb.toString()); 350bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 351bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 352bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 353bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 354bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 355bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 356bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 357bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 358bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { 359bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (itr.isDone() || sts.isError()) { 360bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 361bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 362bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 363bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean found = false; 364bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 365bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String s = itr.current(); 366bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isPrivateusePrefix(s)) { 367bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert int start = itr.currentStart(); 368bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert StringBuilder sb = new StringBuilder(s); 369bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 370bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 371bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!itr.isDone()) { 372bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert s = itr.current(); 373bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!isPrivateuseSubtag(s)) { 374bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 375bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 376bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(s); 377bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._parseLength = itr.currentEnd(); 378bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 379bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert itr.next(); 380bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 381bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 382bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (sts._parseLength <= start) { 383bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // need at least 1 private subtag 384bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorIndex = start; 385bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sts._errorMsg = "Incomplete privateuse"; 386bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 387bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert _privateuse = sb.toString(); 388bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert found = true; 389bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 390bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 391bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 392bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return found; 393bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 394bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 395bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { 396bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert LanguageTag tag = new LanguageTag(); 397bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 398bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String language = baseLocale.getLanguage(); 399bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String script = baseLocale.getScript(); 400bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String region = baseLocale.getRegion(); 401bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String variant = baseLocale.getVariant(); 402bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 403bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert boolean hasSubtag = false; 404bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 405bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String privuseVar = null; // store ill-formed variant subtags 406bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 407bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (language.length() > 0 && isLanguage(language)) { 408bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Convert a deprecated language code used by Java to 409bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // a new code 410bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (language.equals("iw")) { 411bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert language = "he"; 412bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else if (language.equals("ji")) { 413bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert language = "yi"; 414bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else if (language.equals("in")) { 415bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert language = "id"; 416bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 417bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._language = language; 418bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 419bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 420bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (script.length() > 0 && isScript(script)) { 421bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._script = canonicalizeScript(script); 422bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert hasSubtag = true; 423bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 424bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 425bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (region.length() > 0 && isRegion(region)) { 426bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._region = canonicalizeRegion(region); 427bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert hasSubtag = true; 428bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 429bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 430bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (JDKIMPL) { 431bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Special handling for no_NO_NY - use nn_NO for language tag 432bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (tag._language.equals("no") && tag._region.equals("NO") && variant.equals("NY")) { 433bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._language = "nn"; 434bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert variant = ""; 435bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 436bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 437bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 438bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (variant.length() > 0) { 439bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert List<String> variants = null; 440bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); 441bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!varitr.isDone()) { 442bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String var = varitr.current(); 443bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!isVariant(var)) { 444bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 445bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 446bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (variants == null) { 447bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert variants = new ArrayList<String>(); 448bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 449bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (JDKIMPL) { 450bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert variants.add(var); // Do not canonicalize! 451bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 452bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert variants.add(canonicalizeVariant(var)); 453bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 454bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert varitr.next(); 455bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 456bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (variants != null) { 457bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._variants = variants; 458bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert hasSubtag = true; 459bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 460bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!varitr.isDone()) { 461bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // ill-formed variant subtags 462bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert StringBuilder buf = new StringBuilder(); 463bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (!varitr.isDone()) { 464bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String prvv = varitr.current(); 465bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!isPrivateuseSubtag(prvv)) { 466bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // cannot use private use subtag - truncated 467bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert break; 468bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 469bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (buf.length() > 0) { 470bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert buf.append(SEP); 471bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 472bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (!JDKIMPL) { 473bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert prvv = AsciiUtil.toLowerString(prvv); 474bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 475bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert buf.append(prvv); 476bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert varitr.next(); 477bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 478bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (buf.length() > 0) { 479bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert privuseVar = buf.toString(); 480bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 481bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 482bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 483bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 484bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert List<String> extensions = null; 485bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert String privateuse = null; 486bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 487bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert Set<Character> locextKeys = localeExtensions.getKeys(); 488bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert for (Character locextKey : locextKeys) { 489bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert Extension ext = localeExtensions.getExtension(locextKey); 490bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (isPrivateusePrefixChar(locextKey.charValue())) { 491bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert privateuse = ext.getValue(); 492bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 493bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (extensions == null) { 494bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert extensions = new ArrayList<String>(); 495bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 496bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert extensions.add(locextKey.toString() + SEP + ext.getValue()); 497bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 498bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 499bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 500bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (extensions != null) { 501bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._extensions = extensions; 502bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert hasSubtag = true; 503bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 504bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 505bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // append ill-formed variant subtags to private use 506bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (privuseVar != null) { 507bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (privateuse == null) { 508bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; 509bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } else { 510bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP); 511bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 512bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 513bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 514bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (privateuse != null) { 515bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._privateuse = privateuse; 516bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 517bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 518bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (tag._language.length() == 0 && (hasSubtag || privateuse == null)) { 519bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // use lang "und" when 1) no language is available AND 520bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 2) any of other subtags other than private use are available or 521bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // no private use tag is available 522bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert tag._language = UNDETERMINED; 523bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 524bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 525bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return tag; 526bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 527bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 528bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 529bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Getter methods for language subtag fields 530bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 531bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 532bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public String getLanguage() { 533bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return _language; 534bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 535bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 536bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public List<String> getExtlangs() { 537bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return Collections.unmodifiableList(_extlangs); 538bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 539bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 540bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public String getScript() { 541bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return _script; 542bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 543bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 544bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public String getRegion() { 545bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return _region; 546bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 547bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 548bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public List<String> getVariants() { 549bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return Collections.unmodifiableList(_variants); 550bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 551bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 552bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public List<String> getExtensions() { 553bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return Collections.unmodifiableList(_extensions); 554bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 555bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 556bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public String getPrivateuse() { 557bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return _privateuse; 558bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 559bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 560bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 561bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Language subtag syntax checking methods 562bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 563bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 564bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isLanguage(String s) { 565bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // language = 2*3ALPHA ; shortest ISO 639 code 566bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // ["-" extlang] ; sometimes followed by 567bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // ; extended language subtags 568bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / 4ALPHA ; or reserved for future use 569bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / 5*8ALPHA ; or registered language subtag 570bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s); 571bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 572bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 573bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isExtlang(String s) { 574bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // extlang = 3ALPHA ; selected ISO 639 codes 575bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // *2("-" 3ALPHA) ; permanently reserved 576bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() == 3) && AsciiUtil.isAlphaString(s); 577bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 578bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 579bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isScript(String s) { 580bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // script = 4ALPHA ; ISO 15924 code 581bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() == 4) && AsciiUtil.isAlphaString(s); 582bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 583bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 584bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isRegion(String s) { 585bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // region = 2ALPHA ; ISO 3166-1 code 586bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / 3DIGIT ; UN M.49 code 587bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return ((s.length() == 2) && AsciiUtil.isAlphaString(s)) 588bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert || ((s.length() == 3) && AsciiUtil.isNumericString(s)); 589bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 590bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 591bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isVariant(String s) { 592bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // variant = 5*8alphanum ; registered variants 593bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / (DIGIT 3alphanum) 594bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert int len = s.length(); 595bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (len >= 5 && len <= 8) { 596bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.isAlphaNumericString(s); 597bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 598bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (len == 4) { 599bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.isNumeric(s.charAt(0)) 600bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert && AsciiUtil.isAlphaNumeric(s.charAt(1)) 601bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert && AsciiUtil.isAlphaNumeric(s.charAt(2)) 602bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert && AsciiUtil.isAlphaNumeric(s.charAt(3)); 603bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 604bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return false; 605bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 606bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 607bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isExtensionSingleton(String s) { 608bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // singleton = DIGIT ; 0 - 9 609bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / %x41-57 ; A - W 610bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / %x59-5A ; Y - Z 611bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / %x61-77 ; a - w 612bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // / %x79-7A ; y - z 613bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 614bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() == 1) 615bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert && AsciiUtil.isAlphaString(s) 616bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); 617bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 618bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 619bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isExtensionSingletonChar(char c) { 620bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return isExtensionSingleton(String.valueOf(c)); 621bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 622bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 623bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isExtensionSubtag(String s) { 624bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // extension = singleton 1*("-" (2*8alphanum)) 625bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); 626bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 627bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 628bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isPrivateusePrefix(String s) { 629bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // privateuse = "x" 1*("-" (1*8alphanum)) 630bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() == 1) 631bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); 632bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 633bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 634bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isPrivateusePrefixChar(char c) { 635bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c))); 636bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 637bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 638bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static boolean isPrivateuseSubtag(String s) { 639bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // privateuse = "x" 1*("-" (1*8alphanum)) 640bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); 641bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 642bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 643bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 644bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // Language subtag canonicalization methods 645bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert // 646bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 647bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeLanguage(String s) { 648bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 649bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 650bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 651bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeExtlang(String s) { 652bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 653bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 654bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 655bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeScript(String s) { 656bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toTitleString(s); 657bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 658bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 659bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeRegion(String s) { 660bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toUpperString(s); 661bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 662bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 663bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeVariant(String s) { 664bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 665bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 666bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 667bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeExtension(String s) { 668bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 669bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 670bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 671bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeExtensionSingleton(String s) { 672bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 673bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 674bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 675bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizeExtensionSubtag(String s) { 676bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 677bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 678bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 679bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizePrivateuse(String s) { 680bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 681bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 682bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 683bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public static String canonicalizePrivateuseSubtag(String s) { 684bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return AsciiUtil.toLowerString(s); 685bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 686bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 687bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert public String toString() { 688bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert StringBuilder sb = new StringBuilder(); 689bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 690bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_language.length() > 0) { 691bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(_language); 692bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 693bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert for (String extlang : _extlangs) { 694bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(extlang); 695bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 696bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 697bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_script.length() > 0) { 698bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(_script); 699bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 700bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 701bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_region.length() > 0) { 702bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(_region); 703bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 704bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 705bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert for (String variant : _extlangs) { 706bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(variant); 707bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 708bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 709bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert for (String extension : _extensions) { 710bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP).append(extension); 711bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 712bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 713bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (_privateuse.length() > 0) { 714bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (sb.length() > 0) { 715bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(SEP); 716bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 717bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert sb.append(_privateuse); 718bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 719bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 720bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert return sb.toString(); 721bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 722bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert} 723