12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
3bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert/*
4bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert *******************************************************************************
5bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * Copyright (C) 2010-2011, International Business Machines Corporation and    *
6bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert * others. All Rights Reserved.                                                *
7bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert *******************************************************************************
8bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert */
9bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertpackage com.ibm.icu.impl.locale;
10bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
11bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.ArrayList;
12bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.Collections;
13bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.HashMap;
14bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.List;
15bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.Map;
16bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertimport java.util.Set;
17bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
18bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertpublic class LanguageTag {
19bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private static final boolean JDKIMPL = false;
20bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
21bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
22bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // static fields
23bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
24bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static final String SEP = "-";
25bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static final String PRIVATEUSE = "x";
26bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String UNDETERMINED = "und";
27bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static final String PRIVUSE_VARIANT_PREFIX = "lvariant";
28bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
29bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
30bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // Language subtag fields
31bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
32bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private String _language = "";      // language subtag
33bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private String _script = "";        // script subtag
34bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private String _region = "";        // region subtag
35bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private String _privateuse = "";    // privateuse
36bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
37bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private List<String> _extlangs = Collections.emptyList();   // extlang subtags
38bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private List<String> _variants = Collections.emptyList();   // variant subtags
39bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private List<String> _extensions = Collections.emptyList(); // extensions
40bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
41bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // Map contains grandfathered tags and its preferred mappings from
42bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // http://www.ietf.org/rfc/rfc5646.txt
43bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
44bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
45bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
46bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    static {
47bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // grandfathered = irregular           ; non-redundant tags registered
48bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / regular             ; during the RFC 3066 era
49bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //
50bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // irregular     = "en-GB-oed"         ; irregular tags do not match
51bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-ami"             ; the 'langtag' production and
52bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-bnn"             ; would not otherwise be
53bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-default"         ; considered 'well-formed'
54bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-enochian"        ; These tags are all valid,
55bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-hak"             ; but most are deprecated
56bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-klingon"         ; in favor of more modern
57bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-lux"             ; subtags or subtag
58bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-mingo"           ; combination
59bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-navajo"
60bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-pwn"
61bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-tao"
62bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-tay"
63bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "i-tsu"
64bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "sgn-BE-FR"
65bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "sgn-BE-NL"
66bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "sgn-CH-DE"
67bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //
68bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // regular       = "art-lojban"        ; these tags match the 'langtag'
69bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "cel-gaulish"       ; production, but their subtags
70bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "no-bok"            ; are not extended language
71bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "no-nyn"            ; or variant subtags: their meaning
72bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "zh-guoyu"          ; is defined by their registration
73bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "zh-hakka"          ; and all of these are deprecated
74bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "zh-min"            ; in favor of a more modern
75bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "zh-min-nan"        ; subtag or sequence of subtags
76bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / "zh-xiang"
77bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
78bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        final String[][] entries = {
79bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert          //{"tag",         "preferred"},
80bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"art-lojban",  "jbo"},
81bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"cel-gaulish", "xtg-x-cel-gaulish"},   // fallback
82bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"en-GB-oed",   "en-GB-x-oed"},         // fallback
83bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-ami",       "ami"},
84bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-bnn",       "bnn"},
85bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-default",   "en-x-i-default"},      // fallback
86bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-enochian",  "und-x-i-enochian"},    // fallback
87bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-hak",       "hak"},
88bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-klingon",   "tlh"},
89bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-lux",       "lb"},
90bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-mingo",     "see-x-i-mingo"},       // fallback
91bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-navajo",    "nv"},
92bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-pwn",       "pwn"},
93bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-tao",       "tao"},
94bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-tay",       "tay"},
95bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"i-tsu",       "tsu"},
96bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"no-bok",      "nb"},
97bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"no-nyn",      "nn"},
98bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"sgn-BE-FR",   "sfb"},
99bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"sgn-BE-NL",   "vgt"},
100bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"sgn-CH-DE",   "sgg"},
101bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"zh-guoyu",    "cmn"},
102bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"zh-hakka",    "hak"},
103bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"zh-min",      "nan-x-zh-min"},        // fallback
104bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"zh-min-nan",  "nan"},
105bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            {"zh-xiang",    "hsn"},
106bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        };
107bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        for (String[] e : entries) {
108bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
109bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
110bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
111bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
112bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private LanguageTag() {
113bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
114bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
115bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    /*
116bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * BNF in RFC5464
117bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
118bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * Language-Tag  = langtag             ; normal language tags
119bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / privateuse          ; private use tag
120bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / grandfathered       ; grandfathered tags
121bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
122bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
123bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * langtag       = language
124bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 ["-" script]
125bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 ["-" region]
126bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 *("-" variant)
127bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 *("-" extension)
128bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 ["-" privateuse]
129bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
130bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * language      = 2*3ALPHA            ; shortest ISO 639 code
131bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 ["-" extlang]       ; sometimes followed by
132bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                                     ; extended language subtags
133bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / 4ALPHA              ; or reserved for future use
134bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / 5*8ALPHA            ; or registered language subtag
135bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
136bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * extlang       = 3ALPHA              ; selected ISO 639 codes
137bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                 *2("-" 3ALPHA)      ; permanently reserved
138bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
139bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * script        = 4ALPHA              ; ISO 15924 code
140bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
141bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * region        = 2ALPHA              ; ISO 3166-1 code
142bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / 3DIGIT              ; UN M.49 code
143bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
144bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * variant       = 5*8alphanum         ; registered variants
145bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / (DIGIT 3alphanum)
146bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
147bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * extension     = singleton 1*("-" (2*8alphanum))
148bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
149bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                                     ; Single alphanumerics
150bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *                                     ; "x" reserved for private use
151bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * singleton     = DIGIT               ; 0 - 9
152bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / %x41-57             ; A - W
153bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / %x59-5A             ; Y - Z
154bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / %x61-77             ; a - w
155bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *               / %x79-7A             ; y - z
156bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
157bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     * privateuse    = "x" 1*("-" (1*8alphanum))
158bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     *
159bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert     */
160bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static LanguageTag parse(String languageTag, ParseStatus sts) {
161bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (sts == null) {
162bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts = new ParseStatus();
163bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        } else {
164bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts.reset();
165bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
166bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
167bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        StringTokenIterator itr;
168bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
169bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // Check if the tag is grandfathered
170bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
171bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (gfmap != null) {
172bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // use preferred mapping
173bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr = new StringTokenIterator(gfmap[1], SEP);
174bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        } else {
175bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr = new StringTokenIterator(languageTag, SEP);
176bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
177bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
178bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        LanguageTag tag = new LanguageTag();
179bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
180bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // langtag must start with either language or privateuse
181bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (tag.parseLanguage(itr, sts)) {
182bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag.parseExtlangs(itr, sts);
183bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag.parseScript(itr, sts);
184bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag.parseRegion(itr, sts);
185bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag.parseVariants(itr, sts);
186bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag.parseExtensions(itr, sts);
187bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
188bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        tag.parsePrivateuse(itr, sts);
189bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
190bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (!itr.isDone() && !sts.isError()) {
191bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            String s = itr.current();
192bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts._errorIndex = itr.currentStart();
193bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (s.length() == 0) {
194bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sts._errorMsg = "Empty subtag";
195bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else {
196bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sts._errorMsg = "Invalid subtag: " + s;
197bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
198bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
199bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
200bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return tag;
201bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
202bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
203bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
204bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // Language subtag parsers
205bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
206bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
207bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) {
208bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
209bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
210bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
211bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
212bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
213bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
214bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String s = itr.current();
215bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (isLanguage(s)) {
216bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            found = true;
217bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            _language = s;
218bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts._parseLength = itr.currentEnd();
219bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr.next();
220bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
221bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
222bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
223bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
224bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
225bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) {
226bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
227bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
228bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
229bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
230bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
231bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
232bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        while (!itr.isDone()) {
233bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            String s = itr.current();
234bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (!isExtlang(s)) {
235bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                break;
236bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
237bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            found = true;
238bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (_extlangs.isEmpty()) {
239bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                _extlangs = new ArrayList<String>(3);
240bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
241bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            _extlangs.add(s);
242bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts._parseLength = itr.currentEnd();
243bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr.next();
244bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
245bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (_extlangs.size() == 3) {
246bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // Maximum 3 extlangs
247bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                break;
248bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
249bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
250bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
251bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
252bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
253bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
254bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parseScript(StringTokenIterator itr, ParseStatus sts) {
255bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
256bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
257bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
258bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
259bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
260bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
261bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String s = itr.current();
262bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (isScript(s)) {
263bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            found = true;
264bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            _script = s;
265bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts._parseLength = itr.currentEnd();
266bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr.next();
267bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
268bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
269bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
270bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
271bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
272bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) {
273bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
274bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
275bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
276bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
277bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
278bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
279bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String s = itr.current();
280bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (isRegion(s)) {
281bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            found = true;
282bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            _region = s;
283bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts._parseLength = itr.currentEnd();
284bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr.next();
285bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
286bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
287bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
288bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
289bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
290bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) {
291bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
292bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
293bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
294bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
295bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
296bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
297bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        while (!itr.isDone()) {
298bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            String s = itr.current();
299bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (!isVariant(s)) {
300bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                break;
301bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
302bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            found = true;
303bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (_variants.isEmpty()) {
304bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                _variants = new ArrayList<String>(3);
305bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
306bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            _variants.add(s);
307bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sts._parseLength = itr.currentEnd();
308bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr.next();
309bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
310bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
311bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
312bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
313bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
314bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) {
315bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
316bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
317bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
318bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
319bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
320bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
321bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        while (!itr.isDone()) {
322bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            String s = itr.current();
323bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (isExtensionSingleton(s)) {
324bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                int start = itr.currentStart();
325bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                String singleton = s;
326bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                StringBuilder sb = new StringBuilder(singleton);
327bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
328bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                itr.next();
329bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                while (!itr.isDone()) {
330bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    s = itr.current();
331bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (isExtensionSubtag(s)) {
332bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        sb.append(SEP).append(s);
333bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        sts._parseLength = itr.currentEnd();
334bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    } else {
335bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        break;
336bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
337bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    itr.next();
338bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
339bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
340bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (sts._parseLength <= start) {
341bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    sts._errorIndex = start;
342bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    sts._errorMsg = "Incomplete extension '" + singleton + "'";
343bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    break;
344bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
345bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
346bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (_extensions.size() == 0) {
347bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    _extensions = new ArrayList<String>(4);
348bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
349bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                _extensions.add(sb.toString());
350bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                found = true;
351bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else {
352bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                break;
353bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
354bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
355bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
356bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
357bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
358bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) {
359bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (itr.isDone() || sts.isError()) {
360bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return false;
361bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
362bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
363bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean found = false;
364bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
365bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String s = itr.current();
366bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (isPrivateusePrefix(s)) {
367bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            int start = itr.currentStart();
368bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            StringBuilder sb = new StringBuilder(s);
369bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
370bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            itr.next();
371bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            while (!itr.isDone()) {
372bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                s = itr.current();
373bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (!isPrivateuseSubtag(s)) {
374bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    break;
375bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
376bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP).append(s);
377bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sts._parseLength = itr.currentEnd();
378bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
379bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                itr.next();
380bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
381bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
382bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (sts._parseLength <= start) {
383bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // need at least 1 private subtag
384bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sts._errorIndex = start;
385bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sts._errorMsg = "Incomplete privateuse";
386bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else {
387bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                _privateuse = sb.toString();
388bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                found = true;
389bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
390bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
391bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
392bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return found;
393bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
394bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
395bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) {
396bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        LanguageTag tag = new LanguageTag();
397bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
398bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String language = baseLocale.getLanguage();
399bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String script = baseLocale.getScript();
400bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String region = baseLocale.getRegion();
401bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String variant = baseLocale.getVariant();
402bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
403bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        boolean hasSubtag = false;
404bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
405bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String privuseVar = null;   // store ill-formed variant subtags
406bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
407bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (language.length() > 0 && isLanguage(language)) {
408bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // Convert a deprecated language code used by Java to
409bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // a new code
410bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (language.equals("iw")) {
411bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                language = "he";
412bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else if (language.equals("ji")) {
413bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                language = "yi";
414bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else if (language.equals("in")) {
415bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                language = "id";
416bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
417bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag._language = language;
418bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
419bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
420bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (script.length() > 0 && isScript(script)) {
421bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag._script = canonicalizeScript(script);
422bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            hasSubtag = true;
423bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
424bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
425bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (region.length() > 0 && isRegion(region)) {
426bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag._region = canonicalizeRegion(region);
427bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            hasSubtag = true;
428bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
429bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
430bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (JDKIMPL) {
431bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // Special handling for no_NO_NY - use nn_NO for language tag
432bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (tag._language.equals("no") && tag._region.equals("NO") && variant.equals("NY")) {
433bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                tag._language = "nn";
434bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                variant = "";
435bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
436bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
437bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
438bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (variant.length() > 0) {
439bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            List<String> variants = null;
440bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP);
441bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            while (!varitr.isDone()) {
442bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                String var = varitr.current();
443bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (!isVariant(var)) {
444bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    break;
445bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
446bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (variants == null) {
447bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    variants = new ArrayList<String>();
448bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
449bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (JDKIMPL) {
450bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    variants.add(var);  // Do not canonicalize!
451bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                } else {
452bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    variants.add(canonicalizeVariant(var));
453bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
454bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                varitr.next();
455bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
456bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (variants != null) {
457bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                tag._variants = variants;
458bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                hasSubtag = true;
459bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
460bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (!varitr.isDone()) {
461bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                // ill-formed variant subtags
462bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                StringBuilder buf = new StringBuilder();
463bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                while (!varitr.isDone()) {
464bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    String prvv = varitr.current();
465bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (!isPrivateuseSubtag(prvv)) {
466bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        // cannot use private use subtag - truncated
467bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        break;
468bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
469bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (buf.length() > 0) {
470bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        buf.append(SEP);
471bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
472bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    if (!JDKIMPL) {
473bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                        prvv = AsciiUtil.toLowerString(prvv);
474bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    }
475bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    buf.append(prvv);
476bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    varitr.next();
477bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
478bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (buf.length() > 0) {
479bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    privuseVar = buf.toString();
480bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
481bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
482bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
483bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
484bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        List<String> extensions = null;
485bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        String privateuse = null;
486bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
487bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        Set<Character> locextKeys = localeExtensions.getKeys();
488bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        for (Character locextKey : locextKeys) {
489bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            Extension ext = localeExtensions.getExtension(locextKey);
490bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (isPrivateusePrefixChar(locextKey.charValue())) {
491bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                privateuse = ext.getValue();
492bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else {
493bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (extensions == null) {
494bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    extensions = new ArrayList<String>();
495bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
496bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                extensions.add(locextKey.toString() + SEP + ext.getValue());
497bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
498bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
499bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
500bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (extensions != null) {
501bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag._extensions = extensions;
502bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            hasSubtag = true;
503bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
504bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
505bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // append ill-formed variant subtags to private use
506bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (privuseVar != null) {
507bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (privateuse == null) {
508bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar;
509bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            } else {
510bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP);
511bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
512bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
513bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
514bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (privateuse != null) {
515bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag._privateuse = privateuse;
516bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
517bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
518bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (tag._language.length() == 0 && (hasSubtag || privateuse == null)) {
519bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // use lang "und" when 1) no language is available AND
520bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // 2) any of other subtags other than private use are available or
521bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            // no private use tag is available
522bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            tag._language = UNDETERMINED;
523bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
524bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
525bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return tag;
526bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
527bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
528bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
529bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // Getter methods for language subtag fields
530bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
531bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
532bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public String getLanguage() {
533bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return _language;
534bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
535bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
536bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public List<String> getExtlangs() {
537bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return Collections.unmodifiableList(_extlangs);
538bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
539bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
540bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public String getScript() {
541bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return _script;
542bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
543bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
544bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public String getRegion() {
545bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return _region;
546bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
547bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
548bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public List<String> getVariants() {
549bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return Collections.unmodifiableList(_variants);
550bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
551bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
552bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public List<String> getExtensions() {
553bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return Collections.unmodifiableList(_extensions);
554bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
555bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
556bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public String getPrivateuse() {
557bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return _privateuse;
558bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
559bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
560bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
561bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // Language subtag syntax checking methods
562bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
563bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
564bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isLanguage(String s) {
565bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // language      = 2*3ALPHA            ; shortest ISO 639 code
566bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //                 ["-" extlang]       ; sometimes followed by
567bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //                                     ;   extended language subtags
568bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / 4ALPHA              ; or reserved for future use
569bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / 5*8ALPHA            ; or registered language subtag
570bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s);
571bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
572bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
573bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isExtlang(String s) {
574bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // extlang       = 3ALPHA              ; selected ISO 639 codes
575bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //                 *2("-" 3ALPHA)      ; permanently reserved
576bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() == 3) && AsciiUtil.isAlphaString(s);
577bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
578bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
579bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isScript(String s) {
580bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // script        = 4ALPHA              ; ISO 15924 code
581bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() == 4) && AsciiUtil.isAlphaString(s);
582bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
583bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
584bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isRegion(String s) {
585bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // region        = 2ALPHA              ; ISO 3166-1 code
586bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / 3DIGIT              ; UN M.49 code
587bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return ((s.length() == 2) && AsciiUtil.isAlphaString(s))
588bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                || ((s.length() == 3) && AsciiUtil.isNumericString(s));
589bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
590bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
591bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isVariant(String s) {
592bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // variant       = 5*8alphanum         ; registered variants
593bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / (DIGIT 3alphanum)
594bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        int len = s.length();
595bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (len >= 5 && len <= 8) {
596bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return AsciiUtil.isAlphaNumericString(s);
597bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
598bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (len == 4) {
599bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            return AsciiUtil.isNumeric(s.charAt(0))
600bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    && AsciiUtil.isAlphaNumeric(s.charAt(1))
601bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    && AsciiUtil.isAlphaNumeric(s.charAt(2))
602bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    && AsciiUtil.isAlphaNumeric(s.charAt(3));
603bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
604bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return false;
605bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
606bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
607bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isExtensionSingleton(String s) {
608bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // singleton     = DIGIT               ; 0 - 9
609bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / %x41-57             ; A - W
610bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / %x59-5A             ; Y - Z
611bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / %x61-77             ; a - w
612bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        //               / %x79-7A             ; y - z
613bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
614bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() == 1)
615bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                && AsciiUtil.isAlphaString(s)
616bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);
617bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
618bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
619bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isExtensionSingletonChar(char c) {
620bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return isExtensionSingleton(String.valueOf(c));
621bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
622bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
623bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isExtensionSubtag(String s) {
624bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // extension     = singleton 1*("-" (2*8alphanum))
625bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
626bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
627bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
628bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isPrivateusePrefix(String s) {
629bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // privateuse    = "x" 1*("-" (1*8alphanum))
630bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() == 1)
631bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);
632bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
633bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
634bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isPrivateusePrefixChar(char c) {
635bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c)));
636bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
637bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
638bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static boolean isPrivateuseSubtag(String s) {
639bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        // privateuse    = "x" 1*("-" (1*8alphanum))
640bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
641bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
642bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
643bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
644bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    // Language subtag canonicalization methods
645bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    //
646bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
647bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeLanguage(String s) {
648bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
649bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
650bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
651bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeExtlang(String s) {
652bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
653bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
654bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
655bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeScript(String s) {
656bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toTitleString(s);
657bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
658bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
659bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeRegion(String s) {
660bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toUpperString(s);
661bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
662bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
663bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeVariant(String s) {
664bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
665bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
666bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
667bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeExtension(String s) {
668bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
669bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
670bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
671bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeExtensionSingleton(String s) {
672bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
673bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
674bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
675bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizeExtensionSubtag(String s) {
676bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
677bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
678bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
679bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizePrivateuse(String s) {
680bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
681bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
682bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
683bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public static String canonicalizePrivateuseSubtag(String s) {
684bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return AsciiUtil.toLowerString(s);
685bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
686bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
687bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    public String toString() {
688bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        StringBuilder sb = new StringBuilder();
689bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
690bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (_language.length() > 0) {
691bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sb.append(_language);
692bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
693bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            for (String extlang : _extlangs) {
694bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP).append(extlang);
695bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
696bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
697bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (_script.length() > 0) {
698bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP).append(_script);
699bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
700bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
701bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (_region.length() > 0) {
702bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP).append(_region);
703bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
704bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
705bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            for (String variant : _extlangs) {
706bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP).append(variant);
707bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
708bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
709bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            for (String extension : _extensions) {
710bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP).append(extension);
711bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
712bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
713bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (_privateuse.length() > 0) {
714bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (sb.length() > 0) {
715bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                sb.append(SEP);
716bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
717bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            sb.append(_privateuse);
718bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
719bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
720bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        return sb.toString();
721bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
722bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert}
723