/* GENERATED SOURCE. DO NOT MODIFY. */ // © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html#License /* ****************************************************************************** * Copyright (C) 2003-2011, International Business Machines Corporation and * * others. All Rights Reserved. * ****************************************************************************** */ package android.icu.impl; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; import android.icu.impl.locale.AsciiUtil; /** * Utility class to parse and normalize locale ids (including POSIX style) * @hide Only a subset of ICU is exposed in Android */ public final class LocaleIDParser { /** * Char array representing the locale ID. */ private char[] id; /** * Current position in {@link #id} (while parsing). */ private int index; /** * Temporary buffer for parsed sections of data. */ private StringBuilder buffer; // um, don't handle POSIX ids unless we request it. why not? well... because. private boolean canonicalize; private boolean hadCountry; // used when canonicalizing Map keywords; String baseName; /** * Parsing constants. */ private static final char KEYWORD_SEPARATOR = '@'; private static final char HYPHEN = '-'; private static final char KEYWORD_ASSIGN = '='; private static final char COMMA = ','; private static final char ITEM_SEPARATOR = ';'; private static final char DOT = '.'; private static final char UNDERSCORE = '_'; public LocaleIDParser(String localeID) { this(localeID, false); } public LocaleIDParser(String localeID, boolean canonicalize) { id = localeID.toCharArray(); index = 0; buffer = new StringBuilder(id.length + 5); this.canonicalize = canonicalize; } private void reset() { index = 0; buffer = new StringBuilder(id.length + 5); } // utilities for working on text in the buffer /** * Append c to the buffer. */ private void append(char c) { buffer.append(c); } private void addSeparator() { append(UNDERSCORE); } /** * Returns the text in the buffer from start to blen as a String. */ private String getString(int start) { return buffer.substring(start); } /** * Set the length of the buffer to pos, then append the string. */ private void set(int pos, String s) { buffer.delete(pos, buffer.length()); buffer.insert(pos, s); } /** * Append the string to the buffer. */ private void append(String s) { buffer.append(s); } // utilities for parsing text out of the id /** * Character to indicate no more text is available in the id. */ private static final char DONE = '\uffff'; /** * Returns the character at index in the id, and advance index. The returned character * is DONE if index was at the limit of the buffer. The index is advanced regardless * so that decrementing the index will always 'unget' the last character returned. */ private char next() { if (index == id.length) { index++; return DONE; } return id[index++]; } /** * Advance index until the next terminator or id separator, and leave it there. */ private void skipUntilTerminatorOrIDSeparator() { while (!isTerminatorOrIDSeparator(next())); --index; } /** * Returns true if the character at index in the id is a terminator. */ private boolean atTerminator() { return index >= id.length || isTerminator(id[index]); } /** * Returns true if the character is a terminator (keyword separator, dot, or DONE). * Dot is a terminator because of the POSIX form, where dot precedes the codepage. */ private boolean isTerminator(char c) { // always terminate at DOT, even if not handling POSIX. It's an error... return c == KEYWORD_SEPARATOR || c == DONE || c == DOT; } /** * Returns true if the character is a terminator or id separator. */ private boolean isTerminatorOrIDSeparator(char c) { return c == UNDERSCORE || c == HYPHEN || isTerminator(c); } /** * Returns true if the start of the buffer has an experimental or private language * prefix, the pattern '[ixIX][-_].' shows the syntax checked. */ private boolean haveExperimentalLanguagePrefix() { if (id.length > 2) { char c = id[1]; if (c == HYPHEN || c == UNDERSCORE) { c = id[0]; return c == 'x' || c == 'X' || c == 'i' || c == 'I'; } } return false; } /** * Returns true if a value separator occurs at or after index. */ private boolean haveKeywordAssign() { // assume it is safe to start from index for (int i = index; i < id.length; ++i) { if (id[i] == KEYWORD_ASSIGN) { return true; } } return false; } /** * Advance index past language, and accumulate normalized language code in buffer. * Index must be at 0 when this is called. Index is left at a terminator or id * separator. Returns the start of the language code in the buffer. */ private int parseLanguage() { int startLength = buffer.length(); if (haveExperimentalLanguagePrefix()) { append(AsciiUtil.toLower(id[0])); append(HYPHEN); index = 2; } char c; while(!isTerminatorOrIDSeparator(c = next())) { append(AsciiUtil.toLower(c)); } --index; // unget if (buffer.length() - startLength == 3) { String lang = LocaleIDs.threeToTwoLetterLanguage(getString(0)); if (lang != null) { set(0, lang); } } return 0; } /** * Advance index past language. Index must be at 0 when this is called. Index * is left at a terminator or id separator. */ private void skipLanguage() { if (haveExperimentalLanguagePrefix()) { index = 2; } skipUntilTerminatorOrIDSeparator(); } /** * Advance index past script, and accumulate normalized script in buffer. * Index must be immediately after the language. * If the item at this position is not a script (is not four characters * long) leave index and buffer unchanged. Otherwise index is left at * a terminator or id separator. Returns the start of the script code * in the buffer (this may be equal to the buffer length, if there is no * script). */ private int parseScript() { if (!atTerminator()) { int oldIndex = index; // save original index ++index; int oldBlen = buffer.length(); // get before append hyphen, if we truncate everything is undone char c; boolean firstPass = true; while(!isTerminatorOrIDSeparator(c = next()) && AsciiUtil.isAlpha(c)) { if (firstPass) { addSeparator(); append(AsciiUtil.toUpper(c)); firstPass = false; } else { append(AsciiUtil.toLower(c)); } } --index; // unget /* If it's not exactly 4 characters long, then it's not a script. */ if (index - oldIndex != 5) { // +1 to account for separator index = oldIndex; buffer.delete(oldBlen, buffer.length()); } else { oldBlen++; // index past hyphen, for clients who want to extract just the script } return oldBlen; } return buffer.length(); } /** * Advance index past script. * Index must be immediately after the language and IDSeparator. * If the item at this position is not a script (is not four characters * long) leave index. Otherwise index is left at a terminator or * id separator. */ private void skipScript() { if (!atTerminator()) { int oldIndex = index; ++index; char c; while (!isTerminatorOrIDSeparator(c = next()) && AsciiUtil.isAlpha(c)); --index; if (index - oldIndex != 5) { // +1 to account for separator index = oldIndex; } } } /** * Advance index past country, and accumulate normalized country in buffer. * Index must be immediately after the script (if there is one, else language) * and IDSeparator. Return the start of the country code in the buffer. */ private int parseCountry() { if (!atTerminator()) { int oldIndex = index; ++index; int oldBlen = buffer.length(); char c; boolean firstPass = true; while (!isTerminatorOrIDSeparator(c = next())) { if (firstPass) { // first, add hyphen hadCountry = true; // we have a country, let variant parsing know addSeparator(); ++oldBlen; // increment past hyphen firstPass = false; } append(AsciiUtil.toUpper(c)); } --index; // unget int charsAppended = buffer.length() - oldBlen; if (charsAppended == 0) { // Do nothing. } else if (charsAppended < 2 || charsAppended > 3) { // It's not a country, so return index and blen to // their previous values. index = oldIndex; --oldBlen; buffer.delete(oldBlen, buffer.length()); hadCountry = false; } else if (charsAppended == 3) { String region = LocaleIDs.threeToTwoLetterRegion(getString(oldBlen)); if (region != null) { set(oldBlen, region); } } return oldBlen; } return buffer.length(); } /** * Advance index past country. * Index must be immediately after the script (if there is one, else language) * and IDSeparator. */ private void skipCountry() { if (!atTerminator()) { if (id[index] == UNDERSCORE || id[index] == HYPHEN) { ++index; } /* * Save the index point after the separator, since the format * requires two separators if the country is not present. */ int oldIndex = index; skipUntilTerminatorOrIDSeparator(); int charsSkipped = index - oldIndex; if (charsSkipped < 2 || charsSkipped > 3) { index = oldIndex; } } } /** * Advance index past variant, and accumulate normalized variant in buffer. This ignores * the codepage information from POSIX ids. Index must be immediately after the country * or script. Index is left at the keyword separator or at the end of the text. Return * the start of the variant code in the buffer. * * In standard form, we can have the following forms: * ll__VVVV * ll_CC_VVVV * ll_Ssss_VVVV * ll_Ssss_CC_VVVV * * This also handles POSIX ids, which can have the following forms (pppp is code page id): * ll_CC.pppp --> ll_CC * ll_CC.pppp@VVVV --> ll_CC_VVVV * ll_CC@VVVV --> ll_CC_VVVV * * We identify this use of '@' in POSIX ids by looking for an '=' following * the '@'. If there is one, we consider '@' to start a keyword list, instead of * being part of a POSIX id. * * Note: since it was decided that we want an option to not handle POSIX ids, this * becomes a bit more complex. */ private int parseVariant() { int oldBlen = buffer.length(); boolean start = true; boolean needSeparator = true; boolean skipping = false; char c; boolean firstPass = true; while ((c = next()) != DONE) { if (c == DOT) { start = false; skipping = true; } else if (c == KEYWORD_SEPARATOR) { if (haveKeywordAssign()) { break; } skipping = false; start = false; needSeparator = true; // add another underscore if we have more text } else if (start) { start = false; if (c != UNDERSCORE && c != HYPHEN) { index--; } } else if (!skipping) { if (needSeparator) { needSeparator = false; if (firstPass && !hadCountry) { // no country, we'll need two addSeparator(); ++oldBlen; // for sure } addSeparator(); if (firstPass) { // only for the first separator ++oldBlen; firstPass = false; } } c = AsciiUtil.toUpper(c); if (c == HYPHEN || c == COMMA) { c = UNDERSCORE; } append(c); } } --index; // unget return oldBlen; } // no need for skipvariant, to get the keywords we'll just scan directly for // the keyword separator /** * Returns the normalized language id, or the empty string. */ public String getLanguage() { reset(); return getString(parseLanguage()); } /** * Returns the normalized script id, or the empty string. */ public String getScript() { reset(); skipLanguage(); return getString(parseScript()); } /** * return the normalized country id, or the empty string. */ public String getCountry() { reset(); skipLanguage(); skipScript(); return getString(parseCountry()); } /** * Returns the normalized variant id, or the empty string. */ public String getVariant() { reset(); skipLanguage(); skipScript(); skipCountry(); return getString(parseVariant()); } /** * Returns the language, script, country, and variant as separate strings. */ public String[] getLanguageScriptCountryVariant() { reset(); return new String[] { getString(parseLanguage()), getString(parseScript()), getString(parseCountry()), getString(parseVariant()) }; } public void setBaseName(String baseName) { this.baseName = baseName; } public void parseBaseName() { if (baseName != null) { set(0, baseName); } else { reset(); parseLanguage(); parseScript(); parseCountry(); parseVariant(); // catch unwanted trailing underscore after country if there was no variant int len = buffer.length(); if (len > 0 && buffer.charAt(len - 1) == UNDERSCORE) { buffer.deleteCharAt(len - 1); } } } /** * Returns the normalized base form of the locale id. The base * form does not include keywords. */ public String getBaseName() { if (baseName != null) { return baseName; } parseBaseName(); return getString(0); } /** * Returns the normalized full form of the locale id. The full * form includes keywords if they are present. */ public String getName() { parseBaseName(); parseKeywords(); return getString(0); } // keyword utilities /** * If we have keywords, advance index to the start of the keywords and return true, * otherwise return false. */ private boolean setToKeywordStart() { for (int i = index; i < id.length; ++i) { if (id[i] == KEYWORD_SEPARATOR) { if (canonicalize) { for (int j = ++i; j < id.length; ++j) { // increment i past separator for return if (id[j] == KEYWORD_ASSIGN) { index = i; return true; } } } else { if (++i < id.length) { index = i; return true; } } break; } } return false; } private static boolean isDoneOrKeywordAssign(char c) { return c == DONE || c == KEYWORD_ASSIGN; } private static boolean isDoneOrItemSeparator(char c) { return c == DONE || c == ITEM_SEPARATOR; } private String getKeyword() { int start = index; while (!isDoneOrKeywordAssign(next())) { } --index; return AsciiUtil.toLowerString(new String(id, start, index-start).trim()); } private String getValue() { int start = index; while (!isDoneOrItemSeparator(next())) { } --index; return new String(id, start, index-start).trim(); // leave case alone } private Comparator getKeyComparator() { final Comparator comp = new Comparator() { @Override public int compare(String lhs, String rhs) { return lhs.compareTo(rhs); } }; return comp; } /** * Returns a map of the keywords and values, or null if there are none. */ public Map getKeywordMap() { if (keywords == null) { TreeMap m = null; if (setToKeywordStart()) { // trim spaces and convert to lower case, both keywords and values. do { String key = getKeyword(); if (key.length() == 0) { break; } char c = next(); if (c != KEYWORD_ASSIGN) { // throw new IllegalArgumentException("key '" + key + "' missing a value."); if (c == DONE) { break; } else { continue; } } String value = getValue(); if (value.length() == 0) { // throw new IllegalArgumentException("key '" + key + "' missing a value."); continue; } if (m == null) { m = new TreeMap(getKeyComparator()); } else if (m.containsKey(key)) { // throw new IllegalArgumentException("key '" + key + "' already has a value."); continue; } m.put(key, value); } while (next() == ITEM_SEPARATOR); } keywords = m != null ? m : Collections.emptyMap(); } return keywords; } /** * Parse the keywords and return start of the string in the buffer. */ private int parseKeywords() { int oldBlen = buffer.length(); Map m = getKeywordMap(); if (!m.isEmpty()) { boolean first = true; for (Map.Entry e : m.entrySet()) { append(first ? KEYWORD_SEPARATOR : ITEM_SEPARATOR); first = false; append(e.getKey()); append(KEYWORD_ASSIGN); append(e.getValue()); } if (first == false) { ++oldBlen; } } return oldBlen; } /** * Returns an iterator over the keywords, or null if we have an empty map. */ public Iterator getKeywords() { Map m = getKeywordMap(); return m.isEmpty() ? null : m.keySet().iterator(); } /** * Returns the value for the named keyword, or null if the keyword is not * present. */ public String getKeywordValue(String keywordName) { Map m = getKeywordMap(); return m.isEmpty() ? null : m.get(AsciiUtil.toLowerString(keywordName.trim())); } /** * Set the keyword value only if it is not already set to something else. */ public void defaultKeywordValue(String keywordName, String value) { setKeywordValue(keywordName, value, false); } /** * Set the value for the named keyword, or unset it if value is null. If * keywordName itself is null, unset all keywords. If keywordName is not null, * value must not be null. */ public void setKeywordValue(String keywordName, String value) { setKeywordValue(keywordName, value, true); } /** * Set the value for the named keyword, or unset it if value is null. If * keywordName itself is null, unset all keywords. If keywordName is not null, * value must not be null. If reset is true, ignore any previous value for * the keyword, otherwise do not change the keyword (including removal of * one or all keywords). */ private void setKeywordValue(String keywordName, String value, boolean reset) { if (keywordName == null) { if (reset) { // force new map, ignore value keywords = Collections.emptyMap(); } } else { keywordName = AsciiUtil.toLowerString(keywordName.trim()); if (keywordName.length() == 0) { throw new IllegalArgumentException("keyword must not be empty"); } if (value != null) { value = value.trim(); if (value.length() == 0) { throw new IllegalArgumentException("value must not be empty"); } } Map m = getKeywordMap(); if (m.isEmpty()) { // it is EMPTY_MAP if (value != null) { // force new map keywords = new TreeMap(getKeyComparator()); keywords.put(keywordName, value.trim()); } } else { if (reset || !m.containsKey(keywordName)) { if (value != null) { m.put(keywordName, value); } else { m.remove(keywordName); if (m.isEmpty()) { // force new map keywords = Collections.emptyMap(); } } } } } } }