ICU.java revision a94266074c7b82720fd2cecfb37ab8da85f1b296
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package libcore.icu; 18 19import java.util.Collections; 20import java.util.HashMap; 21import java.util.HashSet; 22import java.util.LinkedHashSet; 23import java.util.Locale; 24import java.util.Map; 25import java.util.Set; 26import libcore.util.BasicLruCache; 27 28/** 29 * Makes ICU data accessible to Java. 30 */ 31public final class ICU { 32 private static final BasicLruCache<String, String> CACHED_PATTERNS = 33 new BasicLruCache<String, String>(8); 34 35 private static Locale[] availableLocalesCache; 36 37 private static String[] isoCountries; 38 39 private static String[] isoLanguages; 40 41 /** 42 * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache. 43 */ 44 public static String[] getISOLanguages() { 45 if (isoLanguages == null) { 46 isoLanguages = getISOLanguagesNative(); 47 } 48 return isoLanguages.clone(); 49 } 50 51 /** 52 * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache. 53 */ 54 public static String[] getISOCountries() { 55 if (isoCountries == null) { 56 isoCountries = getISOCountriesNative(); 57 } 58 return isoCountries.clone(); 59 } 60 61 public static Locale forLanguageTag(String languageTag, boolean strict) { 62 final String icuLocaleId = localeForLanguageTag(languageTag, strict); 63 if (icuLocaleId == null) { 64 // TODO: We should probably return "und" here. From what I can tell, 65 // this happens only when the language in the languageTag is bad. 66 // Investigate this a bit more. 67 return null; 68 } 69 70 return localeFromIcuLocaleId(icuLocaleId); 71 } 72 73 private static final int IDX_LANGUAGE = 0; 74 private static final int IDX_SCRIPT = 1; 75 private static final int IDX_REGION = 2; 76 private static final int IDX_VARIANT = 3; 77 78 /* 79 * Parse the {Language, Script, Region, Variant*} section of the ICU locale 80 * ID. This is the bit that appears before the keyword separate "@". The general 81 * structure is a series of ASCII alphanumeric strings (subtags) 82 * separated by underscores. 83 * 84 * Each subtag is interpreted according to its position in the list of subtags 85 * AND its length (groan...). The various cases are explained in comments 86 * below. 87 */ 88 private static void parseLangScriptRegionAndVariants(String string, 89 String[] outputArray) { 90 final int first = string.indexOf('_'); 91 final int second = string.indexOf('_', first + 1); 92 final int third = string.indexOf('_', second + 1); 93 94 if (first == -1) { 95 outputArray[IDX_LANGUAGE] = string; 96 } else if (second == -1) { 97 // Language and country ("ja_JP") OR 98 // Language and script ("en_Latn") OR 99 // Language and variant ("en_POSIX"). 100 101 outputArray[IDX_LANGUAGE] = string.substring(0, first); 102 final String secondString = string.substring(first + 1); 103 104 if (secondString.length() == 4) { 105 // 4 Letter ISO script code. 106 outputArray[IDX_SCRIPT] = secondString; 107 } else if (secondString.length() == 2 || secondString.length() == 3) { 108 // 2 or 3 Letter region code. 109 outputArray[IDX_REGION] = secondString; 110 } else { 111 // If we're here, the length of the second half is either 1 or greater 112 // than 5. Assume that ICU won't hand us malformed tags, and therefore 113 // assume the rest of the string is a series of variant tags. 114 outputArray[IDX_VARIANT] = secondString; 115 } 116 } else if (third == -1) { 117 // Language and country and variant ("ja_JP_TRADITIONAL") OR 118 // Language and script and variant ("en_Latn_POSIX") OR 119 // Language and script and region ("en_Latn_US"). OR 120 // Language and variant with multiple subtags ("en_POSIX_XISOP") 121 122 outputArray[IDX_LANGUAGE] = string.substring(0, first); 123 final String secondString = string.substring(first + 1, second); 124 final String thirdString = string.substring(second + 1); 125 126 if (secondString.length() == 4) { 127 // The second subtag is a script. 128 outputArray[IDX_SCRIPT] = secondString; 129 130 // The third subtag can be either a region or a variant, depending 131 // on its length. 132 if (thirdString.length() == 2 || thirdString.length() == 3 || 133 thirdString.isEmpty()) { 134 outputArray[IDX_REGION] = thirdString; 135 } else { 136 outputArray[IDX_VARIANT] = thirdString; 137 } 138 } else if (secondString.isEmpty() || 139 secondString.length() == 2 || secondString.length() == 3) { 140 // The second string is a region, and the third a variant. 141 outputArray[IDX_REGION] = secondString; 142 outputArray[IDX_VARIANT] = thirdString; 143 } else { 144 // Variant with multiple subtags. 145 outputArray[IDX_VARIANT] = string.substring(first + 1); 146 } 147 } else { 148 // Language, script, region and variant with 1 or more subtags 149 // ("en_Latn_US_POSIX") OR 150 // Language, region and variant with 2 or more subtags 151 // (en_US_POSIX_VARIANT). 152 outputArray[IDX_LANGUAGE] = string.substring(0, first); 153 final String secondString = string.substring(first + 1, second); 154 if (secondString.length() == 4) { 155 outputArray[IDX_SCRIPT] = secondString; 156 outputArray[IDX_REGION] = string.substring(second + 1, third); 157 outputArray[IDX_VARIANT] = string.substring(third + 1); 158 } else { 159 outputArray[IDX_REGION] = secondString; 160 outputArray[IDX_VARIANT] = string.substring(second + 1); 161 } 162 } 163 } 164 165 /** 166 * Returns the appropriate {@code Locale} given a {@code String} of the form returned 167 * by {@code toString}. This is very lenient, and doesn't care what's between the underscores: 168 * this method can parse strings that {@code Locale.toString} won't produce. 169 * Used to remove duplication. 170 */ 171 public static Locale localeFromIcuLocaleId(String localeId) { 172 // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h). 173 final int extensionsIndex = localeId.indexOf('@'); 174 175 Map<Character, String> extensionsMap = Collections.EMPTY_MAP; 176 Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP; 177 Set<String> unicodeAttributeSet = Collections.EMPTY_SET; 178 179 if (extensionsIndex != -1) { 180 extensionsMap = new HashMap<Character, String>(); 181 unicodeKeywordsMap = new HashMap<String, String>(); 182 unicodeAttributeSet = new HashSet<String>(); 183 184 // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string 185 // containing all "keywords" it could parse. An ICU keyword is a key-value pair 186 // separated by an "=" (ULOC_KEYWORD_ASSIGN). 187 // 188 // Each keyword item can be one of three things : 189 // - A unicode extension attribute list: In this case the item key is "attribute" 190 // and the value is a hyphen separated list of unicode attributes. 191 // - A unicode extension keyword: In this case, the item key will be larger than 192 // 1 char in length, and the value will be the unicode extension value. 193 // - A BCP-47 extension subtag: In this case, the item key will be exactly one 194 // char in length, and the value will be a sequence of unparsed subtags that 195 // represent the extension. 196 // 197 // Note that this implies that unicode extension keywords are "promoted" to 198 // to the same namespace as the top level extension subtags and their values. 199 // There can't be any collisions in practice because the BCP-47 spec imposes 200 // restrictions on their lengths. 201 final String extensionsString = localeId.substring(extensionsIndex + 1); 202 final String[] extensions = extensionsString.split(";"); 203 for (String extension : extensions) { 204 // This is the special key for the unicode attributes 205 if (extension.startsWith("attribute=")) { 206 String unicodeAttributeValues = extension.substring("attribute=".length()); 207 for (String unicodeAttribute : unicodeAttributeValues.split("-")) { 208 unicodeAttributeSet.add(unicodeAttribute); 209 } 210 } else { 211 final int separatorIndex = extension.indexOf('='); 212 213 if (separatorIndex == 1) { 214 // This is a BCP-47 extension subtag. 215 final String value = extension.substring(2); 216 final char extensionId = extension.charAt(0); 217 218 extensionsMap.put(extensionId, value); 219 } else { 220 // This is a unicode extension keyword. 221 unicodeKeywordsMap.put(extension.substring(0, separatorIndex), 222 extension.substring(separatorIndex + 1)); 223 } 224 } 225 } 226 } 227 228 final String[] outputArray = new String[] { "", "", "", "" }; 229 if (extensionsIndex == -1) { 230 parseLangScriptRegionAndVariants(localeId, outputArray); 231 } else { 232 parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex), 233 outputArray); 234 } 235 236 return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION], 237 outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT], 238 unicodeAttributeSet, unicodeKeywordsMap, extensionsMap, 239 true /* has validated fields */); 240 } 241 242 public static Locale[] localesFromStrings(String[] localeNames) { 243 // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera. 244 // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about 245 // both so that we never need to convert back when talking to it. 246 LinkedHashSet<Locale> set = new LinkedHashSet<Locale>(); 247 for (String localeName : localeNames) { 248 set.add(localeFromIcuLocaleId(localeName)); 249 } 250 return set.toArray(new Locale[set.size()]); 251 } 252 253 public static Locale[] getAvailableLocales() { 254 if (availableLocalesCache == null) { 255 availableLocalesCache = localesFromStrings(getAvailableLocalesNative()); 256 } 257 return availableLocalesCache.clone(); 258 } 259 260 public static Locale[] getAvailableBreakIteratorLocales() { 261 return localesFromStrings(getAvailableBreakIteratorLocalesNative()); 262 } 263 264 public static Locale[] getAvailableCalendarLocales() { 265 return localesFromStrings(getAvailableCalendarLocalesNative()); 266 } 267 268 public static Locale[] getAvailableCollatorLocales() { 269 return localesFromStrings(getAvailableCollatorLocalesNative()); 270 } 271 272 public static Locale[] getAvailableDateFormatLocales() { 273 return localesFromStrings(getAvailableDateFormatLocalesNative()); 274 } 275 276 public static Locale[] getAvailableDateFormatSymbolsLocales() { 277 return getAvailableDateFormatLocales(); 278 } 279 280 public static Locale[] getAvailableDecimalFormatSymbolsLocales() { 281 return getAvailableNumberFormatLocales(); 282 } 283 284 public static Locale[] getAvailableNumberFormatLocales() { 285 return localesFromStrings(getAvailableNumberFormatLocalesNative()); 286 } 287 288 public static String getBestDateTimePattern(String skeleton, Locale locale) { 289 String languageTag = locale.toLanguageTag(); 290 String key = skeleton + "\t" + languageTag; 291 synchronized (CACHED_PATTERNS) { 292 String pattern = CACHED_PATTERNS.get(key); 293 if (pattern == null) { 294 pattern = getBestDateTimePatternNative(skeleton, languageTag); 295 CACHED_PATTERNS.put(key, pattern); 296 } 297 return pattern; 298 } 299 } 300 301 private static native String getBestDateTimePatternNative(String skeleton, String languageTag); 302 303 public static char[] getDateFormatOrder(String pattern) { 304 char[] result = new char[3]; 305 int resultIndex = 0; 306 boolean sawDay = false; 307 boolean sawMonth = false; 308 boolean sawYear = false; 309 310 for (int i = 0; i < pattern.length(); ++i) { 311 char ch = pattern.charAt(i); 312 if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') { 313 if (ch == 'd' && !sawDay) { 314 result[resultIndex++] = 'd'; 315 sawDay = true; 316 } else if ((ch == 'L' || ch == 'M') && !sawMonth) { 317 result[resultIndex++] = 'M'; 318 sawMonth = true; 319 } else if ((ch == 'y') && !sawYear) { 320 result[resultIndex++] = 'y'; 321 sawYear = true; 322 } 323 } else if (ch == 'G') { 324 // Ignore the era specifier, if present. 325 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 326 throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern); 327 } else if (ch == '\'') { 328 if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') { 329 ++i; 330 } else { 331 i = pattern.indexOf('\'', i + 1); 332 if (i == -1) { 333 throw new IllegalArgumentException("Bad quoting in " + pattern); 334 } 335 ++i; 336 } 337 } else { 338 // Ignore spaces and punctuation. 339 } 340 } 341 return result; 342 } 343 344 /** 345 * Returns the version of the CLDR data in use, such as "22.1.1". 346 */ 347 public static native String getCldrVersion(); 348 349 /** 350 * Returns the icu4c version in use, such as "50.1.1". 351 */ 352 public static native String getIcuVersion(); 353 354 /** 355 * Returns the Unicode version our ICU supports, such as "6.2". 356 */ 357 public static native String getUnicodeVersion(); 358 359 // --- Case mapping. 360 361 public static String toLowerCase(String s, Locale locale) { 362 return toLowerCase(s, locale.toLanguageTag()); 363 } 364 365 private static native String toLowerCase(String s, String languageTag); 366 367 public static String toUpperCase(String s, Locale locale) { 368 return toUpperCase(s, locale.toLanguageTag()); 369 } 370 371 private static native String toUpperCase(String s, String languageTag); 372 373 // --- Errors. 374 375 // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU. 376 public static final int U_ZERO_ERROR = 0; 377 public static final int U_INVALID_CHAR_FOUND = 10; 378 public static final int U_TRUNCATED_CHAR_FOUND = 11; 379 public static final int U_ILLEGAL_CHAR_FOUND = 12; 380 public static final int U_BUFFER_OVERFLOW_ERROR = 15; 381 382 public static boolean U_FAILURE(int error) { 383 return error > U_ZERO_ERROR; 384 } 385 386 // --- Native methods accessing ICU's database. 387 388 private static native String[] getAvailableBreakIteratorLocalesNative(); 389 private static native String[] getAvailableCalendarLocalesNative(); 390 private static native String[] getAvailableCollatorLocalesNative(); 391 private static native String[] getAvailableDateFormatLocalesNative(); 392 private static native String[] getAvailableLocalesNative(); 393 private static native String[] getAvailableNumberFormatLocalesNative(); 394 395 public static native String[] getAvailableCurrencyCodes(); 396 public static native String getCurrencyCode(String countryCode); 397 398 public static String getCurrencyDisplayName(Locale locale, String currencyCode) { 399 return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode); 400 } 401 402 private static native String getCurrencyDisplayName(String languageTag, String currencyCode); 403 404 public static native int getCurrencyFractionDigits(String currencyCode); 405 public static native int getCurrencyNumericCode(String currencyCode); 406 407 public static String getCurrencySymbol(Locale locale, String currencyCode) { 408 return getCurrencySymbol(locale.toLanguageTag(), currencyCode); 409 } 410 411 private static native String getCurrencySymbol(String languageTag, String currencyCode); 412 413 public static String getDisplayCountry(Locale targetLocale, Locale locale) { 414 return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 415 } 416 417 private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag); 418 419 public static String getDisplayLanguage(Locale targetLocale, Locale locale) { 420 return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 421 } 422 423 private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag); 424 425 public static String getDisplayVariant(Locale targetLocale, Locale locale) { 426 return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 427 } 428 429 private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag); 430 431 public static String getDisplayScript(Locale targetLocale, Locale locale) { 432 return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag()); 433 } 434 435 private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag); 436 437 public static String getISO3Country(Locale locale) { 438 return getISO3CountryNative(locale.toLanguageTag()); 439 } 440 441 private static native String getISO3CountryNative(String languageTag); 442 443 public static String getISO3Language(Locale locale) { 444 return getISO3LanguageNative(locale.toLanguageTag()); 445 } 446 447 private static native String getISO3LanguageNative(String languageTag); 448 449 public static native String addLikelySubtags(String locale); 450 public static native String getScript(String locale); 451 452 private static native String[] getISOLanguagesNative(); 453 private static native String[] getISOCountriesNative(); 454 455 private static native String localeForLanguageTag(String languageTag, boolean strict); 456 457 static native boolean initLocaleDataNative(String locale, LocaleData result); 458 459 public static native void setDefaultLocale(String languageTag); 460 public static native String getDefaultLocale(); 461} 462