ICU.java revision df624c1cc36dc17e4051d1100a3400aeb4252511
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package libcore.icu;
18
19import java.util.Collections;
20import java.util.HashMap;
21import java.util.HashSet;
22import java.util.LinkedHashSet;
23import java.util.Locale;
24import java.util.Map;
25import java.util.Set;
26import libcore.util.BasicLruCache;
27
28/**
29 * Makes ICU data accessible to Java.
30 */
31public final class ICU {
32  private static final BasicLruCache<String, String> CACHED_PATTERNS =
33      new BasicLruCache<String, String>(8);
34
35  private static Locale[] availableLocalesCache;
36
37  private static String[] isoCountries;
38
39  private static String[] isoLanguages;
40
41  /**
42   * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
43   */
44  public static String[] getISOLanguages() {
45    if (isoLanguages == null) {
46      isoLanguages = getISOLanguagesNative();
47    }
48    return isoLanguages.clone();
49  }
50
51  /**
52   * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
53   */
54  public static String[] getISOCountries() {
55    if (isoCountries == null) {
56      isoCountries = getISOCountriesNative();
57    }
58    return isoCountries.clone();
59  }
60
61  private static final int IDX_LANGUAGE = 0;
62  private static final int IDX_SCRIPT = 1;
63  private static final int IDX_REGION = 2;
64  private static final int IDX_VARIANT = 3;
65
66  /*
67   * Parse the {Language, Script, Region, Variant*} section of the ICU locale
68   * ID. This is the bit that appears before the keyword separate "@". The general
69   * structure is a series of ASCII alphanumeric strings (subtags)
70   * separated by underscores.
71   *
72   * Each subtag is interpreted according to its position in the list of subtags
73   * AND its length (groan...). The various cases are explained in comments
74   * below.
75   */
76  private static void parseLangScriptRegionAndVariants(String string,
77          String[] outputArray) {
78    final int first = string.indexOf('_');
79    final int second = string.indexOf('_', first + 1);
80    final int third = string.indexOf('_', second + 1);
81
82    if (first == -1) {
83      outputArray[IDX_LANGUAGE] = string;
84    } else if (second == -1) {
85      // Language and country ("ja_JP") OR
86      // Language and script ("en_Latn") OR
87      // Language and variant ("en_POSIX").
88
89      outputArray[IDX_LANGUAGE] = string.substring(0, first);
90      final String secondString = string.substring(first + 1);
91
92      if (secondString.length() == 4) {
93          // 4 Letter ISO script code.
94          outputArray[IDX_SCRIPT] = secondString;
95      } else if (secondString.length() == 2 || secondString.length() == 3) {
96          // 2 or 3 Letter region code.
97          outputArray[IDX_REGION] = secondString;
98      } else {
99          // If we're here, the length of the second half is either 1 or greater
100          // than 5. Assume that ICU won't hand us malformed tags, and therefore
101          // assume the rest of the string is a series of variant tags.
102          outputArray[IDX_VARIANT] = secondString;
103      }
104    } else if (third == -1) {
105      // Language and country and variant ("ja_JP_TRADITIONAL") OR
106      // Language and script and variant ("en_Latn_POSIX") OR
107      // Language and script and region ("en_Latn_US"). OR
108      // Language and variant with multiple subtags ("en_POSIX_XISOP")
109
110      outputArray[IDX_LANGUAGE] = string.substring(0, first);
111      final String secondString = string.substring(first + 1, second);
112      final String thirdString = string.substring(second + 1);
113
114      if (secondString.length() == 4) {
115          // The second subtag is a script.
116          outputArray[IDX_SCRIPT] = secondString;
117
118          // The third subtag can be either a region or a variant, depending
119          // on its length.
120          if (thirdString.length() == 2 || thirdString.length() == 3 ||
121                  thirdString.isEmpty()) {
122              outputArray[IDX_REGION] = thirdString;
123          } else {
124              outputArray[IDX_VARIANT] = thirdString;
125          }
126      } else if (secondString.isEmpty() ||
127              secondString.length() == 2 || secondString.length() == 3) {
128          // The second string is a region, and the third a variant.
129          outputArray[IDX_REGION] = secondString;
130          outputArray[IDX_VARIANT] = thirdString;
131      } else {
132          // Variant with multiple subtags.
133          outputArray[IDX_VARIANT] = string.substring(first + 1);
134      }
135    } else {
136      // Language, script, region and variant with 1 or more subtags
137      // ("en_Latn_US_POSIX") OR
138      // Language, region and variant with 2 or more subtags
139      // (en_US_POSIX_VARIANT).
140      outputArray[IDX_LANGUAGE] = string.substring(0, first);
141      final String secondString = string.substring(first + 1, second);
142      if (secondString.length() == 4) {
143          outputArray[IDX_SCRIPT] = secondString;
144          outputArray[IDX_REGION] = string.substring(second + 1, third);
145          outputArray[IDX_VARIANT] = string.substring(third + 1);
146      } else {
147          outputArray[IDX_REGION] = secondString;
148          outputArray[IDX_VARIANT] = string.substring(second + 1);
149      }
150    }
151  }
152
153  /**
154   * Returns the appropriate {@code Locale} given a {@code String} of the form returned
155   * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
156   * this method can parse strings that {@code Locale.toString} won't produce.
157   * Used to remove duplication.
158   */
159  public static Locale localeFromIcuLocaleId(String localeId) {
160    // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
161    final int extensionsIndex = localeId.indexOf('@');
162
163    Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
164    Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
165    Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
166
167    if (extensionsIndex != -1) {
168      extensionsMap = new HashMap<Character, String>();
169      unicodeKeywordsMap = new HashMap<String, String>();
170      unicodeAttributeSet = new HashSet<String>();
171
172      // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
173      // containing all "keywords" it could parse. An ICU keyword is a key-value pair
174      // separated by an "=" (ULOC_KEYWORD_ASSIGN).
175      //
176      // Each keyword item can be one of three things :
177      // - A unicode extension attribute list: In this case the item key is "attribute"
178      //   and the value is a hyphen separated list of unicode attributes.
179      // - A unicode extension keyword: In this case, the item key will be larger than
180      //   1 char in length, and the value will be the unicode extension value.
181      // - A BCP-47 extension subtag: In this case, the item key will be exactly one
182      //   char in length, and the value will be a sequence of unparsed subtags that
183      //   represent the extension.
184      //
185      // Note that this implies that unicode extension keywords are "promoted" to
186      // to the same namespace as the top level extension subtags and their values.
187      // There can't be any collisions in practice because the BCP-47 spec imposes
188      // restrictions on their lengths.
189      final String extensionsString = localeId.substring(extensionsIndex + 1);
190      final String[] extensions = extensionsString.split(";");
191      for (String extension : extensions) {
192        // This is the special key for the unicode attributes
193        if (extension.startsWith("attribute=")) {
194          String unicodeAttributeValues = extension.substring("attribute=".length());
195          for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
196            unicodeAttributeSet.add(unicodeAttribute);
197          }
198        } else {
199          final int separatorIndex = extension.indexOf('=');
200
201          if (separatorIndex == 1) {
202            // This is a BCP-47 extension subtag.
203            final String value = extension.substring(2);
204            final char extensionId = extension.charAt(0);
205
206            extensionsMap.put(extensionId, value);
207          } else {
208            // This is a unicode extension keyword.
209            unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
210            extension.substring(separatorIndex + 1));
211          }
212        }
213      }
214    }
215
216    final String[] outputArray = new String[] { "", "", "", "" };
217    if (extensionsIndex == -1) {
218      parseLangScriptRegionAndVariants(localeId, outputArray);
219    } else {
220      parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
221          outputArray);
222    }
223
224    return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION],
225        outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT],
226        unicodeAttributeSet, unicodeKeywordsMap, extensionsMap,
227        true /* has validated fields */);
228  }
229
230  public static Locale[] localesFromStrings(String[] localeNames) {
231    // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
232    // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
233    // both so that we never need to convert back when talking to it.
234    LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
235    for (String localeName : localeNames) {
236      set.add(localeFromIcuLocaleId(localeName));
237    }
238    return set.toArray(new Locale[set.size()]);
239  }
240
241  public static Locale[] getAvailableLocales() {
242    if (availableLocalesCache == null) {
243      availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
244    }
245    return availableLocalesCache.clone();
246  }
247
248  public static Locale[] getAvailableBreakIteratorLocales() {
249    return localesFromStrings(getAvailableBreakIteratorLocalesNative());
250  }
251
252  public static Locale[] getAvailableCalendarLocales() {
253    return localesFromStrings(getAvailableCalendarLocalesNative());
254  }
255
256  public static Locale[] getAvailableCollatorLocales() {
257    return localesFromStrings(getAvailableCollatorLocalesNative());
258  }
259
260  public static Locale[] getAvailableDateFormatLocales() {
261    return localesFromStrings(getAvailableDateFormatLocalesNative());
262  }
263
264  public static Locale[] getAvailableDateFormatSymbolsLocales() {
265    return getAvailableDateFormatLocales();
266  }
267
268  public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
269    return getAvailableNumberFormatLocales();
270  }
271
272  public static Locale[] getAvailableNumberFormatLocales() {
273    return localesFromStrings(getAvailableNumberFormatLocalesNative());
274  }
275
276  public static String getBestDateTimePattern(String skeleton, Locale locale) {
277    String languageTag = locale.toLanguageTag();
278    String key = skeleton + "\t" + languageTag;
279    synchronized (CACHED_PATTERNS) {
280      String pattern = CACHED_PATTERNS.get(key);
281      if (pattern == null) {
282        pattern = getBestDateTimePatternNative(skeleton, languageTag);
283        CACHED_PATTERNS.put(key, pattern);
284      }
285      return pattern;
286    }
287  }
288
289  private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
290
291  public static char[] getDateFormatOrder(String pattern) {
292    char[] result = new char[3];
293    int resultIndex = 0;
294    boolean sawDay = false;
295    boolean sawMonth = false;
296    boolean sawYear = false;
297
298    for (int i = 0; i < pattern.length(); ++i) {
299      char ch = pattern.charAt(i);
300      if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
301        if (ch == 'd' && !sawDay) {
302          result[resultIndex++] = 'd';
303          sawDay = true;
304        } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
305          result[resultIndex++] = 'M';
306          sawMonth = true;
307        } else if ((ch == 'y') && !sawYear) {
308          result[resultIndex++] = 'y';
309          sawYear = true;
310        }
311      } else if (ch == 'G') {
312        // Ignore the era specifier, if present.
313      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
314        throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
315      } else if (ch == '\'') {
316        if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
317          ++i;
318        } else {
319          i = pattern.indexOf('\'', i + 1);
320          if (i == -1) {
321            throw new IllegalArgumentException("Bad quoting in " + pattern);
322          }
323          ++i;
324        }
325      } else {
326        // Ignore spaces and punctuation.
327      }
328    }
329    return result;
330  }
331
332  /**
333   * Returns the version of the CLDR data in use, such as "22.1.1".
334   */
335  public static native String getCldrVersion();
336
337  /**
338   * Returns the icu4c version in use, such as "50.1.1".
339   */
340  public static native String getIcuVersion();
341
342  /**
343   * Returns the Unicode version our ICU supports, such as "6.2".
344   */
345  public static native String getUnicodeVersion();
346
347  // --- Case mapping.
348
349  public static String toLowerCase(String s, Locale locale) {
350    return toLowerCase(s, locale.toLanguageTag());
351  }
352
353  private static native String toLowerCase(String s, String languageTag);
354
355  public static String toUpperCase(String s, Locale locale) {
356    return toUpperCase(s, locale.toLanguageTag());
357  }
358
359  private static native String toUpperCase(String s, String languageTag);
360
361  // --- Errors.
362
363  // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
364  public static final int U_ZERO_ERROR = 0;
365  public static final int U_INVALID_CHAR_FOUND = 10;
366  public static final int U_TRUNCATED_CHAR_FOUND = 11;
367  public static final int U_ILLEGAL_CHAR_FOUND = 12;
368  public static final int U_BUFFER_OVERFLOW_ERROR = 15;
369
370  public static boolean U_FAILURE(int error) {
371    return error > U_ZERO_ERROR;
372  }
373
374  // --- Native methods accessing ICU's database.
375
376  private static native String[] getAvailableBreakIteratorLocalesNative();
377  private static native String[] getAvailableCalendarLocalesNative();
378  private static native String[] getAvailableCollatorLocalesNative();
379  private static native String[] getAvailableDateFormatLocalesNative();
380  private static native String[] getAvailableLocalesNative();
381  private static native String[] getAvailableNumberFormatLocalesNative();
382
383  public static native String[] getAvailableCurrencyCodes();
384  public static native String getCurrencyCode(String countryCode);
385
386  public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
387    return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
388  }
389
390  private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
391
392  public static native int getCurrencyFractionDigits(String currencyCode);
393  public static native int getCurrencyNumericCode(String currencyCode);
394
395  public static String getCurrencySymbol(Locale locale, String currencyCode) {
396    return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
397  }
398
399  private static native String getCurrencySymbol(String languageTag, String currencyCode);
400
401  public static String getDisplayCountry(Locale targetLocale, Locale locale) {
402    return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
403  }
404
405  private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
406
407  public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
408    return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
409  }
410
411  private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
412
413  public static String getDisplayVariant(Locale targetLocale, Locale locale) {
414    return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
415  }
416
417  private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
418
419  public static String getDisplayScript(Locale targetLocale, Locale locale) {
420    return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
421  }
422
423  private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
424
425  public static native String getISO3Country(String languageTag);
426
427  public static native String getISO3Language(String languageTag);
428
429  public static native String addLikelySubtags(String locale);
430  public static native String getScript(String locale);
431
432  private static native String[] getISOLanguagesNative();
433  private static native String[] getISOCountriesNative();
434
435  static native boolean initLocaleDataNative(String languageTag, LocaleData result);
436
437  /**
438   * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
439   */
440  public static native void setDefaultLocale(String languageTag);
441
442  /**
443   * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
444   */
445  public static native String getDefaultLocale();
446}
447