ICU.java revision a94266074c7b82720fd2cecfb37ab8da85f1b296
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package libcore.icu;
18
19import java.util.Collections;
20import java.util.HashMap;
21import java.util.HashSet;
22import java.util.LinkedHashSet;
23import java.util.Locale;
24import java.util.Map;
25import java.util.Set;
26import libcore.util.BasicLruCache;
27
28/**
29 * Makes ICU data accessible to Java.
30 */
31public final class ICU {
32  private static final BasicLruCache<String, String> CACHED_PATTERNS =
33      new BasicLruCache<String, String>(8);
34
35  private static Locale[] availableLocalesCache;
36
37  private static String[] isoCountries;
38
39  private static String[] isoLanguages;
40
41  /**
42   * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
43   */
44  public static String[] getISOLanguages() {
45    if (isoLanguages == null) {
46      isoLanguages = getISOLanguagesNative();
47    }
48    return isoLanguages.clone();
49  }
50
51  /**
52   * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
53   */
54  public static String[] getISOCountries() {
55    if (isoCountries == null) {
56      isoCountries = getISOCountriesNative();
57    }
58    return isoCountries.clone();
59  }
60
61  public static Locale forLanguageTag(String languageTag, boolean strict) {
62    final String icuLocaleId = localeForLanguageTag(languageTag, strict);
63    if (icuLocaleId == null) {
64      // TODO: We should probably return "und" here. From what I can tell,
65      // this happens only when the language in the languageTag is bad.
66      // Investigate this a bit more.
67      return null;
68    }
69
70    return localeFromIcuLocaleId(icuLocaleId);
71  }
72
73  private static final int IDX_LANGUAGE = 0;
74  private static final int IDX_SCRIPT = 1;
75  private static final int IDX_REGION = 2;
76  private static final int IDX_VARIANT = 3;
77
78  /*
79   * Parse the {Language, Script, Region, Variant*} section of the ICU locale
80   * ID. This is the bit that appears before the keyword separate "@". The general
81   * structure is a series of ASCII alphanumeric strings (subtags)
82   * separated by underscores.
83   *
84   * Each subtag is interpreted according to its position in the list of subtags
85   * AND its length (groan...). The various cases are explained in comments
86   * below.
87   */
88  private static void parseLangScriptRegionAndVariants(String string,
89          String[] outputArray) {
90    final int first = string.indexOf('_');
91    final int second = string.indexOf('_', first + 1);
92    final int third = string.indexOf('_', second + 1);
93
94    if (first == -1) {
95      outputArray[IDX_LANGUAGE] = string;
96    } else if (second == -1) {
97      // Language and country ("ja_JP") OR
98      // Language and script ("en_Latn") OR
99      // Language and variant ("en_POSIX").
100
101      outputArray[IDX_LANGUAGE] = string.substring(0, first);
102      final String secondString = string.substring(first + 1);
103
104      if (secondString.length() == 4) {
105          // 4 Letter ISO script code.
106          outputArray[IDX_SCRIPT] = secondString;
107      } else if (secondString.length() == 2 || secondString.length() == 3) {
108          // 2 or 3 Letter region code.
109          outputArray[IDX_REGION] = secondString;
110      } else {
111          // If we're here, the length of the second half is either 1 or greater
112          // than 5. Assume that ICU won't hand us malformed tags, and therefore
113          // assume the rest of the string is a series of variant tags.
114          outputArray[IDX_VARIANT] = secondString;
115      }
116    } else if (third == -1) {
117      // Language and country and variant ("ja_JP_TRADITIONAL") OR
118      // Language and script and variant ("en_Latn_POSIX") OR
119      // Language and script and region ("en_Latn_US"). OR
120      // Language and variant with multiple subtags ("en_POSIX_XISOP")
121
122      outputArray[IDX_LANGUAGE] = string.substring(0, first);
123      final String secondString = string.substring(first + 1, second);
124      final String thirdString = string.substring(second + 1);
125
126      if (secondString.length() == 4) {
127          // The second subtag is a script.
128          outputArray[IDX_SCRIPT] = secondString;
129
130          // The third subtag can be either a region or a variant, depending
131          // on its length.
132          if (thirdString.length() == 2 || thirdString.length() == 3 ||
133                  thirdString.isEmpty()) {
134              outputArray[IDX_REGION] = thirdString;
135          } else {
136              outputArray[IDX_VARIANT] = thirdString;
137          }
138      } else if (secondString.isEmpty() ||
139              secondString.length() == 2 || secondString.length() == 3) {
140          // The second string is a region, and the third a variant.
141          outputArray[IDX_REGION] = secondString;
142          outputArray[IDX_VARIANT] = thirdString;
143      } else {
144          // Variant with multiple subtags.
145          outputArray[IDX_VARIANT] = string.substring(first + 1);
146      }
147    } else {
148      // Language, script, region and variant with 1 or more subtags
149      // ("en_Latn_US_POSIX") OR
150      // Language, region and variant with 2 or more subtags
151      // (en_US_POSIX_VARIANT).
152      outputArray[IDX_LANGUAGE] = string.substring(0, first);
153      final String secondString = string.substring(first + 1, second);
154      if (secondString.length() == 4) {
155          outputArray[IDX_SCRIPT] = secondString;
156          outputArray[IDX_REGION] = string.substring(second + 1, third);
157          outputArray[IDX_VARIANT] = string.substring(third + 1);
158      } else {
159          outputArray[IDX_REGION] = secondString;
160          outputArray[IDX_VARIANT] = string.substring(second + 1);
161      }
162    }
163  }
164
165  /**
166   * Returns the appropriate {@code Locale} given a {@code String} of the form returned
167   * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
168   * this method can parse strings that {@code Locale.toString} won't produce.
169   * Used to remove duplication.
170   */
171  public static Locale localeFromIcuLocaleId(String localeId) {
172    // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
173    final int extensionsIndex = localeId.indexOf('@');
174
175    Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
176    Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
177    Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
178
179    if (extensionsIndex != -1) {
180      extensionsMap = new HashMap<Character, String>();
181      unicodeKeywordsMap = new HashMap<String, String>();
182      unicodeAttributeSet = new HashSet<String>();
183
184      // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
185      // containing all "keywords" it could parse. An ICU keyword is a key-value pair
186      // separated by an "=" (ULOC_KEYWORD_ASSIGN).
187      //
188      // Each keyword item can be one of three things :
189      // - A unicode extension attribute list: In this case the item key is "attribute"
190      //   and the value is a hyphen separated list of unicode attributes.
191      // - A unicode extension keyword: In this case, the item key will be larger than
192      //   1 char in length, and the value will be the unicode extension value.
193      // - A BCP-47 extension subtag: In this case, the item key will be exactly one
194      //   char in length, and the value will be a sequence of unparsed subtags that
195      //   represent the extension.
196      //
197      // Note that this implies that unicode extension keywords are "promoted" to
198      // to the same namespace as the top level extension subtags and their values.
199      // There can't be any collisions in practice because the BCP-47 spec imposes
200      // restrictions on their lengths.
201      final String extensionsString = localeId.substring(extensionsIndex + 1);
202      final String[] extensions = extensionsString.split(";");
203      for (String extension : extensions) {
204        // This is the special key for the unicode attributes
205        if (extension.startsWith("attribute=")) {
206          String unicodeAttributeValues = extension.substring("attribute=".length());
207          for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
208            unicodeAttributeSet.add(unicodeAttribute);
209          }
210        } else {
211          final int separatorIndex = extension.indexOf('=');
212
213          if (separatorIndex == 1) {
214            // This is a BCP-47 extension subtag.
215            final String value = extension.substring(2);
216            final char extensionId = extension.charAt(0);
217
218            extensionsMap.put(extensionId, value);
219          } else {
220            // This is a unicode extension keyword.
221            unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
222            extension.substring(separatorIndex + 1));
223          }
224        }
225      }
226    }
227
228    final String[] outputArray = new String[] { "", "", "", "" };
229    if (extensionsIndex == -1) {
230      parseLangScriptRegionAndVariants(localeId, outputArray);
231    } else {
232      parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
233          outputArray);
234    }
235
236    return new Locale(outputArray[IDX_LANGUAGE], outputArray[IDX_REGION],
237        outputArray[IDX_VARIANT], outputArray[IDX_SCRIPT],
238        unicodeAttributeSet, unicodeKeywordsMap, extensionsMap,
239        true /* has validated fields */);
240  }
241
242  public static Locale[] localesFromStrings(String[] localeNames) {
243    // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
244    // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
245    // both so that we never need to convert back when talking to it.
246    LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
247    for (String localeName : localeNames) {
248      set.add(localeFromIcuLocaleId(localeName));
249    }
250    return set.toArray(new Locale[set.size()]);
251  }
252
253  public static Locale[] getAvailableLocales() {
254    if (availableLocalesCache == null) {
255      availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
256    }
257    return availableLocalesCache.clone();
258  }
259
260  public static Locale[] getAvailableBreakIteratorLocales() {
261    return localesFromStrings(getAvailableBreakIteratorLocalesNative());
262  }
263
264  public static Locale[] getAvailableCalendarLocales() {
265    return localesFromStrings(getAvailableCalendarLocalesNative());
266  }
267
268  public static Locale[] getAvailableCollatorLocales() {
269    return localesFromStrings(getAvailableCollatorLocalesNative());
270  }
271
272  public static Locale[] getAvailableDateFormatLocales() {
273    return localesFromStrings(getAvailableDateFormatLocalesNative());
274  }
275
276  public static Locale[] getAvailableDateFormatSymbolsLocales() {
277    return getAvailableDateFormatLocales();
278  }
279
280  public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
281    return getAvailableNumberFormatLocales();
282  }
283
284  public static Locale[] getAvailableNumberFormatLocales() {
285    return localesFromStrings(getAvailableNumberFormatLocalesNative());
286  }
287
288  public static String getBestDateTimePattern(String skeleton, Locale locale) {
289    String languageTag = locale.toLanguageTag();
290    String key = skeleton + "\t" + languageTag;
291    synchronized (CACHED_PATTERNS) {
292      String pattern = CACHED_PATTERNS.get(key);
293      if (pattern == null) {
294        pattern = getBestDateTimePatternNative(skeleton, languageTag);
295        CACHED_PATTERNS.put(key, pattern);
296      }
297      return pattern;
298    }
299  }
300
301  private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
302
303  public static char[] getDateFormatOrder(String pattern) {
304    char[] result = new char[3];
305    int resultIndex = 0;
306    boolean sawDay = false;
307    boolean sawMonth = false;
308    boolean sawYear = false;
309
310    for (int i = 0; i < pattern.length(); ++i) {
311      char ch = pattern.charAt(i);
312      if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
313        if (ch == 'd' && !sawDay) {
314          result[resultIndex++] = 'd';
315          sawDay = true;
316        } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
317          result[resultIndex++] = 'M';
318          sawMonth = true;
319        } else if ((ch == 'y') && !sawYear) {
320          result[resultIndex++] = 'y';
321          sawYear = true;
322        }
323      } else if (ch == 'G') {
324        // Ignore the era specifier, if present.
325      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
326        throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
327      } else if (ch == '\'') {
328        if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
329          ++i;
330        } else {
331          i = pattern.indexOf('\'', i + 1);
332          if (i == -1) {
333            throw new IllegalArgumentException("Bad quoting in " + pattern);
334          }
335          ++i;
336        }
337      } else {
338        // Ignore spaces and punctuation.
339      }
340    }
341    return result;
342  }
343
344  /**
345   * Returns the version of the CLDR data in use, such as "22.1.1".
346   */
347  public static native String getCldrVersion();
348
349  /**
350   * Returns the icu4c version in use, such as "50.1.1".
351   */
352  public static native String getIcuVersion();
353
354  /**
355   * Returns the Unicode version our ICU supports, such as "6.2".
356   */
357  public static native String getUnicodeVersion();
358
359  // --- Case mapping.
360
361  public static String toLowerCase(String s, Locale locale) {
362    return toLowerCase(s, locale.toLanguageTag());
363  }
364
365  private static native String toLowerCase(String s, String languageTag);
366
367  public static String toUpperCase(String s, Locale locale) {
368    return toUpperCase(s, locale.toLanguageTag());
369  }
370
371  private static native String toUpperCase(String s, String languageTag);
372
373  // --- Errors.
374
375  // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
376  public static final int U_ZERO_ERROR = 0;
377  public static final int U_INVALID_CHAR_FOUND = 10;
378  public static final int U_TRUNCATED_CHAR_FOUND = 11;
379  public static final int U_ILLEGAL_CHAR_FOUND = 12;
380  public static final int U_BUFFER_OVERFLOW_ERROR = 15;
381
382  public static boolean U_FAILURE(int error) {
383    return error > U_ZERO_ERROR;
384  }
385
386  // --- Native methods accessing ICU's database.
387
388  private static native String[] getAvailableBreakIteratorLocalesNative();
389  private static native String[] getAvailableCalendarLocalesNative();
390  private static native String[] getAvailableCollatorLocalesNative();
391  private static native String[] getAvailableDateFormatLocalesNative();
392  private static native String[] getAvailableLocalesNative();
393  private static native String[] getAvailableNumberFormatLocalesNative();
394
395  public static native String[] getAvailableCurrencyCodes();
396  public static native String getCurrencyCode(String countryCode);
397
398  public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
399    return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
400  }
401
402  private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
403
404  public static native int getCurrencyFractionDigits(String currencyCode);
405  public static native int getCurrencyNumericCode(String currencyCode);
406
407  public static String getCurrencySymbol(Locale locale, String currencyCode) {
408    return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
409  }
410
411  private static native String getCurrencySymbol(String languageTag, String currencyCode);
412
413  public static String getDisplayCountry(Locale targetLocale, Locale locale) {
414    return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
415  }
416
417  private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
418
419  public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
420    return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
421  }
422
423  private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
424
425  public static String getDisplayVariant(Locale targetLocale, Locale locale) {
426    return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
427  }
428
429  private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
430
431  public static String getDisplayScript(Locale targetLocale, Locale locale) {
432    return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
433  }
434
435  private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
436
437  public static String getISO3Country(Locale locale) {
438    return getISO3CountryNative(locale.toLanguageTag());
439  }
440
441  private static native String getISO3CountryNative(String languageTag);
442
443  public static String getISO3Language(Locale locale) {
444    return getISO3LanguageNative(locale.toLanguageTag());
445  }
446
447  private static native String getISO3LanguageNative(String languageTag);
448
449  public static native String addLikelySubtags(String locale);
450  public static native String getScript(String locale);
451
452  private static native String[] getISOLanguagesNative();
453  private static native String[] getISOCountriesNative();
454
455  private static native String localeForLanguageTag(String languageTag, boolean strict);
456
457  static native boolean initLocaleDataNative(String locale, LocaleData result);
458
459  public static native void setDefaultLocale(String languageTag);
460  public static native String getDefaultLocale();
461}
462