1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package libcore.icu;
18
19import java.util.Collections;
20import java.util.HashMap;
21import java.util.HashSet;
22import java.util.LinkedHashSet;
23import java.util.Locale;
24import java.util.Map;
25import java.util.Map.Entry;
26import java.util.Set;
27import libcore.util.BasicLruCache;
28
29/**
30 * Makes ICU data accessible to Java.
31 */
32public final class ICU {
33  private static final BasicLruCache<String, String> CACHED_PATTERNS =
34      new BasicLruCache<String, String>(8);
35
36  private static Locale[] availableLocalesCache;
37
38  private static String[] isoCountries;
39
40  private static String[] isoLanguages;
41
42  /**
43   * Returns an array of two-letter ISO 639-1 language codes, either from ICU or our cache.
44   */
45  public static String[] getISOLanguages() {
46    if (isoLanguages == null) {
47      isoLanguages = getISOLanguagesNative();
48    }
49    return isoLanguages.clone();
50  }
51
52  /**
53   * Returns an array of two-letter ISO 3166 country codes, either from ICU or our cache.
54   */
55  public static String[] getISOCountries() {
56    if (isoCountries == null) {
57      isoCountries = getISOCountriesNative();
58    }
59    return isoCountries.clone();
60  }
61
62  private static final int IDX_LANGUAGE = 0;
63  private static final int IDX_SCRIPT = 1;
64  private static final int IDX_REGION = 2;
65  private static final int IDX_VARIANT = 3;
66
67  /*
68   * Parse the {Language, Script, Region, Variant*} section of the ICU locale
69   * ID. This is the bit that appears before the keyword separate "@". The general
70   * structure is a series of ASCII alphanumeric strings (subtags)
71   * separated by underscores.
72   *
73   * Each subtag is interpreted according to its position in the list of subtags
74   * AND its length (groan...). The various cases are explained in comments
75   * below.
76   */
77  private static void parseLangScriptRegionAndVariants(String string,
78          String[] outputArray) {
79    final int first = string.indexOf('_');
80    final int second = string.indexOf('_', first + 1);
81    final int third = string.indexOf('_', second + 1);
82
83    if (first == -1) {
84      outputArray[IDX_LANGUAGE] = string;
85    } else if (second == -1) {
86      // Language and country ("ja_JP") OR
87      // Language and script ("en_Latn") OR
88      // Language and variant ("en_POSIX").
89
90      outputArray[IDX_LANGUAGE] = string.substring(0, first);
91      final String secondString = string.substring(first + 1);
92
93      if (secondString.length() == 4) {
94          // 4 Letter ISO script code.
95          outputArray[IDX_SCRIPT] = secondString;
96      } else if (secondString.length() == 2 || secondString.length() == 3) {
97          // 2 or 3 Letter region code.
98          outputArray[IDX_REGION] = secondString;
99      } else {
100          // If we're here, the length of the second half is either 1 or greater
101          // than 5. Assume that ICU won't hand us malformed tags, and therefore
102          // assume the rest of the string is a series of variant tags.
103          outputArray[IDX_VARIANT] = secondString;
104      }
105    } else if (third == -1) {
106      // Language and country and variant ("ja_JP_TRADITIONAL") OR
107      // Language and script and variant ("en_Latn_POSIX") OR
108      // Language and script and region ("en_Latn_US"). OR
109      // Language and variant with multiple subtags ("en_POSIX_XISOP")
110
111      outputArray[IDX_LANGUAGE] = string.substring(0, first);
112      final String secondString = string.substring(first + 1, second);
113      final String thirdString = string.substring(second + 1);
114
115      if (secondString.length() == 4) {
116          // The second subtag is a script.
117          outputArray[IDX_SCRIPT] = secondString;
118
119          // The third subtag can be either a region or a variant, depending
120          // on its length.
121          if (thirdString.length() == 2 || thirdString.length() == 3 ||
122                  thirdString.isEmpty()) {
123              outputArray[IDX_REGION] = thirdString;
124          } else {
125              outputArray[IDX_VARIANT] = thirdString;
126          }
127      } else if (secondString.isEmpty() ||
128              secondString.length() == 2 || secondString.length() == 3) {
129          // The second string is a region, and the third a variant.
130          outputArray[IDX_REGION] = secondString;
131          outputArray[IDX_VARIANT] = thirdString;
132      } else {
133          // Variant with multiple subtags.
134          outputArray[IDX_VARIANT] = string.substring(first + 1);
135      }
136    } else {
137      // Language, script, region and variant with 1 or more subtags
138      // ("en_Latn_US_POSIX") OR
139      // Language, region and variant with 2 or more subtags
140      // (en_US_POSIX_VARIANT).
141      outputArray[IDX_LANGUAGE] = string.substring(0, first);
142      final String secondString = string.substring(first + 1, second);
143      if (secondString.length() == 4) {
144          outputArray[IDX_SCRIPT] = secondString;
145          outputArray[IDX_REGION] = string.substring(second + 1, third);
146          outputArray[IDX_VARIANT] = string.substring(third + 1);
147      } else {
148          outputArray[IDX_REGION] = secondString;
149          outputArray[IDX_VARIANT] = string.substring(second + 1);
150      }
151    }
152  }
153
154  /**
155   * Returns the appropriate {@code Locale} given a {@code String} of the form returned
156   * by {@code toString}. This is very lenient, and doesn't care what's between the underscores:
157   * this method can parse strings that {@code Locale.toString} won't produce.
158   * Used to remove duplication.
159   */
160  public static Locale localeFromIcuLocaleId(String localeId) {
161    // @ == ULOC_KEYWORD_SEPARATOR_UNICODE (uloc.h).
162    final int extensionsIndex = localeId.indexOf('@');
163
164    Map<Character, String> extensionsMap = Collections.EMPTY_MAP;
165    Map<String, String> unicodeKeywordsMap = Collections.EMPTY_MAP;
166    Set<String> unicodeAttributeSet = Collections.EMPTY_SET;
167
168    if (extensionsIndex != -1) {
169      extensionsMap = new HashMap<Character, String>();
170      unicodeKeywordsMap = new HashMap<String, String>();
171      unicodeAttributeSet = new HashSet<String>();
172
173      // ICU sends us a semi-colon (ULOC_KEYWORD_ITEM_SEPARATOR) delimited string
174      // containing all "keywords" it could parse. An ICU keyword is a key-value pair
175      // separated by an "=" (ULOC_KEYWORD_ASSIGN).
176      //
177      // Each keyword item can be one of three things :
178      // - A unicode extension attribute list: In this case the item key is "attribute"
179      //   and the value is a hyphen separated list of unicode attributes.
180      // - A unicode extension keyword: In this case, the item key will be larger than
181      //   1 char in length, and the value will be the unicode extension value.
182      // - A BCP-47 extension subtag: In this case, the item key will be exactly one
183      //   char in length, and the value will be a sequence of unparsed subtags that
184      //   represent the extension.
185      //
186      // Note that this implies that unicode extension keywords are "promoted" to
187      // to the same namespace as the top level extension subtags and their values.
188      // There can't be any collisions in practice because the BCP-47 spec imposes
189      // restrictions on their lengths.
190      final String extensionsString = localeId.substring(extensionsIndex + 1);
191      final String[] extensions = extensionsString.split(";");
192      for (String extension : extensions) {
193        // This is the special key for the unicode attributes
194        if (extension.startsWith("attribute=")) {
195          String unicodeAttributeValues = extension.substring("attribute=".length());
196          for (String unicodeAttribute : unicodeAttributeValues.split("-")) {
197            unicodeAttributeSet.add(unicodeAttribute);
198          }
199        } else {
200          final int separatorIndex = extension.indexOf('=');
201
202          if (separatorIndex == 1) {
203            // This is a BCP-47 extension subtag.
204            final String value = extension.substring(2);
205            final char extensionId = extension.charAt(0);
206
207            extensionsMap.put(extensionId, value);
208          } else {
209            // This is a unicode extension keyword.
210            unicodeKeywordsMap.put(extension.substring(0, separatorIndex),
211            extension.substring(separatorIndex + 1));
212          }
213        }
214      }
215    }
216
217    final String[] outputArray = new String[] { "", "", "", "" };
218    if (extensionsIndex == -1) {
219      parseLangScriptRegionAndVariants(localeId, outputArray);
220    } else {
221      parseLangScriptRegionAndVariants(localeId.substring(0, extensionsIndex),
222          outputArray);
223    }
224    Locale.Builder builder = new Locale.Builder();
225    builder.setLanguage(outputArray[IDX_LANGUAGE]);
226    builder.setRegion(outputArray[IDX_REGION]);
227    builder.setVariant(outputArray[IDX_VARIANT]);
228    builder.setScript(outputArray[IDX_SCRIPT]);
229    for (String attribute : unicodeAttributeSet) {
230      builder.addUnicodeLocaleAttribute(attribute);
231    }
232    for (Entry<String, String> keyword : unicodeKeywordsMap.entrySet()) {
233      builder.setUnicodeLocaleKeyword(keyword.getKey(), keyword.getValue());
234    }
235
236    for (Entry<Character, String> extension : extensionsMap.entrySet()) {
237      builder.setExtension(extension.getKey(), extension.getValue());
238    }
239
240    return builder.build();
241  }
242
243  public static Locale[] localesFromStrings(String[] localeNames) {
244    // We need to remove duplicates caused by the conversion of "he" to "iw", et cetera.
245    // Java needs the obsolete code, ICU needs the modern code, but we let ICU know about
246    // both so that we never need to convert back when talking to it.
247    LinkedHashSet<Locale> set = new LinkedHashSet<Locale>();
248    for (String localeName : localeNames) {
249      set.add(localeFromIcuLocaleId(localeName));
250    }
251    return set.toArray(new Locale[set.size()]);
252  }
253
254  public static Locale[] getAvailableLocales() {
255    if (availableLocalesCache == null) {
256      availableLocalesCache = localesFromStrings(getAvailableLocalesNative());
257    }
258    return availableLocalesCache.clone();
259  }
260
261  public static Locale[] getAvailableBreakIteratorLocales() {
262    return localesFromStrings(getAvailableBreakIteratorLocalesNative());
263  }
264
265  public static Locale[] getAvailableCalendarLocales() {
266    return localesFromStrings(getAvailableCalendarLocalesNative());
267  }
268
269  public static Locale[] getAvailableCollatorLocales() {
270    return localesFromStrings(getAvailableCollatorLocalesNative());
271  }
272
273  public static Locale[] getAvailableDateFormatLocales() {
274    return localesFromStrings(getAvailableDateFormatLocalesNative());
275  }
276
277  public static Locale[] getAvailableDateFormatSymbolsLocales() {
278    return getAvailableDateFormatLocales();
279  }
280
281  public static Locale[] getAvailableDecimalFormatSymbolsLocales() {
282    return getAvailableNumberFormatLocales();
283  }
284
285  public static Locale[] getAvailableNumberFormatLocales() {
286    return localesFromStrings(getAvailableNumberFormatLocalesNative());
287  }
288
289  public static String getBestDateTimePattern(String skeleton, Locale locale) {
290    String languageTag = locale.toLanguageTag();
291    String key = skeleton + "\t" + languageTag;
292    synchronized (CACHED_PATTERNS) {
293      String pattern = CACHED_PATTERNS.get(key);
294      if (pattern == null) {
295        pattern = getBestDateTimePatternNative(skeleton, languageTag);
296        CACHED_PATTERNS.put(key, pattern);
297      }
298      return pattern;
299    }
300  }
301
302  private static native String getBestDateTimePatternNative(String skeleton, String languageTag);
303
304  public static char[] getDateFormatOrder(String pattern) {
305    char[] result = new char[3];
306    int resultIndex = 0;
307    boolean sawDay = false;
308    boolean sawMonth = false;
309    boolean sawYear = false;
310
311    for (int i = 0; i < pattern.length(); ++i) {
312      char ch = pattern.charAt(i);
313      if (ch == 'd' || ch == 'L' || ch == 'M' || ch == 'y') {
314        if (ch == 'd' && !sawDay) {
315          result[resultIndex++] = 'd';
316          sawDay = true;
317        } else if ((ch == 'L' || ch == 'M') && !sawMonth) {
318          result[resultIndex++] = 'M';
319          sawMonth = true;
320        } else if ((ch == 'y') && !sawYear) {
321          result[resultIndex++] = 'y';
322          sawYear = true;
323        }
324      } else if (ch == 'G') {
325        // Ignore the era specifier, if present.
326      } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
327        throw new IllegalArgumentException("Bad pattern character '" + ch + "' in " + pattern);
328      } else if (ch == '\'') {
329        if (i < pattern.length() - 1 && pattern.charAt(i + 1) == '\'') {
330          ++i;
331        } else {
332          i = pattern.indexOf('\'', i + 1);
333          if (i == -1) {
334            throw new IllegalArgumentException("Bad quoting in " + pattern);
335          }
336          ++i;
337        }
338      } else {
339        // Ignore spaces and punctuation.
340      }
341    }
342    return result;
343  }
344
345  /**
346   * Returns the version of the CLDR data in use, such as "22.1.1".
347   */
348  public static native String getCldrVersion();
349
350  /**
351   * Returns the icu4c version in use, such as "50.1.1".
352   */
353  public static native String getIcuVersion();
354
355  /**
356   * Returns the Unicode version our ICU supports, such as "6.2".
357   */
358  public static native String getUnicodeVersion();
359
360  // --- Case mapping.
361
362  public static String toLowerCase(String s, Locale locale) {
363    return toLowerCase(s, locale.toLanguageTag());
364  }
365
366  private static native String toLowerCase(String s, String languageTag);
367
368  public static String toUpperCase(String s, Locale locale) {
369    return toUpperCase(s, locale.toLanguageTag());
370  }
371
372  private static native String toUpperCase(String s, String languageTag);
373
374  // --- Errors.
375
376  // Just the subset of error codes needed by CharsetDecoderICU/CharsetEncoderICU.
377  public static final int U_ZERO_ERROR = 0;
378  public static final int U_INVALID_CHAR_FOUND = 10;
379  public static final int U_TRUNCATED_CHAR_FOUND = 11;
380  public static final int U_ILLEGAL_CHAR_FOUND = 12;
381  public static final int U_BUFFER_OVERFLOW_ERROR = 15;
382
383  public static boolean U_FAILURE(int error) {
384    return error > U_ZERO_ERROR;
385  }
386
387  // --- Native methods accessing ICU's database.
388
389  private static native String[] getAvailableBreakIteratorLocalesNative();
390  private static native String[] getAvailableCalendarLocalesNative();
391  private static native String[] getAvailableCollatorLocalesNative();
392  private static native String[] getAvailableDateFormatLocalesNative();
393  private static native String[] getAvailableLocalesNative();
394  private static native String[] getAvailableNumberFormatLocalesNative();
395
396  public static native String[] getAvailableCurrencyCodes();
397  public static native String getCurrencyCode(String countryCode);
398
399  public static String getCurrencyDisplayName(Locale locale, String currencyCode) {
400    return getCurrencyDisplayName(locale.toLanguageTag(), currencyCode);
401  }
402
403  private static native String getCurrencyDisplayName(String languageTag, String currencyCode);
404
405  public static native int getCurrencyFractionDigits(String currencyCode);
406  public static native int getCurrencyNumericCode(String currencyCode);
407
408  public static String getCurrencySymbol(Locale locale, String currencyCode) {
409    return getCurrencySymbol(locale.toLanguageTag(), currencyCode);
410  }
411
412  private static native String getCurrencySymbol(String languageTag, String currencyCode);
413
414  public static String getDisplayCountry(Locale targetLocale, Locale locale) {
415    return getDisplayCountryNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
416  }
417
418  private static native String getDisplayCountryNative(String targetLanguageTag, String languageTag);
419
420  public static String getDisplayLanguage(Locale targetLocale, Locale locale) {
421    return getDisplayLanguageNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
422  }
423
424  private static native String getDisplayLanguageNative(String targetLanguageTag, String languageTag);
425
426  public static String getDisplayVariant(Locale targetLocale, Locale locale) {
427    return getDisplayVariantNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
428  }
429
430  private static native String getDisplayVariantNative(String targetLanguageTag, String languageTag);
431
432  public static String getDisplayScript(Locale targetLocale, Locale locale) {
433    return getDisplayScriptNative(targetLocale.toLanguageTag(), locale.toLanguageTag());
434  }
435
436  private static native String getDisplayScriptNative(String targetLanguageTag, String languageTag);
437
438  public static native String getISO3Country(String languageTag);
439
440  public static native String getISO3Language(String languageTag);
441
442  public static Locale addLikelySubtags(Locale locale) {
443      return Locale.forLanguageTag(addLikelySubtags(locale.toLanguageTag()).replace('_', '-'));
444  }
445
446  /**
447   * @deprecated use {@link #addLikelySubtags(java.util.Locale)} instead.
448   */
449  @Deprecated
450  public static native String addLikelySubtags(String locale);
451
452  /**
453   * @deprecated use {@link java.util.Locale#getScript()} instead. This has been kept
454   *     around only for the support library.
455   */
456  @Deprecated
457  public static native String getScript(String locale);
458
459  private static native String[] getISOLanguagesNative();
460  private static native String[] getISOCountriesNative();
461
462  static native boolean initLocaleDataNative(String languageTag, LocaleData result);
463
464  /**
465   * Takes a BCP-47 language tag (Locale.toLanguageTag()). e.g. en-US, not en_US
466   */
467  public static native void setDefaultLocale(String languageTag);
468
469  /**
470   * Returns a locale name, not a BCP-47 language tag. e.g. en_US not en-US.
471   */
472  public static native String getDefaultLocale();
473
474  /** Returns the TZData version as reported by ICU4C. */
475  public static native String getTZDataVersion();
476}
477