1/*
2 * Copyright (C) 2011 The Libphonenumber Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.i18n.phonenumbers.geocoding;
18
19import java.io.Externalizable;
20import java.io.IOException;
21import java.io.ObjectInput;
22import java.io.ObjectOutput;
23import java.util.ArrayList;
24import java.util.Arrays;
25import java.util.Collections;
26import java.util.HashMap;
27import java.util.HashSet;
28import java.util.List;
29import java.util.Map;
30import java.util.Set;
31import java.util.SortedMap;
32import java.util.SortedSet;
33import java.util.TreeSet;
34
35/**
36 * A utility which knows the data files that are available for the geocoder to use. The data files
37 * contain mappings from phone number prefixes to text descriptions, and are organized by country
38 * calling code and language that the text descriptions are in.
39 *
40 * @author Shaopeng Jia
41 */
42public class MappingFileProvider implements Externalizable {
43  private int numOfEntries = 0;
44  private int[] countryCallingCodes;
45  private List<Set<String>> availableLanguages;
46  private static final Map<String, String> LOCALE_NORMALIZATION_MAP;
47
48  static {
49    Map<String, String> normalizationMap = new HashMap<String, String>();
50    normalizationMap.put("zh_TW", "zh_Hant");
51    normalizationMap.put("zh_HK", "zh_Hant");
52    normalizationMap.put("zh_MO", "zh_Hant");
53
54    LOCALE_NORMALIZATION_MAP = Collections.unmodifiableMap(normalizationMap);
55  }
56
57  /**
58   * Creates an empty {@link MappingFileProvider}. The default constructor is necessary for
59   * implementing {@link Externalizable}. The empty provider could later be populated by
60   * {@link #readFileConfigs(java.util.SortedMap)} or {@link #readExternal(java.io.ObjectInput)}.
61   */
62  public MappingFileProvider() {
63  }
64
65  /**
66   * Initializes an {@link MappingFileProvider} with {@code availableDataFiles}.
67   *
68   * @param availableDataFiles  a map from country calling codes to sets of languages in which data
69   *     files are available for the specific country calling code. The map is sorted in ascending
70   *     order of the country calling codes as integers.
71   */
72  public void readFileConfigs(SortedMap<Integer, Set<String>> availableDataFiles) {
73    numOfEntries = availableDataFiles.size();
74    countryCallingCodes = new int[numOfEntries];
75    availableLanguages = new ArrayList<Set<String>>(numOfEntries);
76    int index = 0;
77    for (int countryCallingCode : availableDataFiles.keySet()) {
78      countryCallingCodes[index++] = countryCallingCode;
79      availableLanguages.add(new HashSet<String>(availableDataFiles.get(countryCallingCode)));
80    }
81  }
82
83  /**
84   * Supports Java Serialization.
85   */
86  public void readExternal(ObjectInput objectInput) throws IOException {
87    numOfEntries = objectInput.readInt();
88    if (countryCallingCodes == null || countryCallingCodes.length < numOfEntries) {
89      countryCallingCodes = new int[numOfEntries];
90    }
91    if (availableLanguages == null) {
92      availableLanguages = new ArrayList<Set<String>>();
93    }
94    for (int i = 0; i < numOfEntries; i++) {
95      countryCallingCodes[i] = objectInput.readInt();
96      int numOfLangs = objectInput.readInt();
97      Set<String> setOfLangs = new HashSet<String>();
98      for (int j = 0; j < numOfLangs; j++) {
99        setOfLangs.add(objectInput.readUTF());
100      }
101      availableLanguages.add(setOfLangs);
102    }
103  }
104
105  /**
106   * Supports Java Serialization.
107   */
108  public void writeExternal(ObjectOutput objectOutput) throws IOException {
109    objectOutput.writeInt(numOfEntries);
110    for (int i = 0; i < numOfEntries; i++) {
111      objectOutput.writeInt(countryCallingCodes[i]);
112      Set<String> setOfLangs = availableLanguages.get(i);
113      int numOfLangs = setOfLangs.size();
114      objectOutput.writeInt(numOfLangs);
115      for (String lang : setOfLangs) {
116        objectOutput.writeUTF(lang);
117      }
118    }
119  }
120
121  /**
122   * Returns a string representing the data in this class. The string contains one line for each
123   * country calling code. The country calling code is followed by a '|' and then a list of
124   * comma-separated languages sorted in ascending order.
125   */
126  @Override
127  public String toString() {
128    StringBuilder output = new StringBuilder();
129    for (int i = 0; i < numOfEntries; i++) {
130      output.append(countryCallingCodes[i]);
131      output.append('|');
132      SortedSet<String> sortedSetOfLangs = new TreeSet<String>(availableLanguages.get(i));
133      for (String lang : sortedSetOfLangs) {
134        output.append(lang);
135        output.append(',');
136      }
137      output.append('\n');
138    }
139    return output.toString();
140  }
141
142  /**
143   * Gets the name of the file that contains the mapping data for the {@code countryCallingCode} in
144   * the language specified.
145   *
146   * @param countryCallingCode  the country calling code of phone numbers which the data file
147   *     contains
148   * @param language  two-letter lowercase ISO language codes as defined by ISO 639-1
149   * @param script  four-letter titlecase (the first letter is uppercase and the rest of the letters
150   *     are lowercase) ISO script codes as defined in ISO 15924
151   * @param region  two-letter uppercase ISO country codes as defined by ISO 3166-1
152   * @return  the name of the file, or empty string if no such file can be found
153   */
154  String getFileName(int countryCallingCode, String language, String script, String region) {
155    if (language.length() == 0) {
156      return "";
157    }
158    int index = Arrays.binarySearch(countryCallingCodes, countryCallingCode);
159    if (index < 0) {
160      return "";
161    }
162    Set<String> setOfLangs = availableLanguages.get(index);
163    if (setOfLangs.size() > 0) {
164      String languageCode = findBestMatchingLanguageCode(setOfLangs, language, script, region);
165      if (languageCode.length() > 0) {
166        StringBuilder fileName = new StringBuilder();
167        fileName.append(countryCallingCode).append('_').append(languageCode);
168        return fileName.toString();
169      }
170    }
171    return "";
172  }
173
174  private String findBestMatchingLanguageCode(
175      Set<String> setOfLangs, String language, String script, String region) {
176    StringBuilder fullLocale = constructFullLocale(language, script, region);
177    String fullLocaleStr = fullLocale.toString();
178    String normalizedLocale = LOCALE_NORMALIZATION_MAP.get(fullLocaleStr);
179    if (normalizedLocale != null) {
180      if (setOfLangs.contains(normalizedLocale)) {
181        return normalizedLocale;
182      }
183    }
184    if (setOfLangs.contains(fullLocaleStr)) {
185      return fullLocaleStr;
186    }
187
188    if (onlyOneOfScriptOrRegionIsEmpty(script, region)) {
189      if (setOfLangs.contains(language)) {
190        return language;
191      }
192    } else if (script.length() > 0 && region.length() > 0) {
193      StringBuilder langWithScript = new StringBuilder(language).append('_').append(script);
194      String langWithScriptStr = langWithScript.toString();
195      if (setOfLangs.contains(langWithScriptStr)) {
196        return langWithScriptStr;
197      }
198
199      StringBuilder langWithRegion = new StringBuilder(language).append('_').append(region);
200      String langWithRegionStr = langWithRegion.toString();
201      if (setOfLangs.contains(langWithRegionStr)) {
202        return langWithRegionStr;
203      }
204
205      if (setOfLangs.contains(language)) {
206        return language;
207      }
208    }
209    return "";
210  }
211
212  private boolean onlyOneOfScriptOrRegionIsEmpty(String script, String region) {
213    return (script.length() == 0 && region.length() > 0) ||
214            (region.length() == 0 && script.length() > 0);
215  }
216
217  private StringBuilder constructFullLocale(String language, String script, String region) {
218    StringBuilder fullLocale = new StringBuilder(language);
219    appendSubsequentLocalePart(script, fullLocale);
220    appendSubsequentLocalePart(region, fullLocale);
221    return fullLocale;
222  }
223
224  private void appendSubsequentLocalePart(String subsequentLocalePart, StringBuilder fullLocale) {
225    if (subsequentLocalePart.length() > 0) {
226      fullLocale.append('_').append(subsequentLocalePart);
227    }
228  }
229}
230