1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts;
18
19import android.provider.ContactsContract.FullNameStyle;
20import android.provider.ContactsContract.PhoneticNameStyle;
21import android.text.TextUtils;
22import android.util.Log;
23
24import com.android.providers.contacts.HanziToPinyin.Token;
25import com.google.common.annotations.VisibleForTesting;
26
27import java.lang.Character.UnicodeBlock;
28import java.util.Arrays;
29import java.util.ArrayList;
30import java.util.Collections;
31import java.util.HashMap;
32import java.util.HashSet;
33import java.util.Iterator;
34import java.util.List;
35import java.util.Locale;
36import java.util.Map;
37import java.util.Set;
38
39import libcore.icu.AlphabeticIndex;
40import libcore.icu.AlphabeticIndex.ImmutableIndex;
41import libcore.icu.Transliterator;
42
43/**
44 * This utility class provides specialized handling for locale specific
45 * information: labels, name lookup keys.
46 */
47public class ContactLocaleUtils {
48    public static final String TAG = "ContactLocale";
49
50    public static final Locale LOCALE_ARABIC = new Locale("ar");
51    public static final Locale LOCALE_GREEK = new Locale("el");
52    public static final Locale LOCALE_HEBREW = new Locale("he");
53    // Serbian and Ukrainian labels are complementary supersets of Russian
54    public static final Locale LOCALE_SERBIAN = new Locale("sr");
55    public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
56    public static final Locale LOCALE_THAI = new Locale("th");
57
58    /**
59     * This class is the default implementation and should be the base class
60     * for other locales.
61     *
62     * sortKey: same as name
63     * nameLookupKeys: none
64     * labels: uses ICU AlphabeticIndex for labels and extends by labeling
65     *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
66     */
67    private static class ContactLocaleUtilsBase {
68        private static final String EMPTY_STRING = "";
69        private static final String NUMBER_STRING = "#";
70
71        protected final ImmutableIndex mAlphabeticIndex;
72        private final int mAlphabeticIndexBucketCount;
73        private final int mNumberBucketIndex;
74        private final boolean mUsePinyinTransliterator;
75
76        public ContactLocaleUtilsBase(LocaleSet locales) {
77            // AlphabeticIndex.getBucketLabel() uses a binary search across
78            // the entire label set so care should be taken about growing this
79            // set too large. The following set determines for which locales
80            // we will show labels other than your primary locale. General rules
81            // of thumb for adding a locale: should be a supported locale; and
82            // should not be included if from a name it is not deterministic
83            // which way to label it (so eg Chinese cannot be added because
84            // the labeling of a Chinese character varies between Simplified,
85            // Traditional, and Japanese locales). Use English only for all
86            // Latin based alphabets. Ukrainian and Serbian are chosen for
87            // Cyrillic because their alphabets are complementary supersets
88            // of Russian.
89            final Locale secondaryLocale = locales.getSecondaryLocale();
90            mUsePinyinTransliterator = locales.isPrimaryLocaleSimplifiedChinese() ||
91                locales.isSecondaryLocaleSimplifiedChinese();
92            AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
93                .setMaxLabelCount(300);
94            if (secondaryLocale != null) {
95                ai.addLabels(secondaryLocale);
96            }
97            mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
98                .addLabels(Locale.JAPANESE)
99                .addLabels(Locale.KOREAN)
100                .addLabels(LOCALE_THAI)
101                .addLabels(LOCALE_ARABIC)
102                .addLabels(LOCALE_HEBREW)
103                .addLabels(LOCALE_GREEK)
104                .addLabels(LOCALE_UKRAINIAN)
105                .addLabels(LOCALE_SERBIAN)
106                .getImmutableIndex();
107            mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
108            mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
109        }
110
111        public String getSortKey(String name) {
112            return name;
113        }
114
115        /**
116         * Returns the bucket index for the specified string. AlphabeticIndex
117         * sorts strings into buckets numbered in order from 0 to N, where the
118         * exact value of N depends on how many representative index labels are
119         * used in a particular locale. This routine adds one additional bucket
120         * for phone numbers. It attempts to detect phone numbers and shifts
121         * the bucket indexes returned by AlphabeticIndex in order to make room
122         * for the new # bucket, so the returned range becomes 0 to N+1.
123         */
124        public int getBucketIndex(String name) {
125            boolean prefixIsNumeric = false;
126            final int length = name.length();
127            int offset = 0;
128            while (offset < length) {
129                int codePoint = Character.codePointAt(name, offset);
130                // Ignore standard phone number separators and identify any
131                // string that otherwise starts with a number.
132                if (Character.isDigit(codePoint)) {
133                    prefixIsNumeric = true;
134                    break;
135                } else if (!Character.isSpaceChar(codePoint) &&
136                           codePoint != '+' && codePoint != '(' &&
137                           codePoint != ')' && codePoint != '.' &&
138                           codePoint != '-' && codePoint != '#') {
139                    break;
140                }
141                offset += Character.charCount(codePoint);
142            }
143            if (prefixIsNumeric) {
144                return mNumberBucketIndex;
145            }
146
147            /**
148             * ICU 55 AlphabeticIndex doesn't support Simplified Chinese
149             * as a secondary locale so it is necessary to use the
150             * Pinyin transliterator. We also use this for a Simplified
151             * Chinese primary locale because it gives more accurate letter
152             * buckets. b/19835686
153             */
154            if (mUsePinyinTransliterator) {
155                name = HanziToPinyin.getInstance().transliterate(name);
156            }
157            final int bucket = mAlphabeticIndex.getBucketIndex(name);
158            if (bucket < 0) {
159                return -1;
160            }
161            if (bucket >= mNumberBucketIndex) {
162                return bucket + 1;
163            }
164            return bucket;
165        }
166
167        /**
168         * Returns the number of buckets in use (one more than AlphabeticIndex
169         * uses, because this class adds a bucket for phone numbers).
170         */
171        public int getBucketCount() {
172            return mAlphabeticIndexBucketCount + 1;
173        }
174
175        /**
176         * Returns the label for the specified bucket index if a valid index,
177         * otherwise returns an empty string. '#' is returned for the phone
178         * number bucket; for all others, the AlphabeticIndex label is returned.
179         */
180        public String getBucketLabel(int bucketIndex) {
181            if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
182                return EMPTY_STRING;
183            } else if (bucketIndex == mNumberBucketIndex) {
184                return NUMBER_STRING;
185            } else if (bucketIndex > mNumberBucketIndex) {
186                --bucketIndex;
187            }
188            return mAlphabeticIndex.getBucketLabel(bucketIndex);
189        }
190
191        @SuppressWarnings("unused")
192        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
193            return null;
194        }
195
196        public ArrayList<String> getLabels() {
197            final int bucketCount = getBucketCount();
198            final ArrayList<String> labels = new ArrayList<String>(bucketCount);
199            for(int i = 0; i < bucketCount; ++i) {
200                labels.add(getBucketLabel(i));
201            }
202            return labels;
203        }
204    }
205
206    /**
207     * Japanese specific locale overrides.
208     *
209     * sortKey: unchanged (same as name)
210     * nameLookupKeys: unchanged (none)
211     * labels: extends default labels by labeling unlabeled CJ characters
212     *     with the Japanese character 他 ("misc"). Japanese labels are:
213     *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
214     */
215    private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
216        // \u4ed6 is Japanese character 他 ("misc")
217        private static final String JAPANESE_MISC_LABEL = "\u4ed6";
218        private final int mMiscBucketIndex;
219
220        public JapaneseContactUtils(LocaleSet locales) {
221            super(locales);
222            // Determine which bucket AlphabeticIndex is lumping unclassified
223            // Japanese characters into by looking up the bucket index for
224            // a representative Kanji/CJK unified ideograph (\u65e5 is the
225            // character '日').
226            mMiscBucketIndex = super.getBucketIndex("\u65e5");
227        }
228
229        // Set of UnicodeBlocks for unified CJK (Chinese) characters and
230        // Japanese characters. This includes all code blocks that might
231        // contain a character used in Japanese (which is why unified CJK
232        // blocks are included but Korean Hangul and jamo are not).
233        private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
234        static {
235            Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
236            set.add(UnicodeBlock.HIRAGANA);
237            set.add(UnicodeBlock.KATAKANA);
238            set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
239            set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
240            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
241            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
242            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
243            set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
244            set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
245            set.add(UnicodeBlock.CJK_COMPATIBILITY);
246            set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
247            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
248            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
249            CJ_BLOCKS = Collections.unmodifiableSet(set);
250        }
251
252        /**
253         * Helper routine to identify unlabeled Chinese or Japanese characters
254         * to put in a 'misc' bucket.
255         *
256         * @return true if the specified Unicode code point is Chinese or
257         *              Japanese
258         */
259        private static boolean isChineseOrJapanese(int codePoint) {
260            return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
261        }
262
263        /**
264         * Returns the bucket index for the specified string. Adds an
265         * additional 'misc' bucket for Kanji characters to the base class set.
266         */
267        @Override
268        public int getBucketIndex(String name) {
269            final int bucketIndex = super.getBucketIndex(name);
270            if ((bucketIndex == mMiscBucketIndex &&
271                 !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
272                bucketIndex > mMiscBucketIndex) {
273                return bucketIndex + 1;
274            }
275            return bucketIndex;
276        }
277
278        /**
279         * Returns the number of buckets in use (one more than the base class
280         * uses, because this class adds a bucket for Kanji).
281         */
282        @Override
283        public int getBucketCount() {
284            return super.getBucketCount() + 1;
285        }
286
287        /**
288         * Returns the label for the specified bucket index if a valid index,
289         * otherwise returns an empty string. '他' is returned for unclassified
290         * Kanji; for all others, the label determined by the base class is
291         * returned.
292         */
293        @Override
294        public String getBucketLabel(int bucketIndex) {
295            if (bucketIndex == mMiscBucketIndex) {
296                return JAPANESE_MISC_LABEL;
297            } else if (bucketIndex > mMiscBucketIndex) {
298                --bucketIndex;
299            }
300            return super.getBucketLabel(bucketIndex);
301        }
302
303        @Override
304        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
305            // Hiragana and Katakana will be positively identified as Japanese.
306            if (nameStyle == PhoneticNameStyle.JAPANESE) {
307                return getRomajiNameLookupKeys(name);
308            }
309            return null;
310        }
311
312        private static boolean mInitializedTransliterator;
313        private static Transliterator mJapaneseTransliterator;
314
315        private static Transliterator getJapaneseTransliterator() {
316            synchronized(JapaneseContactUtils.class) {
317                if (!mInitializedTransliterator) {
318                    mInitializedTransliterator = true;
319                    Transliterator t = null;
320                    try {
321                        t = new Transliterator("Hiragana-Latin; Katakana-Latin;"
322                                + " Latin-Ascii");
323                    } catch (RuntimeException e) {
324                        Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
325                                + " is missing");
326                    }
327                    mJapaneseTransliterator = t;
328                }
329                return mJapaneseTransliterator;
330            }
331        }
332
333        public static Iterator<String> getRomajiNameLookupKeys(String name) {
334            final Transliterator t = getJapaneseTransliterator();
335            if (t == null) {
336                return null;
337            }
338            final String romajiName = t.transliterate(name);
339            if (TextUtils.isEmpty(romajiName) ||
340                    TextUtils.equals(name, romajiName)) {
341                return null;
342            }
343            final HashSet<String> keys = new HashSet<String>();
344            keys.add(romajiName);
345            return keys.iterator();
346        }
347    }
348
349    /**
350     * Simplified Chinese specific locale overrides. Uses ICU Transliterator
351     * for generating pinyin transliteration.
352     *
353     * sortKey: unchanged (same as name)
354     * nameLookupKeys: adds additional name lookup keys
355     *     - Chinese character's pinyin and pinyin's initial character.
356     *     - Latin word and initial character.
357     * labels: unchanged
358     *     Simplified Chinese labels are the same as English: [A-Z], #, " "
359     */
360    private static class SimplifiedChineseContactUtils
361        extends ContactLocaleUtilsBase {
362        public SimplifiedChineseContactUtils(LocaleSet locales) {
363            super(locales);
364        }
365
366        @Override
367        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
368            if (nameStyle != FullNameStyle.JAPANESE &&
369                    nameStyle != FullNameStyle.KOREAN) {
370                return getPinyinNameLookupKeys(name);
371            }
372            return null;
373        }
374
375        public static Iterator<String> getPinyinNameLookupKeys(String name) {
376            // TODO : Reduce the object allocation.
377            HashSet<String> keys = new HashSet<String>();
378            ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
379            final int tokenCount = tokens.size();
380            final StringBuilder keyPinyin = new StringBuilder();
381            final StringBuilder keyInitial = new StringBuilder();
382            // There is no space among the Chinese Characters, the variant name
383            // lookup key wouldn't work for Chinese. The keyOriginal is used to
384            // build the lookup keys for itself.
385            final StringBuilder keyOriginal = new StringBuilder();
386            for (int i = tokenCount - 1; i >= 0; i--) {
387                final Token token = tokens.get(i);
388                if (Token.UNKNOWN == token.type) {
389                    continue;
390                }
391                if (Token.PINYIN == token.type) {
392                    keyPinyin.insert(0, token.target);
393                    keyInitial.insert(0, token.target.charAt(0));
394                } else if (Token.LATIN == token.type) {
395                    // Avoid adding space at the end of String.
396                    if (keyPinyin.length() > 0) {
397                        keyPinyin.insert(0, ' ');
398                    }
399                    if (keyOriginal.length() > 0) {
400                        keyOriginal.insert(0, ' ');
401                    }
402                    keyPinyin.insert(0, token.source);
403                    keyInitial.insert(0, token.source.charAt(0));
404                }
405                keyOriginal.insert(0, token.source);
406                keys.add(keyOriginal.toString());
407                keys.add(keyPinyin.toString());
408                keys.add(keyInitial.toString());
409            }
410            return keys.iterator();
411        }
412    }
413
414    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
415
416    private static ContactLocaleUtils sSingleton;
417
418    private final LocaleSet mLocales;
419    private final ContactLocaleUtilsBase mUtils;
420
421    private ContactLocaleUtils(LocaleSet locales) {
422        if (locales == null) {
423            mLocales = LocaleSet.getDefault();
424        } else {
425            mLocales = locales;
426        }
427        if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
428            mUtils = new JapaneseContactUtils(mLocales);
429        } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
430            mUtils = new SimplifiedChineseContactUtils(mLocales);
431        } else {
432            mUtils = new ContactLocaleUtilsBase(mLocales);
433        }
434        Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
435                + getLabels().toString());
436    }
437
438    public boolean isLocale(LocaleSet locales) {
439        return mLocales.equals(locales);
440    }
441
442    public static synchronized ContactLocaleUtils getInstance() {
443        if (sSingleton == null) {
444            sSingleton = new ContactLocaleUtils(LocaleSet.getDefault());
445        }
446        return sSingleton;
447    }
448
449    @VisibleForTesting
450    public static synchronized void setLocale(Locale locale) {
451        setLocales(new LocaleSet(locale));
452    }
453
454    public static synchronized void setLocales(LocaleSet locales) {
455        if (sSingleton == null || !sSingleton.isLocale(locales)) {
456            sSingleton = new ContactLocaleUtils(locales);
457        }
458    }
459
460    public String getSortKey(String name, int nameStyle) {
461        return mUtils.getSortKey(name);
462    }
463
464    public int getBucketIndex(String name) {
465        return mUtils.getBucketIndex(name);
466    }
467
468    public int getBucketCount() {
469        return mUtils.getBucketCount();
470    }
471
472    public String getBucketLabel(int bucketIndex) {
473        return mUtils.getBucketLabel(bucketIndex);
474    }
475
476    public String getLabel(String name) {
477        return getBucketLabel(getBucketIndex(name));
478    }
479
480    public ArrayList<String> getLabels() {
481        return mUtils.getLabels();
482    }
483
484    /**
485     *  Determine which utility should be used for generating NameLookupKey.
486     *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
487     *
488     *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
489     *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
490     *  lookup keys when not in a Chinese or Korean locale.
491     *
492     *  Otherwise, use the default behavior of that locale:
493     *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
494     *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
495     */
496    public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
497        if (!mLocales.isPrimaryLocaleCJK()) {
498            if (mLocales.isSecondaryLocaleSimplifiedChinese()) {
499                if (nameStyle == FullNameStyle.CHINESE ||
500                        nameStyle == FullNameStyle.CJK) {
501                    return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
502                }
503            } else {
504                if (nameStyle == FullNameStyle.JAPANESE) {
505                    return JapaneseContactUtils.getRomajiNameLookupKeys(name);
506                }
507            }
508        }
509        return mUtils.getNameLookupKeys(name, nameStyle);
510    }
511
512}
513