1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts;
18
19import android.provider.ContactsContract.FullNameStyle;
20import android.provider.ContactsContract.PhoneticNameStyle;
21import android.text.TextUtils;
22import android.util.Log;
23
24import com.android.providers.contacts.HanziToPinyin.Token;
25import com.google.common.annotations.VisibleForTesting;
26
27import java.lang.Character.UnicodeBlock;
28import java.util.Arrays;
29import java.util.ArrayList;
30import java.util.Collections;
31import java.util.HashMap;
32import java.util.HashSet;
33import java.util.Iterator;
34import java.util.List;
35import java.util.Locale;
36import java.util.Map;
37import java.util.Set;
38
39import libcore.icu.AlphabeticIndex;
40import libcore.icu.AlphabeticIndex.ImmutableIndex;
41import libcore.icu.Transliterator;
42
43/**
44 * This utility class provides specialized handling for locale specific
45 * information: labels, name lookup keys.
46 */
47public class ContactLocaleUtils {
48    public static final String TAG = "ContactLocale";
49
50    public static final Locale LOCALE_ARABIC = new Locale("ar");
51    public static final Locale LOCALE_GREEK = new Locale("el");
52    public static final Locale LOCALE_HEBREW = new Locale("he");
53    // Serbian and Ukrainian labels are complementary supersets of Russian
54    public static final Locale LOCALE_SERBIAN = new Locale("sr");
55    public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
56    public static final Locale LOCALE_THAI = new Locale("th");
57
58    /**
59     * This class is the default implementation and should be the base class
60     * for other locales.
61     *
62     * sortKey: same as name
63     * nameLookupKeys: none
64     * labels: uses ICU AlphabeticIndex for labels and extends by labeling
65     *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
66     */
67    private static class ContactLocaleUtilsBase {
68        private static final String EMPTY_STRING = "";
69        private static final String NUMBER_STRING = "#";
70
71        protected final ImmutableIndex mAlphabeticIndex;
72        private final int mAlphabeticIndexBucketCount;
73        private final int mNumberBucketIndex;
74        private final boolean mEnableSecondaryLocalePinyin;
75
76        public ContactLocaleUtilsBase(LocaleSet locales) {
77            // AlphabeticIndex.getBucketLabel() uses a binary search across
78            // the entire label set so care should be taken about growing this
79            // set too large. The following set determines for which locales
80            // we will show labels other than your primary locale. General rules
81            // of thumb for adding a locale: should be a supported locale; and
82            // should not be included if from a name it is not deterministic
83            // which way to label it (so eg Chinese cannot be added because
84            // the labeling of a Chinese character varies between Simplified,
85            // Traditional, and Japanese locales). Use English only for all
86            // Latin based alphabets. Ukrainian and Serbian are chosen for
87            // Cyrillic because their alphabets are complementary supersets
88            // of Russian.
89            final Locale secondaryLocale = locales.getSecondaryLocale();
90            mEnableSecondaryLocalePinyin = locales.isSecondaryLocaleSimplifiedChinese();
91            AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale())
92                .setMaxLabelCount(300);
93            if (secondaryLocale != null) {
94                ai.addLabels(secondaryLocale);
95            }
96            mAlphabeticIndex = ai.addLabels(Locale.ENGLISH)
97                .addLabels(Locale.JAPANESE)
98                .addLabels(Locale.KOREAN)
99                .addLabels(LOCALE_THAI)
100                .addLabels(LOCALE_ARABIC)
101                .addLabels(LOCALE_HEBREW)
102                .addLabels(LOCALE_GREEK)
103                .addLabels(LOCALE_UKRAINIAN)
104                .addLabels(LOCALE_SERBIAN)
105                .getImmutableIndex();
106            mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
107            mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
108        }
109
110        public String getSortKey(String name) {
111            return name;
112        }
113
114        /**
115         * Returns the bucket index for the specified string. AlphabeticIndex
116         * sorts strings into buckets numbered in order from 0 to N, where the
117         * exact value of N depends on how many representative index labels are
118         * used in a particular locale. This routine adds one additional bucket
119         * for phone numbers. It attempts to detect phone numbers and shifts
120         * the bucket indexes returned by AlphabeticIndex in order to make room
121         * for the new # bucket, so the returned range becomes 0 to N+1.
122         */
123        public int getBucketIndex(String name) {
124            boolean prefixIsNumeric = false;
125            final int length = name.length();
126            int offset = 0;
127            while (offset < length) {
128                int codePoint = Character.codePointAt(name, offset);
129                // Ignore standard phone number separators and identify any
130                // string that otherwise starts with a number.
131                if (Character.isDigit(codePoint)) {
132                    prefixIsNumeric = true;
133                    break;
134                } else if (!Character.isSpaceChar(codePoint) &&
135                           codePoint != '+' && codePoint != '(' &&
136                           codePoint != ')' && codePoint != '.' &&
137                           codePoint != '-' && codePoint != '#') {
138                    break;
139                }
140                offset += Character.charCount(codePoint);
141            }
142            if (prefixIsNumeric) {
143                return mNumberBucketIndex;
144            }
145
146            /**
147             * TODO: ICU 52 AlphabeticIndex doesn't support Simplified Chinese
148             * as a secondary locale. Remove the following if that is added.
149             */
150            if (mEnableSecondaryLocalePinyin) {
151                name = HanziToPinyin.getInstance().transliterate(name);
152            }
153            final int bucket = mAlphabeticIndex.getBucketIndex(name);
154            if (bucket < 0) {
155                return -1;
156            }
157            if (bucket >= mNumberBucketIndex) {
158                return bucket + 1;
159            }
160            return bucket;
161        }
162
163        /**
164         * Returns the number of buckets in use (one more than AlphabeticIndex
165         * uses, because this class adds a bucket for phone numbers).
166         */
167        public int getBucketCount() {
168            return mAlphabeticIndexBucketCount + 1;
169        }
170
171        /**
172         * Returns the label for the specified bucket index if a valid index,
173         * otherwise returns an empty string. '#' is returned for the phone
174         * number bucket; for all others, the AlphabeticIndex label is returned.
175         */
176        public String getBucketLabel(int bucketIndex) {
177            if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
178                return EMPTY_STRING;
179            } else if (bucketIndex == mNumberBucketIndex) {
180                return NUMBER_STRING;
181            } else if (bucketIndex > mNumberBucketIndex) {
182                --bucketIndex;
183            }
184            return mAlphabeticIndex.getBucketLabel(bucketIndex);
185        }
186
187        @SuppressWarnings("unused")
188        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
189            return null;
190        }
191
192        public ArrayList<String> getLabels() {
193            final int bucketCount = getBucketCount();
194            final ArrayList<String> labels = new ArrayList<String>(bucketCount);
195            for(int i = 0; i < bucketCount; ++i) {
196                labels.add(getBucketLabel(i));
197            }
198            return labels;
199        }
200    }
201
202    /**
203     * Japanese specific locale overrides.
204     *
205     * sortKey: unchanged (same as name)
206     * nameLookupKeys: unchanged (none)
207     * labels: extends default labels by labeling unlabeled CJ characters
208     *     with the Japanese character 他 ("misc"). Japanese labels are:
209     *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
210     */
211    private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
212        // \u4ed6 is Japanese character 他 ("misc")
213        private static final String JAPANESE_MISC_LABEL = "\u4ed6";
214        private final int mMiscBucketIndex;
215
216        public JapaneseContactUtils(LocaleSet locales) {
217            super(locales);
218            // Determine which bucket AlphabeticIndex is lumping unclassified
219            // Japanese characters into by looking up the bucket index for
220            // a representative Kanji/CJK unified ideograph (\u65e5 is the
221            // character '日').
222            mMiscBucketIndex = super.getBucketIndex("\u65e5");
223        }
224
225        // Set of UnicodeBlocks for unified CJK (Chinese) characters and
226        // Japanese characters. This includes all code blocks that might
227        // contain a character used in Japanese (which is why unified CJK
228        // blocks are included but Korean Hangul and jamo are not).
229        private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
230        static {
231            Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
232            set.add(UnicodeBlock.HIRAGANA);
233            set.add(UnicodeBlock.KATAKANA);
234            set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
235            set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
236            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
237            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
238            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
239            set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
240            set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
241            set.add(UnicodeBlock.CJK_COMPATIBILITY);
242            set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
243            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
244            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
245            CJ_BLOCKS = Collections.unmodifiableSet(set);
246        }
247
248        /**
249         * Helper routine to identify unlabeled Chinese or Japanese characters
250         * to put in a 'misc' bucket.
251         *
252         * @return true if the specified Unicode code point is Chinese or
253         *              Japanese
254         */
255        private static boolean isChineseOrJapanese(int codePoint) {
256            return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
257        }
258
259        /**
260         * Returns the bucket index for the specified string. Adds an
261         * additional 'misc' bucket for Kanji characters to the base class set.
262         */
263        @Override
264        public int getBucketIndex(String name) {
265            final int bucketIndex = super.getBucketIndex(name);
266            if ((bucketIndex == mMiscBucketIndex &&
267                 !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
268                bucketIndex > mMiscBucketIndex) {
269                return bucketIndex + 1;
270            }
271            return bucketIndex;
272        }
273
274        /**
275         * Returns the number of buckets in use (one more than the base class
276         * uses, because this class adds a bucket for Kanji).
277         */
278        @Override
279        public int getBucketCount() {
280            return super.getBucketCount() + 1;
281        }
282
283        /**
284         * Returns the label for the specified bucket index if a valid index,
285         * otherwise returns an empty string. '他' is returned for unclassified
286         * Kanji; for all others, the label determined by the base class is
287         * returned.
288         */
289        @Override
290        public String getBucketLabel(int bucketIndex) {
291            if (bucketIndex == mMiscBucketIndex) {
292                return JAPANESE_MISC_LABEL;
293            } else if (bucketIndex > mMiscBucketIndex) {
294                --bucketIndex;
295            }
296            return super.getBucketLabel(bucketIndex);
297        }
298
299        @Override
300        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
301            // Hiragana and Katakana will be positively identified as Japanese.
302            if (nameStyle == PhoneticNameStyle.JAPANESE) {
303                return getRomajiNameLookupKeys(name);
304            }
305            return null;
306        }
307
308        private static boolean mInitializedTransliterator;
309        private static Transliterator mJapaneseTransliterator;
310
311        private static Transliterator getJapaneseTransliterator() {
312            synchronized(JapaneseContactUtils.class) {
313                if (!mInitializedTransliterator) {
314                    mInitializedTransliterator = true;
315                    Transliterator t = null;
316                    try {
317                        t = new Transliterator("Hiragana-Latin; Katakana-Latin;"
318                                + " Latin-Ascii");
319                    } catch (RuntimeException e) {
320                        Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
321                                + " is missing");
322                    }
323                    mJapaneseTransliterator = t;
324                }
325                return mJapaneseTransliterator;
326            }
327        }
328
329        public static Iterator<String> getRomajiNameLookupKeys(String name) {
330            final Transliterator t = getJapaneseTransliterator();
331            if (t == null) {
332                return null;
333            }
334            final String romajiName = t.transliterate(name);
335            if (TextUtils.isEmpty(romajiName) ||
336                    TextUtils.equals(name, romajiName)) {
337                return null;
338            }
339            final HashSet<String> keys = new HashSet<String>();
340            keys.add(romajiName);
341            return keys.iterator();
342        }
343    }
344
345    /**
346     * Simplified Chinese specific locale overrides. Uses ICU Transliterator
347     * for generating pinyin transliteration.
348     *
349     * sortKey: unchanged (same as name)
350     * nameLookupKeys: adds additional name lookup keys
351     *     - Chinese character's pinyin and pinyin's initial character.
352     *     - Latin word and initial character.
353     * labels: unchanged
354     *     Simplified Chinese labels are the same as English: [A-Z], #, " "
355     */
356    private static class SimplifiedChineseContactUtils
357        extends ContactLocaleUtilsBase {
358        public SimplifiedChineseContactUtils(LocaleSet locales) {
359            super(locales);
360        }
361
362        @Override
363        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
364            if (nameStyle != FullNameStyle.JAPANESE &&
365                    nameStyle != FullNameStyle.KOREAN) {
366                return getPinyinNameLookupKeys(name);
367            }
368            return null;
369        }
370
371        public static Iterator<String> getPinyinNameLookupKeys(String name) {
372            // TODO : Reduce the object allocation.
373            HashSet<String> keys = new HashSet<String>();
374            ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
375            final int tokenCount = tokens.size();
376            final StringBuilder keyPinyin = new StringBuilder();
377            final StringBuilder keyInitial = new StringBuilder();
378            // There is no space among the Chinese Characters, the variant name
379            // lookup key wouldn't work for Chinese. The keyOriginal is used to
380            // build the lookup keys for itself.
381            final StringBuilder keyOriginal = new StringBuilder();
382            for (int i = tokenCount - 1; i >= 0; i--) {
383                final Token token = tokens.get(i);
384                if (Token.UNKNOWN == token.type) {
385                    continue;
386                }
387                if (Token.PINYIN == token.type) {
388                    keyPinyin.insert(0, token.target);
389                    keyInitial.insert(0, token.target.charAt(0));
390                } else if (Token.LATIN == token.type) {
391                    // Avoid adding space at the end of String.
392                    if (keyPinyin.length() > 0) {
393                        keyPinyin.insert(0, ' ');
394                    }
395                    if (keyOriginal.length() > 0) {
396                        keyOriginal.insert(0, ' ');
397                    }
398                    keyPinyin.insert(0, token.source);
399                    keyInitial.insert(0, token.source.charAt(0));
400                }
401                keyOriginal.insert(0, token.source);
402                keys.add(keyOriginal.toString());
403                keys.add(keyPinyin.toString());
404                keys.add(keyInitial.toString());
405            }
406            return keys.iterator();
407        }
408    }
409
410    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
411
412    private static ContactLocaleUtils sSingleton;
413
414    private final LocaleSet mLocales;
415    private final ContactLocaleUtilsBase mUtils;
416
417    private ContactLocaleUtils(LocaleSet locales) {
418        if (locales == null) {
419            mLocales = LocaleSet.getDefault();
420        } else {
421            mLocales = locales;
422        }
423        if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) {
424            mUtils = new JapaneseContactUtils(mLocales);
425        } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) {
426            mUtils = new SimplifiedChineseContactUtils(mLocales);
427        } else {
428            mUtils = new ContactLocaleUtilsBase(mLocales);
429        }
430        Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
431                + getLabels().toString());
432    }
433
434    public boolean isLocale(LocaleSet locales) {
435        return mLocales.equals(locales);
436    }
437
438    public static synchronized ContactLocaleUtils getInstance() {
439        if (sSingleton == null) {
440            sSingleton = new ContactLocaleUtils(LocaleSet.getDefault());
441        }
442        return sSingleton;
443    }
444
445    @VisibleForTesting
446    public static synchronized void setLocale(Locale locale) {
447        setLocales(new LocaleSet(locale));
448    }
449
450    public static synchronized void setLocales(LocaleSet locales) {
451        if (sSingleton == null || !sSingleton.isLocale(locales)) {
452            sSingleton = new ContactLocaleUtils(locales);
453        }
454    }
455
456    public String getSortKey(String name, int nameStyle) {
457        return mUtils.getSortKey(name);
458    }
459
460    public int getBucketIndex(String name) {
461        return mUtils.getBucketIndex(name);
462    }
463
464    public int getBucketCount() {
465        return mUtils.getBucketCount();
466    }
467
468    public String getBucketLabel(int bucketIndex) {
469        return mUtils.getBucketLabel(bucketIndex);
470    }
471
472    public String getLabel(String name) {
473        return getBucketLabel(getBucketIndex(name));
474    }
475
476    public ArrayList<String> getLabels() {
477        return mUtils.getLabels();
478    }
479
480    /**
481     *  Determine which utility should be used for generating NameLookupKey.
482     *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
483     *
484     *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
485     *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
486     *  lookup keys when not in a Chinese or Korean locale.
487     *
488     *  Otherwise, use the default behavior of that locale:
489     *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
490     *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
491     */
492    public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
493        if (!mLocales.isPrimaryLocaleCJK()) {
494            if (mLocales.isSecondaryLocaleSimplifiedChinese()) {
495                if (nameStyle == FullNameStyle.CHINESE ||
496                        nameStyle == FullNameStyle.CJK) {
497                    return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
498                }
499            } else {
500                if (nameStyle == FullNameStyle.JAPANESE) {
501                    return JapaneseContactUtils.getRomajiNameLookupKeys(name);
502                }
503            }
504        }
505        return mUtils.getNameLookupKeys(name, nameStyle);
506    }
507
508}
509