1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts;
18
19import android.icu.text.AlphabeticIndex;
20import android.icu.text.AlphabeticIndex.ImmutableIndex;
21import android.icu.text.Transliterator;
22import android.os.LocaleList;
23import android.provider.ContactsContract.FullNameStyle;
24import android.provider.ContactsContract.PhoneticNameStyle;
25import android.text.TextUtils;
26import android.util.ArraySet;
27import android.util.Log;
28
29import com.android.providers.contacts.HanziToPinyin.Token;
30
31import com.google.common.annotations.VisibleForTesting;
32
33import java.lang.Character.UnicodeBlock;
34import java.util.ArrayList;
35import java.util.Collections;
36import java.util.Iterator;
37import java.util.List;
38import java.util.Locale;
39import java.util.Set;
40
41
42/**
43 * This utility class provides specialized handling for locale specific
44 * information: labels, name lookup keys.
45 */
46public class ContactLocaleUtils {
47    public static final String TAG = "ContactLocale";
48
49    private static final boolean DEBUG = false; // don't submit with true
50
51    public static final Locale LOCALE_ARABIC = new Locale("ar");
52    public static final Locale LOCALE_GREEK = new Locale("el");
53    public static final Locale LOCALE_HEBREW = new Locale("he");
54    // Serbian and Ukrainian labels are complementary supersets of Russian
55    public static final Locale LOCALE_SERBIAN = new Locale("sr");
56    public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
57    public static final Locale LOCALE_THAI = new Locale("th");
58
59    // -- Note for adding locales to sDefaultLabelLocales --
60    //
61    // AlphabeticIndex.getBucketLabel() uses a binary search across
62    // the entire label set so care should be taken about growing this
63    // set too large. The following set determines for which locales
64    // we will show labels other than your primary locale. General rules
65    // of thumb for adding a locale: should be a supported locale; and
66    // should not be included if from a name it is not deterministic
67    // which way to label it (so eg Chinese cannot be added because
68    // the labeling of a Chinese character varies between Simplified,
69    // Traditional, and Japanese locales). Use English only for all
70    // Latin based alphabets. Ukrainian and Serbian are chosen for
71    // Cyrillic because their alphabets are complementary supersets
72    // of Russian.
73    private static final Locale[] sDefaultLabelLocales = new Locale[]{
74            Locale.ENGLISH,
75            Locale.JAPANESE,
76            Locale.KOREAN,
77            LOCALE_THAI,
78            LOCALE_ARABIC,
79            LOCALE_HEBREW,
80            LOCALE_GREEK,
81            LOCALE_UKRAINIAN,
82            LOCALE_SERBIAN,
83    };
84
85    @VisibleForTesting
86    static void dumpIndex(ImmutableIndex index) {
87        final StringBuilder labels = new StringBuilder();
88        String sep = "";
89        for (int i = 0; i < index.getBucketCount(); i++) {
90            labels.append(sep);
91            labels.append(index.getBucket(i).getLabel());
92            sep = ",";
93        }
94        Log.d(TAG, "Labels=[" + labels + "]");
95    }
96
97    /**
98     * This class is the default implementation and should be the base class
99     * for other locales.
100     *
101     * sortKey: same as name
102     * nameLookupKeys: none
103     * labels: uses ICU AlphabeticIndex for labels and extends by labeling
104     *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
105     */
106    private static class ContactLocaleUtilsBase {
107        private static final String EMPTY_STRING = "";
108        private static final String NUMBER_STRING = "#";
109
110        protected final ImmutableIndex mAlphabeticIndex;
111        private final int mAlphabeticIndexBucketCount;
112        private final int mNumberBucketIndex;
113        private final boolean mUsePinyinTransliterator;
114
115        public ContactLocaleUtilsBase(LocaleSet systemLocales) {
116            mUsePinyinTransliterator = systemLocales.shouldPreferSimplifiedChinese();
117
118            // Build the index buckets based on the current system locale set and
119            // sDefaultLabelLocales.
120            if (DEBUG) {
121                Log.d(TAG, "Building index buckets...");
122            }
123            final List<Locale> locales = getLocalesForBuckets(systemLocales);
124
125            AlphabeticIndex ai = new AlphabeticIndex(locales.get(0))
126                    .setMaxLabelCount(300);
127            for (int i = 1; i < locales.size(); i++) {
128                ai.addLabels(locales.get(i));
129            }
130
131            mAlphabeticIndex = ai.buildImmutableIndex();
132            mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
133            mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
134            if (DEBUG) {
135                dumpIndex(mAlphabeticIndex);
136            }
137        }
138
139        static List<Locale> getLocalesForBuckets(LocaleSet systemLocales) {
140
141            // Create a list of locales that should be used to generate the index buckets.
142            // - Source: the system locales and sDefaultLabelLocales.
143            // - Rules:
144            //   - Don't add the same locale multiple times.
145            //   - Also special rules for Chinese (b/31115382):
146            //     - Don't add multiple Chinese locales.
147            //     - Don't add any Chinese locales after Japanese.
148
149            // First, collect all the locales (allowing duplicates).
150            final LocaleList localeList = systemLocales.getAllLocales();
151
152            final List<Locale> locales = new ArrayList<>(
153                    localeList.size() + sDefaultLabelLocales.length);
154            for (int i = 0; i < localeList.size(); i++) {
155                locales.add(localeList.get(i));
156            }
157            for (int i = 0; i < sDefaultLabelLocales.length; i++) {
158                locales.add(sDefaultLabelLocales[i]);
159            }
160
161            // Then apply the rules to generate the final list.
162            final List<Locale> ret = new ArrayList<>(locales.size());
163            boolean allowChinese = true;
164
165            for (int i = 0; i < locales.size(); i++) {
166                final Locale locale = locales.get(i);
167
168                if (ret.contains(locale)) {
169                    continue;
170                }
171                if (LocaleSet.isLanguageChinese(locale)) {
172                    if (!allowChinese) {
173                        continue;
174                    }
175                    allowChinese = false;
176                }
177                if (LocaleSet.isLanguageJapanese(locale)) {
178                    allowChinese = false;
179                }
180                if (DEBUG) {
181                    Log.d(TAG, "  Adding locale: " + locale.toLanguageTag());
182                }
183                ret.add(locale);
184            }
185            return ret;
186        }
187
188        public String getSortKey(String name) {
189            return name;
190        }
191
192        public int getNumberBucketIndex() {
193            return mNumberBucketIndex;
194        }
195
196        /**
197         * Returns the bucket index for the specified string. AlphabeticIndex
198         * sorts strings into buckets numbered in order from 0 to N, where the
199         * exact value of N depends on how many representative index labels are
200         * used in a particular locale. This routine adds one additional bucket
201         * for phone numbers. It attempts to detect phone numbers and shifts
202         * the bucket indexes returned by AlphabeticIndex in order to make room
203         * for the new # bucket, so the returned range becomes 0 to N+1.
204         */
205        public int getBucketIndex(String name) {
206            boolean prefixIsNumeric = false;
207            final int length = name.length();
208            int offset = 0;
209            while (offset < length) {
210                int codePoint = Character.codePointAt(name, offset);
211                // Ignore standard phone number separators and identify any
212                // string that otherwise starts with a number.
213                if (Character.isDigit(codePoint)) {
214                    prefixIsNumeric = true;
215                    break;
216                } else if (!Character.isSpaceChar(codePoint) &&
217                           codePoint != '+' && codePoint != '(' &&
218                           codePoint != ')' && codePoint != '.' &&
219                           codePoint != '-' && codePoint != '#') {
220                    break;
221                }
222                offset += Character.charCount(codePoint);
223            }
224            if (prefixIsNumeric) {
225                return mNumberBucketIndex;
226            }
227
228            /**
229             * ICU 55 AlphabeticIndex doesn't support Simplified Chinese
230             * as a secondary locale so it is necessary to use the
231             * Pinyin transliterator. We also use this for a Simplified
232             * Chinese primary locale because it gives more accurate letter
233             * buckets. b/19835686
234             */
235            if (mUsePinyinTransliterator) {
236                name = HanziToPinyin.getInstance().transliterate(name);
237            }
238            final int bucket = mAlphabeticIndex.getBucketIndex(name);
239            if (bucket < 0) {
240                return -1;
241            }
242            if (bucket >= mNumberBucketIndex) {
243                return bucket + 1;
244            }
245            return bucket;
246        }
247
248        /**
249         * Returns the number of buckets in use (one more than AlphabeticIndex
250         * uses, because this class adds a bucket for phone numbers).
251         */
252        public int getBucketCount() {
253            return mAlphabeticIndexBucketCount + 1;
254        }
255
256        /**
257         * Returns the label for the specified bucket index if a valid index,
258         * otherwise returns an empty string. '#' is returned for the phone
259         * number bucket; for all others, the AlphabeticIndex label is returned.
260         */
261        public String getBucketLabel(int bucketIndex) {
262            if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
263                return EMPTY_STRING;
264            } else if (bucketIndex == mNumberBucketIndex) {
265                return NUMBER_STRING;
266            } else if (bucketIndex > mNumberBucketIndex) {
267                --bucketIndex;
268            }
269            return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
270        }
271
272        @SuppressWarnings("unused")
273        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
274            return null;
275        }
276
277        public ArrayList<String> getLabels() {
278            final int bucketCount = getBucketCount();
279            final ArrayList<String> labels = new ArrayList<String>(bucketCount);
280            for(int i = 0; i < bucketCount; ++i) {
281                labels.add(getBucketLabel(i));
282            }
283            return labels;
284        }
285    }
286
287    /**
288     * Japanese specific locale overrides.
289     *
290     * sortKey: unchanged (same as name)
291     * nameLookupKeys: unchanged (none)
292     * labels: extends default labels by labeling unlabeled CJ characters
293     *     with the Japanese character 他 ("misc"). Japanese labels are:
294     *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
295     */
296    private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
297        // \u4ed6 is Japanese character 他 ("misc")
298        private static final String JAPANESE_MISC_LABEL = "\u4ed6";
299        private final int mMiscBucketIndex;
300
301        public JapaneseContactUtils(LocaleSet locales) {
302            super(locales);
303            // Determine which bucket AlphabeticIndex is lumping unclassified
304            // Japanese characters into by looking up the bucket index for
305            // a representative Kanji/CJK unified ideograph (\u65e5 is the
306            // character '日').
307            mMiscBucketIndex = super.getBucketIndex("\u65e5");
308        }
309
310        // Set of UnicodeBlocks for unified CJK (Chinese) characters and
311        // Japanese characters. This includes all code blocks that might
312        // contain a character used in Japanese (which is why unified CJK
313        // blocks are included but Korean Hangul and jamo are not).
314        private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
315        static {
316            Set<UnicodeBlock> set = new ArraySet<>();
317            set.add(UnicodeBlock.HIRAGANA);
318            set.add(UnicodeBlock.KATAKANA);
319            set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
320            set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
321            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
322            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
323            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
324            set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
325            set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
326            set.add(UnicodeBlock.CJK_COMPATIBILITY);
327            set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
328            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
329            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
330            CJ_BLOCKS = Collections.unmodifiableSet(set);
331        }
332
333        /**
334         * Helper routine to identify unlabeled Chinese or Japanese characters
335         * to put in a 'misc' bucket.
336         *
337         * @return true if the specified Unicode code point is Chinese or
338         *              Japanese
339         */
340        private static boolean isChineseOrJapanese(int codePoint) {
341            return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
342        }
343
344        /**
345         * Returns the bucket index for the specified string. Adds an
346         * additional 'misc' bucket for Kanji characters to the base class set.
347         */
348        @Override
349        public int getBucketIndex(String name) {
350            final int bucketIndex = super.getBucketIndex(name);
351            if ((bucketIndex == mMiscBucketIndex &&
352                 !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
353                bucketIndex > mMiscBucketIndex) {
354                return bucketIndex + 1;
355            }
356            return bucketIndex;
357        }
358
359        /**
360         * Returns the number of buckets in use (one more than the base class
361         * uses, because this class adds a bucket for Kanji).
362         */
363        @Override
364        public int getBucketCount() {
365            return super.getBucketCount() + 1;
366        }
367
368        /**
369         * Returns the label for the specified bucket index if a valid index,
370         * otherwise returns an empty string. '他' is returned for unclassified
371         * Kanji; for all others, the label determined by the base class is
372         * returned.
373         */
374        @Override
375        public String getBucketLabel(int bucketIndex) {
376            if (bucketIndex == mMiscBucketIndex) {
377                return JAPANESE_MISC_LABEL;
378            } else if (bucketIndex > mMiscBucketIndex) {
379                --bucketIndex;
380            }
381            return super.getBucketLabel(bucketIndex);
382        }
383
384        @Override
385        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
386            // Hiragana and Katakana will be positively identified as Japanese.
387            if (nameStyle == PhoneticNameStyle.JAPANESE) {
388                return getRomajiNameLookupKeys(name);
389            }
390            return null;
391        }
392
393        private static boolean mInitializedTransliterator;
394        private static Transliterator mJapaneseTransliterator;
395
396        private static Transliterator getJapaneseTransliterator() {
397            synchronized(JapaneseContactUtils.class) {
398                if (!mInitializedTransliterator) {
399                    mInitializedTransliterator = true;
400                    Transliterator t = null;
401                    try {
402                        t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
403                                + " Latin-Ascii");
404                    } catch (IllegalArgumentException e) {
405                        Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
406                                + " is missing");
407                    }
408                    mJapaneseTransliterator = t;
409                }
410                return mJapaneseTransliterator;
411            }
412        }
413
414        public static Iterator<String> getRomajiNameLookupKeys(String name) {
415            final Transliterator t = getJapaneseTransliterator();
416            if (t == null) {
417                return null;
418            }
419            final String romajiName = t.transliterate(name);
420            if (TextUtils.isEmpty(romajiName) ||
421                    TextUtils.equals(name, romajiName)) {
422                return null;
423            }
424            final ArraySet<String> keys = new ArraySet<>();
425            keys.add(romajiName);
426            return keys.iterator();
427        }
428
429        /**
430         * Returns the number for "#" bucket index.
431         * Adds an additional 'misc' bucket for Kanji characters to the base class set.
432         */
433        @Override
434        public int getNumberBucketIndex() {
435            final int numberBucketIndex = super.getNumberBucketIndex();
436            if (numberBucketIndex > mMiscBucketIndex) {
437                return numberBucketIndex + 1;
438            }
439            return numberBucketIndex;
440        }
441    }
442
443    /**
444     * Simplified Chinese specific locale overrides. Uses ICU Transliterator
445     * for generating pinyin transliteration.
446     *
447     * sortKey: unchanged (same as name)
448     * nameLookupKeys: adds additional name lookup keys
449     *     - Chinese character's pinyin and pinyin's initial character.
450     *     - Latin word and initial character.
451     * labels: unchanged
452     *     Simplified Chinese labels are the same as English: [A-Z], #, " "
453     */
454    private static class SimplifiedChineseContactUtils
455        extends ContactLocaleUtilsBase {
456        public SimplifiedChineseContactUtils(LocaleSet locales) {
457            super(locales);
458        }
459
460        @Override
461        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
462            if (nameStyle != FullNameStyle.JAPANESE &&
463                    nameStyle != FullNameStyle.KOREAN) {
464                return getPinyinNameLookupKeys(name);
465            }
466            return null;
467        }
468
469        public static Iterator<String> getPinyinNameLookupKeys(String name) {
470            // TODO : Reduce the object allocation.
471            ArraySet<String> keys = new ArraySet<>();
472            ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
473            final int tokenCount = tokens.size();
474            final StringBuilder keyPinyin = new StringBuilder();
475            final StringBuilder keyInitial = new StringBuilder();
476            // There is no space among the Chinese Characters, the variant name
477            // lookup key wouldn't work for Chinese. The keyOriginal is used to
478            // build the lookup keys for itself.
479            final StringBuilder keyOriginal = new StringBuilder();
480            for (int i = tokenCount - 1; i >= 0; i--) {
481                final Token token = tokens.get(i);
482                if (Token.UNKNOWN == token.type) {
483                    continue;
484                }
485                if (Token.PINYIN == token.type) {
486                    keyPinyin.insert(0, token.target);
487                    keyInitial.insert(0, token.target.charAt(0));
488                } else if (Token.LATIN == token.type) {
489                    // Avoid adding space at the end of String.
490                    if (keyPinyin.length() > 0) {
491                        keyPinyin.insert(0, ' ');
492                    }
493                    if (keyOriginal.length() > 0) {
494                        keyOriginal.insert(0, ' ');
495                    }
496                    keyPinyin.insert(0, token.source);
497                    keyInitial.insert(0, token.source.charAt(0));
498                }
499                keyOriginal.insert(0, token.source);
500                keys.add(keyOriginal.toString());
501                keys.add(keyPinyin.toString());
502                keys.add(keyInitial.toString());
503            }
504            return keys.iterator();
505        }
506    }
507
508    private static ContactLocaleUtils sSingleton;
509
510    private final LocaleSet mLocales;
511    private final ContactLocaleUtilsBase mUtils;
512
513    private ContactLocaleUtils(LocaleSet locales) {
514        if (locales == null) {
515            mLocales = LocaleSet.newDefault();
516        } else {
517            mLocales = locales;
518        }
519        if (mLocales.shouldPreferJapanese()) {
520            mUtils = new JapaneseContactUtils(mLocales);
521        } else if (mLocales.shouldPreferSimplifiedChinese()) {
522            mUtils = new SimplifiedChineseContactUtils(mLocales);
523        } else {
524            mUtils = new ContactLocaleUtilsBase(mLocales);
525        }
526        Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
527                + getLabels().toString());
528    }
529
530    public boolean isLocale(LocaleSet locales) {
531        return mLocales.equals(locales);
532    }
533
534    public static synchronized ContactLocaleUtils getInstance() {
535        if (sSingleton == null) {
536            sSingleton = new ContactLocaleUtils(LocaleSet.newDefault());
537        }
538        return sSingleton;
539    }
540
541    @VisibleForTesting
542    public static ContactLocaleUtils newInstanceForTest(Locale... locales) {
543        return new ContactLocaleUtils(LocaleSet.newForTest(locales));
544    }
545
546    @VisibleForTesting
547    public static synchronized void setLocaleForTest(Locale... locales) {
548        setLocales(LocaleSet.newForTest(locales));
549    }
550
551    public static synchronized void setLocales(LocaleSet locales) {
552        if (sSingleton == null || !sSingleton.isLocale(locales)) {
553            if (DEBUG) {
554                Log.d(TAG, "Setting locale(s) to " + locales);
555            }
556            sSingleton = new ContactLocaleUtils(locales);
557        }
558    }
559
560    public String getSortKey(String name, int nameStyle) {
561        return mUtils.getSortKey(name);
562    }
563
564    public int getBucketIndex(String name) {
565        return mUtils.getBucketIndex(name);
566    }
567
568    public int getNumberBucketIndex() {
569        return mUtils.getNumberBucketIndex();
570    }
571
572    public int getBucketCount() {
573        return mUtils.getBucketCount();
574    }
575
576    public String getBucketLabel(int bucketIndex) {
577        return mUtils.getBucketLabel(bucketIndex);
578    }
579
580    public String getLabel(String name) {
581        return getBucketLabel(getBucketIndex(name));
582    }
583
584    public ArrayList<String> getLabels() {
585        return mUtils.getLabels();
586    }
587
588    /**
589     *  Determine which utility should be used for generating NameLookupKey.
590     *  (ie, whether we generate Romaji or Pinyin lookup keys or not)
591     *
592     *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
593     *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
594     *  lookup keys when not in a Chinese or Korean locale.
595     *
596     *  Otherwise, use the default behavior of that locale:
597     *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
598     *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
599     */
600    public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
601        if (!mLocales.isPrimaryLocaleCJK()) {
602            if (mLocales.shouldPreferSimplifiedChinese()) {
603                if (nameStyle == FullNameStyle.CHINESE ||
604                        nameStyle == FullNameStyle.CJK) {
605                    return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
606                }
607            } else {
608                if (nameStyle == FullNameStyle.JAPANESE) {
609                    return JapaneseContactUtils.getRomajiNameLookupKeys(name);
610                }
611            }
612        }
613        return mUtils.getNameLookupKeys(name, nameStyle);
614    }
615
616}
617