ContactLocaleUtils.java revision d3b23d40bc15640abaeafa4041737b59f61ea557
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License
15 */
16
17package com.android.providers.contacts;
18
19import android.provider.ContactsContract.FullNameStyle;
20import android.provider.ContactsContract.PhoneticNameStyle;
21import android.text.TextUtils;
22import android.util.Log;
23
24import com.android.providers.contacts.HanziToPinyin.Token;
25
26import java.lang.Character.UnicodeBlock;
27import java.util.Arrays;
28import java.util.ArrayList;
29import java.util.Collections;
30import java.util.HashMap;
31import java.util.HashSet;
32import java.util.Iterator;
33import java.util.List;
34import java.util.Locale;
35import java.util.Map;
36import java.util.Set;
37
38import libcore.icu.AlphabeticIndex;
39import libcore.icu.AlphabeticIndex.ImmutableIndex;
40import libcore.icu.Transliterator;
41
42/**
43 * This utility class provides specialized handling for locale specific
44 * information: labels, name lookup keys.
45 */
46public class ContactLocaleUtils {
47    public static final String TAG = "ContactLocale";
48
49    public static final Locale LOCALE_ARABIC = new Locale("ar");
50    public static final Locale LOCALE_GREEK = new Locale("el");
51    public static final Locale LOCALE_HEBREW = new Locale("he");
52    // Serbian and Ukrainian labels are complementary supersets of Russian
53    public static final Locale LOCALE_SERBIAN = new Locale("sr");
54    public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
55    public static final Locale LOCALE_THAI = new Locale("th");
56
57    /**
58     * This class is the default implementation and should be the base class
59     * for other locales.
60     *
61     * sortKey: same as name
62     * nameLookupKeys: none
63     * labels: uses ICU AlphabeticIndex for labels and extends by labeling
64     *     phone numbers "#".  Eg English labels are: [A-Z], #, " "
65     */
66    private static class ContactLocaleUtilsBase {
67        private static final String EMPTY_STRING = "";
68        private static final String NUMBER_STRING = "#";
69
70        protected final ImmutableIndex mAlphabeticIndex;
71        private final int mAlphabeticIndexBucketCount;
72        private final int mNumberBucketIndex;
73
74        public ContactLocaleUtilsBase(Locale locale) {
75            // AlphabeticIndex.getBucketLabel() uses a binary search across
76            // the entire label set so care should be taken about growing this
77            // set too large. The following set determines for which locales
78            // we will show labels other than your primary locale. General rules
79            // of thumb for adding a locale: should be a supported locale; and
80            // should not be included if from a name it is not deterministic
81            // which way to label it (so eg Chinese cannot be added because
82            // the labeling of a Chinese character varies between Simplified,
83            // Traditional, and Japanese locales). Use English only for all
84            // Latin based alphabets. Ukrainian and Serbian are chosen for
85            // Cyrillic because their alphabets are complementary supersets
86            // of Russian.
87            mAlphabeticIndex = new AlphabeticIndex(locale)
88                .setMaxLabelCount(300)
89                .addLabels(Locale.ENGLISH)
90                .addLabels(Locale.JAPANESE)
91                .addLabels(Locale.KOREAN)
92                .addLabels(LOCALE_THAI)
93                .addLabels(LOCALE_ARABIC)
94                .addLabels(LOCALE_HEBREW)
95                .addLabels(LOCALE_GREEK)
96                .addLabels(LOCALE_UKRAINIAN)
97                .addLabels(LOCALE_SERBIAN)
98                .getImmutableIndex();
99            mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
100            mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
101        }
102
103        public String getSortKey(String name) {
104            return name;
105        }
106
107        /**
108         * Returns the bucket index for the specified string. AlphabeticIndex
109         * sorts strings into buckets numbered in order from 0 to N, where the
110         * exact value of N depends on how many representative index labels are
111         * used in a particular locale. This routine adds one additional bucket
112         * for phone numbers. It attempts to detect phone numbers and shifts
113         * the bucket indexes returned by AlphabeticIndex in order to make room
114         * for the new # bucket, so the returned range becomes 0 to N+1.
115         */
116        public int getBucketIndex(String name) {
117            boolean prefixIsNumeric = false;
118            final int length = name.length();
119            int offset = 0;
120            while (offset < length) {
121                int codePoint = Character.codePointAt(name, offset);
122                // Ignore standard phone number separators and identify any
123                // string that otherwise starts with a number.
124                if (Character.isDigit(codePoint)) {
125                    prefixIsNumeric = true;
126                    break;
127                } else if (!Character.isSpaceChar(codePoint) &&
128                           codePoint != '+' && codePoint != '(' &&
129                           codePoint != ')' && codePoint != '.' &&
130                           codePoint != '-' && codePoint != '#') {
131                    break;
132                }
133                offset += Character.charCount(codePoint);
134            }
135            if (prefixIsNumeric) {
136                return mNumberBucketIndex;
137            }
138
139            final int bucket = mAlphabeticIndex.getBucketIndex(name);
140            if (bucket < 0) {
141                return -1;
142            }
143            if (bucket >= mNumberBucketIndex) {
144                return bucket + 1;
145            }
146            return bucket;
147        }
148
149        /**
150         * Returns the number of buckets in use (one more than AlphabeticIndex
151         * uses, because this class adds a bucket for phone numbers).
152         */
153        public int getBucketCount() {
154            return mAlphabeticIndexBucketCount + 1;
155        }
156
157        /**
158         * Returns the label for the specified bucket index if a valid index,
159         * otherwise returns an empty string. '#' is returned for the phone
160         * number bucket; for all others, the AlphabeticIndex label is returned.
161         */
162        public String getBucketLabel(int bucketIndex) {
163            if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
164                return EMPTY_STRING;
165            } else if (bucketIndex == mNumberBucketIndex) {
166                return NUMBER_STRING;
167            } else if (bucketIndex > mNumberBucketIndex) {
168                --bucketIndex;
169            }
170            return mAlphabeticIndex.getBucketLabel(bucketIndex);
171        }
172
173        @SuppressWarnings("unused")
174        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
175            return null;
176        }
177
178        public ArrayList<String> getLabels() {
179            final int bucketCount = getBucketCount();
180            final ArrayList<String> labels = new ArrayList<String>(bucketCount);
181            for(int i = 0; i < bucketCount; ++i) {
182                labels.add(getBucketLabel(i));
183            }
184            return labels;
185        }
186    }
187
188    /**
189     * Japanese specific locale overrides.
190     *
191     * sortKey: unchanged (same as name)
192     * nameLookupKeys: unchanged (none)
193     * labels: extends default labels by labeling unlabeled CJ characters
194     *     with the Japanese character 他 ("misc"). Japanese labels are:
195     *     あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
196     */
197    private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
198        // \u4ed6 is Japanese character 他 ("misc")
199        private static final String JAPANESE_MISC_LABEL = "\u4ed6";
200        private final int mMiscBucketIndex;
201
202        public JapaneseContactUtils(Locale locale) {
203            super(locale);
204            // Determine which bucket AlphabeticIndex is lumping unclassified
205            // Japanese characters into by looking up the bucket index for
206            // a representative Kanji/CJK unified ideograph (\u65e5 is the
207            // character '日').
208            mMiscBucketIndex = super.getBucketIndex("\u65e5");
209        }
210
211        // Set of UnicodeBlocks for unified CJK (Chinese) characters and
212        // Japanese characters. This includes all code blocks that might
213        // contain a character used in Japanese (which is why unified CJK
214        // blocks are included but Korean Hangul and jamo are not).
215        private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
216        static {
217            Set<UnicodeBlock> set = new HashSet<UnicodeBlock>();
218            set.add(UnicodeBlock.HIRAGANA);
219            set.add(UnicodeBlock.KATAKANA);
220            set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
221            set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
222            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
223            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
224            set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
225            set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
226            set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
227            set.add(UnicodeBlock.CJK_COMPATIBILITY);
228            set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
229            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
230            set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
231            CJ_BLOCKS = Collections.unmodifiableSet(set);
232        }
233
234        /**
235         * Helper routine to identify unlabeled Chinese or Japanese characters
236         * to put in a 'misc' bucket.
237         *
238         * @return true if the specified Unicode code point is Chinese or
239         *              Japanese
240         */
241        private static boolean isChineseOrJapanese(int codePoint) {
242            return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
243        }
244
245        /**
246         * Returns the bucket index for the specified string. Adds an
247         * additional 'misc' bucket for Kanji characters to the base class set.
248         */
249        @Override
250        public int getBucketIndex(String name) {
251            final int bucketIndex = super.getBucketIndex(name);
252            if ((bucketIndex == mMiscBucketIndex &&
253                 !isChineseOrJapanese(Character.codePointAt(name, 0))) ||
254                bucketIndex > mMiscBucketIndex) {
255                return bucketIndex + 1;
256            }
257            return bucketIndex;
258        }
259
260        /**
261         * Returns the number of buckets in use (one more than the base class
262         * uses, because this class adds a bucket for Kanji).
263         */
264        @Override
265        public int getBucketCount() {
266            return super.getBucketCount() + 1;
267        }
268
269        /**
270         * Returns the label for the specified bucket index if a valid index,
271         * otherwise returns an empty string. '他' is returned for unclassified
272         * Kanji; for all others, the label determined by the base class is
273         * returned.
274         */
275        @Override
276        public String getBucketLabel(int bucketIndex) {
277            if (bucketIndex == mMiscBucketIndex) {
278                return JAPANESE_MISC_LABEL;
279            } else if (bucketIndex > mMiscBucketIndex) {
280                --bucketIndex;
281            }
282            return super.getBucketLabel(bucketIndex);
283        }
284
285        @Override
286        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
287            // Hiragana and Katakana will be positively identified as Japanese.
288            if (nameStyle == PhoneticNameStyle.JAPANESE) {
289                return getRomajiNameLookupKeys(name);
290            }
291            return null;
292        }
293
294        private static boolean mInitializedTransliterator;
295        private static Transliterator mJapaneseTransliterator;
296
297        private static Transliterator getJapaneseTransliterator() {
298            synchronized(JapaneseContactUtils.class) {
299                if (!mInitializedTransliterator) {
300                    mInitializedTransliterator = true;
301                    Transliterator t = null;
302                    try {
303                        t = new Transliterator("Hiragana-Latin; Katakana-Latin;"
304                                + " Latin-Ascii");
305                    } catch (RuntimeException e) {
306                        Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
307                                + " is missing");
308                    }
309                    mJapaneseTransliterator = t;
310                }
311                return mJapaneseTransliterator;
312            }
313        }
314
315        public static Iterator<String> getRomajiNameLookupKeys(String name) {
316            final Transliterator t = getJapaneseTransliterator();
317            if (t == null) {
318                return null;
319            }
320            final String romajiName = t.transliterate(name);
321            if (TextUtils.isEmpty(romajiName) ||
322                    TextUtils.equals(name, romajiName)) {
323                return null;
324            }
325            final HashSet<String> keys = new HashSet<String>();
326            keys.add(romajiName);
327            return keys.iterator();
328        }
329    }
330
331    /**
332     * Simplified Chinese specific locale overrides. Uses ICU Transliterator
333     * for generating pinyin transliteration.
334     *
335     * sortKey: unchanged (same as name)
336     * nameLookupKeys: adds additional name lookup keys
337     *     - Chinese character's pinyin and pinyin's initial character.
338     *     - Latin word and initial character.
339     * labels: unchanged
340     *     Simplified Chinese labels are the same as English: [A-Z], #, " "
341     */
342    private static class SimplifiedChineseContactUtils
343        extends ContactLocaleUtilsBase {
344        public SimplifiedChineseContactUtils(Locale locale) {
345            super(locale);
346        }
347
348        @Override
349        public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
350            if (nameStyle != FullNameStyle.JAPANESE &&
351                    nameStyle != FullNameStyle.KOREAN) {
352                return getPinyinNameLookupKeys(name);
353            }
354            return null;
355        }
356
357        public static Iterator<String> getPinyinNameLookupKeys(String name) {
358            // TODO : Reduce the object allocation.
359            HashSet<String> keys = new HashSet<String>();
360            ArrayList<Token> tokens = HanziToPinyin.getInstance().get(name);
361            final int tokenCount = tokens.size();
362            final StringBuilder keyPinyin = new StringBuilder();
363            final StringBuilder keyInitial = new StringBuilder();
364            // There is no space among the Chinese Characters, the variant name
365            // lookup key wouldn't work for Chinese. The keyOriginal is used to
366            // build the lookup keys for itself.
367            final StringBuilder keyOriginal = new StringBuilder();
368            for (int i = tokenCount - 1; i >= 0; i--) {
369                final Token token = tokens.get(i);
370                if (Token.UNKNOWN == token.type) {
371                    continue;
372                }
373                if (Token.PINYIN == token.type) {
374                    keyPinyin.insert(0, token.target);
375                    keyInitial.insert(0, token.target.charAt(0));
376                } else if (Token.LATIN == token.type) {
377                    // Avoid adding space at the end of String.
378                    if (keyPinyin.length() > 0) {
379                        keyPinyin.insert(0, ' ');
380                    }
381                    if (keyOriginal.length() > 0) {
382                        keyOriginal.insert(0, ' ');
383                    }
384                    keyPinyin.insert(0, token.source);
385                    keyInitial.insert(0, token.source.charAt(0));
386                }
387                keyOriginal.insert(0, token.source);
388                keys.add(keyOriginal.toString());
389                keys.add(keyPinyin.toString());
390                keys.add(keyInitial.toString());
391            }
392            return keys.iterator();
393        }
394    }
395
396    private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
397    private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
398    private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
399
400    private static ContactLocaleUtils sSingleton;
401
402    private final Locale mLocale;
403    private final String mLanguage;
404    private final ContactLocaleUtilsBase mUtils;
405
406    private ContactLocaleUtils(Locale locale) {
407        if (locale == null) {
408            mLocale = Locale.getDefault();
409        } else {
410            mLocale = locale;
411        }
412        mLanguage = mLocale.getLanguage().toLowerCase();
413        if (mLanguage.equals(JAPANESE_LANGUAGE)) {
414            mUtils = new JapaneseContactUtils(mLocale);
415        } else if (mLocale.equals(Locale.CHINA)) {
416            mUtils = new SimplifiedChineseContactUtils(mLocale);
417        } else {
418            mUtils = new ContactLocaleUtilsBase(mLocale);
419        }
420        Log.i(TAG, "AddressBook Labels [" + mLocale.toString() + "]: "
421              + getLabels().toString());
422    }
423
424    public boolean isLocale(Locale locale) {
425        return mLocale.equals(locale);
426    }
427
428    public static synchronized ContactLocaleUtils getInstance() {
429        if (sSingleton == null) {
430            sSingleton = new ContactLocaleUtils(null);
431        }
432        return sSingleton;
433    }
434
435    public static synchronized void setLocale(Locale locale) {
436        if (sSingleton == null || !sSingleton.isLocale(locale)) {
437            sSingleton = new ContactLocaleUtils(locale);
438        }
439    }
440
441    public String getSortKey(String name, int nameStyle) {
442        return mUtils.getSortKey(name);
443    }
444
445    public int getBucketIndex(String name) {
446        return mUtils.getBucketIndex(name);
447    }
448
449    public int getBucketCount() {
450        return mUtils.getBucketCount();
451    }
452
453    public String getBucketLabel(int bucketIndex) {
454        return mUtils.getBucketLabel(bucketIndex);
455    }
456
457    public String getLabel(String name) {
458        return getBucketLabel(getBucketIndex(name));
459    }
460
461    public ArrayList<String> getLabels() {
462        return mUtils.getLabels();
463    }
464
465    /**
466     *  Determine which utility should be used for generating NameLookupKey.
467     *  (ie, whether we generate Pinyin lookup keys or not)
468     *
469     *  Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
470     *  and tagged as CJK. For Hiragana/Katakana names, generate Romaji
471     *  lookup keys when not in a Chinese or Korean locale.
472     *
473     *  Otherwise, use the default behavior of that locale:
474     *  a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
475     *  b. For Simplified Chinese locale, generate Pinyin lookup keys.
476     */
477    public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
478        if (nameStyle == FullNameStyle.JAPANESE &&
479                !CHINESE_LANGUAGE.equals(mLanguage) &&
480                !KOREAN_LANGUAGE.equals(mLanguage)) {
481            return JapaneseContactUtils.getRomajiNameLookupKeys(name);
482        }
483        return mUtils.getNameLookupKeys(name, nameStyle);
484    }
485
486}
487