ContactLocaleUtils.java revision d3b23d40bc15640abaeafa4041737b59f61ea557
1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17package com.android.providers.contacts; 18 19import android.provider.ContactsContract.FullNameStyle; 20import android.provider.ContactsContract.PhoneticNameStyle; 21import android.text.TextUtils; 22import android.util.Log; 23 24import com.android.providers.contacts.HanziToPinyin.Token; 25 26import java.lang.Character.UnicodeBlock; 27import java.util.Arrays; 28import java.util.ArrayList; 29import java.util.Collections; 30import java.util.HashMap; 31import java.util.HashSet; 32import java.util.Iterator; 33import java.util.List; 34import java.util.Locale; 35import java.util.Map; 36import java.util.Set; 37 38import libcore.icu.AlphabeticIndex; 39import libcore.icu.AlphabeticIndex.ImmutableIndex; 40import libcore.icu.Transliterator; 41 42/** 43 * This utility class provides specialized handling for locale specific 44 * information: labels, name lookup keys. 45 */ 46public class ContactLocaleUtils { 47 public static final String TAG = "ContactLocale"; 48 49 public static final Locale LOCALE_ARABIC = new Locale("ar"); 50 public static final Locale LOCALE_GREEK = new Locale("el"); 51 public static final Locale LOCALE_HEBREW = new Locale("he"); 52 // Serbian and Ukrainian labels are complementary supersets of Russian 53 public static final Locale LOCALE_SERBIAN = new Locale("sr"); 54 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 55 public static final Locale LOCALE_THAI = new Locale("th"); 56 57 /** 58 * This class is the default implementation and should be the base class 59 * for other locales. 60 * 61 * sortKey: same as name 62 * nameLookupKeys: none 63 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 64 * phone numbers "#". Eg English labels are: [A-Z], #, " " 65 */ 66 private static class ContactLocaleUtilsBase { 67 private static final String EMPTY_STRING = ""; 68 private static final String NUMBER_STRING = "#"; 69 70 protected final ImmutableIndex mAlphabeticIndex; 71 private final int mAlphabeticIndexBucketCount; 72 private final int mNumberBucketIndex; 73 74 public ContactLocaleUtilsBase(Locale locale) { 75 // AlphabeticIndex.getBucketLabel() uses a binary search across 76 // the entire label set so care should be taken about growing this 77 // set too large. The following set determines for which locales 78 // we will show labels other than your primary locale. General rules 79 // of thumb for adding a locale: should be a supported locale; and 80 // should not be included if from a name it is not deterministic 81 // which way to label it (so eg Chinese cannot be added because 82 // the labeling of a Chinese character varies between Simplified, 83 // Traditional, and Japanese locales). Use English only for all 84 // Latin based alphabets. Ukrainian and Serbian are chosen for 85 // Cyrillic because their alphabets are complementary supersets 86 // of Russian. 87 mAlphabeticIndex = new AlphabeticIndex(locale) 88 .setMaxLabelCount(300) 89 .addLabels(Locale.ENGLISH) 90 .addLabels(Locale.JAPANESE) 91 .addLabels(Locale.KOREAN) 92 .addLabels(LOCALE_THAI) 93 .addLabels(LOCALE_ARABIC) 94 .addLabels(LOCALE_HEBREW) 95 .addLabels(LOCALE_GREEK) 96 .addLabels(LOCALE_UKRAINIAN) 97 .addLabels(LOCALE_SERBIAN) 98 .getImmutableIndex(); 99 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 100 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 101 } 102 103 public String getSortKey(String name) { 104 return name; 105 } 106 107 /** 108 * Returns the bucket index for the specified string. AlphabeticIndex 109 * sorts strings into buckets numbered in order from 0 to N, where the 110 * exact value of N depends on how many representative index labels are 111 * used in a particular locale. This routine adds one additional bucket 112 * for phone numbers. It attempts to detect phone numbers and shifts 113 * the bucket indexes returned by AlphabeticIndex in order to make room 114 * for the new # bucket, so the returned range becomes 0 to N+1. 115 */ 116 public int getBucketIndex(String name) { 117 boolean prefixIsNumeric = false; 118 final int length = name.length(); 119 int offset = 0; 120 while (offset < length) { 121 int codePoint = Character.codePointAt(name, offset); 122 // Ignore standard phone number separators and identify any 123 // string that otherwise starts with a number. 124 if (Character.isDigit(codePoint)) { 125 prefixIsNumeric = true; 126 break; 127 } else if (!Character.isSpaceChar(codePoint) && 128 codePoint != '+' && codePoint != '(' && 129 codePoint != ')' && codePoint != '.' && 130 codePoint != '-' && codePoint != '#') { 131 break; 132 } 133 offset += Character.charCount(codePoint); 134 } 135 if (prefixIsNumeric) { 136 return mNumberBucketIndex; 137 } 138 139 final int bucket = mAlphabeticIndex.getBucketIndex(name); 140 if (bucket < 0) { 141 return -1; 142 } 143 if (bucket >= mNumberBucketIndex) { 144 return bucket + 1; 145 } 146 return bucket; 147 } 148 149 /** 150 * Returns the number of buckets in use (one more than AlphabeticIndex 151 * uses, because this class adds a bucket for phone numbers). 152 */ 153 public int getBucketCount() { 154 return mAlphabeticIndexBucketCount + 1; 155 } 156 157 /** 158 * Returns the label for the specified bucket index if a valid index, 159 * otherwise returns an empty string. '#' is returned for the phone 160 * number bucket; for all others, the AlphabeticIndex label is returned. 161 */ 162 public String getBucketLabel(int bucketIndex) { 163 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 164 return EMPTY_STRING; 165 } else if (bucketIndex == mNumberBucketIndex) { 166 return NUMBER_STRING; 167 } else if (bucketIndex > mNumberBucketIndex) { 168 --bucketIndex; 169 } 170 return mAlphabeticIndex.getBucketLabel(bucketIndex); 171 } 172 173 @SuppressWarnings("unused") 174 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 175 return null; 176 } 177 178 public ArrayList<String> getLabels() { 179 final int bucketCount = getBucketCount(); 180 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 181 for(int i = 0; i < bucketCount; ++i) { 182 labels.add(getBucketLabel(i)); 183 } 184 return labels; 185 } 186 } 187 188 /** 189 * Japanese specific locale overrides. 190 * 191 * sortKey: unchanged (same as name) 192 * nameLookupKeys: unchanged (none) 193 * labels: extends default labels by labeling unlabeled CJ characters 194 * with the Japanese character 他 ("misc"). Japanese labels are: 195 * あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " " 196 */ 197 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 198 // \u4ed6 is Japanese character 他 ("misc") 199 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 200 private final int mMiscBucketIndex; 201 202 public JapaneseContactUtils(Locale locale) { 203 super(locale); 204 // Determine which bucket AlphabeticIndex is lumping unclassified 205 // Japanese characters into by looking up the bucket index for 206 // a representative Kanji/CJK unified ideograph (\u65e5 is the 207 // character '日'). 208 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 209 } 210 211 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 212 // Japanese characters. This includes all code blocks that might 213 // contain a character used in Japanese (which is why unified CJK 214 // blocks are included but Korean Hangul and jamo are not). 215 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 216 static { 217 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>(); 218 set.add(UnicodeBlock.HIRAGANA); 219 set.add(UnicodeBlock.KATAKANA); 220 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 221 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 222 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 223 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 224 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 225 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 226 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 227 set.add(UnicodeBlock.CJK_COMPATIBILITY); 228 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 229 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 230 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 231 CJ_BLOCKS = Collections.unmodifiableSet(set); 232 } 233 234 /** 235 * Helper routine to identify unlabeled Chinese or Japanese characters 236 * to put in a 'misc' bucket. 237 * 238 * @return true if the specified Unicode code point is Chinese or 239 * Japanese 240 */ 241 private static boolean isChineseOrJapanese(int codePoint) { 242 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 243 } 244 245 /** 246 * Returns the bucket index for the specified string. Adds an 247 * additional 'misc' bucket for Kanji characters to the base class set. 248 */ 249 @Override 250 public int getBucketIndex(String name) { 251 final int bucketIndex = super.getBucketIndex(name); 252 if ((bucketIndex == mMiscBucketIndex && 253 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 254 bucketIndex > mMiscBucketIndex) { 255 return bucketIndex + 1; 256 } 257 return bucketIndex; 258 } 259 260 /** 261 * Returns the number of buckets in use (one more than the base class 262 * uses, because this class adds a bucket for Kanji). 263 */ 264 @Override 265 public int getBucketCount() { 266 return super.getBucketCount() + 1; 267 } 268 269 /** 270 * Returns the label for the specified bucket index if a valid index, 271 * otherwise returns an empty string. '他' is returned for unclassified 272 * Kanji; for all others, the label determined by the base class is 273 * returned. 274 */ 275 @Override 276 public String getBucketLabel(int bucketIndex) { 277 if (bucketIndex == mMiscBucketIndex) { 278 return JAPANESE_MISC_LABEL; 279 } else if (bucketIndex > mMiscBucketIndex) { 280 --bucketIndex; 281 } 282 return super.getBucketLabel(bucketIndex); 283 } 284 285 @Override 286 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 287 // Hiragana and Katakana will be positively identified as Japanese. 288 if (nameStyle == PhoneticNameStyle.JAPANESE) { 289 return getRomajiNameLookupKeys(name); 290 } 291 return null; 292 } 293 294 private static boolean mInitializedTransliterator; 295 private static Transliterator mJapaneseTransliterator; 296 297 private static Transliterator getJapaneseTransliterator() { 298 synchronized(JapaneseContactUtils.class) { 299 if (!mInitializedTransliterator) { 300 mInitializedTransliterator = true; 301 Transliterator t = null; 302 try { 303 t = new Transliterator("Hiragana-Latin; Katakana-Latin;" 304 + " Latin-Ascii"); 305 } catch (RuntimeException e) { 306 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 307 + " is missing"); 308 } 309 mJapaneseTransliterator = t; 310 } 311 return mJapaneseTransliterator; 312 } 313 } 314 315 public static Iterator<String> getRomajiNameLookupKeys(String name) { 316 final Transliterator t = getJapaneseTransliterator(); 317 if (t == null) { 318 return null; 319 } 320 final String romajiName = t.transliterate(name); 321 if (TextUtils.isEmpty(romajiName) || 322 TextUtils.equals(name, romajiName)) { 323 return null; 324 } 325 final HashSet<String> keys = new HashSet<String>(); 326 keys.add(romajiName); 327 return keys.iterator(); 328 } 329 } 330 331 /** 332 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 333 * for generating pinyin transliteration. 334 * 335 * sortKey: unchanged (same as name) 336 * nameLookupKeys: adds additional name lookup keys 337 * - Chinese character's pinyin and pinyin's initial character. 338 * - Latin word and initial character. 339 * labels: unchanged 340 * Simplified Chinese labels are the same as English: [A-Z], #, " " 341 */ 342 private static class SimplifiedChineseContactUtils 343 extends ContactLocaleUtilsBase { 344 public SimplifiedChineseContactUtils(Locale locale) { 345 super(locale); 346 } 347 348 @Override 349 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 350 if (nameStyle != FullNameStyle.JAPANESE && 351 nameStyle != FullNameStyle.KOREAN) { 352 return getPinyinNameLookupKeys(name); 353 } 354 return null; 355 } 356 357 public static Iterator<String> getPinyinNameLookupKeys(String name) { 358 // TODO : Reduce the object allocation. 359 HashSet<String> keys = new HashSet<String>(); 360 ArrayList<Token> tokens = HanziToPinyin.getInstance().get(name); 361 final int tokenCount = tokens.size(); 362 final StringBuilder keyPinyin = new StringBuilder(); 363 final StringBuilder keyInitial = new StringBuilder(); 364 // There is no space among the Chinese Characters, the variant name 365 // lookup key wouldn't work for Chinese. The keyOriginal is used to 366 // build the lookup keys for itself. 367 final StringBuilder keyOriginal = new StringBuilder(); 368 for (int i = tokenCount - 1; i >= 0; i--) { 369 final Token token = tokens.get(i); 370 if (Token.UNKNOWN == token.type) { 371 continue; 372 } 373 if (Token.PINYIN == token.type) { 374 keyPinyin.insert(0, token.target); 375 keyInitial.insert(0, token.target.charAt(0)); 376 } else if (Token.LATIN == token.type) { 377 // Avoid adding space at the end of String. 378 if (keyPinyin.length() > 0) { 379 keyPinyin.insert(0, ' '); 380 } 381 if (keyOriginal.length() > 0) { 382 keyOriginal.insert(0, ' '); 383 } 384 keyPinyin.insert(0, token.source); 385 keyInitial.insert(0, token.source.charAt(0)); 386 } 387 keyOriginal.insert(0, token.source); 388 keys.add(keyOriginal.toString()); 389 keys.add(keyPinyin.toString()); 390 keys.add(keyInitial.toString()); 391 } 392 return keys.iterator(); 393 } 394 } 395 396 private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase(); 397 private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 398 private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase(); 399 400 private static ContactLocaleUtils sSingleton; 401 402 private final Locale mLocale; 403 private final String mLanguage; 404 private final ContactLocaleUtilsBase mUtils; 405 406 private ContactLocaleUtils(Locale locale) { 407 if (locale == null) { 408 mLocale = Locale.getDefault(); 409 } else { 410 mLocale = locale; 411 } 412 mLanguage = mLocale.getLanguage().toLowerCase(); 413 if (mLanguage.equals(JAPANESE_LANGUAGE)) { 414 mUtils = new JapaneseContactUtils(mLocale); 415 } else if (mLocale.equals(Locale.CHINA)) { 416 mUtils = new SimplifiedChineseContactUtils(mLocale); 417 } else { 418 mUtils = new ContactLocaleUtilsBase(mLocale); 419 } 420 Log.i(TAG, "AddressBook Labels [" + mLocale.toString() + "]: " 421 + getLabels().toString()); 422 } 423 424 public boolean isLocale(Locale locale) { 425 return mLocale.equals(locale); 426 } 427 428 public static synchronized ContactLocaleUtils getInstance() { 429 if (sSingleton == null) { 430 sSingleton = new ContactLocaleUtils(null); 431 } 432 return sSingleton; 433 } 434 435 public static synchronized void setLocale(Locale locale) { 436 if (sSingleton == null || !sSingleton.isLocale(locale)) { 437 sSingleton = new ContactLocaleUtils(locale); 438 } 439 } 440 441 public String getSortKey(String name, int nameStyle) { 442 return mUtils.getSortKey(name); 443 } 444 445 public int getBucketIndex(String name) { 446 return mUtils.getBucketIndex(name); 447 } 448 449 public int getBucketCount() { 450 return mUtils.getBucketCount(); 451 } 452 453 public String getBucketLabel(int bucketIndex) { 454 return mUtils.getBucketLabel(bucketIndex); 455 } 456 457 public String getLabel(String name) { 458 return getBucketLabel(getBucketIndex(name)); 459 } 460 461 public ArrayList<String> getLabels() { 462 return mUtils.getLabels(); 463 } 464 465 /** 466 * Determine which utility should be used for generating NameLookupKey. 467 * (ie, whether we generate Pinyin lookup keys or not) 468 * 469 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 470 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 471 * lookup keys when not in a Chinese or Korean locale. 472 * 473 * Otherwise, use the default behavior of that locale: 474 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 475 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 476 */ 477 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 478 if (nameStyle == FullNameStyle.JAPANESE && 479 !CHINESE_LANGUAGE.equals(mLanguage) && 480 !KOREAN_LANGUAGE.equals(mLanguage)) { 481 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 482 } 483 return mUtils.getNameLookupKeys(name, nameStyle); 484 } 485 486} 487