1/* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17package com.android.providers.contacts; 18 19import android.provider.ContactsContract.FullNameStyle; 20import android.provider.ContactsContract.PhoneticNameStyle; 21import android.text.TextUtils; 22import android.util.Log; 23 24import com.android.providers.contacts.HanziToPinyin.Token; 25import com.google.common.annotations.VisibleForTesting; 26 27import java.lang.Character.UnicodeBlock; 28import java.util.Arrays; 29import java.util.ArrayList; 30import java.util.Collections; 31import java.util.HashMap; 32import java.util.HashSet; 33import java.util.Iterator; 34import java.util.List; 35import java.util.Locale; 36import java.util.Map; 37import java.util.Set; 38 39import libcore.icu.AlphabeticIndex; 40import libcore.icu.AlphabeticIndex.ImmutableIndex; 41import libcore.icu.Transliterator; 42 43/** 44 * This utility class provides specialized handling for locale specific 45 * information: labels, name lookup keys. 46 */ 47public class ContactLocaleUtils { 48 public static final String TAG = "ContactLocale"; 49 50 public static final Locale LOCALE_ARABIC = new Locale("ar"); 51 public static final Locale LOCALE_GREEK = new Locale("el"); 52 public static final Locale LOCALE_HEBREW = new Locale("he"); 53 // Serbian and Ukrainian labels are complementary supersets of Russian 54 public static final Locale LOCALE_SERBIAN = new Locale("sr"); 55 public static final Locale LOCALE_UKRAINIAN = new Locale("uk"); 56 public static final Locale LOCALE_THAI = new Locale("th"); 57 58 /** 59 * This class is the default implementation and should be the base class 60 * for other locales. 61 * 62 * sortKey: same as name 63 * nameLookupKeys: none 64 * labels: uses ICU AlphabeticIndex for labels and extends by labeling 65 * phone numbers "#". Eg English labels are: [A-Z], #, " " 66 */ 67 private static class ContactLocaleUtilsBase { 68 private static final String EMPTY_STRING = ""; 69 private static final String NUMBER_STRING = "#"; 70 71 protected final ImmutableIndex mAlphabeticIndex; 72 private final int mAlphabeticIndexBucketCount; 73 private final int mNumberBucketIndex; 74 private final boolean mUsePinyinTransliterator; 75 76 public ContactLocaleUtilsBase(LocaleSet locales) { 77 // AlphabeticIndex.getBucketLabel() uses a binary search across 78 // the entire label set so care should be taken about growing this 79 // set too large. The following set determines for which locales 80 // we will show labels other than your primary locale. General rules 81 // of thumb for adding a locale: should be a supported locale; and 82 // should not be included if from a name it is not deterministic 83 // which way to label it (so eg Chinese cannot be added because 84 // the labeling of a Chinese character varies between Simplified, 85 // Traditional, and Japanese locales). Use English only for all 86 // Latin based alphabets. Ukrainian and Serbian are chosen for 87 // Cyrillic because their alphabets are complementary supersets 88 // of Russian. 89 final Locale secondaryLocale = locales.getSecondaryLocale(); 90 mUsePinyinTransliterator = locales.isPrimaryLocaleSimplifiedChinese() || 91 locales.isSecondaryLocaleSimplifiedChinese(); 92 AlphabeticIndex ai = new AlphabeticIndex(locales.getPrimaryLocale()) 93 .setMaxLabelCount(300); 94 if (secondaryLocale != null) { 95 ai.addLabels(secondaryLocale); 96 } 97 mAlphabeticIndex = ai.addLabels(Locale.ENGLISH) 98 .addLabels(Locale.JAPANESE) 99 .addLabels(Locale.KOREAN) 100 .addLabels(LOCALE_THAI) 101 .addLabels(LOCALE_ARABIC) 102 .addLabels(LOCALE_HEBREW) 103 .addLabels(LOCALE_GREEK) 104 .addLabels(LOCALE_UKRAINIAN) 105 .addLabels(LOCALE_SERBIAN) 106 .getImmutableIndex(); 107 mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount(); 108 mNumberBucketIndex = mAlphabeticIndexBucketCount - 1; 109 } 110 111 public String getSortKey(String name) { 112 return name; 113 } 114 115 /** 116 * Returns the bucket index for the specified string. AlphabeticIndex 117 * sorts strings into buckets numbered in order from 0 to N, where the 118 * exact value of N depends on how many representative index labels are 119 * used in a particular locale. This routine adds one additional bucket 120 * for phone numbers. It attempts to detect phone numbers and shifts 121 * the bucket indexes returned by AlphabeticIndex in order to make room 122 * for the new # bucket, so the returned range becomes 0 to N+1. 123 */ 124 public int getBucketIndex(String name) { 125 boolean prefixIsNumeric = false; 126 final int length = name.length(); 127 int offset = 0; 128 while (offset < length) { 129 int codePoint = Character.codePointAt(name, offset); 130 // Ignore standard phone number separators and identify any 131 // string that otherwise starts with a number. 132 if (Character.isDigit(codePoint)) { 133 prefixIsNumeric = true; 134 break; 135 } else if (!Character.isSpaceChar(codePoint) && 136 codePoint != '+' && codePoint != '(' && 137 codePoint != ')' && codePoint != '.' && 138 codePoint != '-' && codePoint != '#') { 139 break; 140 } 141 offset += Character.charCount(codePoint); 142 } 143 if (prefixIsNumeric) { 144 return mNumberBucketIndex; 145 } 146 147 /** 148 * ICU 55 AlphabeticIndex doesn't support Simplified Chinese 149 * as a secondary locale so it is necessary to use the 150 * Pinyin transliterator. We also use this for a Simplified 151 * Chinese primary locale because it gives more accurate letter 152 * buckets. b/19835686 153 */ 154 if (mUsePinyinTransliterator) { 155 name = HanziToPinyin.getInstance().transliterate(name); 156 } 157 final int bucket = mAlphabeticIndex.getBucketIndex(name); 158 if (bucket < 0) { 159 return -1; 160 } 161 if (bucket >= mNumberBucketIndex) { 162 return bucket + 1; 163 } 164 return bucket; 165 } 166 167 /** 168 * Returns the number of buckets in use (one more than AlphabeticIndex 169 * uses, because this class adds a bucket for phone numbers). 170 */ 171 public int getBucketCount() { 172 return mAlphabeticIndexBucketCount + 1; 173 } 174 175 /** 176 * Returns the label for the specified bucket index if a valid index, 177 * otherwise returns an empty string. '#' is returned for the phone 178 * number bucket; for all others, the AlphabeticIndex label is returned. 179 */ 180 public String getBucketLabel(int bucketIndex) { 181 if (bucketIndex < 0 || bucketIndex >= getBucketCount()) { 182 return EMPTY_STRING; 183 } else if (bucketIndex == mNumberBucketIndex) { 184 return NUMBER_STRING; 185 } else if (bucketIndex > mNumberBucketIndex) { 186 --bucketIndex; 187 } 188 return mAlphabeticIndex.getBucketLabel(bucketIndex); 189 } 190 191 @SuppressWarnings("unused") 192 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 193 return null; 194 } 195 196 public ArrayList<String> getLabels() { 197 final int bucketCount = getBucketCount(); 198 final ArrayList<String> labels = new ArrayList<String>(bucketCount); 199 for(int i = 0; i < bucketCount; ++i) { 200 labels.add(getBucketLabel(i)); 201 } 202 return labels; 203 } 204 } 205 206 /** 207 * Japanese specific locale overrides. 208 * 209 * sortKey: unchanged (same as name) 210 * nameLookupKeys: unchanged (none) 211 * labels: extends default labels by labeling unlabeled CJ characters 212 * with the Japanese character 他 ("misc"). Japanese labels are: 213 * あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " " 214 */ 215 private static class JapaneseContactUtils extends ContactLocaleUtilsBase { 216 // \u4ed6 is Japanese character 他 ("misc") 217 private static final String JAPANESE_MISC_LABEL = "\u4ed6"; 218 private final int mMiscBucketIndex; 219 220 public JapaneseContactUtils(LocaleSet locales) { 221 super(locales); 222 // Determine which bucket AlphabeticIndex is lumping unclassified 223 // Japanese characters into by looking up the bucket index for 224 // a representative Kanji/CJK unified ideograph (\u65e5 is the 225 // character '日'). 226 mMiscBucketIndex = super.getBucketIndex("\u65e5"); 227 } 228 229 // Set of UnicodeBlocks for unified CJK (Chinese) characters and 230 // Japanese characters. This includes all code blocks that might 231 // contain a character used in Japanese (which is why unified CJK 232 // blocks are included but Korean Hangul and jamo are not). 233 private static final Set<Character.UnicodeBlock> CJ_BLOCKS; 234 static { 235 Set<UnicodeBlock> set = new HashSet<UnicodeBlock>(); 236 set.add(UnicodeBlock.HIRAGANA); 237 set.add(UnicodeBlock.KATAKANA); 238 set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS); 239 set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS); 240 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS); 241 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A); 242 set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B); 243 set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION); 244 set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT); 245 set.add(UnicodeBlock.CJK_COMPATIBILITY); 246 set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS); 247 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS); 248 set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT); 249 CJ_BLOCKS = Collections.unmodifiableSet(set); 250 } 251 252 /** 253 * Helper routine to identify unlabeled Chinese or Japanese characters 254 * to put in a 'misc' bucket. 255 * 256 * @return true if the specified Unicode code point is Chinese or 257 * Japanese 258 */ 259 private static boolean isChineseOrJapanese(int codePoint) { 260 return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint)); 261 } 262 263 /** 264 * Returns the bucket index for the specified string. Adds an 265 * additional 'misc' bucket for Kanji characters to the base class set. 266 */ 267 @Override 268 public int getBucketIndex(String name) { 269 final int bucketIndex = super.getBucketIndex(name); 270 if ((bucketIndex == mMiscBucketIndex && 271 !isChineseOrJapanese(Character.codePointAt(name, 0))) || 272 bucketIndex > mMiscBucketIndex) { 273 return bucketIndex + 1; 274 } 275 return bucketIndex; 276 } 277 278 /** 279 * Returns the number of buckets in use (one more than the base class 280 * uses, because this class adds a bucket for Kanji). 281 */ 282 @Override 283 public int getBucketCount() { 284 return super.getBucketCount() + 1; 285 } 286 287 /** 288 * Returns the label for the specified bucket index if a valid index, 289 * otherwise returns an empty string. '他' is returned for unclassified 290 * Kanji; for all others, the label determined by the base class is 291 * returned. 292 */ 293 @Override 294 public String getBucketLabel(int bucketIndex) { 295 if (bucketIndex == mMiscBucketIndex) { 296 return JAPANESE_MISC_LABEL; 297 } else if (bucketIndex > mMiscBucketIndex) { 298 --bucketIndex; 299 } 300 return super.getBucketLabel(bucketIndex); 301 } 302 303 @Override 304 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 305 // Hiragana and Katakana will be positively identified as Japanese. 306 if (nameStyle == PhoneticNameStyle.JAPANESE) { 307 return getRomajiNameLookupKeys(name); 308 } 309 return null; 310 } 311 312 private static boolean mInitializedTransliterator; 313 private static Transliterator mJapaneseTransliterator; 314 315 private static Transliterator getJapaneseTransliterator() { 316 synchronized(JapaneseContactUtils.class) { 317 if (!mInitializedTransliterator) { 318 mInitializedTransliterator = true; 319 Transliterator t = null; 320 try { 321 t = new Transliterator("Hiragana-Latin; Katakana-Latin;" 322 + " Latin-Ascii"); 323 } catch (RuntimeException e) { 324 Log.w(TAG, "Hiragana/Katakana-Latin transliterator data" 325 + " is missing"); 326 } 327 mJapaneseTransliterator = t; 328 } 329 return mJapaneseTransliterator; 330 } 331 } 332 333 public static Iterator<String> getRomajiNameLookupKeys(String name) { 334 final Transliterator t = getJapaneseTransliterator(); 335 if (t == null) { 336 return null; 337 } 338 final String romajiName = t.transliterate(name); 339 if (TextUtils.isEmpty(romajiName) || 340 TextUtils.equals(name, romajiName)) { 341 return null; 342 } 343 final HashSet<String> keys = new HashSet<String>(); 344 keys.add(romajiName); 345 return keys.iterator(); 346 } 347 } 348 349 /** 350 * Simplified Chinese specific locale overrides. Uses ICU Transliterator 351 * for generating pinyin transliteration. 352 * 353 * sortKey: unchanged (same as name) 354 * nameLookupKeys: adds additional name lookup keys 355 * - Chinese character's pinyin and pinyin's initial character. 356 * - Latin word and initial character. 357 * labels: unchanged 358 * Simplified Chinese labels are the same as English: [A-Z], #, " " 359 */ 360 private static class SimplifiedChineseContactUtils 361 extends ContactLocaleUtilsBase { 362 public SimplifiedChineseContactUtils(LocaleSet locales) { 363 super(locales); 364 } 365 366 @Override 367 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 368 if (nameStyle != FullNameStyle.JAPANESE && 369 nameStyle != FullNameStyle.KOREAN) { 370 return getPinyinNameLookupKeys(name); 371 } 372 return null; 373 } 374 375 public static Iterator<String> getPinyinNameLookupKeys(String name) { 376 // TODO : Reduce the object allocation. 377 HashSet<String> keys = new HashSet<String>(); 378 ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name); 379 final int tokenCount = tokens.size(); 380 final StringBuilder keyPinyin = new StringBuilder(); 381 final StringBuilder keyInitial = new StringBuilder(); 382 // There is no space among the Chinese Characters, the variant name 383 // lookup key wouldn't work for Chinese. The keyOriginal is used to 384 // build the lookup keys for itself. 385 final StringBuilder keyOriginal = new StringBuilder(); 386 for (int i = tokenCount - 1; i >= 0; i--) { 387 final Token token = tokens.get(i); 388 if (Token.UNKNOWN == token.type) { 389 continue; 390 } 391 if (Token.PINYIN == token.type) { 392 keyPinyin.insert(0, token.target); 393 keyInitial.insert(0, token.target.charAt(0)); 394 } else if (Token.LATIN == token.type) { 395 // Avoid adding space at the end of String. 396 if (keyPinyin.length() > 0) { 397 keyPinyin.insert(0, ' '); 398 } 399 if (keyOriginal.length() > 0) { 400 keyOriginal.insert(0, ' '); 401 } 402 keyPinyin.insert(0, token.source); 403 keyInitial.insert(0, token.source.charAt(0)); 404 } 405 keyOriginal.insert(0, token.source); 406 keys.add(keyOriginal.toString()); 407 keys.add(keyPinyin.toString()); 408 keys.add(keyInitial.toString()); 409 } 410 return keys.iterator(); 411 } 412 } 413 414 private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 415 416 private static ContactLocaleUtils sSingleton; 417 418 private final LocaleSet mLocales; 419 private final ContactLocaleUtilsBase mUtils; 420 421 private ContactLocaleUtils(LocaleSet locales) { 422 if (locales == null) { 423 mLocales = LocaleSet.getDefault(); 424 } else { 425 mLocales = locales; 426 } 427 if (mLocales.isPrimaryLanguage(JAPANESE_LANGUAGE)) { 428 mUtils = new JapaneseContactUtils(mLocales); 429 } else if (mLocales.isPrimaryLocaleSimplifiedChinese()) { 430 mUtils = new SimplifiedChineseContactUtils(mLocales); 431 } else { 432 mUtils = new ContactLocaleUtilsBase(mLocales); 433 } 434 Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: " 435 + getLabels().toString()); 436 } 437 438 public boolean isLocale(LocaleSet locales) { 439 return mLocales.equals(locales); 440 } 441 442 public static synchronized ContactLocaleUtils getInstance() { 443 if (sSingleton == null) { 444 sSingleton = new ContactLocaleUtils(LocaleSet.getDefault()); 445 } 446 return sSingleton; 447 } 448 449 @VisibleForTesting 450 public static synchronized void setLocale(Locale locale) { 451 setLocales(new LocaleSet(locale)); 452 } 453 454 public static synchronized void setLocales(LocaleSet locales) { 455 if (sSingleton == null || !sSingleton.isLocale(locales)) { 456 sSingleton = new ContactLocaleUtils(locales); 457 } 458 } 459 460 public String getSortKey(String name, int nameStyle) { 461 return mUtils.getSortKey(name); 462 } 463 464 public int getBucketIndex(String name) { 465 return mUtils.getBucketIndex(name); 466 } 467 468 public int getBucketCount() { 469 return mUtils.getBucketCount(); 470 } 471 472 public String getBucketLabel(int bucketIndex) { 473 return mUtils.getBucketLabel(bucketIndex); 474 } 475 476 public String getLabel(String name) { 477 return getBucketLabel(getBucketIndex(name)); 478 } 479 480 public ArrayList<String> getLabels() { 481 return mUtils.getLabels(); 482 } 483 484 /** 485 * Determine which utility should be used for generating NameLookupKey. 486 * (ie, whether we generate Romaji or Pinyin lookup keys or not) 487 * 488 * Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified 489 * and tagged as CJK. For Hiragana/Katakana names, generate Romaji 490 * lookup keys when not in a Chinese or Korean locale. 491 * 492 * Otherwise, use the default behavior of that locale: 493 * a. For Japan, generate Romaji lookup keys for Hiragana/Katakana. 494 * b. For Simplified Chinese locale, generate Pinyin lookup keys. 495 */ 496 public Iterator<String> getNameLookupKeys(String name, int nameStyle) { 497 if (!mLocales.isPrimaryLocaleCJK()) { 498 if (mLocales.isSecondaryLocaleSimplifiedChinese()) { 499 if (nameStyle == FullNameStyle.CHINESE || 500 nameStyle == FullNameStyle.CJK) { 501 return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name); 502 } 503 } else { 504 if (nameStyle == FullNameStyle.JAPANESE) { 505 return JapaneseContactUtils.getRomajiNameLookupKeys(name); 506 } 507 } 508 } 509 return mUtils.getNameLookupKeys(name, nameStyle); 510 } 511 512} 513