ContactsBinaryDictionary.java revision edd1992ed329a84f0e9ef7056fda99f78eeb92b4
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import com.android.inputmethod.latin.personalization.AccountUtils; 20 21import android.content.ContentResolver; 22import android.content.Context; 23import android.database.ContentObserver; 24import android.database.Cursor; 25import android.net.Uri; 26import android.os.SystemClock; 27import android.provider.BaseColumns; 28import android.provider.ContactsContract; 29import android.provider.ContactsContract.Contacts; 30import android.text.TextUtils; 31import android.util.Log; 32 33import com.android.inputmethod.latin.utils.StringUtils; 34 35import java.util.List; 36import java.util.Locale; 37 38public class ContactsBinaryDictionary extends ExpandableBinaryDictionary { 39 40 private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME}; 41 private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID}; 42 43 private static final String TAG = ContactsBinaryDictionary.class.getSimpleName(); 44 private static final String NAME = "contacts"; 45 46 private static boolean DEBUG = false; 47 48 /** 49 * Frequency for contacts information into the dictionary 50 */ 51 private static final int FREQUENCY_FOR_CONTACTS = 40; 52 private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90; 53 54 /** The maximum number of contacts that this dictionary supports. */ 55 private static final int MAX_CONTACT_COUNT = 10000; 56 57 private static final int INDEX_NAME = 1; 58 59 /** The number of contacts in the most recent dictionary rebuild. */ 60 static private int sContactCountAtLastRebuild = 0; 61 62 /** The locale for this contacts dictionary. Controls name bigram predictions. */ 63 public final Locale mLocale; 64 65 private ContentObserver mObserver; 66 67 /** 68 * Whether to use "firstname lastname" in bigram predictions. 69 */ 70 private final boolean mUseFirstLastBigrams; 71 72 public ContactsBinaryDictionary(final Context context, final Locale locale) { 73 super(context, getFilenameWithLocale(NAME, locale.toString()), Dictionary.TYPE_CONTACTS); 74 mLocale = locale; 75 mUseFirstLastBigrams = useFirstLastBigramsForLocale(locale); 76 registerObserver(context); 77 78 // Load the current binary dictionary from internal storage. If no binary dictionary exists, 79 // loadDictionary will start a new thread to generate one asynchronously. 80 loadDictionary(); 81 } 82 83 private synchronized void registerObserver(final Context context) { 84 // Perform a managed query. The Activity will handle closing and requerying the cursor 85 // when needed. 86 if (mObserver != null) return; 87 ContentResolver cres = context.getContentResolver(); 88 cres.registerContentObserver(Contacts.CONTENT_URI, true, mObserver = 89 new ContentObserver(null) { 90 @Override 91 public void onChange(boolean self) { 92 setRequiresReload(true); 93 } 94 }); 95 } 96 97 public void reopen(final Context context) { 98 registerObserver(context); 99 } 100 101 @Override 102 public synchronized void close() { 103 if (mObserver != null) { 104 mContext.getContentResolver().unregisterContentObserver(mObserver); 105 mObserver = null; 106 } 107 super.close(); 108 } 109 110 @Override 111 public void loadDictionaryAsync() { 112 loadDeviceAccountsEmailAddresses(); 113 loadDictionaryAsyncForUri(ContactsContract.Profile.CONTENT_URI); 114 // TODO: Switch this URL to the newer ContactsContract too 115 loadDictionaryAsyncForUri(Contacts.CONTENT_URI); 116 } 117 118 private void loadDeviceAccountsEmailAddresses() { 119 final List<String> accountVocabulary = 120 AccountUtils.getDeviceAccountsEmailAddresses(mContext); 121 if (accountVocabulary == null || accountVocabulary.isEmpty()) { 122 return; 123 } 124 for (String word : accountVocabulary) { 125 if (DEBUG) { 126 Log.d(TAG, "loadAccountVocabulary: " + word); 127 } 128 super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 129 false /* isNotAWord */); 130 } 131 } 132 133 private void loadDictionaryAsyncForUri(final Uri uri) { 134 try { 135 Cursor cursor = mContext.getContentResolver() 136 .query(uri, PROJECTION, null, null, null); 137 if (cursor != null) { 138 try { 139 if (cursor.moveToFirst()) { 140 sContactCountAtLastRebuild = getContactCount(); 141 addWords(cursor); 142 } 143 } finally { 144 cursor.close(); 145 } 146 } 147 } catch (IllegalStateException e) { 148 Log.e(TAG, "Contacts DB is having problems"); 149 } 150 } 151 152 private boolean useFirstLastBigramsForLocale(final Locale locale) { 153 // TODO: Add firstname/lastname bigram rules for other languages. 154 if (locale != null && locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) { 155 return true; 156 } 157 return false; 158 } 159 160 private void addWords(final Cursor cursor) { 161 int count = 0; 162 while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) { 163 String name = cursor.getString(INDEX_NAME); 164 if (isValidName(name)) { 165 addName(name); 166 ++count; 167 } 168 cursor.moveToNext(); 169 } 170 } 171 172 private int getContactCount() { 173 // TODO: consider switching to a rawQuery("select count(*)...") on the database if 174 // performance is a bottleneck. 175 final Cursor cursor = mContext.getContentResolver().query( 176 Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null); 177 if (cursor != null) { 178 try { 179 return cursor.getCount(); 180 } finally { 181 cursor.close(); 182 } 183 } 184 return 0; 185 } 186 187 /** 188 * Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their 189 * bigrams depending on locale. 190 */ 191 private void addName(final String name) { 192 int len = StringUtils.codePointCount(name); 193 String prevWord = null; 194 // TODO: Better tokenization for non-Latin writing systems 195 for (int i = 0; i < len; i++) { 196 if (Character.isLetter(name.codePointAt(i))) { 197 int end = getWordEndPosition(name, len, i); 198 String word = name.substring(i, end); 199 i = end - 1; 200 // Don't add single letter words, possibly confuses 201 // capitalization of i. 202 final int wordLen = StringUtils.codePointCount(word); 203 if (wordLen < MAX_WORD_LENGTH && wordLen > 1) { 204 if (DEBUG) { 205 Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord); 206 } 207 super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS, 208 false /* isNotAWord */); 209 if (!TextUtils.isEmpty(prevWord)) { 210 if (mUseFirstLastBigrams) { 211 super.setBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM); 212 } 213 } 214 prevWord = word; 215 } 216 } 217 } 218 } 219 220 /** 221 * Returns the index of the last letter in the word, starting from position startIndex. 222 */ 223 private static int getWordEndPosition(final String string, final int len, 224 final int startIndex) { 225 int end; 226 int cp = 0; 227 for (end = startIndex + 1; end < len; end += Character.charCount(cp)) { 228 cp = string.codePointAt(end); 229 if (!(cp == Constants.CODE_DASH || cp == Constants.CODE_SINGLE_QUOTE 230 || Character.isLetter(cp))) { 231 break; 232 } 233 } 234 return end; 235 } 236 237 @Override 238 protected boolean needsToReloadBeforeWriting() { 239 return true; 240 } 241 242 @Override 243 protected boolean hasContentChanged() { 244 final long startTime = SystemClock.uptimeMillis(); 245 final int contactCount = getContactCount(); 246 if (contactCount > MAX_CONTACT_COUNT) { 247 // If there are too many contacts then return false. In this rare case it is impossible 248 // to include all of them anyways and the cost of rebuilding the dictionary is too high. 249 // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts? 250 return false; 251 } 252 if (contactCount != sContactCountAtLastRebuild) { 253 if (DEBUG) { 254 Log.d(TAG, "Contact count changed: " + sContactCountAtLastRebuild + " to " 255 + contactCount); 256 } 257 return true; 258 } 259 // Check all contacts since it's not possible to find out which names have changed. 260 // This is needed because it's possible to receive extraneous onChange events even when no 261 // name has changed. 262 Cursor cursor = mContext.getContentResolver().query( 263 Contacts.CONTENT_URI, PROJECTION, null, null, null); 264 if (cursor != null) { 265 try { 266 if (cursor.moveToFirst()) { 267 while (!cursor.isAfterLast()) { 268 String name = cursor.getString(INDEX_NAME); 269 if (isValidName(name) && !isNameInDictionary(name)) { 270 if (DEBUG) { 271 Log.d(TAG, "Contact name missing: " + name + " (runtime = " 272 + (SystemClock.uptimeMillis() - startTime) + " ms)"); 273 } 274 return true; 275 } 276 cursor.moveToNext(); 277 } 278 } 279 } finally { 280 cursor.close(); 281 } 282 } 283 if (DEBUG) { 284 Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime) 285 + " ms)"); 286 } 287 return false; 288 } 289 290 private static boolean isValidName(final String name) { 291 if (name != null && -1 == name.indexOf(Constants.CODE_COMMERCIAL_AT)) { 292 return true; 293 } 294 return false; 295 } 296 297 /** 298 * Checks if the words in a name are in the current binary dictionary. 299 */ 300 private boolean isNameInDictionary(final String name) { 301 int len = StringUtils.codePointCount(name); 302 String prevWord = null; 303 for (int i = 0; i < len; i++) { 304 if (Character.isLetter(name.codePointAt(i))) { 305 int end = getWordEndPosition(name, len, i); 306 String word = name.substring(i, end); 307 i = end - 1; 308 final int wordLen = StringUtils.codePointCount(word); 309 if (wordLen < MAX_WORD_LENGTH && wordLen > 1) { 310 if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) { 311 if (!super.isValidBigramLocked(prevWord, word)) { 312 return false; 313 } 314 } else { 315 if (!super.isValidWordLocked(word)) { 316 return false; 317 } 318 } 319 prevWord = word; 320 } 321 } 322 } 323 return true; 324 } 325} 326