ContactsBinaryDictionary.java revision edd1992ed329a84f0e9ef7056fda99f78eeb92b4
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import com.android.inputmethod.latin.personalization.AccountUtils;
20
21import android.content.ContentResolver;
22import android.content.Context;
23import android.database.ContentObserver;
24import android.database.Cursor;
25import android.net.Uri;
26import android.os.SystemClock;
27import android.provider.BaseColumns;
28import android.provider.ContactsContract;
29import android.provider.ContactsContract.Contacts;
30import android.text.TextUtils;
31import android.util.Log;
32
33import com.android.inputmethod.latin.utils.StringUtils;
34
35import java.util.List;
36import java.util.Locale;
37
38public class ContactsBinaryDictionary extends ExpandableBinaryDictionary {
39
40    private static final String[] PROJECTION = {BaseColumns._ID, Contacts.DISPLAY_NAME};
41    private static final String[] PROJECTION_ID_ONLY = {BaseColumns._ID};
42
43    private static final String TAG = ContactsBinaryDictionary.class.getSimpleName();
44    private static final String NAME = "contacts";
45
46    private static boolean DEBUG = false;
47
48    /**
49     * Frequency for contacts information into the dictionary
50     */
51    private static final int FREQUENCY_FOR_CONTACTS = 40;
52    private static final int FREQUENCY_FOR_CONTACTS_BIGRAM = 90;
53
54    /** The maximum number of contacts that this dictionary supports. */
55    private static final int MAX_CONTACT_COUNT = 10000;
56
57    private static final int INDEX_NAME = 1;
58
59    /** The number of contacts in the most recent dictionary rebuild. */
60    static private int sContactCountAtLastRebuild = 0;
61
62    /** The locale for this contacts dictionary. Controls name bigram predictions. */
63    public final Locale mLocale;
64
65    private ContentObserver mObserver;
66
67    /**
68     * Whether to use "firstname lastname" in bigram predictions.
69     */
70    private final boolean mUseFirstLastBigrams;
71
72    public ContactsBinaryDictionary(final Context context, final Locale locale) {
73        super(context, getFilenameWithLocale(NAME, locale.toString()), Dictionary.TYPE_CONTACTS);
74        mLocale = locale;
75        mUseFirstLastBigrams = useFirstLastBigramsForLocale(locale);
76        registerObserver(context);
77
78        // Load the current binary dictionary from internal storage. If no binary dictionary exists,
79        // loadDictionary will start a new thread to generate one asynchronously.
80        loadDictionary();
81    }
82
83    private synchronized void registerObserver(final Context context) {
84        // Perform a managed query. The Activity will handle closing and requerying the cursor
85        // when needed.
86        if (mObserver != null) return;
87        ContentResolver cres = context.getContentResolver();
88        cres.registerContentObserver(Contacts.CONTENT_URI, true, mObserver =
89                new ContentObserver(null) {
90                    @Override
91                    public void onChange(boolean self) {
92                        setRequiresReload(true);
93                    }
94                });
95    }
96
97    public void reopen(final Context context) {
98        registerObserver(context);
99    }
100
101    @Override
102    public synchronized void close() {
103        if (mObserver != null) {
104            mContext.getContentResolver().unregisterContentObserver(mObserver);
105            mObserver = null;
106        }
107        super.close();
108    }
109
110    @Override
111    public void loadDictionaryAsync() {
112        loadDeviceAccountsEmailAddresses();
113        loadDictionaryAsyncForUri(ContactsContract.Profile.CONTENT_URI);
114        // TODO: Switch this URL to the newer ContactsContract too
115        loadDictionaryAsyncForUri(Contacts.CONTENT_URI);
116    }
117
118    private void loadDeviceAccountsEmailAddresses() {
119        final List<String> accountVocabulary =
120                AccountUtils.getDeviceAccountsEmailAddresses(mContext);
121        if (accountVocabulary == null || accountVocabulary.isEmpty()) {
122            return;
123        }
124        for (String word : accountVocabulary) {
125            if (DEBUG) {
126                Log.d(TAG, "loadAccountVocabulary: " + word);
127            }
128            super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
129                    false /* isNotAWord */);
130        }
131    }
132
133    private void loadDictionaryAsyncForUri(final Uri uri) {
134        try {
135            Cursor cursor = mContext.getContentResolver()
136                    .query(uri, PROJECTION, null, null, null);
137            if (cursor != null) {
138                try {
139                    if (cursor.moveToFirst()) {
140                        sContactCountAtLastRebuild = getContactCount();
141                        addWords(cursor);
142                    }
143                } finally {
144                    cursor.close();
145                }
146            }
147        } catch (IllegalStateException e) {
148            Log.e(TAG, "Contacts DB is having problems");
149        }
150    }
151
152    private boolean useFirstLastBigramsForLocale(final Locale locale) {
153        // TODO: Add firstname/lastname bigram rules for other languages.
154        if (locale != null && locale.getLanguage().equals(Locale.ENGLISH.getLanguage())) {
155            return true;
156        }
157        return false;
158    }
159
160    private void addWords(final Cursor cursor) {
161        int count = 0;
162        while (!cursor.isAfterLast() && count < MAX_CONTACT_COUNT) {
163            String name = cursor.getString(INDEX_NAME);
164            if (isValidName(name)) {
165                addName(name);
166                ++count;
167            }
168            cursor.moveToNext();
169        }
170    }
171
172    private int getContactCount() {
173        // TODO: consider switching to a rawQuery("select count(*)...") on the database if
174        // performance is a bottleneck.
175        final Cursor cursor = mContext.getContentResolver().query(
176                Contacts.CONTENT_URI, PROJECTION_ID_ONLY, null, null, null);
177        if (cursor != null) {
178            try {
179                return cursor.getCount();
180            } finally {
181                cursor.close();
182            }
183        }
184        return 0;
185    }
186
187    /**
188     * Adds the words in a name (e.g., firstname/lastname) to the binary dictionary along with their
189     * bigrams depending on locale.
190     */
191    private void addName(final String name) {
192        int len = StringUtils.codePointCount(name);
193        String prevWord = null;
194        // TODO: Better tokenization for non-Latin writing systems
195        for (int i = 0; i < len; i++) {
196            if (Character.isLetter(name.codePointAt(i))) {
197                int end = getWordEndPosition(name, len, i);
198                String word = name.substring(i, end);
199                i = end - 1;
200                // Don't add single letter words, possibly confuses
201                // capitalization of i.
202                final int wordLen = StringUtils.codePointCount(word);
203                if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
204                    if (DEBUG) {
205                        Log.d(TAG, "addName " + name + ", " + word + ", " + prevWord);
206                    }
207                    super.addWord(word, null /* shortcut */, FREQUENCY_FOR_CONTACTS,
208                            false /* isNotAWord */);
209                    if (!TextUtils.isEmpty(prevWord)) {
210                        if (mUseFirstLastBigrams) {
211                            super.setBigram(prevWord, word, FREQUENCY_FOR_CONTACTS_BIGRAM);
212                        }
213                    }
214                    prevWord = word;
215                }
216            }
217        }
218    }
219
220    /**
221     * Returns the index of the last letter in the word, starting from position startIndex.
222     */
223    private static int getWordEndPosition(final String string, final int len,
224            final int startIndex) {
225        int end;
226        int cp = 0;
227        for (end = startIndex + 1; end < len; end += Character.charCount(cp)) {
228            cp = string.codePointAt(end);
229            if (!(cp == Constants.CODE_DASH || cp == Constants.CODE_SINGLE_QUOTE
230                    || Character.isLetter(cp))) {
231                break;
232            }
233        }
234        return end;
235    }
236
237    @Override
238    protected boolean needsToReloadBeforeWriting() {
239        return true;
240    }
241
242    @Override
243    protected boolean hasContentChanged() {
244        final long startTime = SystemClock.uptimeMillis();
245        final int contactCount = getContactCount();
246        if (contactCount > MAX_CONTACT_COUNT) {
247            // If there are too many contacts then return false. In this rare case it is impossible
248            // to include all of them anyways and the cost of rebuilding the dictionary is too high.
249            // TODO: Sort and check only the MAX_CONTACT_COUNT most recent contacts?
250            return false;
251        }
252        if (contactCount != sContactCountAtLastRebuild) {
253            if (DEBUG) {
254                Log.d(TAG, "Contact count changed: " + sContactCountAtLastRebuild + " to "
255                        + contactCount);
256            }
257            return true;
258        }
259        // Check all contacts since it's not possible to find out which names have changed.
260        // This is needed because it's possible to receive extraneous onChange events even when no
261        // name has changed.
262        Cursor cursor = mContext.getContentResolver().query(
263                Contacts.CONTENT_URI, PROJECTION, null, null, null);
264        if (cursor != null) {
265            try {
266                if (cursor.moveToFirst()) {
267                    while (!cursor.isAfterLast()) {
268                        String name = cursor.getString(INDEX_NAME);
269                        if (isValidName(name) && !isNameInDictionary(name)) {
270                            if (DEBUG) {
271                                Log.d(TAG, "Contact name missing: " + name + " (runtime = "
272                                        + (SystemClock.uptimeMillis() - startTime) + " ms)");
273                            }
274                            return true;
275                        }
276                        cursor.moveToNext();
277                    }
278                }
279            } finally {
280                cursor.close();
281            }
282        }
283        if (DEBUG) {
284            Log.d(TAG, "No contacts changed. (runtime = " + (SystemClock.uptimeMillis() - startTime)
285                    + " ms)");
286        }
287        return false;
288    }
289
290    private static boolean isValidName(final String name) {
291        if (name != null && -1 == name.indexOf(Constants.CODE_COMMERCIAL_AT)) {
292            return true;
293        }
294        return false;
295    }
296
297    /**
298     * Checks if the words in a name are in the current binary dictionary.
299     */
300    private boolean isNameInDictionary(final String name) {
301        int len = StringUtils.codePointCount(name);
302        String prevWord = null;
303        for (int i = 0; i < len; i++) {
304            if (Character.isLetter(name.codePointAt(i))) {
305                int end = getWordEndPosition(name, len, i);
306                String word = name.substring(i, end);
307                i = end - 1;
308                final int wordLen = StringUtils.codePointCount(word);
309                if (wordLen < MAX_WORD_LENGTH && wordLen > 1) {
310                    if (!TextUtils.isEmpty(prevWord) && mUseFirstLastBigrams) {
311                        if (!super.isValidBigramLocked(prevWord, word)) {
312                            return false;
313                        }
314                    } else {
315                        if (!super.isValidWordLocked(word)) {
316                            return false;
317                        }
318                    }
319                    prevWord = word;
320                }
321            }
322        }
323        return true;
324    }
325}
326