AndroidSpellCheckerService.java revision d8590857bdff7f30a93af07aef0362d9f7460a5a
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.SharedPreferences;
21import android.preference.PreferenceManager;
22import android.service.textservice.SpellCheckerService;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25
26import com.android.inputmethod.keyboard.ProximityInfo;
27import com.android.inputmethod.latin.BinaryDictionary;
28import com.android.inputmethod.latin.CollectionUtils;
29import com.android.inputmethod.latin.ContactsBinaryDictionary;
30import com.android.inputmethod.latin.Dictionary;
31import com.android.inputmethod.latin.DictionaryCollection;
32import com.android.inputmethod.latin.DictionaryFactory;
33import com.android.inputmethod.latin.LocaleUtils;
34import com.android.inputmethod.latin.R;
35import com.android.inputmethod.latin.StringUtils;
36import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary;
37import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary;
38import com.android.inputmethod.latin.UserBinaryDictionary;
39
40import java.lang.ref.WeakReference;
41import java.util.ArrayList;
42import java.util.Arrays;
43import java.util.Collections;
44import java.util.HashSet;
45import java.util.Iterator;
46import java.util.Locale;
47import java.util.Map;
48import java.util.TreeMap;
49
50/**
51 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
52 */
53public final class AndroidSpellCheckerService extends SpellCheckerService
54        implements SharedPreferences.OnSharedPreferenceChangeListener {
55    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
56    private static final boolean DBG = false;
57    private static final int POOL_SIZE = 2;
58
59    public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts";
60
61    public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
62    public static final int CAPITALIZE_FIRST = 1; // First only
63    public static final int CAPITALIZE_ALL = 2; // All caps
64
65    private final static String[] EMPTY_STRING_ARRAY = new String[0];
66    private Map<String, DictionaryPool> mDictionaryPools = CollectionUtils.newSynchronizedTreeMap();
67    private Map<String, UserBinaryDictionary> mUserDictionaries =
68            CollectionUtils.newSynchronizedTreeMap();
69    private ContactsBinaryDictionary mContactsDictionary;
70
71    // The threshold for a candidate to be offered as a suggestion.
72    private float mSuggestionThreshold;
73    // The threshold for a suggestion to be considered "recommended".
74    private float mRecommendedThreshold;
75    // Whether to use the contacts dictionary
76    private boolean mUseContactsDictionary;
77    private final Object mUseContactsLock = new Object();
78
79    private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
80            CollectionUtils.newHashSet();
81
82    public static final int SCRIPT_LATIN = 0;
83    public static final int SCRIPT_CYRILLIC = 1;
84    public static final int SCRIPT_GREEK = 2;
85    public static final String SINGLE_QUOTE = "\u0027";
86    public static final String APOSTROPHE = "\u2019";
87    private static final TreeMap<String, Integer> mLanguageToScript;
88    static {
89        // List of the supported languages and their associated script. We won't check
90        // words written in another script than the selected script, because we know we
91        // don't have those in our dictionary so we will underline everything and we
92        // will never have any suggestions, so it makes no sense checking them, and this
93        // is done in {@link #shouldFilterOut}. Also, the script is used to choose which
94        // proximity to pass to the dictionary descent algorithm.
95        // IMPORTANT: this only contains languages - do not write countries in there.
96        // Only the language is searched from the map.
97        mLanguageToScript = CollectionUtils.newTreeMap();
98        mLanguageToScript.put("cs", SCRIPT_LATIN);
99        mLanguageToScript.put("da", SCRIPT_LATIN);
100        mLanguageToScript.put("de", SCRIPT_LATIN);
101        mLanguageToScript.put("el", SCRIPT_GREEK);
102        mLanguageToScript.put("en", SCRIPT_LATIN);
103        mLanguageToScript.put("es", SCRIPT_LATIN);
104        mLanguageToScript.put("fi", SCRIPT_LATIN);
105        mLanguageToScript.put("fr", SCRIPT_LATIN);
106        mLanguageToScript.put("hr", SCRIPT_LATIN);
107        mLanguageToScript.put("it", SCRIPT_LATIN);
108        mLanguageToScript.put("lt", SCRIPT_LATIN);
109        mLanguageToScript.put("lv", SCRIPT_LATIN);
110        mLanguageToScript.put("nb", SCRIPT_LATIN);
111        mLanguageToScript.put("nl", SCRIPT_LATIN);
112        mLanguageToScript.put("pt", SCRIPT_LATIN);
113        mLanguageToScript.put("sl", SCRIPT_LATIN);
114        mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
115    }
116
117    @Override public void onCreate() {
118        super.onCreate();
119        mSuggestionThreshold =
120                Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value));
121        mRecommendedThreshold =
122                Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value));
123        final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
124        prefs.registerOnSharedPreferenceChangeListener(this);
125        onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
126    }
127
128    public static int getScriptFromLocale(final Locale locale) {
129        final Integer script = mLanguageToScript.get(locale.getLanguage());
130        if (null == script) {
131            throw new RuntimeException("We have been called with an unsupported language: \""
132                    + locale.getLanguage() + "\". Framework bug?");
133        }
134        return script;
135    }
136
137    @Override
138    public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
139        if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
140        synchronized(mUseContactsLock) {
141            mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true);
142            if (mUseContactsDictionary) {
143                startUsingContactsDictionaryLocked();
144            } else {
145                stopUsingContactsDictionaryLocked();
146            }
147        }
148    }
149
150    private void startUsingContactsDictionaryLocked() {
151        if (null == mContactsDictionary) {
152            // TODO: use the right locale for each session
153            mContactsDictionary =
154                    new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault());
155        }
156        final Iterator<WeakReference<DictionaryCollection>> iterator =
157                mDictionaryCollectionsList.iterator();
158        while (iterator.hasNext()) {
159            final WeakReference<DictionaryCollection> dictRef = iterator.next();
160            final DictionaryCollection dict = dictRef.get();
161            if (null == dict) {
162                iterator.remove();
163            } else {
164                dict.addDictionary(mContactsDictionary);
165            }
166        }
167    }
168
169    private void stopUsingContactsDictionaryLocked() {
170        if (null == mContactsDictionary) return;
171        final Dictionary contactsDict = mContactsDictionary;
172        // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed
173        mContactsDictionary = null;
174        final Iterator<WeakReference<DictionaryCollection>> iterator =
175                mDictionaryCollectionsList.iterator();
176        while (iterator.hasNext()) {
177            final WeakReference<DictionaryCollection> dictRef = iterator.next();
178            final DictionaryCollection dict = dictRef.get();
179            if (null == dict) {
180                iterator.remove();
181            } else {
182                dict.removeDictionary(contactsDict);
183            }
184        }
185        contactsDict.close();
186    }
187
188    @Override
189    public Session createSession() {
190        // Should not refer to AndroidSpellCheckerSession directly considering
191        // that AndroidSpellCheckerSession may be overlaid.
192        return AndroidSpellCheckerSessionFactory.newInstance(this);
193    }
194
195    public static SuggestionsInfo getNotInDictEmptySuggestions() {
196        return new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
197    }
198
199    public static SuggestionsInfo getInDictEmptySuggestions() {
200        return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
201                EMPTY_STRING_ARRAY);
202    }
203
204    public SuggestionsGatherer newSuggestionsGatherer(final String text, int maxLength) {
205        return new SuggestionsGatherer(
206                text, mSuggestionThreshold, mRecommendedThreshold, maxLength);
207    }
208
209    // TODO: remove this class and replace it by storage local to the session.
210    public static final class SuggestionsGatherer {
211        public static final class Result {
212            public final String[] mSuggestions;
213            public final boolean mHasRecommendedSuggestions;
214            public Result(final String[] gatheredSuggestions,
215                    final boolean hasRecommendedSuggestions) {
216                mSuggestions = gatheredSuggestions;
217                mHasRecommendedSuggestions = hasRecommendedSuggestions;
218            }
219        }
220
221        private final ArrayList<String> mSuggestions;
222        private final int[] mScores;
223        private final String mOriginalText;
224        private final float mSuggestionThreshold;
225        private final float mRecommendedThreshold;
226        private final int mMaxLength;
227        private int mLength = 0;
228
229        // The two following attributes are only ever filled if the requested max length
230        // is 0 (or less, which is treated the same).
231        private String mBestSuggestion = null;
232        private int mBestScore = Integer.MIN_VALUE; // As small as possible
233
234        SuggestionsGatherer(final String originalText, final float suggestionThreshold,
235                final float recommendedThreshold, final int maxLength) {
236            mOriginalText = originalText;
237            mSuggestionThreshold = suggestionThreshold;
238            mRecommendedThreshold = recommendedThreshold;
239            mMaxLength = maxLength;
240            mSuggestions = CollectionUtils.newArrayList(maxLength + 1);
241            mScores = new int[mMaxLength];
242        }
243
244        synchronized public boolean addWord(char[] word, int[] spaceIndices, int wordOffset,
245                int wordLength, int score) {
246            final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score);
247            // binarySearch returns the index if the element exists, and -<insertion index> - 1
248            // if it doesn't. See documentation for binarySearch.
249            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
250
251            if (insertIndex == 0 && mLength >= mMaxLength) {
252                // In the future, we may want to keep track of the best suggestion score even if
253                // we are asked for 0 suggestions. In this case, we can use the following
254                // (tested) code to keep it:
255                // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
256                // then we need to keep track of the best suggestion in mBestScore and
257                // mBestSuggestion. This is so that we know whether the best suggestion makes
258                // the score cutoff, since we need to know that to return a meaningful
259                // looksLikeTypo.
260                // if (0 >= mMaxLength) {
261                //     if (score > mBestScore) {
262                //         mBestScore = score;
263                //         mBestSuggestion = new String(word, wordOffset, wordLength);
264                //     }
265                // }
266                return true;
267            }
268            if (insertIndex >= mMaxLength) {
269                // We found a suggestion, but its score is too weak to be kept considering
270                // the suggestion limit.
271                return true;
272            }
273
274            // Compute the normalized score and skip this word if it's normalized score does not
275            // make the threshold.
276            final String wordString = new String(word, wordOffset, wordLength);
277            final float normalizedScore =
278                    BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
279            if (normalizedScore < mSuggestionThreshold) {
280                if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
281                return true;
282            }
283
284            if (mLength < mMaxLength) {
285                final int copyLen = mLength - insertIndex;
286                ++mLength;
287                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
288                mSuggestions.add(insertIndex, wordString);
289            } else {
290                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
291                mSuggestions.add(insertIndex, wordString);
292                mSuggestions.remove(0);
293            }
294            mScores[insertIndex] = score;
295
296            return true;
297        }
298
299        public Result getResults(final int capitalizeType, final Locale locale) {
300            final String[] gatheredSuggestions;
301            final boolean hasRecommendedSuggestions;
302            if (0 == mLength) {
303                // TODO: the comment below describes what is intended, but in the practice
304                // mBestSuggestion is only ever set to null so it doesn't work. Fix this.
305                // Either we found no suggestions, or we found some BUT the max length was 0.
306                // If we found some mBestSuggestion will not be null. If it is null, then
307                // we found none, regardless of the max length.
308                if (null == mBestSuggestion) {
309                    gatheredSuggestions = null;
310                    hasRecommendedSuggestions = false;
311                } else {
312                    gatheredSuggestions = EMPTY_STRING_ARRAY;
313                    final float normalizedScore = BinaryDictionary.calcNormalizedScore(
314                            mOriginalText, mBestSuggestion, mBestScore);
315                    hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
316                }
317            } else {
318                if (DBG) {
319                    if (mLength != mSuggestions.size()) {
320                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
321                    }
322                    for (int i = mLength - 1; i >= 0; --i) {
323                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
324                    }
325                }
326                Collections.reverse(mSuggestions);
327                StringUtils.removeDupes(mSuggestions);
328                if (CAPITALIZE_ALL == capitalizeType) {
329                    for (int i = 0; i < mSuggestions.size(); ++i) {
330                        // get(i) returns a CharSequence which is actually a String so .toString()
331                        // should return the same object.
332                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
333                    }
334                } else if (CAPITALIZE_FIRST == capitalizeType) {
335                    for (int i = 0; i < mSuggestions.size(); ++i) {
336                        // Likewise
337                        mSuggestions.set(i, StringUtils.toTitleCase(
338                                mSuggestions.get(i).toString(), locale));
339                    }
340                }
341                // This returns a String[], while toArray() returns an Object[] which cannot be cast
342                // into a String[].
343                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
344
345                final int bestScore = mScores[mLength - 1];
346                final String bestSuggestion = mSuggestions.get(0);
347                final float normalizedScore =
348                        BinaryDictionary.calcNormalizedScore(
349                                mOriginalText, bestSuggestion.toString(), bestScore);
350                hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
351                if (DBG) {
352                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
353                    Log.i(TAG, "Normalized score = " + normalizedScore
354                            + " (threshold " + mRecommendedThreshold
355                            + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
356                }
357            }
358            return new Result(gatheredSuggestions, hasRecommendedSuggestions);
359        }
360    }
361
362    @Override
363    public boolean onUnbind(final Intent intent) {
364        closeAllDictionaries();
365        return false;
366    }
367
368    private void closeAllDictionaries() {
369        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
370        mDictionaryPools = CollectionUtils.newSynchronizedTreeMap();
371        final Map<String, UserBinaryDictionary> oldUserDictionaries = mUserDictionaries;
372        mUserDictionaries = CollectionUtils.newSynchronizedTreeMap();
373        new Thread("spellchecker_close_dicts") {
374            @Override
375            public void run() {
376                for (DictionaryPool pool : oldPools.values()) {
377                    pool.close();
378                }
379                for (Dictionary dict : oldUserDictionaries.values()) {
380                    dict.close();
381                }
382                synchronized (mUseContactsLock) {
383                    if (null != mContactsDictionary) {
384                        // The synchronously loaded contacts dictionary should have been in one
385                        // or several pools, but it is shielded against multiple closing and it's
386                        // safe to call it several times.
387                        final ContactsBinaryDictionary dictToClose = mContactsDictionary;
388                        // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY
389                        // is no longer needed
390                        mContactsDictionary = null;
391                        dictToClose.close();
392                    }
393                }
394            }
395        }.start();
396    }
397
398    public DictionaryPool getDictionaryPool(final String locale) {
399        DictionaryPool pool = mDictionaryPools.get(locale);
400        if (null == pool) {
401            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
402            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
403            mDictionaryPools.put(locale, pool);
404        }
405        return pool;
406    }
407
408    public DictAndProximity createDictAndProximity(final Locale locale) {
409        final int script = getScriptFromLocale(locale);
410        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
411                SpellCheckerProximityInfo.getProximityForScript(script),
412                SpellCheckerProximityInfo.ROW_SIZE,
413                SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH,
414                SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT);
415        final DictionaryCollection dictionaryCollection =
416                DictionaryFactory.createMainDictionaryFromManager(this, locale,
417                        true /* useFullEditDistance */);
418        final String localeStr = locale.toString();
419        UserBinaryDictionary userDictionary = mUserDictionaries.get(localeStr);
420        if (null == userDictionary) {
421            userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true);
422            mUserDictionaries.put(localeStr, userDictionary);
423        }
424        dictionaryCollection.addDictionary(userDictionary);
425        synchronized (mUseContactsLock) {
426            if (mUseContactsDictionary) {
427                if (null == mContactsDictionary) {
428                    // TODO: use the right locale. We can't do it right now because the
429                    // spell checker is reusing the contacts dictionary across sessions
430                    // without regard for their locale, so we need to fix that first.
431                    mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this,
432                            Locale.getDefault());
433                }
434            }
435            dictionaryCollection.addDictionary(mContactsDictionary);
436            mDictionaryCollectionsList.add(
437                    new WeakReference<DictionaryCollection>(dictionaryCollection));
438        }
439        return new DictAndProximity(dictionaryCollection, proximityInfo);
440    }
441
442    // This method assumes the text is not empty or null.
443    public static int getCapitalizationType(String text) {
444        // If the first char is not uppercase, then the word is either all lower case,
445        // and in either case we return CAPITALIZE_NONE.
446        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
447        final int len = text.length();
448        int capsCount = 1;
449        int letterCount = 1;
450        for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
451            if (1 != capsCount && letterCount != capsCount) break;
452            final int codePoint = text.codePointAt(i);
453            if (Character.isUpperCase(codePoint)) {
454                ++capsCount;
455                ++letterCount;
456            } else if (Character.isLetter(codePoint)) {
457                // We need to discount non-letters since they may not be upper-case, but may
458                // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME")
459                ++letterCount;
460            }
461        }
462        // We know the first char is upper case. So we want to test if either every letter other
463        // than the first is lower case, or if they are all upper case. If the string is exactly
464        // one char long, then we will arrive here with letterCount 1, and this is correct, too.
465        if (1 == capsCount) return CAPITALIZE_FIRST;
466        return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
467    }
468}
469