AndroidSpellCheckerService.java revision 46fc768e54e3d52003645494552f9e686f28f20f
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.Intent;
20import android.content.SharedPreferences;
21import android.preference.PreferenceManager;
22import android.service.textservice.SpellCheckerService;
23import android.util.Log;
24import android.view.textservice.SuggestionsInfo;
25
26import com.android.inputmethod.keyboard.ProximityInfo;
27import com.android.inputmethod.latin.BinaryDictionary;
28import com.android.inputmethod.latin.ContactsBinaryDictionary;
29import com.android.inputmethod.latin.Dictionary;
30import com.android.inputmethod.latin.DictionaryCollection;
31import com.android.inputmethod.latin.DictionaryFactory;
32import com.android.inputmethod.latin.LocaleUtils;
33import com.android.inputmethod.latin.R;
34import com.android.inputmethod.latin.StringUtils;
35import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary;
36import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary;
37import com.android.inputmethod.latin.UserBinaryDictionary;
38
39import java.lang.ref.WeakReference;
40import java.util.ArrayList;
41import java.util.Arrays;
42import java.util.Collections;
43import java.util.HashSet;
44import java.util.Iterator;
45import java.util.Locale;
46import java.util.Map;
47import java.util.TreeMap;
48
49/**
50 * Service for spell checking, using LatinIME's dictionaries and mechanisms.
51 */
52public class AndroidSpellCheckerService extends SpellCheckerService
53        implements SharedPreferences.OnSharedPreferenceChangeListener {
54    private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
55    private static final boolean DBG = false;
56    private static final int POOL_SIZE = 2;
57
58    public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts";
59
60    public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
61    public static final int CAPITALIZE_FIRST = 1; // First only
62    public static final int CAPITALIZE_ALL = 2; // All caps
63
64    private final static String[] EMPTY_STRING_ARRAY = new String[0];
65    private Map<String, DictionaryPool> mDictionaryPools =
66            Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
67    private Map<String, UserBinaryDictionary> mUserDictionaries =
68            Collections.synchronizedMap(new TreeMap<String, UserBinaryDictionary>());
69    private ContactsBinaryDictionary mContactsDictionary;
70
71    // The threshold for a candidate to be offered as a suggestion.
72    private float mSuggestionThreshold;
73    // The threshold for a suggestion to be considered "recommended".
74    private float mRecommendedThreshold;
75    // Whether to use the contacts dictionary
76    private boolean mUseContactsDictionary;
77    private final Object mUseContactsLock = new Object();
78
79    private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList =
80            new HashSet<WeakReference<DictionaryCollection>>();
81
82    public static final int SCRIPT_LATIN = 0;
83    public static final int SCRIPT_CYRILLIC = 1;
84    public static final String SINGLE_QUOTE = "\u0027";
85    public static final String APOSTROPHE = "\u2019";
86    private static final TreeMap<String, Integer> mLanguageToScript;
87    static {
88        // List of the supported languages and their associated script. We won't check
89        // words written in another script than the selected script, because we know we
90        // don't have those in our dictionary so we will underline everything and we
91        // will never have any suggestions, so it makes no sense checking them, and this
92        // is done in {@link #shouldFilterOut}. Also, the script is used to choose which
93        // proximity to pass to the dictionary descent algorithm.
94        // IMPORTANT: this only contains languages - do not write countries in there.
95        // Only the language is searched from the map.
96        mLanguageToScript = new TreeMap<String, Integer>();
97        mLanguageToScript.put("en", SCRIPT_LATIN);
98        mLanguageToScript.put("fr", SCRIPT_LATIN);
99        mLanguageToScript.put("de", SCRIPT_LATIN);
100        mLanguageToScript.put("nl", SCRIPT_LATIN);
101        mLanguageToScript.put("cs", SCRIPT_LATIN);
102        mLanguageToScript.put("es", SCRIPT_LATIN);
103        mLanguageToScript.put("it", SCRIPT_LATIN);
104        mLanguageToScript.put("hr", SCRIPT_LATIN);
105        mLanguageToScript.put("pt", SCRIPT_LATIN);
106        mLanguageToScript.put("ru", SCRIPT_CYRILLIC);
107        // TODO: Make a persian proximity, and activate the Farsi subtype.
108        // mLanguageToScript.put("fa", SCRIPT_PERSIAN);
109    }
110
111    @Override public void onCreate() {
112        super.onCreate();
113        mSuggestionThreshold =
114                Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value));
115        mRecommendedThreshold =
116                Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value));
117        final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
118        prefs.registerOnSharedPreferenceChangeListener(this);
119        onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY);
120    }
121
122    public static int getScriptFromLocale(final Locale locale) {
123        final Integer script = mLanguageToScript.get(locale.getLanguage());
124        if (null == script) {
125            throw new RuntimeException("We have been called with an unsupported language: \""
126                    + locale.getLanguage() + "\". Framework bug?");
127        }
128        return script;
129    }
130
131    @Override
132    public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) {
133        if (!PREF_USE_CONTACTS_KEY.equals(key)) return;
134        synchronized(mUseContactsLock) {
135            mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true);
136            if (mUseContactsDictionary) {
137                startUsingContactsDictionaryLocked();
138            } else {
139                stopUsingContactsDictionaryLocked();
140            }
141        }
142    }
143
144    private void startUsingContactsDictionaryLocked() {
145        if (null == mContactsDictionary) {
146            // TODO: use the right locale for each session
147            mContactsDictionary =
148                    new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault());
149        }
150        final Iterator<WeakReference<DictionaryCollection>> iterator =
151                mDictionaryCollectionsList.iterator();
152        while (iterator.hasNext()) {
153            final WeakReference<DictionaryCollection> dictRef = iterator.next();
154            final DictionaryCollection dict = dictRef.get();
155            if (null == dict) {
156                iterator.remove();
157            } else {
158                dict.addDictionary(mContactsDictionary);
159            }
160        }
161    }
162
163    private void stopUsingContactsDictionaryLocked() {
164        if (null == mContactsDictionary) return;
165        final Dictionary contactsDict = mContactsDictionary;
166        // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed
167        mContactsDictionary = null;
168        final Iterator<WeakReference<DictionaryCollection>> iterator =
169                mDictionaryCollectionsList.iterator();
170        while (iterator.hasNext()) {
171            final WeakReference<DictionaryCollection> dictRef = iterator.next();
172            final DictionaryCollection dict = dictRef.get();
173            if (null == dict) {
174                iterator.remove();
175            } else {
176                dict.removeDictionary(contactsDict);
177            }
178        }
179        contactsDict.close();
180    }
181
182    @Override
183    public Session createSession() {
184        // Should not refer to AndroidSpellCheckerSession directly considering
185        // that AndroidSpellCheckerSession may be overlaid.
186        return AndroidSpellCheckerSessionFactory.newInstance(this);
187    }
188
189    public static SuggestionsInfo getNotInDictEmptySuggestions() {
190        return new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
191    }
192
193    public static SuggestionsInfo getInDictEmptySuggestions() {
194        return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
195                EMPTY_STRING_ARRAY);
196    }
197
198    public SuggestionsGatherer newSuggestionsGatherer(final String text, int maxLength) {
199        return new SuggestionsGatherer(
200                text, mSuggestionThreshold, mRecommendedThreshold, maxLength);
201    }
202
203    // TODO: remove this class and replace it by storage local to the session.
204    public static class SuggestionsGatherer {
205        public static class Result {
206            public final String[] mSuggestions;
207            public final boolean mHasRecommendedSuggestions;
208            public Result(final String[] gatheredSuggestions,
209                    final boolean hasRecommendedSuggestions) {
210                mSuggestions = gatheredSuggestions;
211                mHasRecommendedSuggestions = hasRecommendedSuggestions;
212            }
213        }
214
215        private final ArrayList<CharSequence> mSuggestions;
216        private final int[] mScores;
217        private final String mOriginalText;
218        private final float mSuggestionThreshold;
219        private final float mRecommendedThreshold;
220        private final int mMaxLength;
221        private int mLength = 0;
222
223        // The two following attributes are only ever filled if the requested max length
224        // is 0 (or less, which is treated the same).
225        private String mBestSuggestion = null;
226        private int mBestScore = Integer.MIN_VALUE; // As small as possible
227
228        SuggestionsGatherer(final String originalText, final float suggestionThreshold,
229                final float recommendedThreshold, final int maxLength) {
230            mOriginalText = originalText;
231            mSuggestionThreshold = suggestionThreshold;
232            mRecommendedThreshold = recommendedThreshold;
233            mMaxLength = maxLength;
234            mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
235            mScores = new int[mMaxLength];
236        }
237
238        synchronized public boolean addWord(char[] word, int[] spaceIndices, int wordOffset,
239                int wordLength, int score) {
240            final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score);
241            // binarySearch returns the index if the element exists, and -<insertion index> - 1
242            // if it doesn't. See documentation for binarySearch.
243            final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
244
245            if (insertIndex == 0 && mLength >= mMaxLength) {
246                // In the future, we may want to keep track of the best suggestion score even if
247                // we are asked for 0 suggestions. In this case, we can use the following
248                // (tested) code to keep it:
249                // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
250                // then we need to keep track of the best suggestion in mBestScore and
251                // mBestSuggestion. This is so that we know whether the best suggestion makes
252                // the score cutoff, since we need to know that to return a meaningful
253                // looksLikeTypo.
254                // if (0 >= mMaxLength) {
255                //     if (score > mBestScore) {
256                //         mBestScore = score;
257                //         mBestSuggestion = new String(word, wordOffset, wordLength);
258                //     }
259                // }
260                return true;
261            }
262            if (insertIndex >= mMaxLength) {
263                // We found a suggestion, but its score is too weak to be kept considering
264                // the suggestion limit.
265                return true;
266            }
267
268            // Compute the normalized score and skip this word if it's normalized score does not
269            // make the threshold.
270            final String wordString = new String(word, wordOffset, wordLength);
271            final float normalizedScore =
272                    BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score);
273            if (normalizedScore < mSuggestionThreshold) {
274                if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
275                return true;
276            }
277
278            if (mLength < mMaxLength) {
279                final int copyLen = mLength - insertIndex;
280                ++mLength;
281                System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
282                mSuggestions.add(insertIndex, wordString);
283            } else {
284                System.arraycopy(mScores, 1, mScores, 0, insertIndex);
285                mSuggestions.add(insertIndex, wordString);
286                mSuggestions.remove(0);
287            }
288            mScores[insertIndex] = score;
289
290            return true;
291        }
292
293        public Result getResults(final int capitalizeType, final Locale locale) {
294            final String[] gatheredSuggestions;
295            final boolean hasRecommendedSuggestions;
296            if (0 == mLength) {
297                // Either we found no suggestions, or we found some BUT the max length was 0.
298                // If we found some mBestSuggestion will not be null. If it is null, then
299                // we found none, regardless of the max length.
300                if (null == mBestSuggestion) {
301                    gatheredSuggestions = null;
302                    hasRecommendedSuggestions = false;
303                } else {
304                    gatheredSuggestions = EMPTY_STRING_ARRAY;
305                    final float normalizedScore = BinaryDictionary.calcNormalizedScore(
306                            mOriginalText, mBestSuggestion, mBestScore);
307                    hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
308                }
309            } else {
310                if (DBG) {
311                    if (mLength != mSuggestions.size()) {
312                        Log.e(TAG, "Suggestion size is not the same as stored mLength");
313                    }
314                    for (int i = mLength - 1; i >= 0; --i) {
315                        Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
316                    }
317                }
318                Collections.reverse(mSuggestions);
319                StringUtils.removeDupes(mSuggestions);
320                if (CAPITALIZE_ALL == capitalizeType) {
321                    for (int i = 0; i < mSuggestions.size(); ++i) {
322                        // get(i) returns a CharSequence which is actually a String so .toString()
323                        // should return the same object.
324                        mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
325                    }
326                } else if (CAPITALIZE_FIRST == capitalizeType) {
327                    for (int i = 0; i < mSuggestions.size(); ++i) {
328                        // Likewise
329                        mSuggestions.set(i, StringUtils.toTitleCase(
330                                mSuggestions.get(i).toString(), locale));
331                    }
332                }
333                // This returns a String[], while toArray() returns an Object[] which cannot be cast
334                // into a String[].
335                gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
336
337                final int bestScore = mScores[mLength - 1];
338                final CharSequence bestSuggestion = mSuggestions.get(0);
339                final float normalizedScore =
340                        BinaryDictionary.calcNormalizedScore(
341                                mOriginalText, bestSuggestion.toString(), bestScore);
342                hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold);
343                if (DBG) {
344                    Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
345                    Log.i(TAG, "Normalized score = " + normalizedScore
346                            + " (threshold " + mRecommendedThreshold
347                            + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions);
348                }
349            }
350            return new Result(gatheredSuggestions, hasRecommendedSuggestions);
351        }
352    }
353
354    @Override
355    public boolean onUnbind(final Intent intent) {
356        closeAllDictionaries();
357        return false;
358    }
359
360    private void closeAllDictionaries() {
361        final Map<String, DictionaryPool> oldPools = mDictionaryPools;
362        mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
363        final Map<String, UserBinaryDictionary> oldUserDictionaries = mUserDictionaries;
364        mUserDictionaries =
365                Collections.synchronizedMap(new TreeMap<String, UserBinaryDictionary>());
366        new Thread("spellchecker_close_dicts") {
367            @Override
368            public void run() {
369                for (DictionaryPool pool : oldPools.values()) {
370                    pool.close();
371                }
372                for (Dictionary dict : oldUserDictionaries.values()) {
373                    dict.close();
374                }
375                synchronized (mUseContactsLock) {
376                    if (null != mContactsDictionary) {
377                        // The synchronously loaded contacts dictionary should have been in one
378                        // or several pools, but it is shielded against multiple closing and it's
379                        // safe to call it several times.
380                        final ContactsBinaryDictionary dictToClose = mContactsDictionary;
381                        // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY
382                        // is no longer needed
383                        mContactsDictionary = null;
384                        dictToClose.close();
385                    }
386                }
387            }
388        }.start();
389    }
390
391    public DictionaryPool getDictionaryPool(final String locale) {
392        DictionaryPool pool = mDictionaryPools.get(locale);
393        if (null == pool) {
394            final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
395            pool = new DictionaryPool(POOL_SIZE, this, localeObject);
396            mDictionaryPools.put(locale, pool);
397        }
398        return pool;
399    }
400
401    public DictAndProximity createDictAndProximity(final Locale locale) {
402        final int script = getScriptFromLocale(locale);
403        final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(
404                SpellCheckerProximityInfo.getProximityForScript(script),
405                SpellCheckerProximityInfo.ROW_SIZE,
406                SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH,
407                SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT);
408        final DictionaryCollection dictionaryCollection =
409                DictionaryFactory.createMainDictionaryFromManager(this, locale,
410                        true /* useFullEditDistance */);
411        final String localeStr = locale.toString();
412        UserBinaryDictionary userDictionary = mUserDictionaries.get(localeStr);
413        if (null == userDictionary) {
414            userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true);
415            mUserDictionaries.put(localeStr, userDictionary);
416        }
417        dictionaryCollection.addDictionary(userDictionary);
418        synchronized (mUseContactsLock) {
419            if (mUseContactsDictionary) {
420                if (null == mContactsDictionary) {
421                    // TODO: use the right locale. We can't do it right now because the
422                    // spell checker is reusing the contacts dictionary across sessions
423                    // without regard for their locale, so we need to fix that first.
424                    mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this,
425                            Locale.getDefault());
426                }
427            }
428            dictionaryCollection.addDictionary(mContactsDictionary);
429            mDictionaryCollectionsList.add(
430                    new WeakReference<DictionaryCollection>(dictionaryCollection));
431        }
432        return new DictAndProximity(dictionaryCollection, proximityInfo);
433    }
434
435    // This method assumes the text is not empty or null.
436    public static int getCapitalizationType(String text) {
437        // If the first char is not uppercase, then the word is either all lower case,
438        // and in either case we return CAPITALIZE_NONE.
439        if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
440        final int len = text.length();
441        int capsCount = 1;
442        for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) {
443            if (1 != capsCount && i != capsCount) break;
444            if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
445        }
446        // We know the first char is upper case. So we want to test if either everything
447        // else is lower case, or if everything else is upper case. If the string is
448        // exactly one char long, then we will arrive here with capsCount 1, and this is
449        // correct, too.
450        if (1 == capsCount) return CAPITALIZE_FIRST;
451        return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
452    }
453}
454