AndroidWordLevelSpellCheckerSession.java revision 0dab3171d442a4d0acc87cc0019bfcbd4ea4123f
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.ContentResolver;
20import android.database.ContentObserver;
21import android.os.Binder;
22import android.provider.UserDictionary.Words;
23import android.service.textservice.SpellCheckerService.Session;
24import android.text.TextUtils;
25import android.util.Log;
26import android.util.LruCache;
27import android.view.textservice.SuggestionsInfo;
28import android.view.textservice.TextInfo;
29
30import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
31import com.android.inputmethod.latin.Constants;
32import com.android.inputmethod.latin.Dictionary;
33import com.android.inputmethod.latin.PrevWordsInfo;
34import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
35import com.android.inputmethod.latin.WordComposer;
36import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer;
37import com.android.inputmethod.latin.utils.CoordinateUtils;
38import com.android.inputmethod.latin.utils.LocaleUtils;
39import com.android.inputmethod.latin.utils.ScriptUtils;
40import com.android.inputmethod.latin.utils.StringUtils;
41
42import java.util.ArrayList;
43import java.util.Locale;
44
45public abstract class AndroidWordLevelSpellCheckerSession extends Session {
46    private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
47    private static final boolean DBG = false;
48
49    // Immutable, but need the locale which is not available in the constructor yet
50    private DictionaryPool mDictionaryPool;
51    // Likewise
52    private Locale mLocale;
53    // Cache this for performance
54    private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
55    private final AndroidSpellCheckerService mService;
56    protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
57    private final ContentObserver mObserver;
58
59    private static final class SuggestionsParams {
60        public final String[] mSuggestions;
61        public final int mFlags;
62        public SuggestionsParams(String[] suggestions, int flags) {
63            mSuggestions = suggestions;
64            mFlags = flags;
65        }
66    }
67
68    protected static final class SuggestionsCache {
69        private static final char CHAR_DELIMITER = '\uFFFC';
70        private static final int MAX_CACHE_SIZE = 50;
71        private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
72                new LruCache<>(MAX_CACHE_SIZE);
73
74        // TODO: Support n-gram input
75        private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) {
76            if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) {
77                return query;
78            }
79            return query + CHAR_DELIMITER + prevWordsInfo;
80        }
81
82        public SuggestionsParams getSuggestionsFromCache(String query,
83                final PrevWordsInfo prevWordsInfo) {
84            return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo));
85        }
86
87        public void putSuggestionsToCache(
88                final String query, final PrevWordsInfo prevWordsInfo,
89                final String[] suggestions, final int flags) {
90            if (suggestions == null || TextUtils.isEmpty(query)) {
91                return;
92            }
93            mUnigramSuggestionsInfoCache.put(
94                    generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags));
95        }
96
97        public void clearCache() {
98            mUnigramSuggestionsInfoCache.evictAll();
99        }
100    }
101
102    AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
103        mService = service;
104        final ContentResolver cres = service.getContentResolver();
105
106        mObserver = new ContentObserver(null) {
107            @Override
108            public void onChange(boolean self) {
109                mSuggestionsCache.clearCache();
110            }
111        };
112        cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
113    }
114
115    @Override
116    public void onCreate() {
117        final String localeString = getLocale();
118        mDictionaryPool = mService.getDictionaryPool(localeString);
119        mLocale = LocaleUtils.constructLocaleFromString(localeString);
120        mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
121    }
122
123    @Override
124    public void onClose() {
125        final ContentResolver cres = mService.getContentResolver();
126        cres.unregisterContentObserver(mObserver);
127    }
128
129    private static final int CHECKABILITY_CHECKABLE = 0;
130    private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
131    private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
132    private static final int CHECKABILITY_EMAIL_OR_URL = 3;
133    private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
134    private static final int CHECKABILITY_TOO_SHORT = 5;
135    /**
136     * Finds out whether a particular string should be filtered out of spell checking.
137     *
138     * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
139     * we know we will never recognize, this accepts a script identifier that should be one
140     * of the SCRIPT_* constants defined above, to rule out quickly characters from very
141     * different languages.
142     *
143     * @param text the string to evaluate.
144     * @param script the identifier for the script this spell checker recognizes
145     * @return one of the FILTER_OUT_* constants above.
146     */
147    private static int getCheckabilityInScript(final String text, final int script) {
148        if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
149
150        // TODO: check if an equivalent processing can't be done more quickly with a
151        // compiled regexp.
152        // Filter by first letter
153        final int firstCodePoint = text.codePointAt(0);
154        // Filter out words that don't start with a letter or an apostrophe
155        if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
156                && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
157
158        // Filter contents
159        final int length = text.length();
160        int letterCount = 0;
161        for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
162            final int codePoint = text.codePointAt(i);
163            // Any word containing a COMMERCIAL_AT is probably an e-mail address
164            // Any word containing a SLASH is probably either an ad-hoc combination of two
165            // words or a URI - in either case we don't want to spell check that
166            if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
167                return CHECKABILITY_EMAIL_OR_URL;
168            }
169            // If the string contains a period, native returns strange suggestions (it seems
170            // to return suggestions for everything up to the period only and to ignore the
171            // rest), so we suppress lookup if there is a period.
172            // TODO: investigate why native returns these suggestions and remove this code.
173            if (Constants.CODE_PERIOD == codePoint) {
174                return CHECKABILITY_CONTAINS_PERIOD;
175            }
176            if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
177        }
178        // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
179        // in this word are letters
180        return (letterCount * 4 < length * 3)
181                ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
182    }
183
184    /**
185     * Helper method to test valid capitalizations of a word.
186     *
187     * If the "text" is lower-case, we test only the exact string.
188     * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
189     *  version of it "text".
190     * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
191     *  version of it "text" and the capitalized version of it "Text".
192     */
193    private boolean isInDictForAnyCapitalization(final Dictionary dict, final String text,
194            final int capitalizeType) {
195        // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
196        // case versions, but only if the word is not already all-lower case or mixed case.
197        if (dict.isValidWord(text)) return true;
198        if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
199
200        // If we come here, we have a capitalized word (either First- or All-).
201        // Downcase the word and look it up again. If the word is only capitalized, we
202        // tested all possibilities, so if it's still negative we can return false.
203        final String lowerCaseText = text.toLowerCase(mLocale);
204        if (dict.isValidWord(lowerCaseText)) return true;
205        if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
206
207        // If the lower case version is not in the dictionary, it's still possible
208        // that we have an all-caps version of a word that needs to be capitalized
209        // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
210        return dict.isValidWord(StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
211    }
212
213    // Note : this must be reentrant
214    /**
215     * Gets a list of suggestions for a specific string. This returns a list of possible
216     * corrections for the text passed as an argument. It may split or group words, and
217     * even perform grammatical analysis.
218     */
219    private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
220            final int suggestionsLimit) {
221        return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
222    }
223
224    protected SuggestionsInfo onGetSuggestionsInternal(
225            final TextInfo textInfo, final PrevWordsInfo prevWordsInfo,
226            final int suggestionsLimit) {
227        try {
228            final String inText = textInfo.getText();
229            final SuggestionsParams cachedSuggestionsParams =
230                    mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo);
231            if (cachedSuggestionsParams != null) {
232                if (DBG) {
233                    Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
234                }
235                return new SuggestionsInfo(
236                        cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
237            }
238
239            final int checkability = getCheckabilityInScript(inText, mScript);
240            if (CHECKABILITY_CHECKABLE != checkability) {
241                DictAndKeyboard dictInfo = null;
242                try {
243                    dictInfo = mDictionaryPool.pollWithDefaultTimeout();
244                    if (!DictionaryPool.isAValidDictionary(dictInfo)) {
245                        return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
246                                false /* reportAsTypo */);
247                    }
248                    if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
249                        final String[] splitText = inText.split(Constants.REGEXP_PERIOD);
250                        boolean allWordsAreValid = true;
251                        for (final String word : splitText) {
252                            if (!dictInfo.mDictionary.isValidWord(word)) {
253                                allWordsAreValid = false;
254                                break;
255                            }
256                        }
257                        if (allWordsAreValid) {
258                            return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
259                                    | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
260                                    new String[] {
261                                            TextUtils.join(Constants.STRING_SPACE, splitText),
262                                            TextUtils.join(Constants.STRING_PERIOD_AND_SPACE,
263                                                    splitText) });
264                        }
265                    }
266                    return dictInfo.mDictionary.isValidWord(inText)
267                            ? AndroidSpellCheckerService.getInDictEmptySuggestions()
268                            : AndroidSpellCheckerService.getNotInDictEmptySuggestions(
269                                    CHECKABILITY_CONTAINS_PERIOD == checkability
270                                    /* reportAsTypo */);
271                } finally {
272                    if (null != dictInfo) {
273                        if (!mDictionaryPool.offer(dictInfo)) {
274                            Log.e(TAG, "Can't re-insert a dictionary into its pool");
275                        }
276                    }
277                }
278            }
279            final String text = inText.replaceAll(
280                    AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
281
282            // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
283            //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
284            //mService.mSuggestionThreshold, mService.mRecommendedThreshold,
285            //suggestionsLimit);
286            final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer(
287                    text, suggestionsLimit);
288
289            final int capitalizeType = StringUtils.getCapitalizationType(text);
290            boolean isInDict = true;
291            DictAndKeyboard dictInfo = null;
292            try {
293                dictInfo = mDictionaryPool.pollWithDefaultTimeout();
294                if (!DictionaryPool.isAValidDictionary(dictInfo)) {
295                    return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
296                            false /* reportAsTypo */);
297                }
298                final WordComposer composer = new WordComposer();
299                final int[] codePoints = StringUtils.toCodePointArray(text);
300                final int[] coordinates;
301                if (null == dictInfo.mKeyboard) {
302                    coordinates = CoordinateUtils.newCoordinateArray(codePoints.length,
303                            Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
304                } else {
305                    coordinates = dictInfo.mKeyboard.getCoordinates(codePoints);
306                }
307                composer.setComposingWord(codePoints, coordinates);
308                // TODO: make a spell checker option to block offensive words or not
309                final ArrayList<SuggestedWordInfo> suggestions =
310                        dictInfo.mDictionary.getSuggestions(composer, prevWordsInfo,
311                                dictInfo.getProximityInfo(), true /* blockOffensiveWords */,
312                                null /* additionalFeaturesOptions */, 0 /* sessionId */,
313                                null /* inOutLanguageWeight */);
314                if (suggestions != null) {
315                    for (final SuggestedWordInfo suggestion : suggestions) {
316                        final String suggestionStr = suggestion.mWord;
317                        suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0,
318                                suggestionStr.length(), suggestion.mScore);
319                    }
320                }
321                isInDict = isInDictForAnyCapitalization(dictInfo.mDictionary, text, capitalizeType);
322            } finally {
323                if (null != dictInfo) {
324                    if (!mDictionaryPool.offer(dictInfo)) {
325                        Log.e(TAG, "Can't re-insert a dictionary into its pool");
326                    }
327                }
328            }
329
330            final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
331                    capitalizeType, mLocale);
332
333            if (DBG) {
334                Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
335                        + suggestionsLimit);
336                Log.i(TAG, "IsInDict = " + isInDict);
337                Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
338                Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
339                if (null != result.mSuggestions) {
340                    for (String suggestion : result.mSuggestions) {
341                        Log.i(TAG, suggestion);
342                    }
343                }
344            }
345
346            final int flags =
347                    (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
348                            : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
349                    | (result.mHasRecommendedSuggestions
350                            ? SuggestionsInfoCompatUtils
351                                    .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
352                            : 0);
353            final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
354            mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions,
355                    flags);
356            return retval;
357        } catch (RuntimeException e) {
358            // Don't kill the keyboard if there is a bug in the spell checker
359            if (DBG) {
360                throw e;
361            } else {
362                Log.e(TAG, "Exception while spellcheking", e);
363                return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
364                        false /* reportAsTypo */);
365            }
366        }
367    }
368
369    /*
370     * The spell checker acts on its own behalf. That is needed, in particular, to be able to
371     * access the dictionary files, which the provider restricts to the identity of Latin IME.
372     * Since it's called externally by the application, the spell checker is using the identity
373     * of the application by default unless we clearCallingIdentity.
374     * That's what the following method does.
375     */
376    @Override
377    public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
378            final int suggestionsLimit) {
379        long ident = Binder.clearCallingIdentity();
380        try {
381            return onGetSuggestionsInternal(textInfo, suggestionsLimit);
382        } finally {
383            Binder.restoreCallingIdentity(ident);
384        }
385    }
386}
387