AndroidWordLevelSpellCheckerSession.java revision 9e76304d6004c43c3149bc2df460af2a00b18a4f
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.ContentResolver;
20import android.database.ContentObserver;
21import android.os.Binder;
22import android.provider.UserDictionary.Words;
23import android.service.textservice.SpellCheckerService.Session;
24import android.text.TextUtils;
25import android.util.Log;
26import android.util.LruCache;
27import android.view.textservice.SuggestionsInfo;
28import android.view.textservice.TextInfo;
29
30import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
31import com.android.inputmethod.keyboard.Keyboard;
32import com.android.inputmethod.keyboard.ProximityInfo;
33import com.android.inputmethod.latin.Constants;
34import com.android.inputmethod.latin.PrevWordsInfo;
35import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
36import com.android.inputmethod.latin.WordComposer;
37import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
38import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer;
39import com.android.inputmethod.latin.utils.CoordinateUtils;
40import com.android.inputmethod.latin.utils.LocaleUtils;
41import com.android.inputmethod.latin.utils.ScriptUtils;
42import com.android.inputmethod.latin.utils.StringUtils;
43import com.android.inputmethod.latin.utils.SuggestionResults;
44
45import java.util.Locale;
46
47public abstract class AndroidWordLevelSpellCheckerSession extends Session {
48    private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
49    private static final boolean DBG = false;
50
51    // Immutable, but not available in the constructor.
52    private Locale mLocale;
53    // Cache this for performance
54    private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
55    private final AndroidSpellCheckerService mService;
56    protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
57    private final ContentObserver mObserver;
58
59    private static final class SuggestionsParams {
60        public final String[] mSuggestions;
61        public final int mFlags;
62        public SuggestionsParams(String[] suggestions, int flags) {
63            mSuggestions = suggestions;
64            mFlags = flags;
65        }
66    }
67
68    protected static final class SuggestionsCache {
69        private static final char CHAR_DELIMITER = '\uFFFC';
70        private static final int MAX_CACHE_SIZE = 50;
71        private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
72                new LruCache<>(MAX_CACHE_SIZE);
73
74        // TODO: Support n-gram input
75        private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) {
76            if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) {
77                return query;
78            }
79            return query + CHAR_DELIMITER + prevWordsInfo;
80        }
81
82        public SuggestionsParams getSuggestionsFromCache(String query,
83                final PrevWordsInfo prevWordsInfo) {
84            return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo));
85        }
86
87        public void putSuggestionsToCache(
88                final String query, final PrevWordsInfo prevWordsInfo,
89                final String[] suggestions, final int flags) {
90            if (suggestions == null || TextUtils.isEmpty(query)) {
91                return;
92            }
93            mUnigramSuggestionsInfoCache.put(
94                    generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags));
95        }
96
97        public void clearCache() {
98            mUnigramSuggestionsInfoCache.evictAll();
99        }
100    }
101
102    AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
103        mService = service;
104        final ContentResolver cres = service.getContentResolver();
105
106        mObserver = new ContentObserver(null) {
107            @Override
108            public void onChange(boolean self) {
109                mSuggestionsCache.clearCache();
110            }
111        };
112        cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
113    }
114
115    @Override
116    public void onCreate() {
117        final String localeString = getLocale();
118        mLocale = LocaleUtils.constructLocaleFromString(localeString);
119        mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale);
120    }
121
122    @Override
123    public void onClose() {
124        final ContentResolver cres = mService.getContentResolver();
125        cres.unregisterContentObserver(mObserver);
126    }
127
128    private static final int CHECKABILITY_CHECKABLE = 0;
129    private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1;
130    private static final int CHECKABILITY_CONTAINS_PERIOD = 2;
131    private static final int CHECKABILITY_EMAIL_OR_URL = 3;
132    private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4;
133    private static final int CHECKABILITY_TOO_SHORT = 5;
134    /**
135     * Finds out whether a particular string should be filtered out of spell checking.
136     *
137     * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
138     * we know we will never recognize, this accepts a script identifier that should be one
139     * of the SCRIPT_* constants defined above, to rule out quickly characters from very
140     * different languages.
141     *
142     * @param text the string to evaluate.
143     * @param script the identifier for the script this spell checker recognizes
144     * @return one of the FILTER_OUT_* constants above.
145     */
146    private static int getCheckabilityInScript(final String text, final int script) {
147        if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT;
148
149        // TODO: check if an equivalent processing can't be done more quickly with a
150        // compiled regexp.
151        // Filter by first letter
152        final int firstCodePoint = text.codePointAt(0);
153        // Filter out words that don't start with a letter or an apostrophe
154        if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script)
155                && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE;
156
157        // Filter contents
158        final int length = text.length();
159        int letterCount = 0;
160        for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
161            final int codePoint = text.codePointAt(i);
162            // Any word containing a COMMERCIAL_AT is probably an e-mail address
163            // Any word containing a SLASH is probably either an ad-hoc combination of two
164            // words or a URI - in either case we don't want to spell check that
165            if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) {
166                return CHECKABILITY_EMAIL_OR_URL;
167            }
168            // If the string contains a period, native returns strange suggestions (it seems
169            // to return suggestions for everything up to the period only and to ignore the
170            // rest), so we suppress lookup if there is a period.
171            // TODO: investigate why native returns these suggestions and remove this code.
172            if (Constants.CODE_PERIOD == codePoint) {
173                return CHECKABILITY_CONTAINS_PERIOD;
174            }
175            if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount;
176        }
177        // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
178        // in this word are letters
179        return (letterCount * 4 < length * 3)
180                ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE;
181    }
182
183    /**
184     * Helper method to test valid capitalizations of a word.
185     *
186     * If the "text" is lower-case, we test only the exact string.
187     * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased
188     *  version of it "text".
189     * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased
190     *  version of it "text" and the capitalized version of it "Text".
191     */
192    private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) {
193        // If the word is in there as is, then it's in the dictionary. If not, we'll test lower
194        // case versions, but only if the word is not already all-lower case or mixed case.
195        if (mService.isValidWord(mLocale, text)) return true;
196        if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false;
197
198        // If we come here, we have a capitalized word (either First- or All-).
199        // Downcase the word and look it up again. If the word is only capitalized, we
200        // tested all possibilities, so if it's still negative we can return false.
201        final String lowerCaseText = text.toLowerCase(mLocale);
202        if (mService.isValidWord(mLocale, lowerCaseText)) return true;
203        if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false;
204
205        // If the lower case version is not in the dictionary, it's still possible
206        // that we have an all-caps version of a word that needs to be capitalized
207        // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans".
208        return mService.isValidWord(mLocale,
209                StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale));
210    }
211
212    // Note : this must be reentrant
213    /**
214     * Gets a list of suggestions for a specific string. This returns a list of possible
215     * corrections for the text passed as an argument. It may split or group words, and
216     * even perform grammatical analysis.
217     */
218    private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo,
219            final int suggestionsLimit) {
220        return onGetSuggestionsInternal(textInfo, null, suggestionsLimit);
221    }
222
223    protected SuggestionsInfo onGetSuggestionsInternal(
224            final TextInfo textInfo, final PrevWordsInfo prevWordsInfo,
225            final int suggestionsLimit) {
226        try {
227            final String inText = textInfo.getText();
228            final SuggestionsParams cachedSuggestionsParams =
229                    mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo);
230            if (cachedSuggestionsParams != null) {
231                if (DBG) {
232                    Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
233                }
234                return new SuggestionsInfo(
235                        cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
236            }
237            final int checkability = getCheckabilityInScript(inText, mScript);
238            if (CHECKABILITY_CHECKABLE != checkability) {
239                if (CHECKABILITY_CONTAINS_PERIOD == checkability) {
240                    final String[] splitText = inText.split(Constants.REGEXP_PERIOD);
241                    boolean allWordsAreValid = true;
242                    for (final String word : splitText) {
243                        if (!mService.isValidWord(mLocale, word)) {
244                            allWordsAreValid = false;
245                            break;
246                        }
247                    }
248                    if (allWordsAreValid) {
249                        return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO
250                                | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS,
251                                new String[] {
252                                        TextUtils.join(Constants.STRING_SPACE, splitText) });
253                    }
254                }
255                return mService.isValidWord(mLocale, inText) ?
256                        AndroidSpellCheckerService.getInDictEmptySuggestions() :
257                        AndroidSpellCheckerService.getNotInDictEmptySuggestions(
258                                CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */);
259            }
260            final String text = inText.replaceAll(
261                    AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
262
263            // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
264            //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
265            //mService.mSuggestionThreshold, mService.mRecommendedThreshold,
266            //suggestionsLimit);
267            final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer(
268                    text, suggestionsLimit);
269
270            final int capitalizeType = StringUtils.getCapitalizationType(text);
271            boolean isInDict = true;
272            if (!mService.hasMainDictionaryForLocale(mLocale)) {
273                return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
274                        false /* reportAsTypo */);
275            }
276            final Keyboard keyboard = mService.getKeyboardForLocale(mLocale);
277            final WordComposer composer = new WordComposer();
278            final int[] codePoints = StringUtils.toCodePointArray(text);
279            final int[] coordinates;
280            final ProximityInfo proximityInfo;
281            if (null == keyboard) {
282                coordinates = CoordinateUtils.newCoordinateArray(codePoints.length,
283                        Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
284                proximityInfo = null;
285            } else {
286                coordinates = keyboard.getCoordinates(codePoints);
287                proximityInfo = keyboard.getProximityInfo();
288            }
289            composer.setComposingWord(codePoints, coordinates);
290            final SuggestionResults suggestionResults = mService.getSuggestionResults(
291                    mLocale, composer, prevWordsInfo, proximityInfo);
292            if (suggestionResults != null) {
293                for (final SuggestedWordInfo suggestion : suggestionResults) {
294                    final String suggestionStr = suggestion.mWord;
295                    suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0,
296                            suggestionStr.length(), suggestion.mScore);
297                }
298            }
299            isInDict = isInDictForAnyCapitalization(text, capitalizeType);
300
301            final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
302                    capitalizeType, mLocale);
303
304            if (DBG) {
305                Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
306                        + suggestionsLimit);
307                Log.i(TAG, "IsInDict = " + isInDict);
308                Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
309                Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
310                if (null != result.mSuggestions) {
311                    for (String suggestion : result.mSuggestions) {
312                        Log.i(TAG, suggestion);
313                    }
314                }
315            }
316
317            final int flags =
318                    (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
319                            : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
320                    | (result.mHasRecommendedSuggestions
321                            ? SuggestionsInfoCompatUtils
322                                    .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
323                            : 0);
324            final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
325            mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions,
326                    flags);
327            return retval;
328        } catch (RuntimeException e) {
329            // Don't kill the keyboard if there is a bug in the spell checker
330            if (DBG) {
331                throw e;
332            } else {
333                Log.e(TAG, "Exception while spellcheking", e);
334                return AndroidSpellCheckerService.getNotInDictEmptySuggestions(
335                        false /* reportAsTypo */);
336            }
337        }
338    }
339
340    /*
341     * The spell checker acts on its own behalf. That is needed, in particular, to be able to
342     * access the dictionary files, which the provider restricts to the identity of Latin IME.
343     * Since it's called externally by the application, the spell checker is using the identity
344     * of the application by default unless we clearCallingIdentity.
345     * That's what the following method does.
346     */
347    @Override
348    public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
349            final int suggestionsLimit) {
350        long ident = Binder.clearCallingIdentity();
351        try {
352            return onGetSuggestionsInternal(textInfo, suggestionsLimit);
353        } finally {
354            Binder.restoreCallingIdentity(ident);
355        }
356    }
357}
358