AndroidWordLevelSpellCheckerSession.java revision a28a05e971cc242b338331a3b78276fa95188d19
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.spellcheck;
18
19import android.content.ContentResolver;
20import android.database.ContentObserver;
21import android.provider.UserDictionary.Words;
22import android.service.textservice.SpellCheckerService.Session;
23import android.text.TextUtils;
24import android.util.Log;
25import android.util.LruCache;
26import android.view.textservice.SuggestionsInfo;
27import android.view.textservice.TextInfo;
28
29import com.android.inputmethod.compat.SuggestionsInfoCompatUtils;
30import com.android.inputmethod.latin.Constants;
31import com.android.inputmethod.latin.LocaleUtils;
32import com.android.inputmethod.latin.WordComposer;
33import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
34import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer;
35
36import java.util.ArrayList;
37import java.util.Locale;
38
39public abstract class AndroidWordLevelSpellCheckerSession extends Session {
40    private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName();
41    private static final boolean DBG = false;
42
43    // Immutable, but need the locale which is not available in the constructor yet
44    private DictionaryPool mDictionaryPool;
45    // Likewise
46    private Locale mLocale;
47    // Cache this for performance
48    private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now.
49    private final AndroidSpellCheckerService mService;
50    protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache();
51    private final ContentObserver mObserver;
52
53    private static final class SuggestionsParams {
54        public final String[] mSuggestions;
55        public final int mFlags;
56        public SuggestionsParams(String[] suggestions, int flags) {
57            mSuggestions = suggestions;
58            mFlags = flags;
59        }
60    }
61
62    protected static final class SuggestionsCache {
63        private static final char CHAR_DELIMITER = '\uFFFC';
64        private static final int MAX_CACHE_SIZE = 50;
65        private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache =
66                new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE);
67
68        // TODO: Support n-gram input
69        private static String generateKey(String query, String prevWord) {
70            if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) {
71                return query;
72            }
73            return query + CHAR_DELIMITER + prevWord;
74        }
75
76        // TODO: Support n-gram input
77        public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) {
78            return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord));
79        }
80
81        // TODO: Support n-gram input
82        public void putSuggestionsToCache(
83                String query, String prevWord, String[] suggestions, int flags) {
84            if (suggestions == null || TextUtils.isEmpty(query)) {
85                return;
86            }
87            mUnigramSuggestionsInfoCache.put(
88                    generateKey(query, prevWord), new SuggestionsParams(suggestions, flags));
89        }
90
91        public void clearCache() {
92            mUnigramSuggestionsInfoCache.evictAll();
93        }
94    }
95
96    AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) {
97        mService = service;
98        final ContentResolver cres = service.getContentResolver();
99
100        mObserver = new ContentObserver(null) {
101            @Override
102            public void onChange(boolean self) {
103                mSuggestionsCache.clearCache();
104            }
105        };
106        cres.registerContentObserver(Words.CONTENT_URI, true, mObserver);
107    }
108
109    @Override
110    public void onCreate() {
111        final String localeString = getLocale();
112        mDictionaryPool = mService.getDictionaryPool(localeString);
113        mLocale = LocaleUtils.constructLocaleFromString(localeString);
114        mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale);
115    }
116
117    @Override
118    public void onClose() {
119        final ContentResolver cres = mService.getContentResolver();
120        cres.unregisterContentObserver(mObserver);
121    }
122
123    /*
124     * Returns whether the code point is a letter that makes sense for the specified
125     * locale for this spell checker.
126     * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml
127     * and is limited to EFIGS languages and Russian.
128     * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters
129     * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters.
130     */
131    private static boolean isLetterCheckableByLanguage(final int codePoint,
132            final int script) {
133        switch (script) {
134        case AndroidSpellCheckerService.SCRIPT_LATIN:
135            // Our supported latin script dictionaries (EFIGS) at the moment only include
136            // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode
137            // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF,
138            // so the below is a very efficient way to test for it. As for the 0-0x3F, it's
139            // excluded from isLetter anyway.
140            return codePoint <= 0x2AF && Character.isLetter(codePoint);
141        case AndroidSpellCheckerService.SCRIPT_CYRILLIC:
142            // All Cyrillic characters are in the 400~52F block. There are some in the upper
143            // Unicode range, but they are archaic characters that are not used in modern
144            // russian and are not used by our dictionary.
145            return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint);
146        default:
147            // Should never come here
148            throw new RuntimeException("Impossible value of script: " + script);
149        }
150    }
151
152    /**
153     * Finds out whether a particular string should be filtered out of spell checking.
154     *
155     * This will loosely match URLs, numbers, symbols. To avoid always underlining words that
156     * we know we will never recognize, this accepts a script identifier that should be one
157     * of the SCRIPT_* constants defined above, to rule out quickly characters from very
158     * different languages.
159     *
160     * @param text the string to evaluate.
161     * @param script the identifier for the script this spell checker recognizes
162     * @return true if we should filter this text out, false otherwise
163     */
164    private static boolean shouldFilterOut(final String text, final int script) {
165        if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
166
167        // TODO: check if an equivalent processing can't be done more quickly with a
168        // compiled regexp.
169        // Filter by first letter
170        final int firstCodePoint = text.codePointAt(0);
171        // Filter out words that don't start with a letter or an apostrophe
172        if (!isLetterCheckableByLanguage(firstCodePoint, script)
173                && '\'' != firstCodePoint) return true;
174
175        // Filter contents
176        final int length = text.length();
177        int letterCount = 0;
178        for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
179            final int codePoint = text.codePointAt(i);
180            // Any word containing a '@' is probably an e-mail address
181            // Any word containing a '/' is probably either an ad-hoc combination of two
182            // words or a URI - in either case we don't want to spell check that
183            if ('@' == codePoint || '/' == codePoint) return true;
184            if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount;
185        }
186        // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
187        // in this word are letters
188        return (letterCount * 4 < length * 3);
189    }
190
191    // Note : this must be reentrant
192    /**
193     * Gets a list of suggestions for a specific string. This returns a list of possible
194     * corrections for the text passed as an argument. It may split or group words, and
195     * even perform grammatical analysis.
196     */
197    @Override
198    public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
199            final int suggestionsLimit) {
200        return onGetSuggestions(textInfo, null, suggestionsLimit);
201    }
202
203    protected SuggestionsInfo onGetSuggestions(
204            final TextInfo textInfo, final String prevWord, final int suggestionsLimit) {
205        try {
206            final String inText = textInfo.getText();
207            final SuggestionsParams cachedSuggestionsParams =
208                    mSuggestionsCache.getSuggestionsFromCache(inText, prevWord);
209            if (cachedSuggestionsParams != null) {
210                if (DBG) {
211                    Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags);
212                }
213                return new SuggestionsInfo(
214                        cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions);
215            }
216
217            if (shouldFilterOut(inText, mScript)) {
218                DictAndProximity dictInfo = null;
219                try {
220                    dictInfo = mDictionaryPool.pollWithDefaultTimeout();
221                    if (!DictionaryPool.isAValidDictionary(dictInfo)) {
222                        return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
223                    }
224                    return dictInfo.mDictionary.isValidWord(inText)
225                            ? AndroidSpellCheckerService.getInDictEmptySuggestions()
226                            : AndroidSpellCheckerService.getNotInDictEmptySuggestions();
227                } finally {
228                    if (null != dictInfo) {
229                        if (!mDictionaryPool.offer(dictInfo)) {
230                            Log.e(TAG, "Can't re-insert a dictionary into its pool");
231                        }
232                    }
233                }
234            }
235            final String text = inText.replaceAll(
236                    AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE);
237
238            // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
239            //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
240            //mService.mSuggestionThreshold, mService.mRecommendedThreshold,
241            //suggestionsLimit);
242            final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer(
243                    text, suggestionsLimit);
244            final WordComposer composer = new WordComposer();
245            final int length = text.length();
246            for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) {
247                final int codePoint = text.codePointAt(i);
248                // The getXYForCodePointAndScript method returns (Y << 16) + X
249                final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript(
250                        codePoint, mScript);
251                if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) {
252                    composer.add(codePoint,
253                            Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE);
254                } else {
255                    composer.add(codePoint, xy & 0xFFFF, xy >> 16);
256                }
257            }
258
259            final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text);
260            boolean isInDict = true;
261            DictAndProximity dictInfo = null;
262            try {
263                dictInfo = mDictionaryPool.pollWithDefaultTimeout();
264                if (!DictionaryPool.isAValidDictionary(dictInfo)) {
265                    return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
266                }
267                final ArrayList<SuggestedWordInfo> suggestions =
268                        dictInfo.mDictionary.getSuggestions(composer, prevWord,
269                                dictInfo.mProximityInfo);
270                for (final SuggestedWordInfo suggestion : suggestions) {
271                    final String suggestionStr = suggestion.mWord.toString();
272                    suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0,
273                            suggestionStr.length(), suggestion.mScore);
274                }
275                isInDict = dictInfo.mDictionary.isValidWord(text);
276                if (!isInDict && AndroidSpellCheckerService.CAPITALIZE_NONE != capitalizeType) {
277                    // We want to test the word again if it's all caps or first caps only.
278                    // If it's fully down, we already tested it, if it's mixed case, we don't
279                    // want to test a lowercase version of it.
280                    isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
281                }
282            } finally {
283                if (null != dictInfo) {
284                    if (!mDictionaryPool.offer(dictInfo)) {
285                        Log.e(TAG, "Can't re-insert a dictionary into its pool");
286                    }
287                }
288            }
289
290            final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
291                    capitalizeType, mLocale);
292
293            if (DBG) {
294                Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
295                        + suggestionsLimit);
296                Log.i(TAG, "IsInDict = " + isInDict);
297                Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
298                Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions);
299                if (null != result.mSuggestions) {
300                    for (String suggestion : result.mSuggestions) {
301                        Log.i(TAG, suggestion);
302                    }
303                }
304            }
305
306            final int flags =
307                    (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
308                            : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO)
309                    | (result.mHasRecommendedSuggestions
310                            ? SuggestionsInfoCompatUtils
311                                    .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS()
312                            : 0);
313            final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions);
314            mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags);
315            return retval;
316        } catch (RuntimeException e) {
317            // Don't kill the keyboard if there is a bug in the spell checker
318            if (DBG) {
319                throw e;
320            } else {
321                Log.e(TAG, "Exception while spellcheking: " + e);
322                return AndroidSpellCheckerService.getNotInDictEmptySuggestions();
323            }
324        }
325    }
326}
327