AndroidWordLevelSpellCheckerSession.java revision 289299bf66de5fb0c8a378f2366c0760da27077b
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.ContentResolver; 20import android.database.ContentObserver; 21import android.os.Binder; 22import android.provider.UserDictionary.Words; 23import android.service.textservice.SpellCheckerService.Session; 24import android.text.TextUtils; 25import android.util.Log; 26import android.util.LruCache; 27import android.view.textservice.SuggestionsInfo; 28import android.view.textservice.TextInfo; 29 30import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31import com.android.inputmethod.latin.Constants; 32import com.android.inputmethod.latin.Dictionary; 33import com.android.inputmethod.latin.PrevWordsInfo; 34import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 35import com.android.inputmethod.latin.WordComposer; 36import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; 37import com.android.inputmethod.latin.utils.CoordinateUtils; 38import com.android.inputmethod.latin.utils.LocaleUtils; 39import com.android.inputmethod.latin.utils.ScriptUtils; 40import com.android.inputmethod.latin.utils.StringUtils; 41 42import java.util.ArrayList; 43import java.util.Locale; 44 45public abstract class AndroidWordLevelSpellCheckerSession extends Session { 46 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 47 private static final boolean DBG = false; 48 49 // Immutable, but need the locale which is not available in the constructor yet 50 private DictionaryPool mDictionaryPool; 51 // Likewise 52 private Locale mLocale; 53 // Cache this for performance 54 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 55 private final AndroidSpellCheckerService mService; 56 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 57 private final ContentObserver mObserver; 58 59 private static final class SuggestionsParams { 60 public final String[] mSuggestions; 61 public final int mFlags; 62 public SuggestionsParams(String[] suggestions, int flags) { 63 mSuggestions = suggestions; 64 mFlags = flags; 65 } 66 } 67 68 protected static final class SuggestionsCache { 69 private static final char CHAR_DELIMITER = '\uFFFC'; 70 private static final int MAX_CACHE_SIZE = 50; 71 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 72 new LruCache<>(MAX_CACHE_SIZE); 73 74 // TODO: Support n-gram input 75 private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) { 76 if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWordsInfo.mPrevWord)) { 77 return query; 78 } 79 return query + CHAR_DELIMITER + prevWordsInfo.mPrevWord; 80 } 81 82 public SuggestionsParams getSuggestionsFromCache(String query, 83 final PrevWordsInfo prevWordsInfo) { 84 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo)); 85 } 86 87 public void putSuggestionsToCache( 88 final String query, final PrevWordsInfo prevWordsInfo, 89 final String[] suggestions, final int flags) { 90 if (suggestions == null || TextUtils.isEmpty(query)) { 91 return; 92 } 93 mUnigramSuggestionsInfoCache.put( 94 generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags)); 95 } 96 97 public void clearCache() { 98 mUnigramSuggestionsInfoCache.evictAll(); 99 } 100 } 101 102 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 103 mService = service; 104 final ContentResolver cres = service.getContentResolver(); 105 106 mObserver = new ContentObserver(null) { 107 @Override 108 public void onChange(boolean self) { 109 mSuggestionsCache.clearCache(); 110 } 111 }; 112 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 113 } 114 115 @Override 116 public void onCreate() { 117 final String localeString = getLocale(); 118 mDictionaryPool = mService.getDictionaryPool(localeString); 119 mLocale = LocaleUtils.constructLocaleFromString(localeString); 120 mScript = ScriptUtils.getScriptFromLocale(mLocale); 121 } 122 123 @Override 124 public void onClose() { 125 final ContentResolver cres = mService.getContentResolver(); 126 cres.unregisterContentObserver(mObserver); 127 } 128 129 private static final int CHECKABILITY_CHECKABLE = 0; 130 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 131 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 132 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 133 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 134 private static final int CHECKABILITY_TOO_SHORT = 5; 135 /** 136 * Finds out whether a particular string should be filtered out of spell checking. 137 * 138 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 139 * we know we will never recognize, this accepts a script identifier that should be one 140 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 141 * different languages. 142 * 143 * @param text the string to evaluate. 144 * @param script the identifier for the script this spell checker recognizes 145 * @return one of the FILTER_OUT_* constants above. 146 */ 147 private static int getCheckabilityInScript(final String text, final int script) { 148 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 149 150 // TODO: check if an equivalent processing can't be done more quickly with a 151 // compiled regexp. 152 // Filter by first letter 153 final int firstCodePoint = text.codePointAt(0); 154 // Filter out words that don't start with a letter or an apostrophe 155 if (!ScriptUtils.isLetterCheckableByScript(firstCodePoint, script) 156 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 157 158 // Filter contents 159 final int length = text.length(); 160 int letterCount = 0; 161 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 162 final int codePoint = text.codePointAt(i); 163 // Any word containing a COMMERCIAL_AT is probably an e-mail address 164 // Any word containing a SLASH is probably either an ad-hoc combination of two 165 // words or a URI - in either case we don't want to spell check that 166 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 167 return CHECKABILITY_EMAIL_OR_URL; 168 } 169 // If the string contains a period, native returns strange suggestions (it seems 170 // to return suggestions for everything up to the period only and to ignore the 171 // rest), so we suppress lookup if there is a period. 172 // TODO: investigate why native returns these suggestions and remove this code. 173 if (Constants.CODE_PERIOD == codePoint) { 174 return CHECKABILITY_CONTAINS_PERIOD; 175 } 176 if (ScriptUtils.isLetterCheckableByScript(codePoint, script)) ++letterCount; 177 } 178 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 179 // in this word are letters 180 return (letterCount * 4 < length * 3) 181 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 182 } 183 184 /** 185 * Helper method to test valid capitalizations of a word. 186 * 187 * If the "text" is lower-case, we test only the exact string. 188 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 189 * version of it "text". 190 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 191 * version of it "text" and the capitalized version of it "Text". 192 */ 193 private boolean isInDictForAnyCapitalization(final Dictionary dict, final String text, 194 final int capitalizeType) { 195 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 196 // case versions, but only if the word is not already all-lower case or mixed case. 197 if (dict.isValidWord(text)) return true; 198 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 199 200 // If we come here, we have a capitalized word (either First- or All-). 201 // Downcase the word and look it up again. If the word is only capitalized, we 202 // tested all possibilities, so if it's still negative we can return false. 203 final String lowerCaseText = text.toLowerCase(mLocale); 204 if (dict.isValidWord(lowerCaseText)) return true; 205 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 206 207 // If the lower case version is not in the dictionary, it's still possible 208 // that we have an all-caps version of a word that needs to be capitalized 209 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 210 return dict.isValidWord(StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 211 } 212 213 // Note : this must be reentrant 214 /** 215 * Gets a list of suggestions for a specific string. This returns a list of possible 216 * corrections for the text passed as an argument. It may split or group words, and 217 * even perform grammatical analysis. 218 */ 219 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 220 final int suggestionsLimit) { 221 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 222 } 223 224 protected SuggestionsInfo onGetSuggestionsInternal( 225 final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, 226 final int suggestionsLimit) { 227 try { 228 final String inText = textInfo.getText(); 229 final SuggestionsParams cachedSuggestionsParams = 230 mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo); 231 if (cachedSuggestionsParams != null) { 232 if (DBG) { 233 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 234 } 235 return new SuggestionsInfo( 236 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 237 } 238 239 final int checkability = getCheckabilityInScript(inText, mScript); 240 if (CHECKABILITY_CHECKABLE != checkability) { 241 DictAndKeyboard dictInfo = null; 242 try { 243 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 244 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 245 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 246 false /* reportAsTypo */); 247 } 248 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 249 final String[] splitText = inText.split(Constants.REGEXP_PERIOD); 250 boolean allWordsAreValid = true; 251 for (final String word : splitText) { 252 if (!dictInfo.mDictionary.isValidWord(word)) { 253 allWordsAreValid = false; 254 break; 255 } 256 } 257 if (allWordsAreValid) { 258 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 259 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 260 new String[] { 261 TextUtils.join(Constants.STRING_SPACE, splitText) }); 262 } 263 } 264 return dictInfo.mDictionary.isValidWord(inText) 265 ? AndroidSpellCheckerService.getInDictEmptySuggestions() 266 : AndroidSpellCheckerService.getNotInDictEmptySuggestions( 267 CHECKABILITY_CONTAINS_PERIOD == checkability 268 /* reportAsTypo */); 269 } finally { 270 if (null != dictInfo) { 271 if (!mDictionaryPool.offer(dictInfo)) { 272 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 273 } 274 } 275 } 276 } 277 final String text = inText.replaceAll( 278 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 279 280 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 281 //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 282 //mService.mSuggestionThreshold, mService.mRecommendedThreshold, 283 //suggestionsLimit); 284 final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( 285 text, suggestionsLimit); 286 287 final int capitalizeType = StringUtils.getCapitalizationType(text); 288 boolean isInDict = true; 289 DictAndKeyboard dictInfo = null; 290 try { 291 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 292 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 293 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 294 false /* reportAsTypo */); 295 } 296 final WordComposer composer = new WordComposer(); 297 final int[] codePoints = StringUtils.toCodePointArray(text); 298 final int[] coordinates; 299 if (null == dictInfo.mKeyboard) { 300 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, 301 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); 302 } else { 303 coordinates = dictInfo.mKeyboard.getCoordinates(codePoints); 304 } 305 composer.setComposingWord(codePoints, coordinates); 306 // TODO: make a spell checker option to block offensive words or not 307 final ArrayList<SuggestedWordInfo> suggestions = 308 dictInfo.mDictionary.getSuggestions(composer, prevWordsInfo, 309 dictInfo.getProximityInfo(), true /* blockOffensiveWords */, 310 null /* additionalFeaturesOptions */, 0 /* sessionId */, 311 null /* inOutLanguageWeight */); 312 if (suggestions != null) { 313 for (final SuggestedWordInfo suggestion : suggestions) { 314 final String suggestionStr = suggestion.mWord; 315 suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, 316 suggestionStr.length(), suggestion.mScore); 317 } 318 } 319 isInDict = isInDictForAnyCapitalization(dictInfo.mDictionary, text, capitalizeType); 320 } finally { 321 if (null != dictInfo) { 322 if (!mDictionaryPool.offer(dictInfo)) { 323 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 324 } 325 } 326 } 327 328 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 329 capitalizeType, mLocale); 330 331 if (DBG) { 332 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 333 + suggestionsLimit); 334 Log.i(TAG, "IsInDict = " + isInDict); 335 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 336 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 337 if (null != result.mSuggestions) { 338 for (String suggestion : result.mSuggestions) { 339 Log.i(TAG, suggestion); 340 } 341 } 342 } 343 344 final int flags = 345 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 346 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 347 | (result.mHasRecommendedSuggestions 348 ? SuggestionsInfoCompatUtils 349 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 350 : 0); 351 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 352 mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions, 353 flags); 354 return retval; 355 } catch (RuntimeException e) { 356 // Don't kill the keyboard if there is a bug in the spell checker 357 if (DBG) { 358 throw e; 359 } else { 360 Log.e(TAG, "Exception while spellcheking", e); 361 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 362 false /* reportAsTypo */); 363 } 364 } 365 } 366 367 /* 368 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 369 * access the dictionary files, which the provider restricts to the identity of Latin IME. 370 * Since it's called externally by the application, the spell checker is using the identity 371 * of the application by default unless we clearCallingIdentity. 372 * That's what the following method does. 373 */ 374 @Override 375 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 376 final int suggestionsLimit) { 377 long ident = Binder.clearCallingIdentity(); 378 try { 379 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 380 } finally { 381 Binder.restoreCallingIdentity(ident); 382 } 383 } 384} 385