AndroidWordLevelSpellCheckerSession.java revision 292deb632cbab232334190e68d29184094d6d51b
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.ContentResolver; 20import android.database.ContentObserver; 21import android.os.Binder; 22import android.provider.UserDictionary.Words; 23import android.service.textservice.SpellCheckerService.Session; 24import android.text.TextUtils; 25import android.util.Log; 26import android.util.LruCache; 27import android.view.textservice.SuggestionsInfo; 28import android.view.textservice.TextInfo; 29 30import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31import com.android.inputmethod.latin.Constants; 32import com.android.inputmethod.latin.Dictionary; 33import com.android.inputmethod.latin.PrevWordsInfo; 34import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 35import com.android.inputmethod.latin.WordComposer; 36import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; 37import com.android.inputmethod.latin.utils.CoordinateUtils; 38import com.android.inputmethod.latin.utils.LocaleUtils; 39import com.android.inputmethod.latin.utils.ScriptUtils; 40import com.android.inputmethod.latin.utils.StringUtils; 41 42import java.util.ArrayList; 43import java.util.Locale; 44 45public abstract class AndroidWordLevelSpellCheckerSession extends Session { 46 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 47 private static final boolean DBG = false; 48 49 // Immutable, but need the locale which is not available in the constructor yet 50 private DictionaryPool mDictionaryPool; 51 // Likewise 52 private Locale mLocale; 53 // Cache this for performance 54 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 55 private final AndroidSpellCheckerService mService; 56 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 57 private final ContentObserver mObserver; 58 59 private static final class SuggestionsParams { 60 public final String[] mSuggestions; 61 public final int mFlags; 62 public SuggestionsParams(String[] suggestions, int flags) { 63 mSuggestions = suggestions; 64 mFlags = flags; 65 } 66 } 67 68 protected static final class SuggestionsCache { 69 private static final char CHAR_DELIMITER = '\uFFFC'; 70 private static final int MAX_CACHE_SIZE = 50; 71 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 72 new LruCache<>(MAX_CACHE_SIZE); 73 74 // TODO: Support n-gram input 75 private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) { 76 if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) { 77 return query; 78 } 79 return query + CHAR_DELIMITER + prevWordsInfo; 80 } 81 82 public SuggestionsParams getSuggestionsFromCache(String query, 83 final PrevWordsInfo prevWordsInfo) { 84 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo)); 85 } 86 87 public void putSuggestionsToCache( 88 final String query, final PrevWordsInfo prevWordsInfo, 89 final String[] suggestions, final int flags) { 90 if (suggestions == null || TextUtils.isEmpty(query)) { 91 return; 92 } 93 mUnigramSuggestionsInfoCache.put( 94 generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags)); 95 } 96 97 public void clearCache() { 98 mUnigramSuggestionsInfoCache.evictAll(); 99 } 100 } 101 102 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 103 mService = service; 104 final ContentResolver cres = service.getContentResolver(); 105 106 mObserver = new ContentObserver(null) { 107 @Override 108 public void onChange(boolean self) { 109 mSuggestionsCache.clearCache(); 110 } 111 }; 112 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 113 } 114 115 @Override 116 public void onCreate() { 117 final String localeString = getLocale(); 118 mDictionaryPool = mService.getDictionaryPool(localeString); 119 mLocale = LocaleUtils.constructLocaleFromString(localeString); 120 mScript = ScriptUtils.getScriptFromLocale(mLocale); 121 } 122 123 @Override 124 public void onClose() { 125 final ContentResolver cres = mService.getContentResolver(); 126 cres.unregisterContentObserver(mObserver); 127 } 128 129 private static final int CHECKABILITY_CHECKABLE = 0; 130 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 131 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 132 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 133 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 134 private static final int CHECKABILITY_TOO_SHORT = 5; 135 /** 136 * Finds out whether a particular string should be filtered out of spell checking. 137 * 138 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 139 * we know we will never recognize, this accepts a script identifier that should be one 140 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 141 * different languages. 142 * 143 * @param text the string to evaluate. 144 * @param script the identifier for the script this spell checker recognizes 145 * @return one of the FILTER_OUT_* constants above. 146 */ 147 private static int getCheckabilityInScript(final String text, final int script) { 148 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 149 150 // TODO: check if an equivalent processing can't be done more quickly with a 151 // compiled regexp. 152 // Filter by first letter 153 final int firstCodePoint = text.codePointAt(0); 154 // Filter out words that don't start with a letter or an apostrophe 155 if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script) 156 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 157 158 // Filter contents 159 final int length = text.length(); 160 int letterCount = 0; 161 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 162 final int codePoint = text.codePointAt(i); 163 // Any word containing a COMMERCIAL_AT is probably an e-mail address 164 // Any word containing a SLASH is probably either an ad-hoc combination of two 165 // words or a URI - in either case we don't want to spell check that 166 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 167 return CHECKABILITY_EMAIL_OR_URL; 168 } 169 // If the string contains a period, native returns strange suggestions (it seems 170 // to return suggestions for everything up to the period only and to ignore the 171 // rest), so we suppress lookup if there is a period. 172 // TODO: investigate why native returns these suggestions and remove this code. 173 if (Constants.CODE_PERIOD == codePoint) { 174 return CHECKABILITY_CONTAINS_PERIOD; 175 } 176 if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount; 177 } 178 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 179 // in this word are letters 180 return (letterCount * 4 < length * 3) 181 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 182 } 183 184 /** 185 * Helper method to test valid capitalizations of a word. 186 * 187 * If the "text" is lower-case, we test only the exact string. 188 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 189 * version of it "text". 190 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 191 * version of it "text" and the capitalized version of it "Text". 192 */ 193 private boolean isInDictForAnyCapitalization(final Dictionary dict, final String text, 194 final int capitalizeType) { 195 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 196 // case versions, but only if the word is not already all-lower case or mixed case. 197 if (dict.isValidWord(text)) return true; 198 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 199 200 // If we come here, we have a capitalized word (either First- or All-). 201 // Downcase the word and look it up again. If the word is only capitalized, we 202 // tested all possibilities, so if it's still negative we can return false. 203 final String lowerCaseText = text.toLowerCase(mLocale); 204 if (dict.isValidWord(lowerCaseText)) return true; 205 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 206 207 // If the lower case version is not in the dictionary, it's still possible 208 // that we have an all-caps version of a word that needs to be capitalized 209 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 210 return dict.isValidWord(StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 211 } 212 213 // Note : this must be reentrant 214 /** 215 * Gets a list of suggestions for a specific string. This returns a list of possible 216 * corrections for the text passed as an argument. It may split or group words, and 217 * even perform grammatical analysis. 218 */ 219 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 220 final int suggestionsLimit) { 221 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 222 } 223 224 protected SuggestionsInfo onGetSuggestionsInternal( 225 final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, 226 final int suggestionsLimit) { 227 try { 228 final String inText = textInfo.getText(); 229 final SuggestionsParams cachedSuggestionsParams = 230 mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo); 231 if (cachedSuggestionsParams != null) { 232 if (DBG) { 233 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 234 } 235 return new SuggestionsInfo( 236 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 237 } 238 239 final int checkability = getCheckabilityInScript(inText, mScript); 240 if (CHECKABILITY_CHECKABLE != checkability) { 241 DictAndKeyboard dictInfo = null; 242 try { 243 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 244 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 245 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 246 false /* reportAsTypo */); 247 } 248 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 249 final String[] splitText = inText.split(Constants.REGEXP_PERIOD); 250 boolean allWordsAreValid = true; 251 for (final String word : splitText) { 252 if (!dictInfo.mDictionary.isValidWord(word)) { 253 allWordsAreValid = false; 254 break; 255 } 256 } 257 if (allWordsAreValid) { 258 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 259 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 260 new String[] { 261 TextUtils.join(Constants.STRING_SPACE, splitText), 262 TextUtils.join(Constants.STRING_PERIOD_AND_SPACE, 263 splitText) }); 264 } 265 } 266 return dictInfo.mDictionary.isValidWord(inText) 267 ? AndroidSpellCheckerService.getInDictEmptySuggestions() 268 : AndroidSpellCheckerService.getNotInDictEmptySuggestions( 269 CHECKABILITY_CONTAINS_PERIOD == checkability 270 /* reportAsTypo */); 271 } finally { 272 if (null != dictInfo) { 273 if (!mDictionaryPool.offer(dictInfo)) { 274 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 275 } 276 } 277 } 278 } 279 final String text = inText.replaceAll( 280 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 281 282 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 283 //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 284 //mService.mSuggestionThreshold, mService.mRecommendedThreshold, 285 //suggestionsLimit); 286 final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( 287 text, suggestionsLimit); 288 289 final int capitalizeType = StringUtils.getCapitalizationType(text); 290 boolean isInDict = true; 291 DictAndKeyboard dictInfo = null; 292 try { 293 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 294 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 295 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 296 false /* reportAsTypo */); 297 } 298 final WordComposer composer = new WordComposer(); 299 final int[] codePoints = StringUtils.toCodePointArray(text); 300 final int[] coordinates; 301 if (null == dictInfo.mKeyboard) { 302 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, 303 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); 304 } else { 305 coordinates = dictInfo.mKeyboard.getCoordinates(codePoints); 306 } 307 composer.setComposingWord(codePoints, coordinates); 308 // TODO: make a spell checker option to block offensive words or not 309 final ArrayList<SuggestedWordInfo> suggestions = 310 dictInfo.mDictionary.getSuggestions(composer, prevWordsInfo, 311 dictInfo.getProximityInfo(), true /* blockOffensiveWords */, 312 null /* additionalFeaturesOptions */, 0 /* sessionId */, 313 null /* inOutLanguageWeight */); 314 if (suggestions != null) { 315 for (final SuggestedWordInfo suggestion : suggestions) { 316 final String suggestionStr = suggestion.mWord; 317 suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, 318 suggestionStr.length(), suggestion.mScore); 319 } 320 } 321 isInDict = isInDictForAnyCapitalization(dictInfo.mDictionary, text, capitalizeType); 322 } finally { 323 if (null != dictInfo) { 324 if (!mDictionaryPool.offer(dictInfo)) { 325 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 326 } 327 } 328 } 329 330 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 331 capitalizeType, mLocale); 332 333 if (DBG) { 334 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 335 + suggestionsLimit); 336 Log.i(TAG, "IsInDict = " + isInDict); 337 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 338 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 339 if (null != result.mSuggestions) { 340 for (String suggestion : result.mSuggestions) { 341 Log.i(TAG, suggestion); 342 } 343 } 344 } 345 346 final int flags = 347 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 348 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 349 | (result.mHasRecommendedSuggestions 350 ? SuggestionsInfoCompatUtils 351 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 352 : 0); 353 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 354 mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions, 355 flags); 356 return retval; 357 } catch (RuntimeException e) { 358 // Don't kill the keyboard if there is a bug in the spell checker 359 if (DBG) { 360 throw e; 361 } else { 362 Log.e(TAG, "Exception while spellcheking", e); 363 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 364 false /* reportAsTypo */); 365 } 366 } 367 } 368 369 /* 370 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 371 * access the dictionary files, which the provider restricts to the identity of Latin IME. 372 * Since it's called externally by the application, the spell checker is using the identity 373 * of the application by default unless we clearCallingIdentity. 374 * That's what the following method does. 375 */ 376 @Override 377 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 378 final int suggestionsLimit) { 379 long ident = Binder.clearCallingIdentity(); 380 try { 381 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 382 } finally { 383 Binder.restoreCallingIdentity(ident); 384 } 385 } 386} 387