AndroidWordLevelSpellCheckerSession.java revision 9e76304d6004c43c3149bc2df460af2a00b18a4f
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.ContentResolver; 20import android.database.ContentObserver; 21import android.os.Binder; 22import android.provider.UserDictionary.Words; 23import android.service.textservice.SpellCheckerService.Session; 24import android.text.TextUtils; 25import android.util.Log; 26import android.util.LruCache; 27import android.view.textservice.SuggestionsInfo; 28import android.view.textservice.TextInfo; 29 30import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31import com.android.inputmethod.keyboard.Keyboard; 32import com.android.inputmethod.keyboard.ProximityInfo; 33import com.android.inputmethod.latin.Constants; 34import com.android.inputmethod.latin.PrevWordsInfo; 35import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 36import com.android.inputmethod.latin.WordComposer; 37import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; 38import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; 39import com.android.inputmethod.latin.utils.CoordinateUtils; 40import com.android.inputmethod.latin.utils.LocaleUtils; 41import com.android.inputmethod.latin.utils.ScriptUtils; 42import com.android.inputmethod.latin.utils.StringUtils; 43import com.android.inputmethod.latin.utils.SuggestionResults; 44 45import java.util.Locale; 46 47public abstract class AndroidWordLevelSpellCheckerSession extends Session { 48 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 49 private static final boolean DBG = false; 50 51 // Immutable, but not available in the constructor. 52 private Locale mLocale; 53 // Cache this for performance 54 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 55 private final AndroidSpellCheckerService mService; 56 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 57 private final ContentObserver mObserver; 58 59 private static final class SuggestionsParams { 60 public final String[] mSuggestions; 61 public final int mFlags; 62 public SuggestionsParams(String[] suggestions, int flags) { 63 mSuggestions = suggestions; 64 mFlags = flags; 65 } 66 } 67 68 protected static final class SuggestionsCache { 69 private static final char CHAR_DELIMITER = '\uFFFC'; 70 private static final int MAX_CACHE_SIZE = 50; 71 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 72 new LruCache<>(MAX_CACHE_SIZE); 73 74 // TODO: Support n-gram input 75 private static String generateKey(final String query, final PrevWordsInfo prevWordsInfo) { 76 if (TextUtils.isEmpty(query) || !prevWordsInfo.isValid()) { 77 return query; 78 } 79 return query + CHAR_DELIMITER + prevWordsInfo; 80 } 81 82 public SuggestionsParams getSuggestionsFromCache(String query, 83 final PrevWordsInfo prevWordsInfo) { 84 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWordsInfo)); 85 } 86 87 public void putSuggestionsToCache( 88 final String query, final PrevWordsInfo prevWordsInfo, 89 final String[] suggestions, final int flags) { 90 if (suggestions == null || TextUtils.isEmpty(query)) { 91 return; 92 } 93 mUnigramSuggestionsInfoCache.put( 94 generateKey(query, prevWordsInfo), new SuggestionsParams(suggestions, flags)); 95 } 96 97 public void clearCache() { 98 mUnigramSuggestionsInfoCache.evictAll(); 99 } 100 } 101 102 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 103 mService = service; 104 final ContentResolver cres = service.getContentResolver(); 105 106 mObserver = new ContentObserver(null) { 107 @Override 108 public void onChange(boolean self) { 109 mSuggestionsCache.clearCache(); 110 } 111 }; 112 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 113 } 114 115 @Override 116 public void onCreate() { 117 final String localeString = getLocale(); 118 mLocale = LocaleUtils.constructLocaleFromString(localeString); 119 mScript = ScriptUtils.getScriptFromSpellCheckerLocale(mLocale); 120 } 121 122 @Override 123 public void onClose() { 124 final ContentResolver cres = mService.getContentResolver(); 125 cres.unregisterContentObserver(mObserver); 126 } 127 128 private static final int CHECKABILITY_CHECKABLE = 0; 129 private static final int CHECKABILITY_TOO_MANY_NON_LETTERS = 1; 130 private static final int CHECKABILITY_CONTAINS_PERIOD = 2; 131 private static final int CHECKABILITY_EMAIL_OR_URL = 3; 132 private static final int CHECKABILITY_FIRST_LETTER_UNCHECKABLE = 4; 133 private static final int CHECKABILITY_TOO_SHORT = 5; 134 /** 135 * Finds out whether a particular string should be filtered out of spell checking. 136 * 137 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 138 * we know we will never recognize, this accepts a script identifier that should be one 139 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 140 * different languages. 141 * 142 * @param text the string to evaluate. 143 * @param script the identifier for the script this spell checker recognizes 144 * @return one of the FILTER_OUT_* constants above. 145 */ 146 private static int getCheckabilityInScript(final String text, final int script) { 147 if (TextUtils.isEmpty(text) || text.length() <= 1) return CHECKABILITY_TOO_SHORT; 148 149 // TODO: check if an equivalent processing can't be done more quickly with a 150 // compiled regexp. 151 // Filter by first letter 152 final int firstCodePoint = text.codePointAt(0); 153 // Filter out words that don't start with a letter or an apostrophe 154 if (!ScriptUtils.isLetterPartOfScript(firstCodePoint, script) 155 && '\'' != firstCodePoint) return CHECKABILITY_FIRST_LETTER_UNCHECKABLE; 156 157 // Filter contents 158 final int length = text.length(); 159 int letterCount = 0; 160 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 161 final int codePoint = text.codePointAt(i); 162 // Any word containing a COMMERCIAL_AT is probably an e-mail address 163 // Any word containing a SLASH is probably either an ad-hoc combination of two 164 // words or a URI - in either case we don't want to spell check that 165 if (Constants.CODE_COMMERCIAL_AT == codePoint || Constants.CODE_SLASH == codePoint) { 166 return CHECKABILITY_EMAIL_OR_URL; 167 } 168 // If the string contains a period, native returns strange suggestions (it seems 169 // to return suggestions for everything up to the period only and to ignore the 170 // rest), so we suppress lookup if there is a period. 171 // TODO: investigate why native returns these suggestions and remove this code. 172 if (Constants.CODE_PERIOD == codePoint) { 173 return CHECKABILITY_CONTAINS_PERIOD; 174 } 175 if (ScriptUtils.isLetterPartOfScript(codePoint, script)) ++letterCount; 176 } 177 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 178 // in this word are letters 179 return (letterCount * 4 < length * 3) 180 ? CHECKABILITY_TOO_MANY_NON_LETTERS : CHECKABILITY_CHECKABLE; 181 } 182 183 /** 184 * Helper method to test valid capitalizations of a word. 185 * 186 * If the "text" is lower-case, we test only the exact string. 187 * If the "Text" is capitalized, we test the exact string "Text" and the lower-cased 188 * version of it "text". 189 * If the "TEXT" is fully upper case, we test the exact string "TEXT", the lower-cased 190 * version of it "text" and the capitalized version of it "Text". 191 */ 192 private boolean isInDictForAnyCapitalization(final String text, final int capitalizeType) { 193 // If the word is in there as is, then it's in the dictionary. If not, we'll test lower 194 // case versions, but only if the word is not already all-lower case or mixed case. 195 if (mService.isValidWord(mLocale, text)) return true; 196 if (StringUtils.CAPITALIZE_NONE == capitalizeType) return false; 197 198 // If we come here, we have a capitalized word (either First- or All-). 199 // Downcase the word and look it up again. If the word is only capitalized, we 200 // tested all possibilities, so if it's still negative we can return false. 201 final String lowerCaseText = text.toLowerCase(mLocale); 202 if (mService.isValidWord(mLocale, lowerCaseText)) return true; 203 if (StringUtils.CAPITALIZE_FIRST == capitalizeType) return false; 204 205 // If the lower case version is not in the dictionary, it's still possible 206 // that we have an all-caps version of a word that needs to be capitalized 207 // according to the dictionary. E.g. "GERMANS" only exists in the dictionary as "Germans". 208 return mService.isValidWord(mLocale, 209 StringUtils.capitalizeFirstAndDowncaseRest(lowerCaseText, mLocale)); 210 } 211 212 // Note : this must be reentrant 213 /** 214 * Gets a list of suggestions for a specific string. This returns a list of possible 215 * corrections for the text passed as an argument. It may split or group words, and 216 * even perform grammatical analysis. 217 */ 218 private SuggestionsInfo onGetSuggestionsInternal(final TextInfo textInfo, 219 final int suggestionsLimit) { 220 return onGetSuggestionsInternal(textInfo, null, suggestionsLimit); 221 } 222 223 protected SuggestionsInfo onGetSuggestionsInternal( 224 final TextInfo textInfo, final PrevWordsInfo prevWordsInfo, 225 final int suggestionsLimit) { 226 try { 227 final String inText = textInfo.getText(); 228 final SuggestionsParams cachedSuggestionsParams = 229 mSuggestionsCache.getSuggestionsFromCache(inText, prevWordsInfo); 230 if (cachedSuggestionsParams != null) { 231 if (DBG) { 232 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 233 } 234 return new SuggestionsInfo( 235 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 236 } 237 final int checkability = getCheckabilityInScript(inText, mScript); 238 if (CHECKABILITY_CHECKABLE != checkability) { 239 if (CHECKABILITY_CONTAINS_PERIOD == checkability) { 240 final String[] splitText = inText.split(Constants.REGEXP_PERIOD); 241 boolean allWordsAreValid = true; 242 for (final String word : splitText) { 243 if (!mService.isValidWord(mLocale, word)) { 244 allWordsAreValid = false; 245 break; 246 } 247 } 248 if (allWordsAreValid) { 249 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO 250 | SuggestionsInfo.RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS, 251 new String[] { 252 TextUtils.join(Constants.STRING_SPACE, splitText) }); 253 } 254 } 255 return mService.isValidWord(mLocale, inText) ? 256 AndroidSpellCheckerService.getInDictEmptySuggestions() : 257 AndroidSpellCheckerService.getNotInDictEmptySuggestions( 258 CHECKABILITY_CONTAINS_PERIOD == checkability /* reportAsTypo */); 259 } 260 final String text = inText.replaceAll( 261 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 262 263 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 264 //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 265 //mService.mSuggestionThreshold, mService.mRecommendedThreshold, 266 //suggestionsLimit); 267 final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( 268 text, suggestionsLimit); 269 270 final int capitalizeType = StringUtils.getCapitalizationType(text); 271 boolean isInDict = true; 272 if (!mService.hasMainDictionaryForLocale(mLocale)) { 273 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 274 false /* reportAsTypo */); 275 } 276 final Keyboard keyboard = mService.getKeyboardForLocale(mLocale); 277 final WordComposer composer = new WordComposer(); 278 final int[] codePoints = StringUtils.toCodePointArray(text); 279 final int[] coordinates; 280 final ProximityInfo proximityInfo; 281 if (null == keyboard) { 282 coordinates = CoordinateUtils.newCoordinateArray(codePoints.length, 283 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); 284 proximityInfo = null; 285 } else { 286 coordinates = keyboard.getCoordinates(codePoints); 287 proximityInfo = keyboard.getProximityInfo(); 288 } 289 composer.setComposingWord(codePoints, coordinates); 290 final SuggestionResults suggestionResults = mService.getSuggestionResults( 291 mLocale, composer, prevWordsInfo, proximityInfo); 292 if (suggestionResults != null) { 293 for (final SuggestedWordInfo suggestion : suggestionResults) { 294 final String suggestionStr = suggestion.mWord; 295 suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, 296 suggestionStr.length(), suggestion.mScore); 297 } 298 } 299 isInDict = isInDictForAnyCapitalization(text, capitalizeType); 300 301 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 302 capitalizeType, mLocale); 303 304 if (DBG) { 305 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 306 + suggestionsLimit); 307 Log.i(TAG, "IsInDict = " + isInDict); 308 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 309 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 310 if (null != result.mSuggestions) { 311 for (String suggestion : result.mSuggestions) { 312 Log.i(TAG, suggestion); 313 } 314 } 315 } 316 317 final int flags = 318 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 319 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 320 | (result.mHasRecommendedSuggestions 321 ? SuggestionsInfoCompatUtils 322 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 323 : 0); 324 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 325 mSuggestionsCache.putSuggestionsToCache(text, prevWordsInfo, result.mSuggestions, 326 flags); 327 return retval; 328 } catch (RuntimeException e) { 329 // Don't kill the keyboard if there is a bug in the spell checker 330 if (DBG) { 331 throw e; 332 } else { 333 Log.e(TAG, "Exception while spellcheking", e); 334 return AndroidSpellCheckerService.getNotInDictEmptySuggestions( 335 false /* reportAsTypo */); 336 } 337 } 338 } 339 340 /* 341 * The spell checker acts on its own behalf. That is needed, in particular, to be able to 342 * access the dictionary files, which the provider restricts to the identity of Latin IME. 343 * Since it's called externally by the application, the spell checker is using the identity 344 * of the application by default unless we clearCallingIdentity. 345 * That's what the following method does. 346 */ 347 @Override 348 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 349 final int suggestionsLimit) { 350 long ident = Binder.clearCallingIdentity(); 351 try { 352 return onGetSuggestionsInternal(textInfo, suggestionsLimit); 353 } finally { 354 Binder.restoreCallingIdentity(ident); 355 } 356 } 357} 358