AndroidWordLevelSpellCheckerSession.java revision a28a05e971cc242b338331a3b78276fa95188d19
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.ContentResolver; 20import android.database.ContentObserver; 21import android.provider.UserDictionary.Words; 22import android.service.textservice.SpellCheckerService.Session; 23import android.text.TextUtils; 24import android.util.Log; 25import android.util.LruCache; 26import android.view.textservice.SuggestionsInfo; 27import android.view.textservice.TextInfo; 28 29import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 30import com.android.inputmethod.latin.Constants; 31import com.android.inputmethod.latin.LocaleUtils; 32import com.android.inputmethod.latin.WordComposer; 33import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 34import com.android.inputmethod.latin.spellcheck.AndroidSpellCheckerService.SuggestionsGatherer; 35 36import java.util.ArrayList; 37import java.util.Locale; 38 39public abstract class AndroidWordLevelSpellCheckerSession extends Session { 40 private static final String TAG = AndroidWordLevelSpellCheckerSession.class.getSimpleName(); 41 private static final boolean DBG = false; 42 43 // Immutable, but need the locale which is not available in the constructor yet 44 private DictionaryPool mDictionaryPool; 45 // Likewise 46 private Locale mLocale; 47 // Cache this for performance 48 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 49 private final AndroidSpellCheckerService mService; 50 protected final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 51 private final ContentObserver mObserver; 52 53 private static final class SuggestionsParams { 54 public final String[] mSuggestions; 55 public final int mFlags; 56 public SuggestionsParams(String[] suggestions, int flags) { 57 mSuggestions = suggestions; 58 mFlags = flags; 59 } 60 } 61 62 protected static final class SuggestionsCache { 63 private static final char CHAR_DELIMITER = '\uFFFC'; 64 private static final int MAX_CACHE_SIZE = 50; 65 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 66 new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE); 67 68 // TODO: Support n-gram input 69 private static String generateKey(String query, String prevWord) { 70 if (TextUtils.isEmpty(query) || TextUtils.isEmpty(prevWord)) { 71 return query; 72 } 73 return query + CHAR_DELIMITER + prevWord; 74 } 75 76 // TODO: Support n-gram input 77 public SuggestionsParams getSuggestionsFromCache(String query, String prevWord) { 78 return mUnigramSuggestionsInfoCache.get(generateKey(query, prevWord)); 79 } 80 81 // TODO: Support n-gram input 82 public void putSuggestionsToCache( 83 String query, String prevWord, String[] suggestions, int flags) { 84 if (suggestions == null || TextUtils.isEmpty(query)) { 85 return; 86 } 87 mUnigramSuggestionsInfoCache.put( 88 generateKey(query, prevWord), new SuggestionsParams(suggestions, flags)); 89 } 90 91 public void clearCache() { 92 mUnigramSuggestionsInfoCache.evictAll(); 93 } 94 } 95 96 AndroidWordLevelSpellCheckerSession(final AndroidSpellCheckerService service) { 97 mService = service; 98 final ContentResolver cres = service.getContentResolver(); 99 100 mObserver = new ContentObserver(null) { 101 @Override 102 public void onChange(boolean self) { 103 mSuggestionsCache.clearCache(); 104 } 105 }; 106 cres.registerContentObserver(Words.CONTENT_URI, true, mObserver); 107 } 108 109 @Override 110 public void onCreate() { 111 final String localeString = getLocale(); 112 mDictionaryPool = mService.getDictionaryPool(localeString); 113 mLocale = LocaleUtils.constructLocaleFromString(localeString); 114 mScript = AndroidSpellCheckerService.getScriptFromLocale(mLocale); 115 } 116 117 @Override 118 public void onClose() { 119 final ContentResolver cres = mService.getContentResolver(); 120 cres.unregisterContentObserver(mObserver); 121 } 122 123 /* 124 * Returns whether the code point is a letter that makes sense for the specified 125 * locale for this spell checker. 126 * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml 127 * and is limited to EFIGS languages and Russian. 128 * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters 129 * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. 130 */ 131 private static boolean isLetterCheckableByLanguage(final int codePoint, 132 final int script) { 133 switch (script) { 134 case AndroidSpellCheckerService.SCRIPT_LATIN: 135 // Our supported latin script dictionaries (EFIGS) at the moment only include 136 // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode 137 // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, 138 // so the below is a very efficient way to test for it. As for the 0-0x3F, it's 139 // excluded from isLetter anyway. 140 return codePoint <= 0x2AF && Character.isLetter(codePoint); 141 case AndroidSpellCheckerService.SCRIPT_CYRILLIC: 142 // All Cyrillic characters are in the 400~52F block. There are some in the upper 143 // Unicode range, but they are archaic characters that are not used in modern 144 // russian and are not used by our dictionary. 145 return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); 146 default: 147 // Should never come here 148 throw new RuntimeException("Impossible value of script: " + script); 149 } 150 } 151 152 /** 153 * Finds out whether a particular string should be filtered out of spell checking. 154 * 155 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 156 * we know we will never recognize, this accepts a script identifier that should be one 157 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 158 * different languages. 159 * 160 * @param text the string to evaluate. 161 * @param script the identifier for the script this spell checker recognizes 162 * @return true if we should filter this text out, false otherwise 163 */ 164 private static boolean shouldFilterOut(final String text, final int script) { 165 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 166 167 // TODO: check if an equivalent processing can't be done more quickly with a 168 // compiled regexp. 169 // Filter by first letter 170 final int firstCodePoint = text.codePointAt(0); 171 // Filter out words that don't start with a letter or an apostrophe 172 if (!isLetterCheckableByLanguage(firstCodePoint, script) 173 && '\'' != firstCodePoint) return true; 174 175 // Filter contents 176 final int length = text.length(); 177 int letterCount = 0; 178 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 179 final int codePoint = text.codePointAt(i); 180 // Any word containing a '@' is probably an e-mail address 181 // Any word containing a '/' is probably either an ad-hoc combination of two 182 // words or a URI - in either case we don't want to spell check that 183 if ('@' == codePoint || '/' == codePoint) return true; 184 if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; 185 } 186 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 187 // in this word are letters 188 return (letterCount * 4 < length * 3); 189 } 190 191 // Note : this must be reentrant 192 /** 193 * Gets a list of suggestions for a specific string. This returns a list of possible 194 * corrections for the text passed as an argument. It may split or group words, and 195 * even perform grammatical analysis. 196 */ 197 @Override 198 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 199 final int suggestionsLimit) { 200 return onGetSuggestions(textInfo, null, suggestionsLimit); 201 } 202 203 protected SuggestionsInfo onGetSuggestions( 204 final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { 205 try { 206 final String inText = textInfo.getText(); 207 final SuggestionsParams cachedSuggestionsParams = 208 mSuggestionsCache.getSuggestionsFromCache(inText, prevWord); 209 if (cachedSuggestionsParams != null) { 210 if (DBG) { 211 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 212 } 213 return new SuggestionsInfo( 214 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 215 } 216 217 if (shouldFilterOut(inText, mScript)) { 218 DictAndProximity dictInfo = null; 219 try { 220 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 221 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 222 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 223 } 224 return dictInfo.mDictionary.isValidWord(inText) 225 ? AndroidSpellCheckerService.getInDictEmptySuggestions() 226 : AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 227 } finally { 228 if (null != dictInfo) { 229 if (!mDictionaryPool.offer(dictInfo)) { 230 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 231 } 232 } 233 } 234 } 235 final String text = inText.replaceAll( 236 AndroidSpellCheckerService.APOSTROPHE, AndroidSpellCheckerService.SINGLE_QUOTE); 237 238 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 239 //final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 240 //mService.mSuggestionThreshold, mService.mRecommendedThreshold, 241 //suggestionsLimit); 242 final SuggestionsGatherer suggestionsGatherer = mService.newSuggestionsGatherer( 243 text, suggestionsLimit); 244 final WordComposer composer = new WordComposer(); 245 final int length = text.length(); 246 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 247 final int codePoint = text.codePointAt(i); 248 // The getXYForCodePointAndScript method returns (Y << 16) + X 249 final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( 250 codePoint, mScript); 251 if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { 252 composer.add(codePoint, 253 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE); 254 } else { 255 composer.add(codePoint, xy & 0xFFFF, xy >> 16); 256 } 257 } 258 259 final int capitalizeType = AndroidSpellCheckerService.getCapitalizationType(text); 260 boolean isInDict = true; 261 DictAndProximity dictInfo = null; 262 try { 263 dictInfo = mDictionaryPool.pollWithDefaultTimeout(); 264 if (!DictionaryPool.isAValidDictionary(dictInfo)) { 265 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 266 } 267 final ArrayList<SuggestedWordInfo> suggestions = 268 dictInfo.mDictionary.getSuggestions(composer, prevWord, 269 dictInfo.mProximityInfo); 270 for (final SuggestedWordInfo suggestion : suggestions) { 271 final String suggestionStr = suggestion.mWord.toString(); 272 suggestionsGatherer.addWord(suggestionStr.toCharArray(), null, 0, 273 suggestionStr.length(), suggestion.mScore); 274 } 275 isInDict = dictInfo.mDictionary.isValidWord(text); 276 if (!isInDict && AndroidSpellCheckerService.CAPITALIZE_NONE != capitalizeType) { 277 // We want to test the word again if it's all caps or first caps only. 278 // If it's fully down, we already tested it, if it's mixed case, we don't 279 // want to test a lowercase version of it. 280 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 281 } 282 } finally { 283 if (null != dictInfo) { 284 if (!mDictionaryPool.offer(dictInfo)) { 285 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 286 } 287 } 288 } 289 290 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 291 capitalizeType, mLocale); 292 293 if (DBG) { 294 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 295 + suggestionsLimit); 296 Log.i(TAG, "IsInDict = " + isInDict); 297 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 298 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 299 if (null != result.mSuggestions) { 300 for (String suggestion : result.mSuggestions) { 301 Log.i(TAG, suggestion); 302 } 303 } 304 } 305 306 final int flags = 307 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 308 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 309 | (result.mHasRecommendedSuggestions 310 ? SuggestionsInfoCompatUtils 311 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 312 : 0); 313 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 314 mSuggestionsCache.putSuggestionsToCache(text, prevWord, result.mSuggestions, flags); 315 return retval; 316 } catch (RuntimeException e) { 317 // Don't kill the keyboard if there is a bug in the spell checker 318 if (DBG) { 319 throw e; 320 } else { 321 Log.e(TAG, "Exception while spellcheking: " + e); 322 return AndroidSpellCheckerService.getNotInDictEmptySuggestions(); 323 } 324 } 325 } 326} 327