AndroidSpellCheckerService.java revision e897e4d3422c8d9d8b6f051376cc2ba16e4d5945
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.Intent; 20import android.content.res.Resources; 21import android.service.textservice.SpellCheckerService; 22import android.service.textservice.SpellCheckerService.Session; 23import android.util.Log; 24import android.view.textservice.SuggestionsInfo; 25import android.view.textservice.TextInfo; 26import android.text.TextUtils; 27 28import com.android.inputmethod.compat.ArraysCompatUtils; 29import com.android.inputmethod.keyboard.Key; 30import com.android.inputmethod.keyboard.ProximityInfo; 31import com.android.inputmethod.latin.Dictionary; 32import com.android.inputmethod.latin.Dictionary.DataType; 33import com.android.inputmethod.latin.Dictionary.WordCallback; 34import com.android.inputmethod.latin.DictionaryCollection; 35import com.android.inputmethod.latin.DictionaryFactory; 36import com.android.inputmethod.latin.LocaleUtils; 37import com.android.inputmethod.latin.R; 38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 39import com.android.inputmethod.latin.UserDictionary; 40import com.android.inputmethod.latin.Utils; 41import com.android.inputmethod.latin.WordComposer; 42 43import java.util.ArrayList; 44import java.util.Arrays; 45import java.util.Collections; 46import java.util.Locale; 47import java.util.Map; 48import java.util.TreeMap; 49 50/** 51 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 52 */ 53public class AndroidSpellCheckerService extends SpellCheckerService { 54 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 55 private static final boolean DBG = false; 56 private static final int POOL_SIZE = 2; 57 58 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 59 private static final int CAPITALIZE_FIRST = 1; // First only 60 private static final int CAPITALIZE_ALL = 2; // All caps 61 62 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 63 private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS = 64 new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 65 private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS = 66 new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 67 EMPTY_STRING_ARRAY); 68 private Map<String, DictionaryPool> mDictionaryPools = 69 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 70 private Map<String, Dictionary> mUserDictionaries = 71 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 72 73 private double mTypoThreshold; 74 75 @Override public void onCreate() { 76 super.onCreate(); 77 mTypoThreshold = Double.parseDouble(getString(R.string.spellchecker_typo_threshold_value)); 78 } 79 80 @Override 81 public Session createSession() { 82 return new AndroidSpellCheckerSession(this); 83 } 84 85 private static class SuggestionsGatherer implements WordCallback { 86 public static class Result { 87 public final String[] mSuggestions; 88 public final boolean mLooksLikeTypo; 89 public Result(final String[] gatheredSuggestions, final boolean looksLikeTypo) { 90 mSuggestions = gatheredSuggestions; 91 mLooksLikeTypo = looksLikeTypo; 92 } 93 } 94 95 private final int DEFAULT_SUGGESTION_LENGTH = 16; 96 private final ArrayList<CharSequence> mSuggestions; 97 private final int[] mScores; 98 private final int mMaxLength; 99 private int mLength = 0; 100 101 // The two following attributes are only ever filled if the requested max length 102 // is 0 (or less, which is treated the same). 103 private String mBestSuggestion = null; 104 private int mBestScore = Integer.MIN_VALUE; // As small as possible 105 106 SuggestionsGatherer(final int maxLength) { 107 mMaxLength = maxLength; 108 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 109 mScores = new int[mMaxLength]; 110 } 111 112 @Override 113 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 114 int dicTypeId, DataType dataType) { 115 final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); 116 // binarySearch returns the index if the element exists, and -<insertion index> - 1 117 // if it doesn't. See documentation for binarySearch. 118 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 119 120 if (mLength < mMaxLength) { 121 final int copyLen = mLength - insertIndex; 122 ++mLength; 123 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 124 mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength)); 125 } else { 126 if (insertIndex == 0) { 127 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 128 // then we need to keep track of the best suggestion in mBestScore and 129 // mBestSuggestion. This is so that we know whether the best suggestion makes 130 // the score cutoff, since we need to know that to return a meaningful 131 // looksLikeTypo. 132 if (0 >= mMaxLength) { 133 if (score > mBestScore) { 134 mBestScore = score; 135 mBestSuggestion = new String(word, wordOffset, wordLength); 136 } 137 } 138 return true; 139 } 140 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 141 mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength)); 142 mSuggestions.remove(0); 143 } 144 mScores[insertIndex] = score; 145 146 return true; 147 } 148 149 public Result getResults(final CharSequence originalText, final double threshold, 150 final int capitalizeType, final Locale locale) { 151 final String[] gatheredSuggestions; 152 final boolean looksLikeTypo; 153 if (0 == mLength) { 154 // Either we found no suggestions, or we found some BUT the max length was 0. 155 // If we found some mBestSuggestion will not be null. If it is null, then 156 // we found none, regardless of the max length. 157 if (null == mBestSuggestion) { 158 gatheredSuggestions = null; 159 looksLikeTypo = false; 160 } else { 161 gatheredSuggestions = EMPTY_STRING_ARRAY; 162 final double normalizedScore = 163 Utils.calcNormalizedScore(originalText, mBestSuggestion, mBestScore); 164 looksLikeTypo = (normalizedScore > threshold); 165 } 166 } else { 167 if (DBG) { 168 if (mLength != mSuggestions.size()) { 169 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 170 } 171 for (int i = mLength - 1; i >= 0; --i) { 172 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 173 } 174 } 175 Collections.reverse(mSuggestions); 176 Utils.removeDupes(mSuggestions); 177 if (CAPITALIZE_ALL == capitalizeType) { 178 for (int i = 0; i < mSuggestions.size(); ++i) { 179 // get(i) returns a CharSequence which is actually a String so .toString() 180 // should return the same object. 181 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 182 } 183 } else if (CAPITALIZE_FIRST == capitalizeType) { 184 for (int i = 0; i < mSuggestions.size(); ++i) { 185 // Likewise 186 mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(), 187 locale)); 188 } 189 } 190 // This returns a String[], while toArray() returns an Object[] which cannot be cast 191 // into a String[]. 192 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 193 194 final int bestScore = mScores[mLength - 1]; 195 final CharSequence bestSuggestion = mSuggestions.get(0); 196 final double normalizedScore = 197 Utils.calcNormalizedScore(originalText, bestSuggestion, bestScore); 198 looksLikeTypo = (normalizedScore > threshold); 199 if (DBG) { 200 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 201 Log.i(TAG, "Normalized score = " + normalizedScore + " (threshold " + threshold 202 + ") => looksLikeTypo = " + looksLikeTypo); 203 } 204 } 205 return new Result(gatheredSuggestions, looksLikeTypo); 206 } 207 } 208 209 @Override 210 public boolean onUnbind(final Intent intent) { 211 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 212 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 213 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 214 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 215 for (DictionaryPool pool : oldPools.values()) { 216 pool.close(); 217 } 218 for (Dictionary dict : oldUserDictionaries.values()) { 219 dict.close(); 220 } 221 return false; 222 } 223 224 private DictionaryPool getDictionaryPool(final String locale) { 225 DictionaryPool pool = mDictionaryPools.get(locale); 226 if (null == pool) { 227 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 228 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 229 mDictionaryPools.put(locale, pool); 230 } 231 return pool; 232 } 233 234 public DictAndProximity createDictAndProximity(final Locale locale) { 235 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); 236 final Resources resources = getResources(); 237 final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); 238 final DictionaryCollection dictionaryCollection = 239 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId); 240 final String localeStr = locale.toString(); 241 Dictionary userDict = mUserDictionaries.get(localeStr); 242 if (null == userDict) { 243 userDict = new SynchronouslyLoadedUserDictionary(this, localeStr, true); 244 mUserDictionaries.put(localeStr, userDict); 245 } 246 dictionaryCollection.addDictionary(userDict); 247 return new DictAndProximity(dictionaryCollection, proximityInfo); 248 } 249 250 // This method assumes the text is not empty or null. 251 private static int getCapitalizationType(String text) { 252 // If the first char is not uppercase, then the word is either all lower case, 253 // and in either case we return CAPITALIZE_NONE. 254 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 255 final int len = text.codePointCount(0, text.length()); 256 int capsCount = 1; 257 for (int i = 1; i < len; ++i) { 258 if (1 != capsCount && i != capsCount) break; 259 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 260 } 261 // We know the first char is upper case. So we want to test if either everything 262 // else is lower case, or if everything else is upper case. If the string is 263 // exactly one char long, then we will arrive here with capsCount 1, and this is 264 // correct, too. 265 if (1 == capsCount) return CAPITALIZE_FIRST; 266 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 267 } 268 269 private static class AndroidSpellCheckerSession extends Session { 270 // Immutable, but need the locale which is not available in the constructor yet 271 private DictionaryPool mDictionaryPool; 272 // Likewise 273 private Locale mLocale; 274 275 private final AndroidSpellCheckerService mService; 276 277 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 278 mService = service; 279 } 280 281 @Override 282 public void onCreate() { 283 final String localeString = getLocale(); 284 mDictionaryPool = mService.getDictionaryPool(localeString); 285 mLocale = LocaleUtils.constructLocaleFromString(localeString); 286 } 287 288 /** 289 * Finds out whether a particular string should be filtered out of spell checking. 290 * 291 * This will loosely match URLs, numbers, symbols. 292 * 293 * @param text the string to evaluate. 294 * @return true if we should filter this text out, false otherwise 295 */ 296 private boolean shouldFilterOut(final String text) { 297 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 298 299 // TODO: check if an equivalent processing can't be done more quickly with a 300 // compiled regexp. 301 // Filter by first letter 302 final int firstCodePoint = text.codePointAt(0); 303 // Filter out words that don't start with a letter or an apostrophe 304 if (!Character.isLetter(firstCodePoint) 305 && '\'' != firstCodePoint) return true; 306 307 // Filter contents 308 final int length = text.length(); 309 int letterCount = 0; 310 for (int i = 0; i < length; ++i) { 311 final int codePoint = text.codePointAt(i); 312 // Any word containing a '@' is probably an e-mail address 313 // Any word containing a '/' is probably either an ad-hoc combination of two 314 // words or a URI - in either case we don't want to spell check that 315 if ('@' == codePoint 316 || '/' == codePoint) return true; 317 if (Character.isLetter(codePoint)) ++letterCount; 318 } 319 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 320 // in this word are letters 321 return (letterCount * 4 < length * 3); 322 } 323 324 // Note : this must be reentrant 325 /** 326 * Gets a list of suggestions for a specific string. This returns a list of possible 327 * corrections for the text passed as an argument. It may split or group words, and 328 * even perform grammatical analysis. 329 */ 330 @Override 331 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 332 final int suggestionsLimit) { 333 try { 334 final String text = textInfo.getText(); 335 336 if (shouldFilterOut(text)) { 337 final DictAndProximity dictInfo = mDictionaryPool.takeOrGetNull(); 338 if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS; 339 return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS 340 : NOT_IN_DICT_EMPTY_SUGGESTIONS; 341 } 342 343 final SuggestionsGatherer suggestionsGatherer = 344 new SuggestionsGatherer(suggestionsLimit); 345 final WordComposer composer = new WordComposer(); 346 final int length = text.length(); 347 for (int i = 0; i < length; ++i) { 348 final int character = text.codePointAt(i); 349 final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); 350 final int[] proximities; 351 if (-1 == proximityIndex) { 352 proximities = new int[] { character }; 353 } else { 354 proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, 355 proximityIndex, 356 proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); 357 } 358 composer.add(character, proximities, 359 WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); 360 } 361 362 final int capitalizeType = getCapitalizationType(text); 363 boolean isInDict = true; 364 final DictAndProximity dictInfo = mDictionaryPool.takeOrGetNull(); 365 if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS; 366 dictInfo.mDictionary.getWords(composer, suggestionsGatherer, 367 dictInfo.mProximityInfo); 368 isInDict = dictInfo.mDictionary.isValidWord(text); 369 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 370 // We want to test the word again if it's all caps or first caps only. 371 // If it's fully down, we already tested it, if it's mixed case, we don't 372 // want to test a lowercase version of it. 373 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 374 } 375 if (!mDictionaryPool.offer(dictInfo)) { 376 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 377 } 378 379 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(text, 380 mService.mTypoThreshold, capitalizeType, mLocale); 381 382 if (DBG) { 383 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 384 + suggestionsLimit); 385 Log.i(TAG, "IsInDict = " + result.mLooksLikeTypo); 386 Log.i(TAG, "LooksLikeTypo = " + result.mLooksLikeTypo); 387 for (String suggestion : result.mSuggestions) { 388 Log.i(TAG, suggestion); 389 } 390 } 391 392 final int flags = 393 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : 0) 394 | (result.mLooksLikeTypo 395 ? SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO : 0); 396 return new SuggestionsInfo(flags, result.mSuggestions); 397 } catch (RuntimeException e) { 398 // Don't kill the keyboard if there is a bug in the spell checker 399 if (DBG) { 400 throw e; 401 } else { 402 Log.e(TAG, "Exception while spellcheking: " + e); 403 return NOT_IN_DICT_EMPTY_SUGGESTIONS; 404 } 405 } 406 } 407 } 408} 409