AndroidSpellCheckerService.java revision 51075d145a85d1acaff08c02f4d6b10b175eaa36
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.Intent; 20import android.content.res.Resources; 21import android.service.textservice.SpellCheckerService; 22import android.text.TextUtils; 23import android.util.Log; 24import android.view.textservice.SuggestionsInfo; 25import android.view.textservice.TextInfo; 26 27import com.android.inputmethod.compat.ArraysCompatUtils; 28import com.android.inputmethod.keyboard.ProximityInfo; 29import com.android.inputmethod.latin.BinaryDictionary; 30import com.android.inputmethod.latin.Dictionary; 31import com.android.inputmethod.latin.Dictionary.DataType; 32import com.android.inputmethod.latin.Dictionary.WordCallback; 33import com.android.inputmethod.latin.DictionaryCollection; 34import com.android.inputmethod.latin.DictionaryFactory; 35import com.android.inputmethod.latin.Flag; 36import com.android.inputmethod.latin.LocaleUtils; 37import com.android.inputmethod.latin.R; 38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 39import com.android.inputmethod.latin.Utils; 40import com.android.inputmethod.latin.WordComposer; 41 42import java.util.ArrayList; 43import java.util.Arrays; 44import java.util.Collections; 45import java.util.Locale; 46import java.util.Map; 47import java.util.TreeMap; 48 49/** 50 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 51 */ 52public class AndroidSpellCheckerService extends SpellCheckerService { 53 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 54 private static final boolean DBG = false; 55 private static final int POOL_SIZE = 2; 56 57 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 58 private static final int CAPITALIZE_FIRST = 1; // First only 59 private static final int CAPITALIZE_ALL = 2; // All caps 60 61 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 62 private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS = 63 new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 64 private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS = 65 new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 66 EMPTY_STRING_ARRAY); 67 private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY; 68 static { 69 // See BinaryDictionary.java for an explanation of these flags 70 // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the 71 // current dictionary configuration - for example, consider the UMLAUT flag 72 // so that it will be turned on for German dictionaries and off for others. 73 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS, 74 BinaryDictionary.ALL_CONFIG_FLAGS.length + 1); 75 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] = 76 BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE; 77 } 78 private Map<String, DictionaryPool> mDictionaryPools = 79 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 80 private Map<String, Dictionary> mUserDictionaries = 81 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 82 83 // The threshold for a candidate to be offered as a suggestion. 84 private double mSuggestionThreshold; 85 // The threshold for a suggestion to be considered "likely". 86 private double mLikelyThreshold; 87 88 @Override public void onCreate() { 89 super.onCreate(); 90 mSuggestionThreshold = 91 Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value)); 92 mLikelyThreshold = 93 Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value)); 94 } 95 96 @Override 97 public Session createSession() { 98 return new AndroidSpellCheckerSession(this); 99 } 100 101 private static class SuggestionsGatherer implements WordCallback { 102 public static class Result { 103 public final String[] mSuggestions; 104 public final boolean mHasLikelySuggestions; 105 public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) { 106 mSuggestions = gatheredSuggestions; 107 mHasLikelySuggestions = hasLikelySuggestions; 108 } 109 } 110 111 private final ArrayList<CharSequence> mSuggestions; 112 private final int[] mScores; 113 private final String mOriginalText; 114 private final double mSuggestionThreshold; 115 private final double mLikelyThreshold; 116 private final int mMaxLength; 117 private int mLength = 0; 118 119 // The two following attributes are only ever filled if the requested max length 120 // is 0 (or less, which is treated the same). 121 private String mBestSuggestion = null; 122 private int mBestScore = Integer.MIN_VALUE; // As small as possible 123 124 SuggestionsGatherer(final String originalText, final double suggestionThreshold, 125 final double likelyThreshold, final int maxLength) { 126 mOriginalText = originalText; 127 mSuggestionThreshold = suggestionThreshold; 128 mLikelyThreshold = likelyThreshold; 129 mMaxLength = maxLength; 130 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 131 mScores = new int[mMaxLength]; 132 } 133 134 @Override 135 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 136 int dicTypeId, DataType dataType) { 137 final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); 138 // binarySearch returns the index if the element exists, and -<insertion index> - 1 139 // if it doesn't. See documentation for binarySearch. 140 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 141 142 if (insertIndex == 0 && mLength >= mMaxLength) { 143 // In the future, we may want to keep track of the best suggestion score even if 144 // we are asked for 0 suggestions. In this case, we can use the following 145 // (tested) code to keep it: 146 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 147 // then we need to keep track of the best suggestion in mBestScore and 148 // mBestSuggestion. This is so that we know whether the best suggestion makes 149 // the score cutoff, since we need to know that to return a meaningful 150 // looksLikeTypo. 151 // if (0 >= mMaxLength) { 152 // if (score > mBestScore) { 153 // mBestScore = score; 154 // mBestSuggestion = new String(word, wordOffset, wordLength); 155 // } 156 // } 157 return true; 158 } 159 160 // Compute the normalized score and skip this word if it's normalized score does not 161 // make the threshold. 162 final String wordString = new String(word, wordOffset, wordLength); 163 final double normalizedScore = 164 Utils.calcNormalizedScore(mOriginalText, wordString, score); 165 if (normalizedScore < mSuggestionThreshold) { 166 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 167 return true; 168 } 169 170 if (mLength < mMaxLength) { 171 final int copyLen = mLength - insertIndex; 172 ++mLength; 173 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 174 mSuggestions.add(insertIndex, wordString); 175 } else { 176 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 177 mSuggestions.add(insertIndex, wordString); 178 mSuggestions.remove(0); 179 } 180 mScores[insertIndex] = score; 181 182 return true; 183 } 184 185 public Result getResults(final int capitalizeType, final Locale locale) { 186 final String[] gatheredSuggestions; 187 final boolean hasLikelySuggestions; 188 if (0 == mLength) { 189 // Either we found no suggestions, or we found some BUT the max length was 0. 190 // If we found some mBestSuggestion will not be null. If it is null, then 191 // we found none, regardless of the max length. 192 if (null == mBestSuggestion) { 193 gatheredSuggestions = null; 194 hasLikelySuggestions = false; 195 } else { 196 gatheredSuggestions = EMPTY_STRING_ARRAY; 197 final double normalizedScore = 198 Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore); 199 hasLikelySuggestions = (normalizedScore > mLikelyThreshold); 200 } 201 } else { 202 if (DBG) { 203 if (mLength != mSuggestions.size()) { 204 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 205 } 206 for (int i = mLength - 1; i >= 0; --i) { 207 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 208 } 209 } 210 Collections.reverse(mSuggestions); 211 Utils.removeDupes(mSuggestions); 212 if (CAPITALIZE_ALL == capitalizeType) { 213 for (int i = 0; i < mSuggestions.size(); ++i) { 214 // get(i) returns a CharSequence which is actually a String so .toString() 215 // should return the same object. 216 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 217 } 218 } else if (CAPITALIZE_FIRST == capitalizeType) { 219 for (int i = 0; i < mSuggestions.size(); ++i) { 220 // Likewise 221 mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(), 222 locale)); 223 } 224 } 225 // This returns a String[], while toArray() returns an Object[] which cannot be cast 226 // into a String[]. 227 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 228 229 final int bestScore = mScores[mLength - 1]; 230 final CharSequence bestSuggestion = mSuggestions.get(0); 231 final double normalizedScore = 232 Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore); 233 hasLikelySuggestions = (normalizedScore > mLikelyThreshold); 234 if (DBG) { 235 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 236 Log.i(TAG, "Normalized score = " + normalizedScore 237 + " (threshold " + mLikelyThreshold 238 + ") => hasLikelySuggestions = " + hasLikelySuggestions); 239 } 240 } 241 return new Result(gatheredSuggestions, hasLikelySuggestions); 242 } 243 } 244 245 @Override 246 public boolean onUnbind(final Intent intent) { 247 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 248 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 249 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 250 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 251 for (DictionaryPool pool : oldPools.values()) { 252 pool.close(); 253 } 254 for (Dictionary dict : oldUserDictionaries.values()) { 255 dict.close(); 256 } 257 return false; 258 } 259 260 private DictionaryPool getDictionaryPool(final String locale) { 261 DictionaryPool pool = mDictionaryPools.get(locale); 262 if (null == pool) { 263 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 264 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 265 mDictionaryPools.put(locale, pool); 266 } 267 return pool; 268 } 269 270 public DictAndProximity createDictAndProximity(final Locale locale) { 271 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); 272 final Resources resources = getResources(); 273 final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); 274 final DictionaryCollection dictionaryCollection = 275 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId, 276 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY); 277 final String localeStr = locale.toString(); 278 Dictionary userDict = mUserDictionaries.get(localeStr); 279 if (null == userDict) { 280 userDict = new SynchronouslyLoadedUserDictionary(this, localeStr, true); 281 mUserDictionaries.put(localeStr, userDict); 282 } 283 dictionaryCollection.addDictionary(userDict); 284 return new DictAndProximity(dictionaryCollection, proximityInfo); 285 } 286 287 // This method assumes the text is not empty or null. 288 private static int getCapitalizationType(String text) { 289 // If the first char is not uppercase, then the word is either all lower case, 290 // and in either case we return CAPITALIZE_NONE. 291 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 292 final int len = text.codePointCount(0, text.length()); 293 int capsCount = 1; 294 for (int i = 1; i < len; ++i) { 295 if (1 != capsCount && i != capsCount) break; 296 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 297 } 298 // We know the first char is upper case. So we want to test if either everything 299 // else is lower case, or if everything else is upper case. If the string is 300 // exactly one char long, then we will arrive here with capsCount 1, and this is 301 // correct, too. 302 if (1 == capsCount) return CAPITALIZE_FIRST; 303 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 304 } 305 306 private static class AndroidSpellCheckerSession extends Session { 307 // Immutable, but need the locale which is not available in the constructor yet 308 private DictionaryPool mDictionaryPool; 309 // Likewise 310 private Locale mLocale; 311 312 private final AndroidSpellCheckerService mService; 313 314 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 315 mService = service; 316 } 317 318 @Override 319 public void onCreate() { 320 final String localeString = getLocale(); 321 mDictionaryPool = mService.getDictionaryPool(localeString); 322 mLocale = LocaleUtils.constructLocaleFromString(localeString); 323 } 324 325 /** 326 * Finds out whether a particular string should be filtered out of spell checking. 327 * 328 * This will loosely match URLs, numbers, symbols. 329 * 330 * @param text the string to evaluate. 331 * @return true if we should filter this text out, false otherwise 332 */ 333 private boolean shouldFilterOut(final String text) { 334 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 335 336 // TODO: check if an equivalent processing can't be done more quickly with a 337 // compiled regexp. 338 // Filter by first letter 339 final int firstCodePoint = text.codePointAt(0); 340 // Filter out words that don't start with a letter or an apostrophe 341 if (!Character.isLetter(firstCodePoint) 342 && '\'' != firstCodePoint) return true; 343 344 // Filter contents 345 final int length = text.length(); 346 int letterCount = 0; 347 for (int i = 0; i < length; ++i) { 348 final int codePoint = text.codePointAt(i); 349 // Any word containing a '@' is probably an e-mail address 350 // Any word containing a '/' is probably either an ad-hoc combination of two 351 // words or a URI - in either case we don't want to spell check that 352 if ('@' == codePoint 353 || '/' == codePoint) return true; 354 if (Character.isLetter(codePoint)) ++letterCount; 355 } 356 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 357 // in this word are letters 358 return (letterCount * 4 < length * 3); 359 } 360 361 // Note : this must be reentrant 362 /** 363 * Gets a list of suggestions for a specific string. This returns a list of possible 364 * corrections for the text passed as an argument. It may split or group words, and 365 * even perform grammatical analysis. 366 */ 367 @Override 368 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 369 final int suggestionsLimit) { 370 try { 371 final String text = textInfo.getText(); 372 373 if (shouldFilterOut(text)) { 374 DictAndProximity dictInfo = null; 375 try { 376 dictInfo = mDictionaryPool.takeOrGetNull(); 377 if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS; 378 return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS 379 : NOT_IN_DICT_EMPTY_SUGGESTIONS; 380 } finally { 381 if (null != dictInfo) { 382 if (!mDictionaryPool.offer(dictInfo)) { 383 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 384 } 385 } 386 } 387 } 388 389 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 390 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 391 mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit); 392 final WordComposer composer = new WordComposer(); 393 final int length = text.length(); 394 for (int i = 0; i < length; ++i) { 395 final int character = text.codePointAt(i); 396 final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); 397 final int[] proximities; 398 if (-1 == proximityIndex) { 399 proximities = new int[] { character }; 400 } else { 401 proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, 402 proximityIndex, 403 proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); 404 } 405 composer.add(character, proximities, 406 WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); 407 } 408 409 final int capitalizeType = getCapitalizationType(text); 410 boolean isInDict = true; 411 DictAndProximity dictInfo = null; 412 try { 413 dictInfo = mDictionaryPool.takeOrGetNull(); 414 if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS; 415 dictInfo.mDictionary.getWords(composer, suggestionsGatherer, 416 dictInfo.mProximityInfo); 417 isInDict = dictInfo.mDictionary.isValidWord(text); 418 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 419 // We want to test the word again if it's all caps or first caps only. 420 // If it's fully down, we already tested it, if it's mixed case, we don't 421 // want to test a lowercase version of it. 422 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 423 } 424 } finally { 425 if (null != dictInfo) { 426 if (!mDictionaryPool.offer(dictInfo)) { 427 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 428 } 429 } 430 } 431 432 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 433 capitalizeType, mLocale); 434 435 if (DBG) { 436 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 437 + suggestionsLimit); 438 Log.i(TAG, "IsInDict = " + isInDict); 439 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 440 Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions); 441 if (null != result.mSuggestions) { 442 for (String suggestion : result.mSuggestions) { 443 Log.i(TAG, suggestion); 444 } 445 } 446 } 447 448 // TODO: actually use result.mHasLikelySuggestions 449 final int flags = 450 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 451 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO); 452 return new SuggestionsInfo(flags, result.mSuggestions); 453 } catch (RuntimeException e) { 454 // Don't kill the keyboard if there is a bug in the spell checker 455 if (DBG) { 456 throw e; 457 } else { 458 Log.e(TAG, "Exception while spellcheking: " + e); 459 return NOT_IN_DICT_EMPTY_SUGGESTIONS; 460 } 461 } 462 } 463 } 464} 465