AndroidSpellCheckerService.java revision d8590857bdff7f30a93af07aef0362d9f7460a5a
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.Intent; 20import android.content.SharedPreferences; 21import android.preference.PreferenceManager; 22import android.service.textservice.SpellCheckerService; 23import android.util.Log; 24import android.view.textservice.SuggestionsInfo; 25 26import com.android.inputmethod.keyboard.ProximityInfo; 27import com.android.inputmethod.latin.BinaryDictionary; 28import com.android.inputmethod.latin.CollectionUtils; 29import com.android.inputmethod.latin.ContactsBinaryDictionary; 30import com.android.inputmethod.latin.Dictionary; 31import com.android.inputmethod.latin.DictionaryCollection; 32import com.android.inputmethod.latin.DictionaryFactory; 33import com.android.inputmethod.latin.LocaleUtils; 34import com.android.inputmethod.latin.R; 35import com.android.inputmethod.latin.StringUtils; 36import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary; 37import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary; 38import com.android.inputmethod.latin.UserBinaryDictionary; 39 40import java.lang.ref.WeakReference; 41import java.util.ArrayList; 42import java.util.Arrays; 43import java.util.Collections; 44import java.util.HashSet; 45import java.util.Iterator; 46import java.util.Locale; 47import java.util.Map; 48import java.util.TreeMap; 49 50/** 51 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 52 */ 53public final class AndroidSpellCheckerService extends SpellCheckerService 54 implements SharedPreferences.OnSharedPreferenceChangeListener { 55 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 56 private static final boolean DBG = false; 57 private static final int POOL_SIZE = 2; 58 59 public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; 60 61 public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 62 public static final int CAPITALIZE_FIRST = 1; // First only 63 public static final int CAPITALIZE_ALL = 2; // All caps 64 65 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 66 private Map<String, DictionaryPool> mDictionaryPools = CollectionUtils.newSynchronizedTreeMap(); 67 private Map<String, UserBinaryDictionary> mUserDictionaries = 68 CollectionUtils.newSynchronizedTreeMap(); 69 private ContactsBinaryDictionary mContactsDictionary; 70 71 // The threshold for a candidate to be offered as a suggestion. 72 private float mSuggestionThreshold; 73 // The threshold for a suggestion to be considered "recommended". 74 private float mRecommendedThreshold; 75 // Whether to use the contacts dictionary 76 private boolean mUseContactsDictionary; 77 private final Object mUseContactsLock = new Object(); 78 79 private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList = 80 CollectionUtils.newHashSet(); 81 82 public static final int SCRIPT_LATIN = 0; 83 public static final int SCRIPT_CYRILLIC = 1; 84 public static final int SCRIPT_GREEK = 2; 85 public static final String SINGLE_QUOTE = "\u0027"; 86 public static final String APOSTROPHE = "\u2019"; 87 private static final TreeMap<String, Integer> mLanguageToScript; 88 static { 89 // List of the supported languages and their associated script. We won't check 90 // words written in another script than the selected script, because we know we 91 // don't have those in our dictionary so we will underline everything and we 92 // will never have any suggestions, so it makes no sense checking them, and this 93 // is done in {@link #shouldFilterOut}. Also, the script is used to choose which 94 // proximity to pass to the dictionary descent algorithm. 95 // IMPORTANT: this only contains languages - do not write countries in there. 96 // Only the language is searched from the map. 97 mLanguageToScript = CollectionUtils.newTreeMap(); 98 mLanguageToScript.put("cs", SCRIPT_LATIN); 99 mLanguageToScript.put("da", SCRIPT_LATIN); 100 mLanguageToScript.put("de", SCRIPT_LATIN); 101 mLanguageToScript.put("el", SCRIPT_GREEK); 102 mLanguageToScript.put("en", SCRIPT_LATIN); 103 mLanguageToScript.put("es", SCRIPT_LATIN); 104 mLanguageToScript.put("fi", SCRIPT_LATIN); 105 mLanguageToScript.put("fr", SCRIPT_LATIN); 106 mLanguageToScript.put("hr", SCRIPT_LATIN); 107 mLanguageToScript.put("it", SCRIPT_LATIN); 108 mLanguageToScript.put("lt", SCRIPT_LATIN); 109 mLanguageToScript.put("lv", SCRIPT_LATIN); 110 mLanguageToScript.put("nb", SCRIPT_LATIN); 111 mLanguageToScript.put("nl", SCRIPT_LATIN); 112 mLanguageToScript.put("pt", SCRIPT_LATIN); 113 mLanguageToScript.put("sl", SCRIPT_LATIN); 114 mLanguageToScript.put("ru", SCRIPT_CYRILLIC); 115 } 116 117 @Override public void onCreate() { 118 super.onCreate(); 119 mSuggestionThreshold = 120 Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value)); 121 mRecommendedThreshold = 122 Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value)); 123 final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); 124 prefs.registerOnSharedPreferenceChangeListener(this); 125 onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); 126 } 127 128 public static int getScriptFromLocale(final Locale locale) { 129 final Integer script = mLanguageToScript.get(locale.getLanguage()); 130 if (null == script) { 131 throw new RuntimeException("We have been called with an unsupported language: \"" 132 + locale.getLanguage() + "\". Framework bug?"); 133 } 134 return script; 135 } 136 137 @Override 138 public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { 139 if (!PREF_USE_CONTACTS_KEY.equals(key)) return; 140 synchronized(mUseContactsLock) { 141 mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true); 142 if (mUseContactsDictionary) { 143 startUsingContactsDictionaryLocked(); 144 } else { 145 stopUsingContactsDictionaryLocked(); 146 } 147 } 148 } 149 150 private void startUsingContactsDictionaryLocked() { 151 if (null == mContactsDictionary) { 152 // TODO: use the right locale for each session 153 mContactsDictionary = 154 new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault()); 155 } 156 final Iterator<WeakReference<DictionaryCollection>> iterator = 157 mDictionaryCollectionsList.iterator(); 158 while (iterator.hasNext()) { 159 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 160 final DictionaryCollection dict = dictRef.get(); 161 if (null == dict) { 162 iterator.remove(); 163 } else { 164 dict.addDictionary(mContactsDictionary); 165 } 166 } 167 } 168 169 private void stopUsingContactsDictionaryLocked() { 170 if (null == mContactsDictionary) return; 171 final Dictionary contactsDict = mContactsDictionary; 172 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed 173 mContactsDictionary = null; 174 final Iterator<WeakReference<DictionaryCollection>> iterator = 175 mDictionaryCollectionsList.iterator(); 176 while (iterator.hasNext()) { 177 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 178 final DictionaryCollection dict = dictRef.get(); 179 if (null == dict) { 180 iterator.remove(); 181 } else { 182 dict.removeDictionary(contactsDict); 183 } 184 } 185 contactsDict.close(); 186 } 187 188 @Override 189 public Session createSession() { 190 // Should not refer to AndroidSpellCheckerSession directly considering 191 // that AndroidSpellCheckerSession may be overlaid. 192 return AndroidSpellCheckerSessionFactory.newInstance(this); 193 } 194 195 public static SuggestionsInfo getNotInDictEmptySuggestions() { 196 return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 197 } 198 199 public static SuggestionsInfo getInDictEmptySuggestions() { 200 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 201 EMPTY_STRING_ARRAY); 202 } 203 204 public SuggestionsGatherer newSuggestionsGatherer(final String text, int maxLength) { 205 return new SuggestionsGatherer( 206 text, mSuggestionThreshold, mRecommendedThreshold, maxLength); 207 } 208 209 // TODO: remove this class and replace it by storage local to the session. 210 public static final class SuggestionsGatherer { 211 public static final class Result { 212 public final String[] mSuggestions; 213 public final boolean mHasRecommendedSuggestions; 214 public Result(final String[] gatheredSuggestions, 215 final boolean hasRecommendedSuggestions) { 216 mSuggestions = gatheredSuggestions; 217 mHasRecommendedSuggestions = hasRecommendedSuggestions; 218 } 219 } 220 221 private final ArrayList<String> mSuggestions; 222 private final int[] mScores; 223 private final String mOriginalText; 224 private final float mSuggestionThreshold; 225 private final float mRecommendedThreshold; 226 private final int mMaxLength; 227 private int mLength = 0; 228 229 // The two following attributes are only ever filled if the requested max length 230 // is 0 (or less, which is treated the same). 231 private String mBestSuggestion = null; 232 private int mBestScore = Integer.MIN_VALUE; // As small as possible 233 234 SuggestionsGatherer(final String originalText, final float suggestionThreshold, 235 final float recommendedThreshold, final int maxLength) { 236 mOriginalText = originalText; 237 mSuggestionThreshold = suggestionThreshold; 238 mRecommendedThreshold = recommendedThreshold; 239 mMaxLength = maxLength; 240 mSuggestions = CollectionUtils.newArrayList(maxLength + 1); 241 mScores = new int[mMaxLength]; 242 } 243 244 synchronized public boolean addWord(char[] word, int[] spaceIndices, int wordOffset, 245 int wordLength, int score) { 246 final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score); 247 // binarySearch returns the index if the element exists, and -<insertion index> - 1 248 // if it doesn't. See documentation for binarySearch. 249 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 250 251 if (insertIndex == 0 && mLength >= mMaxLength) { 252 // In the future, we may want to keep track of the best suggestion score even if 253 // we are asked for 0 suggestions. In this case, we can use the following 254 // (tested) code to keep it: 255 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 256 // then we need to keep track of the best suggestion in mBestScore and 257 // mBestSuggestion. This is so that we know whether the best suggestion makes 258 // the score cutoff, since we need to know that to return a meaningful 259 // looksLikeTypo. 260 // if (0 >= mMaxLength) { 261 // if (score > mBestScore) { 262 // mBestScore = score; 263 // mBestSuggestion = new String(word, wordOffset, wordLength); 264 // } 265 // } 266 return true; 267 } 268 if (insertIndex >= mMaxLength) { 269 // We found a suggestion, but its score is too weak to be kept considering 270 // the suggestion limit. 271 return true; 272 } 273 274 // Compute the normalized score and skip this word if it's normalized score does not 275 // make the threshold. 276 final String wordString = new String(word, wordOffset, wordLength); 277 final float normalizedScore = 278 BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); 279 if (normalizedScore < mSuggestionThreshold) { 280 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 281 return true; 282 } 283 284 if (mLength < mMaxLength) { 285 final int copyLen = mLength - insertIndex; 286 ++mLength; 287 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 288 mSuggestions.add(insertIndex, wordString); 289 } else { 290 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 291 mSuggestions.add(insertIndex, wordString); 292 mSuggestions.remove(0); 293 } 294 mScores[insertIndex] = score; 295 296 return true; 297 } 298 299 public Result getResults(final int capitalizeType, final Locale locale) { 300 final String[] gatheredSuggestions; 301 final boolean hasRecommendedSuggestions; 302 if (0 == mLength) { 303 // TODO: the comment below describes what is intended, but in the practice 304 // mBestSuggestion is only ever set to null so it doesn't work. Fix this. 305 // Either we found no suggestions, or we found some BUT the max length was 0. 306 // If we found some mBestSuggestion will not be null. If it is null, then 307 // we found none, regardless of the max length. 308 if (null == mBestSuggestion) { 309 gatheredSuggestions = null; 310 hasRecommendedSuggestions = false; 311 } else { 312 gatheredSuggestions = EMPTY_STRING_ARRAY; 313 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 314 mOriginalText, mBestSuggestion, mBestScore); 315 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 316 } 317 } else { 318 if (DBG) { 319 if (mLength != mSuggestions.size()) { 320 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 321 } 322 for (int i = mLength - 1; i >= 0; --i) { 323 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 324 } 325 } 326 Collections.reverse(mSuggestions); 327 StringUtils.removeDupes(mSuggestions); 328 if (CAPITALIZE_ALL == capitalizeType) { 329 for (int i = 0; i < mSuggestions.size(); ++i) { 330 // get(i) returns a CharSequence which is actually a String so .toString() 331 // should return the same object. 332 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 333 } 334 } else if (CAPITALIZE_FIRST == capitalizeType) { 335 for (int i = 0; i < mSuggestions.size(); ++i) { 336 // Likewise 337 mSuggestions.set(i, StringUtils.toTitleCase( 338 mSuggestions.get(i).toString(), locale)); 339 } 340 } 341 // This returns a String[], while toArray() returns an Object[] which cannot be cast 342 // into a String[]. 343 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 344 345 final int bestScore = mScores[mLength - 1]; 346 final String bestSuggestion = mSuggestions.get(0); 347 final float normalizedScore = 348 BinaryDictionary.calcNormalizedScore( 349 mOriginalText, bestSuggestion.toString(), bestScore); 350 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 351 if (DBG) { 352 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 353 Log.i(TAG, "Normalized score = " + normalizedScore 354 + " (threshold " + mRecommendedThreshold 355 + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); 356 } 357 } 358 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 359 } 360 } 361 362 @Override 363 public boolean onUnbind(final Intent intent) { 364 closeAllDictionaries(); 365 return false; 366 } 367 368 private void closeAllDictionaries() { 369 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 370 mDictionaryPools = CollectionUtils.newSynchronizedTreeMap(); 371 final Map<String, UserBinaryDictionary> oldUserDictionaries = mUserDictionaries; 372 mUserDictionaries = CollectionUtils.newSynchronizedTreeMap(); 373 new Thread("spellchecker_close_dicts") { 374 @Override 375 public void run() { 376 for (DictionaryPool pool : oldPools.values()) { 377 pool.close(); 378 } 379 for (Dictionary dict : oldUserDictionaries.values()) { 380 dict.close(); 381 } 382 synchronized (mUseContactsLock) { 383 if (null != mContactsDictionary) { 384 // The synchronously loaded contacts dictionary should have been in one 385 // or several pools, but it is shielded against multiple closing and it's 386 // safe to call it several times. 387 final ContactsBinaryDictionary dictToClose = mContactsDictionary; 388 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY 389 // is no longer needed 390 mContactsDictionary = null; 391 dictToClose.close(); 392 } 393 } 394 } 395 }.start(); 396 } 397 398 public DictionaryPool getDictionaryPool(final String locale) { 399 DictionaryPool pool = mDictionaryPools.get(locale); 400 if (null == pool) { 401 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 402 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 403 mDictionaryPools.put(locale, pool); 404 } 405 return pool; 406 } 407 408 public DictAndProximity createDictAndProximity(final Locale locale) { 409 final int script = getScriptFromLocale(locale); 410 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo( 411 SpellCheckerProximityInfo.getProximityForScript(script), 412 SpellCheckerProximityInfo.ROW_SIZE, 413 SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH, 414 SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT); 415 final DictionaryCollection dictionaryCollection = 416 DictionaryFactory.createMainDictionaryFromManager(this, locale, 417 true /* useFullEditDistance */); 418 final String localeStr = locale.toString(); 419 UserBinaryDictionary userDictionary = mUserDictionaries.get(localeStr); 420 if (null == userDictionary) { 421 userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true); 422 mUserDictionaries.put(localeStr, userDictionary); 423 } 424 dictionaryCollection.addDictionary(userDictionary); 425 synchronized (mUseContactsLock) { 426 if (mUseContactsDictionary) { 427 if (null == mContactsDictionary) { 428 // TODO: use the right locale. We can't do it right now because the 429 // spell checker is reusing the contacts dictionary across sessions 430 // without regard for their locale, so we need to fix that first. 431 mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this, 432 Locale.getDefault()); 433 } 434 } 435 dictionaryCollection.addDictionary(mContactsDictionary); 436 mDictionaryCollectionsList.add( 437 new WeakReference<DictionaryCollection>(dictionaryCollection)); 438 } 439 return new DictAndProximity(dictionaryCollection, proximityInfo); 440 } 441 442 // This method assumes the text is not empty or null. 443 public static int getCapitalizationType(String text) { 444 // If the first char is not uppercase, then the word is either all lower case, 445 // and in either case we return CAPITALIZE_NONE. 446 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 447 final int len = text.length(); 448 int capsCount = 1; 449 int letterCount = 1; 450 for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { 451 if (1 != capsCount && letterCount != capsCount) break; 452 final int codePoint = text.codePointAt(i); 453 if (Character.isUpperCase(codePoint)) { 454 ++capsCount; 455 ++letterCount; 456 } else if (Character.isLetter(codePoint)) { 457 // We need to discount non-letters since they may not be upper-case, but may 458 // still be part of a word (e.g. single quote or dash, as in "IT'S" or "FULL-TIME") 459 ++letterCount; 460 } 461 } 462 // We know the first char is upper case. So we want to test if either every letter other 463 // than the first is lower case, or if they are all upper case. If the string is exactly 464 // one char long, then we will arrive here with letterCount 1, and this is correct, too. 465 if (1 == capsCount) return CAPITALIZE_FIRST; 466 return (letterCount == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 467 } 468} 469