AndroidSpellCheckerService.java revision bc464e2952e102219f0b977fc1e9140ad5bd03e4
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.Intent; 20import android.content.SharedPreferences; 21import android.preference.PreferenceManager; 22import android.service.textservice.SpellCheckerService; 23import android.util.Log; 24import android.view.textservice.SuggestionsInfo; 25 26import com.android.inputmethod.keyboard.ProximityInfo; 27import com.android.inputmethod.latin.BinaryDictionary; 28import com.android.inputmethod.latin.CollectionUtils; 29import com.android.inputmethod.latin.ContactsBinaryDictionary; 30import com.android.inputmethod.latin.Dictionary; 31import com.android.inputmethod.latin.DictionaryCollection; 32import com.android.inputmethod.latin.DictionaryFactory; 33import com.android.inputmethod.latin.LocaleUtils; 34import com.android.inputmethod.latin.R; 35import com.android.inputmethod.latin.StringUtils; 36import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary; 37import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary; 38import com.android.inputmethod.latin.UserBinaryDictionary; 39 40import java.lang.ref.WeakReference; 41import java.util.ArrayList; 42import java.util.Arrays; 43import java.util.Collections; 44import java.util.HashSet; 45import java.util.Iterator; 46import java.util.Locale; 47import java.util.Map; 48import java.util.TreeMap; 49 50/** 51 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 52 */ 53public final class AndroidSpellCheckerService extends SpellCheckerService 54 implements SharedPreferences.OnSharedPreferenceChangeListener { 55 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 56 private static final boolean DBG = false; 57 private static final int POOL_SIZE = 2; 58 59 public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; 60 61 public static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 62 public static final int CAPITALIZE_FIRST = 1; // First only 63 public static final int CAPITALIZE_ALL = 2; // All caps 64 65 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 66 private Map<String, DictionaryPool> mDictionaryPools = CollectionUtils.newSynchronizedTreeMap(); 67 private Map<String, UserBinaryDictionary> mUserDictionaries = 68 CollectionUtils.newSynchronizedTreeMap(); 69 private ContactsBinaryDictionary mContactsDictionary; 70 71 // The threshold for a candidate to be offered as a suggestion. 72 private float mSuggestionThreshold; 73 // The threshold for a suggestion to be considered "recommended". 74 private float mRecommendedThreshold; 75 // Whether to use the contacts dictionary 76 private boolean mUseContactsDictionary; 77 private final Object mUseContactsLock = new Object(); 78 79 private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList = 80 CollectionUtils.newHashSet(); 81 82 public static final int SCRIPT_LATIN = 0; 83 public static final int SCRIPT_CYRILLIC = 1; 84 public static final String SINGLE_QUOTE = "\u0027"; 85 public static final String APOSTROPHE = "\u2019"; 86 private static final TreeMap<String, Integer> mLanguageToScript; 87 static { 88 // List of the supported languages and their associated script. We won't check 89 // words written in another script than the selected script, because we know we 90 // don't have those in our dictionary so we will underline everything and we 91 // will never have any suggestions, so it makes no sense checking them, and this 92 // is done in {@link #shouldFilterOut}. Also, the script is used to choose which 93 // proximity to pass to the dictionary descent algorithm. 94 // IMPORTANT: this only contains languages - do not write countries in there. 95 // Only the language is searched from the map. 96 mLanguageToScript = CollectionUtils.newTreeMap(); 97 mLanguageToScript.put("en", SCRIPT_LATIN); 98 mLanguageToScript.put("fr", SCRIPT_LATIN); 99 mLanguageToScript.put("de", SCRIPT_LATIN); 100 mLanguageToScript.put("nl", SCRIPT_LATIN); 101 mLanguageToScript.put("cs", SCRIPT_LATIN); 102 mLanguageToScript.put("es", SCRIPT_LATIN); 103 mLanguageToScript.put("it", SCRIPT_LATIN); 104 mLanguageToScript.put("hr", SCRIPT_LATIN); 105 mLanguageToScript.put("pt", SCRIPT_LATIN); 106 mLanguageToScript.put("ru", SCRIPT_CYRILLIC); 107 // TODO: Make a persian proximity, and activate the Farsi subtype. 108 // mLanguageToScript.put("fa", SCRIPT_PERSIAN); 109 } 110 111 @Override public void onCreate() { 112 super.onCreate(); 113 mSuggestionThreshold = 114 Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value)); 115 mRecommendedThreshold = 116 Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value)); 117 final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); 118 prefs.registerOnSharedPreferenceChangeListener(this); 119 onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); 120 } 121 122 public static int getScriptFromLocale(final Locale locale) { 123 final Integer script = mLanguageToScript.get(locale.getLanguage()); 124 if (null == script) { 125 throw new RuntimeException("We have been called with an unsupported language: \"" 126 + locale.getLanguage() + "\". Framework bug?"); 127 } 128 return script; 129 } 130 131 @Override 132 public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { 133 if (!PREF_USE_CONTACTS_KEY.equals(key)) return; 134 synchronized(mUseContactsLock) { 135 mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true); 136 if (mUseContactsDictionary) { 137 startUsingContactsDictionaryLocked(); 138 } else { 139 stopUsingContactsDictionaryLocked(); 140 } 141 } 142 } 143 144 private void startUsingContactsDictionaryLocked() { 145 if (null == mContactsDictionary) { 146 // TODO: use the right locale for each session 147 mContactsDictionary = 148 new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault()); 149 } 150 final Iterator<WeakReference<DictionaryCollection>> iterator = 151 mDictionaryCollectionsList.iterator(); 152 while (iterator.hasNext()) { 153 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 154 final DictionaryCollection dict = dictRef.get(); 155 if (null == dict) { 156 iterator.remove(); 157 } else { 158 dict.addDictionary(mContactsDictionary); 159 } 160 } 161 } 162 163 private void stopUsingContactsDictionaryLocked() { 164 if (null == mContactsDictionary) return; 165 final Dictionary contactsDict = mContactsDictionary; 166 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed 167 mContactsDictionary = null; 168 final Iterator<WeakReference<DictionaryCollection>> iterator = 169 mDictionaryCollectionsList.iterator(); 170 while (iterator.hasNext()) { 171 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 172 final DictionaryCollection dict = dictRef.get(); 173 if (null == dict) { 174 iterator.remove(); 175 } else { 176 dict.removeDictionary(contactsDict); 177 } 178 } 179 contactsDict.close(); 180 } 181 182 @Override 183 public Session createSession() { 184 // Should not refer to AndroidSpellCheckerSession directly considering 185 // that AndroidSpellCheckerSession may be overlaid. 186 return AndroidSpellCheckerSessionFactory.newInstance(this); 187 } 188 189 public static SuggestionsInfo getNotInDictEmptySuggestions() { 190 return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 191 } 192 193 public static SuggestionsInfo getInDictEmptySuggestions() { 194 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 195 EMPTY_STRING_ARRAY); 196 } 197 198 public SuggestionsGatherer newSuggestionsGatherer(final String text, int maxLength) { 199 return new SuggestionsGatherer( 200 text, mSuggestionThreshold, mRecommendedThreshold, maxLength); 201 } 202 203 // TODO: remove this class and replace it by storage local to the session. 204 public static final class SuggestionsGatherer { 205 public static final class Result { 206 public final String[] mSuggestions; 207 public final boolean mHasRecommendedSuggestions; 208 public Result(final String[] gatheredSuggestions, 209 final boolean hasRecommendedSuggestions) { 210 mSuggestions = gatheredSuggestions; 211 mHasRecommendedSuggestions = hasRecommendedSuggestions; 212 } 213 } 214 215 private final ArrayList<String> mSuggestions; 216 private final int[] mScores; 217 private final String mOriginalText; 218 private final float mSuggestionThreshold; 219 private final float mRecommendedThreshold; 220 private final int mMaxLength; 221 private int mLength = 0; 222 223 // The two following attributes are only ever filled if the requested max length 224 // is 0 (or less, which is treated the same). 225 private String mBestSuggestion = null; 226 private int mBestScore = Integer.MIN_VALUE; // As small as possible 227 228 SuggestionsGatherer(final String originalText, final float suggestionThreshold, 229 final float recommendedThreshold, final int maxLength) { 230 mOriginalText = originalText; 231 mSuggestionThreshold = suggestionThreshold; 232 mRecommendedThreshold = recommendedThreshold; 233 mMaxLength = maxLength; 234 mSuggestions = CollectionUtils.newArrayList(maxLength + 1); 235 mScores = new int[mMaxLength]; 236 } 237 238 synchronized public boolean addWord(char[] word, int[] spaceIndices, int wordOffset, 239 int wordLength, int score) { 240 final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score); 241 // binarySearch returns the index if the element exists, and -<insertion index> - 1 242 // if it doesn't. See documentation for binarySearch. 243 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 244 245 if (insertIndex == 0 && mLength >= mMaxLength) { 246 // In the future, we may want to keep track of the best suggestion score even if 247 // we are asked for 0 suggestions. In this case, we can use the following 248 // (tested) code to keep it: 249 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 250 // then we need to keep track of the best suggestion in mBestScore and 251 // mBestSuggestion. This is so that we know whether the best suggestion makes 252 // the score cutoff, since we need to know that to return a meaningful 253 // looksLikeTypo. 254 // if (0 >= mMaxLength) { 255 // if (score > mBestScore) { 256 // mBestScore = score; 257 // mBestSuggestion = new String(word, wordOffset, wordLength); 258 // } 259 // } 260 return true; 261 } 262 if (insertIndex >= mMaxLength) { 263 // We found a suggestion, but its score is too weak to be kept considering 264 // the suggestion limit. 265 return true; 266 } 267 268 // Compute the normalized score and skip this word if it's normalized score does not 269 // make the threshold. 270 final String wordString = new String(word, wordOffset, wordLength); 271 final float normalizedScore = 272 BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); 273 if (normalizedScore < mSuggestionThreshold) { 274 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 275 return true; 276 } 277 278 if (mLength < mMaxLength) { 279 final int copyLen = mLength - insertIndex; 280 ++mLength; 281 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 282 mSuggestions.add(insertIndex, wordString); 283 } else { 284 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 285 mSuggestions.add(insertIndex, wordString); 286 mSuggestions.remove(0); 287 } 288 mScores[insertIndex] = score; 289 290 return true; 291 } 292 293 public Result getResults(final int capitalizeType, final Locale locale) { 294 final String[] gatheredSuggestions; 295 final boolean hasRecommendedSuggestions; 296 if (0 == mLength) { 297 // Either we found no suggestions, or we found some BUT the max length was 0. 298 // If we found some mBestSuggestion will not be null. If it is null, then 299 // we found none, regardless of the max length. 300 if (null == mBestSuggestion) { 301 gatheredSuggestions = null; 302 hasRecommendedSuggestions = false; 303 } else { 304 gatheredSuggestions = EMPTY_STRING_ARRAY; 305 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 306 mOriginalText, mBestSuggestion, mBestScore); 307 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 308 } 309 } else { 310 if (DBG) { 311 if (mLength != mSuggestions.size()) { 312 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 313 } 314 for (int i = mLength - 1; i >= 0; --i) { 315 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 316 } 317 } 318 Collections.reverse(mSuggestions); 319 StringUtils.removeDupes(mSuggestions); 320 if (CAPITALIZE_ALL == capitalizeType) { 321 for (int i = 0; i < mSuggestions.size(); ++i) { 322 // get(i) returns a CharSequence which is actually a String so .toString() 323 // should return the same object. 324 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 325 } 326 } else if (CAPITALIZE_FIRST == capitalizeType) { 327 for (int i = 0; i < mSuggestions.size(); ++i) { 328 // Likewise 329 mSuggestions.set(i, StringUtils.toTitleCase( 330 mSuggestions.get(i).toString(), locale)); 331 } 332 } 333 // This returns a String[], while toArray() returns an Object[] which cannot be cast 334 // into a String[]. 335 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 336 337 final int bestScore = mScores[mLength - 1]; 338 final String bestSuggestion = mSuggestions.get(0); 339 final float normalizedScore = 340 BinaryDictionary.calcNormalizedScore( 341 mOriginalText, bestSuggestion.toString(), bestScore); 342 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 343 if (DBG) { 344 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 345 Log.i(TAG, "Normalized score = " + normalizedScore 346 + " (threshold " + mRecommendedThreshold 347 + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); 348 } 349 } 350 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 351 } 352 } 353 354 @Override 355 public boolean onUnbind(final Intent intent) { 356 closeAllDictionaries(); 357 return false; 358 } 359 360 private void closeAllDictionaries() { 361 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 362 mDictionaryPools = CollectionUtils.newSynchronizedTreeMap(); 363 final Map<String, UserBinaryDictionary> oldUserDictionaries = mUserDictionaries; 364 mUserDictionaries = CollectionUtils.newSynchronizedTreeMap(); 365 new Thread("spellchecker_close_dicts") { 366 @Override 367 public void run() { 368 for (DictionaryPool pool : oldPools.values()) { 369 pool.close(); 370 } 371 for (Dictionary dict : oldUserDictionaries.values()) { 372 dict.close(); 373 } 374 synchronized (mUseContactsLock) { 375 if (null != mContactsDictionary) { 376 // The synchronously loaded contacts dictionary should have been in one 377 // or several pools, but it is shielded against multiple closing and it's 378 // safe to call it several times. 379 final ContactsBinaryDictionary dictToClose = mContactsDictionary; 380 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY 381 // is no longer needed 382 mContactsDictionary = null; 383 dictToClose.close(); 384 } 385 } 386 } 387 }.start(); 388 } 389 390 public DictionaryPool getDictionaryPool(final String locale) { 391 DictionaryPool pool = mDictionaryPools.get(locale); 392 if (null == pool) { 393 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 394 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 395 mDictionaryPools.put(locale, pool); 396 } 397 return pool; 398 } 399 400 public DictAndProximity createDictAndProximity(final Locale locale) { 401 final int script = getScriptFromLocale(locale); 402 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo( 403 SpellCheckerProximityInfo.getProximityForScript(script), 404 SpellCheckerProximityInfo.ROW_SIZE, 405 SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH, 406 SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT); 407 final DictionaryCollection dictionaryCollection = 408 DictionaryFactory.createMainDictionaryFromManager(this, locale, 409 true /* useFullEditDistance */); 410 final String localeStr = locale.toString(); 411 UserBinaryDictionary userDictionary = mUserDictionaries.get(localeStr); 412 if (null == userDictionary) { 413 userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true); 414 mUserDictionaries.put(localeStr, userDictionary); 415 } 416 dictionaryCollection.addDictionary(userDictionary); 417 synchronized (mUseContactsLock) { 418 if (mUseContactsDictionary) { 419 if (null == mContactsDictionary) { 420 // TODO: use the right locale. We can't do it right now because the 421 // spell checker is reusing the contacts dictionary across sessions 422 // without regard for their locale, so we need to fix that first. 423 mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this, 424 Locale.getDefault()); 425 } 426 } 427 dictionaryCollection.addDictionary(mContactsDictionary); 428 mDictionaryCollectionsList.add( 429 new WeakReference<DictionaryCollection>(dictionaryCollection)); 430 } 431 return new DictAndProximity(dictionaryCollection, proximityInfo); 432 } 433 434 // This method assumes the text is not empty or null. 435 public static int getCapitalizationType(String text) { 436 // If the first char is not uppercase, then the word is either all lower case, 437 // and in either case we return CAPITALIZE_NONE. 438 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 439 final int len = text.length(); 440 int capsCount = 1; 441 for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { 442 if (1 != capsCount && i != capsCount) break; 443 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 444 } 445 // We know the first char is upper case. So we want to test if either everything 446 // else is lower case, or if everything else is upper case. If the string is 447 // exactly one char long, then we will arrive here with capsCount 1, and this is 448 // correct, too. 449 if (1 == capsCount) return CAPITALIZE_FIRST; 450 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 451 } 452} 453