AndroidSpellCheckerService.java revision 88fa53b840686bb428b932eed7dd38162ae902c2
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.spellcheck; 18 19import android.content.Intent; 20import android.content.res.Resources; 21import android.service.textservice.SpellCheckerService; 22import android.service.textservice.SpellCheckerService.Session; 23import android.util.Log; 24import android.view.textservice.SuggestionsInfo; 25import android.view.textservice.TextInfo; 26import android.text.TextUtils; 27 28import com.android.inputmethod.compat.ArraysCompatUtils; 29import com.android.inputmethod.keyboard.Key; 30import com.android.inputmethod.keyboard.ProximityInfo; 31import com.android.inputmethod.latin.Dictionary; 32import com.android.inputmethod.latin.Dictionary.DataType; 33import com.android.inputmethod.latin.Dictionary.WordCallback; 34import com.android.inputmethod.latin.DictionaryCollection; 35import com.android.inputmethod.latin.DictionaryFactory; 36import com.android.inputmethod.latin.LocaleUtils; 37import com.android.inputmethod.latin.R; 38import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 39import com.android.inputmethod.latin.UserDictionary; 40import com.android.inputmethod.latin.Utils; 41import com.android.inputmethod.latin.WordComposer; 42 43import java.util.ArrayList; 44import java.util.Arrays; 45import java.util.Collections; 46import java.util.Locale; 47import java.util.Map; 48import java.util.TreeMap; 49 50/** 51 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 52 */ 53public class AndroidSpellCheckerService extends SpellCheckerService { 54 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 55 private static final boolean DBG = false; 56 private static final int POOL_SIZE = 2; 57 58 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 59 private static final int CAPITALIZE_FIRST = 1; // First only 60 private static final int CAPITALIZE_ALL = 2; // All caps 61 62 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 63 private final static SuggestionsInfo EMPTY_SUGGESTIONS_INFO = 64 new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 65 private Map<String, DictionaryPool> mDictionaryPools = 66 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 67 private Map<String, Dictionary> mUserDictionaries = 68 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 69 70 private double mTypoThreshold; 71 72 @Override public void onCreate() { 73 super.onCreate(); 74 mTypoThreshold = Double.parseDouble(getString(R.string.spellchecker_typo_threshold_value)); 75 } 76 77 @Override 78 public Session createSession() { 79 return new AndroidSpellCheckerSession(this); 80 } 81 82 private static class SuggestionsGatherer implements WordCallback { 83 public static class Result { 84 public final String[] mSuggestions; 85 public final boolean mLooksLikeTypo; 86 public Result(final String[] gatheredSuggestions, final boolean looksLikeTypo) { 87 mSuggestions = gatheredSuggestions; 88 mLooksLikeTypo = looksLikeTypo; 89 } 90 } 91 92 private final int DEFAULT_SUGGESTION_LENGTH = 16; 93 private final ArrayList<CharSequence> mSuggestions; 94 private final int[] mScores; 95 private final int mMaxLength; 96 private int mLength = 0; 97 98 // The two following attributes are only ever filled if the requested max length 99 // is 0 (or less, which is treated the same). 100 private String mBestSuggestion = null; 101 private int mBestScore = Integer.MIN_VALUE; // As small as possible 102 103 SuggestionsGatherer(final int maxLength) { 104 mMaxLength = maxLength; 105 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 106 mScores = new int[mMaxLength]; 107 } 108 109 @Override 110 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 111 int dicTypeId, DataType dataType) { 112 final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); 113 // binarySearch returns the index if the element exists, and -<insertion index> - 1 114 // if it doesn't. See documentation for binarySearch. 115 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 116 117 if (mLength < mMaxLength) { 118 final int copyLen = mLength - insertIndex; 119 ++mLength; 120 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 121 mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength)); 122 } else { 123 if (insertIndex == 0) { 124 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 125 // then we need to keep track of the best suggestion in mBestScore and 126 // mBestSuggestion. This is so that we know whether the best suggestion makes 127 // the score cutoff, since we need to know that to return a meaningful 128 // looksLikeTypo. 129 if (0 >= mMaxLength) { 130 if (score > mBestScore) { 131 mBestScore = score; 132 mBestSuggestion = new String(word, wordOffset, wordLength); 133 } 134 } 135 return true; 136 } 137 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 138 mSuggestions.add(insertIndex, new String(word, wordOffset, wordLength)); 139 mSuggestions.remove(0); 140 } 141 mScores[insertIndex] = score; 142 143 return true; 144 } 145 146 public Result getResults(final CharSequence originalText, final double threshold, 147 final int capitalizeType, final Locale locale) { 148 final String[] gatheredSuggestions; 149 final boolean looksLikeTypo; 150 if (0 == mLength) { 151 // Either we found no suggestions, or we found some BUT the max length was 0. 152 // If we found some mBestSuggestion will not be null. If it is null, then 153 // we found none, regardless of the max length. 154 if (null == mBestSuggestion) { 155 gatheredSuggestions = null; 156 looksLikeTypo = false; 157 } else { 158 gatheredSuggestions = EMPTY_STRING_ARRAY; 159 final double normalizedScore = 160 Utils.calcNormalizedScore(originalText, mBestSuggestion, mBestScore); 161 looksLikeTypo = (normalizedScore > threshold); 162 } 163 } else { 164 if (DBG) { 165 if (mLength != mSuggestions.size()) { 166 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 167 } 168 for (int i = mLength - 1; i >= 0; --i) { 169 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 170 } 171 } 172 Collections.reverse(mSuggestions); 173 Utils.removeDupes(mSuggestions); 174 if (CAPITALIZE_ALL == capitalizeType) { 175 for (int i = 0; i < mSuggestions.size(); ++i) { 176 // get(i) returns a CharSequence which is actually a String so .toString() 177 // should return the same object. 178 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 179 } 180 } else if (CAPITALIZE_FIRST == capitalizeType) { 181 for (int i = 0; i < mSuggestions.size(); ++i) { 182 // Likewise 183 mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(), 184 locale)); 185 } 186 } 187 // This returns a String[], while toArray() returns an Object[] which cannot be cast 188 // into a String[]. 189 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 190 191 final int bestScore = mScores[mLength - 1]; 192 final CharSequence bestSuggestion = mSuggestions.get(0); 193 final double normalizedScore = 194 Utils.calcNormalizedScore(originalText, bestSuggestion, bestScore); 195 looksLikeTypo = (normalizedScore > threshold); 196 if (DBG) { 197 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 198 Log.i(TAG, "Normalized score = " + normalizedScore + " (threshold " + threshold 199 + ") => looksLikeTypo = " + looksLikeTypo); 200 } 201 } 202 return new Result(gatheredSuggestions, looksLikeTypo); 203 } 204 } 205 206 @Override 207 public boolean onUnbind(final Intent intent) { 208 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 209 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 210 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 211 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 212 for (DictionaryPool pool : oldPools.values()) { 213 pool.close(); 214 } 215 for (Dictionary dict : oldUserDictionaries.values()) { 216 dict.close(); 217 } 218 return false; 219 } 220 221 private DictionaryPool getDictionaryPool(final String locale) { 222 DictionaryPool pool = mDictionaryPools.get(locale); 223 if (null == pool) { 224 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 225 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 226 mDictionaryPools.put(locale, pool); 227 } 228 return pool; 229 } 230 231 public DictAndProximity createDictAndProximity(final Locale locale) { 232 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); 233 final Resources resources = getResources(); 234 final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); 235 final DictionaryCollection dictionaryCollection = 236 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId); 237 final String localeStr = locale.toString(); 238 Dictionary userDict = mUserDictionaries.get(localeStr); 239 if (null == userDict) { 240 userDict = new SynchronouslyLoadedUserDictionary(this, localeStr); 241 mUserDictionaries.put(localeStr, userDict); 242 } 243 dictionaryCollection.addDictionary(userDict); 244 return new DictAndProximity(dictionaryCollection, proximityInfo); 245 } 246 247 // This method assumes the text is not empty or null. 248 private static int getCapitalizationType(String text) { 249 // If the first char is not uppercase, then the word is either all lower case, 250 // and in either case we return CAPITALIZE_NONE. 251 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 252 final int len = text.codePointCount(0, text.length()); 253 int capsCount = 1; 254 for (int i = 1; i < len; ++i) { 255 if (1 != capsCount && i != capsCount) break; 256 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 257 } 258 // We know the first char is upper case. So we want to test if either everything 259 // else is lower case, or if everything else is upper case. If the string is 260 // exactly one char long, then we will arrive here with capsCount 1, and this is 261 // correct, too. 262 if (1 == capsCount) return CAPITALIZE_FIRST; 263 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 264 } 265 266 private static class AndroidSpellCheckerSession extends Session { 267 // Immutable, but need the locale which is not available in the constructor yet 268 private DictionaryPool mDictionaryPool; 269 // Likewise 270 private Locale mLocale; 271 272 private final AndroidSpellCheckerService mService; 273 274 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 275 mService = service; 276 } 277 278 @Override 279 public void onCreate() { 280 final String localeString = getLocale(); 281 mDictionaryPool = mService.getDictionaryPool(localeString); 282 mLocale = LocaleUtils.constructLocaleFromString(localeString); 283 } 284 285 /** 286 * Finds out whether a particular string should be filtered out of spell checking. 287 * 288 * This will loosely match URLs, numbers, symbols. 289 * 290 * @param text the string to evaluate. 291 * @return true if we should filter this text out, false otherwise 292 */ 293 private boolean shouldFilterOut(final String text) { 294 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 295 296 // TODO: check if an equivalent processing can't be done more quickly with a 297 // compiled regexp. 298 // Filter by first letter 299 final int firstCodePoint = text.codePointAt(0); 300 // Filter out words that don't start with a letter or an apostrophe 301 if (!Character.isLetter(firstCodePoint) 302 && '\'' != firstCodePoint) return true; 303 304 // Filter contents 305 final int length = text.length(); 306 int letterCount = 0; 307 for (int i = 0; i < length; ++i) { 308 final int codePoint = text.codePointAt(i); 309 // Any word containing a '@' is probably an e-mail address 310 // Any word containing a '/' is probably either an ad-hoc combination of two 311 // words or a URI - in either case we don't want to spell check that 312 if ('@' == codePoint 313 || '/' == codePoint) return true; 314 if (Character.isLetter(codePoint)) ++letterCount; 315 } 316 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 317 // in this word are letters 318 return (letterCount * 4 < length * 3); 319 } 320 321 // Note : this must be reentrant 322 /** 323 * Gets a list of suggestions for a specific string. This returns a list of possible 324 * corrections for the text passed as an argument. It may split or group words, and 325 * even perform grammatical analysis. 326 */ 327 @Override 328 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 329 final int suggestionsLimit) { 330 final String text = textInfo.getText(); 331 332 if (shouldFilterOut(text)) return EMPTY_SUGGESTIONS_INFO; 333 334 final SuggestionsGatherer suggestionsGatherer = 335 new SuggestionsGatherer(suggestionsLimit); 336 final WordComposer composer = new WordComposer(); 337 final int length = text.length(); 338 for (int i = 0; i < length; ++i) { 339 final int character = text.codePointAt(i); 340 final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); 341 final int[] proximities; 342 if (-1 == proximityIndex) { 343 proximities = new int[] { character }; 344 } else { 345 proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, 346 proximityIndex, proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); 347 } 348 composer.add(character, proximities, 349 WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); 350 } 351 352 final int capitalizeType = getCapitalizationType(text); 353 boolean isInDict = true; 354 try { 355 final DictAndProximity dictInfo = mDictionaryPool.take(); 356 dictInfo.mDictionary.getWords(composer, suggestionsGatherer, 357 dictInfo.mProximityInfo); 358 isInDict = dictInfo.mDictionary.isValidWord(text); 359 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 360 // We want to test the word again if it's all caps or first caps only. 361 // If it's fully down, we already tested it, if it's mixed case, we don't 362 // want to test a lowercase version of it. 363 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 364 } 365 if (!mDictionaryPool.offer(dictInfo)) { 366 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 367 } 368 } catch (InterruptedException e) { 369 // I don't think this can happen. 370 return EMPTY_SUGGESTIONS_INFO; 371 } 372 373 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(text, 374 mService.mTypoThreshold, capitalizeType, mLocale); 375 376 if (DBG) { 377 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 378 + suggestionsLimit); 379 Log.i(TAG, "IsInDict = " + result.mLooksLikeTypo); 380 Log.i(TAG, "LooksLikeTypo = " + result.mLooksLikeTypo); 381 for (String suggestion : result.mSuggestions) { 382 Log.i(TAG, suggestion); 383 } 384 } 385 386 final int flags = 387 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY : 0) 388 | (result.mLooksLikeTypo 389 ? SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO : 0); 390 return new SuggestionsInfo(flags, result.mSuggestions); 391 } 392 } 393} 394