Suggest.java revision 251bb70f087c396afc504a8d0f0221c890e88571
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21import android.util.Log; 22 23import com.android.inputmethod.keyboard.Keyboard; 24import com.android.inputmethod.keyboard.ProximityInfo; 25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26 27import java.io.File; 28import java.util.ArrayList; 29import java.util.Comparator; 30import java.util.HashSet; 31import java.util.Locale; 32import java.util.concurrent.ConcurrentHashMap; 33 34/** 35 * This class loads a dictionary and provides a list of suggestions for a given sequence of 36 * characters. This includes corrections and completions. 37 */ 38public class Suggest { 39 public static final String TAG = Suggest.class.getSimpleName(); 40 41 public static final int APPROX_MAX_WORD_LENGTH = 32; 42 43 // TODO: rename this to CORRECTION_OFF 44 public static final int CORRECTION_NONE = 0; 45 // TODO: rename this to CORRECTION_ON 46 public static final int CORRECTION_FULL = 1; 47 48 private static final boolean DBG = LatinImeLogger.sDBG; 49 50 private Dictionary mMainDictionary; 51 private ContactsBinaryDictionary mContactsDict; 52 private WhitelistDictionary mWhiteListDictionary; 53 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 54 new ConcurrentHashMap<String, Dictionary>(); 55 56 public static final int MAX_SUGGESTIONS = 18; 57 58 private float mAutoCorrectionThreshold; 59 60 // Locale used for upper- and title-casing words 61 final private Locale mLocale; 62 63 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 64 65 public Suggest(final Context context, final Locale locale) { 66 initAsynchronously(context, locale); 67 mLocale = locale; 68 } 69 70 /* package for test */ Suggest(final Context context, final File dictionary, 71 final long startOffset, final long length, final Locale locale) { 72 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, 73 startOffset, length /* useFullEditDistance */, false, locale); 74 mLocale = locale; 75 mMainDictionary = mainDict; 76 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 77 initWhitelistAndAutocorrectAndPool(context, locale); 78 } 79 80 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 81 mWhiteListDictionary = new WhitelistDictionary(context, locale); 82 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_WHITELIST, mWhiteListDictionary); 83 } 84 85 private void initAsynchronously(final Context context, final Locale locale) { 86 resetMainDict(context, locale); 87 88 // TODO: read the whitelist and init the pool asynchronously too. 89 // initPool should be done asynchronously now that the pool is thread-safe. 90 initWhitelistAndAutocorrectAndPool(context, locale); 91 } 92 93 private static void addOrReplaceDictionary( 94 final ConcurrentHashMap<String, Dictionary> dictionaries, 95 final String key, final Dictionary dict) { 96 final Dictionary oldDict = (dict == null) 97 ? dictionaries.remove(key) 98 : dictionaries.put(key, dict); 99 if (oldDict != null && dict != oldDict) { 100 oldDict.close(); 101 } 102 } 103 104 public void resetMainDict(final Context context, final Locale locale) { 105 mMainDictionary = null; 106 new Thread("InitializeBinaryDictionary") { 107 @Override 108 public void run() { 109 final DictionaryCollection newMainDict = 110 DictionaryFactory.createMainDictionaryFromManager(context, locale); 111 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 112 mMainDictionary = newMainDict; 113 } 114 }.start(); 115 } 116 117 // The main dictionary could have been loaded asynchronously. Don't cache the return value 118 // of this method. 119 public boolean hasMainDictionary() { 120 return null != mMainDictionary && mMainDictionary.isInitialized(); 121 } 122 123 public Dictionary getMainDictionary() { 124 return mMainDictionary; 125 } 126 127 public ContactsBinaryDictionary getContactsDictionary() { 128 return mContactsDict; 129 } 130 131 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 132 return mDictionaries; 133 } 134 135 public static int getApproxMaxWordLength() { 136 return APPROX_MAX_WORD_LENGTH; 137 } 138 139 /** 140 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 141 * before the main dictionary, if set. This refers to the system-managed user dictionary. 142 */ 143 public void setUserDictionary(UserBinaryDictionary userDictionary) { 144 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 145 } 146 147 /** 148 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 149 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 150 * won't be used. 151 */ 152 public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { 153 mContactsDict = contactsDictionary; 154 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 155 } 156 157 public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { 158 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 159 } 160 161 public void setAutoCorrectionThreshold(float threshold) { 162 mAutoCorrectionThreshold = threshold; 163 } 164 165 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 166 public SuggestedWords getSuggestedWords( 167 final WordComposer wordComposer, CharSequence prevWordForBigram, 168 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, 169 final boolean isPrediction) { 170 LatinImeLogger.onStartSuggestion(prevWordForBigram); 171 final boolean isFirstCharCapitalized = 172 !isPrediction && wordComposer.isFirstCharCapitalized(); 173 final boolean isAllUpperCase = !isPrediction && wordComposer.isAllUpperCase(); 174 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 175 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 176 MAX_SUGGESTIONS); 177 178 final String typedWord = wordComposer.getTypedWord(); 179 final String consideredWord = trailingSingleQuotesCount > 0 180 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 181 : typedWord; 182 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 183 184 if (wordComposer.size() <= 1 && isCorrectionEnabled) { 185 // At first character typed, search only the bigrams 186 if (!TextUtils.isEmpty(prevWordForBigram)) { 187 final CharSequence lowerPrevWord; 188 if (StringUtils.hasUpperCase(prevWordForBigram)) { 189 // TODO: Must pay attention to locale when changing case. 190 lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 191 } else { 192 lowerPrevWord = null; 193 } 194 for (final String key : mDictionaries.keySet()) { 195 final Dictionary dictionary = mDictionaries.get(key); 196 suggestionsSet.addAll(dictionary.getBigrams(wordComposer, prevWordForBigram)); 197 if (null != lowerPrevWord) { 198 suggestionsSet.addAll(dictionary.getBigrams(wordComposer, lowerPrevWord)); 199 } 200 } 201 } 202 } else if (wordComposer.size() > 1) { 203 final WordComposer wordComposerForLookup; 204 if (trailingSingleQuotesCount > 0) { 205 wordComposerForLookup = new WordComposer(wordComposer); 206 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 207 wordComposerForLookup.deleteLast(); 208 } 209 } else { 210 wordComposerForLookup = wordComposer; 211 } 212 // At second character typed, search the unigrams (scores being affected by bigrams) 213 for (final String key : mDictionaries.keySet()) { 214 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 215 if (key.equals(Dictionary.TYPE_USER_HISTORY) 216 || key.equals(Dictionary.TYPE_WHITELIST)) 217 continue; 218 final Dictionary dictionary = mDictionaries.get(key); 219 suggestionsSet.addAll(dictionary.getWords( 220 wordComposerForLookup, prevWordForBigram, proximityInfo)); 221 } 222 } 223 224 final ArrayList<SuggestedWordInfo> suggestionsContainer = 225 new ArrayList<SuggestedWordInfo>(suggestionsSet); 226 for (int i = 0; i < suggestionsContainer.size(); ++i) { 227 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 228 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(wordInfo, 229 mLocale, isAllUpperCase, isFirstCharCapitalized, trailingSingleQuotesCount); 230 suggestionsContainer.set(i, transformedWordInfo); 231 LatinImeLogger.onAddSuggestedWord(transformedWordInfo.mWord.toString(), 232 transformedWordInfo.mSourceDict); 233 } 234 235 final CharSequence whitelistedWord = 236 mWhiteListDictionary.getWhitelistedWord(consideredWord); 237 238 final boolean hasAutoCorrection; 239 if (isCorrectionEnabled) { 240 final SuggestedWordInfo bestSuggestion = suggestionsSet.isEmpty() 241 ? null : suggestionsSet.first(); 242 final CharSequence autoCorrection = 243 AutoCorrection.computeAutoCorrectionWord(mDictionaries, wordComposer, 244 bestSuggestion, consideredWord, mAutoCorrectionThreshold, 245 whitelistedWord); 246 hasAutoCorrection = (null != autoCorrection); 247 } else { 248 hasAutoCorrection = false; 249 } 250 251 if (whitelistedWord != null) { 252 final SuggestedWordInfo whitelistSuggestion; 253 whitelistSuggestion = new SuggestedWordInfo(whitelistedWord, 254 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST, 255 Dictionary.TYPE_WHITELIST); 256 suggestionsContainer.add(0, getTransformedSuggestedWordInfo(whitelistSuggestion, 257 mLocale, isAllUpperCase, isFirstCharCapitalized, trailingSingleQuotesCount)); 258 } 259 260 if (!isPrediction) { 261 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 262 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 263 Dictionary.TYPE_USER_TYPED)); 264 } 265 SuggestedWordInfo.removeDups(suggestionsContainer); 266 267 final ArrayList<SuggestedWordInfo> suggestionsList; 268 if (DBG && !suggestionsContainer.isEmpty()) { 269 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 270 } else { 271 suggestionsList = suggestionsContainer; 272 } 273 274 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 275 // but still autocorrected from - in the case the whitelist only capitalizes the word. 276 // The whitelist should be case-insensitive, so it's not possible to be consistent with 277 // a boolean flag. Right now this is handled with a slight hack in 278 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 279 final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( 280 getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()) 281 // If we don't have a main dictionary, we never want to auto-correct. The reason for this 282 // is, the user may have a contact whose name happens to match a valid word in their 283 // language, and it will unexpectedly auto-correct. For example, if the user types in 284 // English with no dictionary and has a "Will" in their contact list, "will" would 285 // always auto-correct to "Will" which is unwanted. Hence, no main dict => no auto-correct. 286 && hasMainDictionary(); 287 288 boolean autoCorrectionAvailable = hasAutoCorrection; 289 if (isCorrectionEnabled) { 290 autoCorrectionAvailable |= !allowsToBeAutoCorrected; 291 } 292 // Don't auto-correct words with multiple capital letter 293 autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); 294 autoCorrectionAvailable &= !wordComposer.isResumed(); 295 if (allowsToBeAutoCorrected && suggestionsList.size() > 1 && mAutoCorrectionThreshold > 0 296 && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, 297 suggestionsList.get(1).mWord)) { 298 autoCorrectionAvailable = false; 299 } 300 return new SuggestedWords(suggestionsList, 301 !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */, 302 !isPrediction && autoCorrectionAvailable /* hasAutoCorrectionCandidate */, 303 !isPrediction && allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, 304 false /* isPunctuationSuggestions */, 305 false /* isObsoleteSuggestions */, 306 isPrediction); 307 } 308 309 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 310 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 311 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 312 typedWordInfo.setDebugString("+"); 313 final int suggestionsSize = suggestions.size(); 314 final ArrayList<SuggestedWordInfo> suggestionsList = 315 new ArrayList<SuggestedWordInfo>(suggestionsSize); 316 suggestionsList.add(typedWordInfo); 317 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 318 // than i because we added the typed word to mSuggestions without touching mScores. 319 for (int i = 0; i < suggestionsSize - 1; ++i) { 320 final SuggestedWordInfo cur = suggestions.get(i + 1); 321 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 322 typedWord, cur.toString(), cur.mScore); 323 final String scoreInfoString; 324 if (normalizedScore > 0) { 325 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 326 } else { 327 scoreInfoString = Integer.toString(cur.mScore); 328 } 329 cur.setDebugString(scoreInfoString); 330 suggestionsList.add(cur); 331 } 332 return suggestionsList; 333 } 334 335 private static class SuggestedWordInfoComparator implements Comparator<SuggestedWordInfo> { 336 // This comparator ranks the word info with the higher frequency first. That's because 337 // that's the order we want our elements in. 338 @Override 339 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 340 if (o1.mScore > o2.mScore) return -1; 341 if (o1.mScore < o2.mScore) return 1; 342 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 343 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 344 return o1.mWord.toString().compareTo(o2.mWord.toString()); 345 } 346 } 347 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 348 new SuggestedWordInfoComparator(); 349 350 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 351 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 352 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 353 if (!isFirstCharCapitalized && !isAllUpperCase && 0 == trailingSingleQuotesCount) { 354 return wordInfo; 355 } 356 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 357 if (isAllUpperCase) { 358 sb.append(wordInfo.mWord.toString().toUpperCase(locale)); 359 } else if (isFirstCharCapitalized) { 360 sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); 361 } else { 362 sb.append(wordInfo.mWord); 363 } 364 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 365 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 366 } 367 return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); 368 } 369 370 public void close() { 371 final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); 372 dictionaries.addAll(mDictionaries.values()); 373 for (final Dictionary dictionary : dictionaries) { 374 dictionary.close(); 375 } 376 mMainDictionary = null; 377 } 378 379 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 380 // this safety net 381 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 382 final CharSequence suggestion) { 383 // Safety net for auto correction. 384 // Actually if we hit this safety net, it's a bug. 385 // If user selected aggressive auto correction mode, there is no need to use the safety 386 // net. 387 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 388 // we should not use net because relatively edit distance can be big. 389 final int typedWordLength = typedWord.length(); 390 if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { 391 return false; 392 } 393 final int maxEditDistanceOfNativeDictionary = 394 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 395 final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); 396 if (DBG) { 397 Log.d(TAG, "Autocorrected edit distance = " + distance 398 + ", " + maxEditDistanceOfNativeDictionary); 399 } 400 if (distance > maxEditDistanceOfNativeDictionary) { 401 if (DBG) { 402 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 403 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 404 + "Turning off auto-correction."); 405 } 406 return true; 407 } else { 408 return false; 409 } 410 } 411} 412