Suggest.java revision e398c6c024ecf507232948a2b3e862b4a27a1a84
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21import android.util.Log; 22 23import com.android.inputmethod.keyboard.Keyboard; 24import com.android.inputmethod.keyboard.ProximityInfo; 25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26 27import java.io.File; 28import java.util.ArrayList; 29import java.util.Comparator; 30import java.util.HashSet; 31import java.util.Locale; 32import java.util.concurrent.ConcurrentHashMap; 33 34/** 35 * This class loads a dictionary and provides a list of suggestions for a given sequence of 36 * characters. This includes corrections and completions. 37 */ 38public class Suggest { 39 public static final String TAG = Suggest.class.getSimpleName(); 40 41 public static final int APPROX_MAX_WORD_LENGTH = 32; 42 43 // TODO: rename this to CORRECTION_OFF 44 public static final int CORRECTION_NONE = 0; 45 // TODO: rename this to CORRECTION_ON 46 public static final int CORRECTION_FULL = 1; 47 48 private static final boolean DBG = LatinImeLogger.sDBG; 49 50 private Dictionary mMainDictionary; 51 private ContactsBinaryDictionary mContactsDict; 52 private WhitelistDictionary mWhiteListDictionary; 53 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 54 new ConcurrentHashMap<String, Dictionary>(); 55 56 public static final int MAX_SUGGESTIONS = 18; 57 58 private float mAutoCorrectionThreshold; 59 60 // Locale used for upper- and title-casing words 61 final private Locale mLocale; 62 63 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 64 65 public Suggest(final Context context, final Locale locale) { 66 initAsynchronously(context, locale); 67 mLocale = locale; 68 } 69 70 /* package for test */ Suggest(final Context context, final File dictionary, 71 final long startOffset, final long length, final Locale locale) { 72 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, 73 startOffset, length /* useFullEditDistance */, false, locale); 74 mLocale = locale; 75 mMainDictionary = mainDict; 76 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 77 initWhitelistAndAutocorrectAndPool(context, locale); 78 } 79 80 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 81 mWhiteListDictionary = new WhitelistDictionary(context, locale); 82 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_WHITELIST, mWhiteListDictionary); 83 } 84 85 private void initAsynchronously(final Context context, final Locale locale) { 86 resetMainDict(context, locale); 87 88 // TODO: read the whitelist and init the pool asynchronously too. 89 // initPool should be done asynchronously now that the pool is thread-safe. 90 initWhitelistAndAutocorrectAndPool(context, locale); 91 } 92 93 private static void addOrReplaceDictionary( 94 final ConcurrentHashMap<String, Dictionary> dictionaries, 95 final String key, final Dictionary dict) { 96 final Dictionary oldDict = (dict == null) 97 ? dictionaries.remove(key) 98 : dictionaries.put(key, dict); 99 if (oldDict != null && dict != oldDict) { 100 oldDict.close(); 101 } 102 } 103 104 public void resetMainDict(final Context context, final Locale locale) { 105 mMainDictionary = null; 106 new Thread("InitializeBinaryDictionary") { 107 @Override 108 public void run() { 109 final DictionaryCollection newMainDict = 110 DictionaryFactory.createMainDictionaryFromManager(context, locale); 111 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 112 mMainDictionary = newMainDict; 113 } 114 }.start(); 115 } 116 117 // The main dictionary could have been loaded asynchronously. Don't cache the return value 118 // of this method. 119 public boolean hasMainDictionary() { 120 return null != mMainDictionary && mMainDictionary.isInitialized(); 121 } 122 123 public Dictionary getMainDictionary() { 124 return mMainDictionary; 125 } 126 127 public ContactsBinaryDictionary getContactsDictionary() { 128 return mContactsDict; 129 } 130 131 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 132 return mDictionaries; 133 } 134 135 public static int getApproxMaxWordLength() { 136 return APPROX_MAX_WORD_LENGTH; 137 } 138 139 /** 140 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 141 * before the main dictionary, if set. This refers to the system-managed user dictionary. 142 */ 143 public void setUserDictionary(UserBinaryDictionary userDictionary) { 144 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 145 } 146 147 /** 148 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 149 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 150 * won't be used. 151 */ 152 public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { 153 mContactsDict = contactsDictionary; 154 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 155 } 156 157 public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { 158 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 159 } 160 161 public void setAutoCorrectionThreshold(float threshold) { 162 mAutoCorrectionThreshold = threshold; 163 } 164 165 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 166 public SuggestedWords getSuggestedWords( 167 final WordComposer wordComposer, CharSequence prevWordForBigram, 168 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, 169 // TODO: remove isPrediction parameter. It effectively means the same thing 170 // as wordComposer.size() <= 1 171 final boolean isPrediction) { 172 LatinImeLogger.onStartSuggestion(prevWordForBigram); 173 final boolean isFirstCharCapitalized = 174 !isPrediction && wordComposer.isFirstCharCapitalized(); 175 final boolean isAllUpperCase = !isPrediction && wordComposer.isAllUpperCase(); 176 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 177 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 178 MAX_SUGGESTIONS); 179 180 final String typedWord = wordComposer.getTypedWord(); 181 final String consideredWord = trailingSingleQuotesCount > 0 182 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 183 : typedWord; 184 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 185 186 if (wordComposer.size() <= 1 && isCorrectionEnabled) { 187 // At first character typed, search only the bigrams 188 if (!TextUtils.isEmpty(prevWordForBigram)) { 189 final CharSequence lowerPrevWord; 190 if (StringUtils.hasUpperCase(prevWordForBigram)) { 191 // TODO: Must pay attention to locale when changing case. 192 lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 193 } else { 194 lowerPrevWord = null; 195 } 196 for (final String key : mDictionaries.keySet()) { 197 final Dictionary dictionary = mDictionaries.get(key); 198 suggestionsSet.addAll(dictionary.getBigrams(wordComposer, prevWordForBigram)); 199 if (null != lowerPrevWord) { 200 suggestionsSet.addAll(dictionary.getBigrams(wordComposer, lowerPrevWord)); 201 } 202 } 203 } 204 } else if (wordComposer.size() > 1) { 205 final WordComposer wordComposerForLookup; 206 if (trailingSingleQuotesCount > 0) { 207 wordComposerForLookup = new WordComposer(wordComposer); 208 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 209 wordComposerForLookup.deleteLast(); 210 } 211 } else { 212 wordComposerForLookup = wordComposer; 213 } 214 // At second character typed, search the unigrams (scores being affected by bigrams) 215 for (final String key : mDictionaries.keySet()) { 216 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 217 if (key.equals(Dictionary.TYPE_USER_HISTORY) 218 || key.equals(Dictionary.TYPE_WHITELIST)) 219 continue; 220 final Dictionary dictionary = mDictionaries.get(key); 221 suggestionsSet.addAll(dictionary.getWords( 222 wordComposerForLookup, prevWordForBigram, proximityInfo)); 223 } 224 } 225 226 final CharSequence whitelistedWord = 227 mWhiteListDictionary.getWhitelistedWord(consideredWord); 228 229 final boolean hasAutoCorrection; 230 if (!isCorrectionEnabled) { 231 hasAutoCorrection = false; 232 } else if (null != whitelistedWord) { 233 hasAutoCorrection = true; 234 } else if (!AutoCorrection.isWhitelistedOrNotAWord( 235 mDictionaries, consideredWord, false)) { 236 hasAutoCorrection = true; 237 } else if (suggestionsSet.isEmpty()) { 238 hasAutoCorrection = false; 239 } else if (AutoCorrection.suggestionExceedsAutoCorrectionThreshold(suggestionsSet.first(), 240 consideredWord, mAutoCorrectionThreshold)) { 241 hasAutoCorrection = true; 242 } else { 243 hasAutoCorrection = false; 244 } 245 246 if (whitelistedWord != null) { 247 suggestionsSet.add(new SuggestedWordInfo(whitelistedWord, 248 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST, 249 Dictionary.TYPE_WHITELIST)); 250 } 251 252 final ArrayList<SuggestedWordInfo> suggestionsContainer = 253 new ArrayList<SuggestedWordInfo>(suggestionsSet); 254 final int suggestionsCount = suggestionsContainer.size(); 255 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 256 for (int i = 0; i < suggestionsCount; ++i) { 257 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 258 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 259 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 260 trailingSingleQuotesCount); 261 suggestionsContainer.set(i, transformedWordInfo); 262 } 263 } 264 265 for (int i = 0; i < suggestionsCount; ++i) { 266 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 267 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 268 } 269 270 if (!TextUtils.isEmpty(typedWord)) { 271 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 272 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 273 Dictionary.TYPE_USER_TYPED)); 274 } 275 SuggestedWordInfo.removeDups(suggestionsContainer); 276 277 final ArrayList<SuggestedWordInfo> suggestionsList; 278 if (DBG && !suggestionsContainer.isEmpty()) { 279 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 280 } else { 281 suggestionsList = suggestionsContainer; 282 } 283 284 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 285 // but still autocorrected from - in the case the whitelist only capitalizes the word. 286 // The whitelist should be case-insensitive, so it's not possible to be consistent with 287 // a boolean flag. Right now this is handled with a slight hack in 288 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 289 final boolean allowsToBeAutoCorrected = AutoCorrection.isWhitelistedOrNotAWord( 290 getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()) 291 // If we don't have a main dictionary, we never want to auto-correct. The reason for this 292 // is, the user may have a contact whose name happens to match a valid word in their 293 // language, and it will unexpectedly auto-correct. For example, if the user types in 294 // English with no dictionary and has a "Will" in their contact list, "will" would 295 // always auto-correct to "Will" which is unwanted. Hence, no main dict => no auto-correct. 296 && hasMainDictionary(); 297 298 boolean autoCorrectionAvailable = hasAutoCorrection; 299 if (isCorrectionEnabled) { 300 autoCorrectionAvailable |= !allowsToBeAutoCorrected; 301 } 302 // Don't auto-correct words with multiple capital letter 303 autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); 304 autoCorrectionAvailable &= !wordComposer.isResumed(); 305 if (allowsToBeAutoCorrected && suggestionsList.size() > 1 && mAutoCorrectionThreshold > 0 306 && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, 307 suggestionsList.get(1).mWord)) { 308 autoCorrectionAvailable = false; 309 } 310 return new SuggestedWords(suggestionsList, 311 !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */, 312 !isPrediction && autoCorrectionAvailable /* hasAutoCorrectionCandidate */, 313 !isPrediction && allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, 314 false /* isPunctuationSuggestions */, 315 false /* isObsoleteSuggestions */, 316 isPrediction); 317 } 318 319 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 320 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 321 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 322 typedWordInfo.setDebugString("+"); 323 final int suggestionsSize = suggestions.size(); 324 final ArrayList<SuggestedWordInfo> suggestionsList = 325 new ArrayList<SuggestedWordInfo>(suggestionsSize); 326 suggestionsList.add(typedWordInfo); 327 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 328 // than i because we added the typed word to mSuggestions without touching mScores. 329 for (int i = 0; i < suggestionsSize - 1; ++i) { 330 final SuggestedWordInfo cur = suggestions.get(i + 1); 331 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 332 typedWord, cur.toString(), cur.mScore); 333 final String scoreInfoString; 334 if (normalizedScore > 0) { 335 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 336 } else { 337 scoreInfoString = Integer.toString(cur.mScore); 338 } 339 cur.setDebugString(scoreInfoString); 340 suggestionsList.add(cur); 341 } 342 return suggestionsList; 343 } 344 345 private static class SuggestedWordInfoComparator implements Comparator<SuggestedWordInfo> { 346 // This comparator ranks the word info with the higher frequency first. That's because 347 // that's the order we want our elements in. 348 @Override 349 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 350 if (o1.mScore > o2.mScore) return -1; 351 if (o1.mScore < o2.mScore) return 1; 352 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 353 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 354 return o1.mWord.toString().compareTo(o2.mWord.toString()); 355 } 356 } 357 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 358 new SuggestedWordInfoComparator(); 359 360 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 361 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 362 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 363 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 364 if (isAllUpperCase) { 365 sb.append(wordInfo.mWord.toString().toUpperCase(locale)); 366 } else if (isFirstCharCapitalized) { 367 sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); 368 } else { 369 sb.append(wordInfo.mWord); 370 } 371 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 372 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 373 } 374 return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); 375 } 376 377 public void close() { 378 final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); 379 dictionaries.addAll(mDictionaries.values()); 380 for (final Dictionary dictionary : dictionaries) { 381 dictionary.close(); 382 } 383 mMainDictionary = null; 384 } 385 386 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 387 // this safety net 388 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 389 final CharSequence suggestion) { 390 // Safety net for auto correction. 391 // Actually if we hit this safety net, it's a bug. 392 // If user selected aggressive auto correction mode, there is no need to use the safety 393 // net. 394 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 395 // we should not use net because relatively edit distance can be big. 396 final int typedWordLength = typedWord.length(); 397 if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { 398 return false; 399 } 400 final int maxEditDistanceOfNativeDictionary = 401 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 402 final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); 403 if (DBG) { 404 Log.d(TAG, "Autocorrected edit distance = " + distance 405 + ", " + maxEditDistanceOfNativeDictionary); 406 } 407 if (distance > maxEditDistanceOfNativeDictionary) { 408 if (DBG) { 409 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 410 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 411 + "Turning off auto-correction."); 412 } 413 return true; 414 } else { 415 return false; 416 } 417 } 418} 419