Suggest.java revision 499371a5efdd5e3b76d12370fcd63fb7963a2488
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.preference.PreferenceManager; 21import android.text.TextUtils; 22import android.util.Log; 23 24import com.android.inputmethod.annotations.UsedForTesting; 25import com.android.inputmethod.keyboard.ProximityInfo; 26import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 27import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary; 28import com.android.inputmethod.latin.personalization.UserHistoryDictionary; 29import com.android.inputmethod.latin.settings.Settings; 30import com.android.inputmethod.latin.utils.AutoCorrectionUtils; 31import com.android.inputmethod.latin.utils.BoundedTreeSet; 32import com.android.inputmethod.latin.utils.CollectionUtils; 33import com.android.inputmethod.latin.utils.StringUtils; 34 35import java.util.ArrayList; 36import java.util.Comparator; 37import java.util.HashSet; 38import java.util.Locale; 39import java.util.concurrent.ConcurrentHashMap; 40 41/** 42 * This class loads a dictionary and provides a list of suggestions for a given sequence of 43 * characters. This includes corrections and completions. 44 */ 45public final class Suggest { 46 public static final String TAG = Suggest.class.getSimpleName(); 47 48 // Session id for 49 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 50 // We are sharing the same ID between typing and gesture to save RAM footprint. 51 public static final int SESSION_TYPING = 0; 52 public static final int SESSION_GESTURE = 0; 53 54 // TODO: rename this to CORRECTION_OFF 55 public static final int CORRECTION_NONE = 0; 56 // TODO: rename this to CORRECTION_ON 57 public static final int CORRECTION_FULL = 1; 58 59 // Close to -2**31 60 private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; 61 62 public static final int MAX_SUGGESTIONS = 18; 63 64 public interface SuggestInitializationListener { 65 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 66 } 67 68 private static final boolean DBG = LatinImeLogger.sDBG; 69 70 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 71 CollectionUtils.newConcurrentHashMap(); 72 private HashSet<String> mOnlyDictionarySetForDebug = null; 73 private Dictionary mMainDictionary; 74 private ContactsBinaryDictionary mContactsDict; 75 @UsedForTesting 76 private boolean mIsCurrentlyWaitingForMainDictionary = false; 77 78 private float mAutoCorrectionThreshold; 79 80 // Locale used for upper- and title-casing words 81 public final Locale mLocale; 82 83 public Suggest(final Context context, final Locale locale, 84 final SuggestInitializationListener listener) { 85 initAsynchronously(context, locale, listener); 86 mLocale = locale; 87 // initialize a debug flag for the personalization 88 if (Settings.readUseOnlyPersonalizationDictionaryForDebug( 89 PreferenceManager.getDefaultSharedPreferences(context))) { 90 mOnlyDictionarySetForDebug = new HashSet<String>(); 91 mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA); 92 } 93 } 94 95 @UsedForTesting 96 Suggest(final AssetFileAddress[] dictionaryList, final Locale locale) { 97 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionaryList, 98 false /* useFullEditDistance */, locale); 99 mLocale = locale; 100 mMainDictionary = mainDict; 101 addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, mainDict); 102 } 103 104 private void initAsynchronously(final Context context, final Locale locale, 105 final SuggestInitializationListener listener) { 106 resetMainDict(context, locale, listener); 107 } 108 109 private void addOrReplaceDictionaryInternal(final String key, final Dictionary dict) { 110 if (mOnlyDictionarySetForDebug != null && !mOnlyDictionarySetForDebug.contains(key)) { 111 Log.w(TAG, "Ignore add " + key + " dictionary for debug."); 112 return; 113 } 114 addOrReplaceDictionary(mDictionaries, key, dict); 115 } 116 117 private static void addOrReplaceDictionary( 118 final ConcurrentHashMap<String, Dictionary> dictionaries, 119 final String key, final Dictionary dict) { 120 final Dictionary oldDict = (dict == null) 121 ? dictionaries.remove(key) 122 : dictionaries.put(key, dict); 123 if (oldDict != null && dict != oldDict) { 124 oldDict.close(); 125 } 126 } 127 128 public void resetMainDict(final Context context, final Locale locale, 129 final SuggestInitializationListener listener) { 130 mIsCurrentlyWaitingForMainDictionary = true; 131 mMainDictionary = null; 132 if (listener != null) { 133 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 134 } 135 new Thread("InitializeBinaryDictionary") { 136 @Override 137 public void run() { 138 final DictionaryCollection newMainDict = 139 DictionaryFactory.createMainDictionaryFromManager(context, locale); 140 addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, newMainDict); 141 mMainDictionary = newMainDict; 142 if (listener != null) { 143 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 144 } 145 mIsCurrentlyWaitingForMainDictionary = false; 146 } 147 }.start(); 148 } 149 150 // The main dictionary could have been loaded asynchronously. Don't cache the return value 151 // of this method. 152 public boolean hasMainDictionary() { 153 return null != mMainDictionary && mMainDictionary.isInitialized(); 154 } 155 156 @UsedForTesting 157 public boolean isCurrentlyWaitingForMainDictionary() { 158 return mIsCurrentlyWaitingForMainDictionary; 159 } 160 161 public Dictionary getMainDictionary() { 162 return mMainDictionary; 163 } 164 165 public ContactsBinaryDictionary getContactsDictionary() { 166 return mContactsDict; 167 } 168 169 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 170 return mDictionaries; 171 } 172 173 /** 174 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 175 * before the main dictionary, if set. This refers to the system-managed user dictionary. 176 */ 177 public void setUserDictionary(final UserBinaryDictionary userDictionary) { 178 addOrReplaceDictionaryInternal(Dictionary.TYPE_USER, userDictionary); 179 } 180 181 /** 182 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 183 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 184 * won't be used. 185 */ 186 public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) { 187 mContactsDict = contactsDictionary; 188 addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary); 189 } 190 191 public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { 192 addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 193 } 194 195 public void setPersonalizationPredictionDictionary( 196 final PersonalizationPredictionDictionary personalizationPredictionDictionary) { 197 addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA, 198 personalizationPredictionDictionary); 199 } 200 201 public void setAutoCorrectionThreshold(float threshold) { 202 mAutoCorrectionThreshold = threshold; 203 } 204 205 public interface OnGetSuggestedWordsCallback { 206 public void onGetSuggestedWords(final SuggestedWords suggestedWords); 207 } 208 209 public void getSuggestedWords(final WordComposer wordComposer, 210 final String prevWordForBigram, final ProximityInfo proximityInfo, 211 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 212 final int[] additionalFeaturesOptions, final int sessionId, final int sequenceNumber, 213 final OnGetSuggestedWordsCallback callback) { 214 LatinImeLogger.onStartSuggestion(prevWordForBigram); 215 if (wordComposer.isBatchMode()) { 216 getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo, 217 blockOffensiveWords, additionalFeaturesOptions, sessionId, sequenceNumber, 218 callback); 219 } else { 220 getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 221 blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions, 222 sequenceNumber, callback); 223 } 224 } 225 226 // Retrieves suggestions for the typing input 227 // and calls the callback function with the suggestions. 228 private void getSuggestedWordsForTypingInput(final WordComposer wordComposer, 229 final String prevWordForBigram, final ProximityInfo proximityInfo, 230 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 231 final int[] additionalFeaturesOptions, final int sequenceNumber, 232 final OnGetSuggestedWordsCallback callback) { 233 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 234 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 235 MAX_SUGGESTIONS); 236 237 final String typedWord = wordComposer.getTypedWord(); 238 final String consideredWord = trailingSingleQuotesCount > 0 239 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 240 : typedWord; 241 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 242 243 final WordComposer wordComposerForLookup; 244 if (trailingSingleQuotesCount > 0) { 245 wordComposerForLookup = new WordComposer(wordComposer); 246 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 247 wordComposerForLookup.deleteLast(); 248 } 249 } else { 250 wordComposerForLookup = wordComposer; 251 } 252 253 for (final String key : mDictionaries.keySet()) { 254 final Dictionary dictionary = mDictionaries.get(key); 255 suggestionsSet.addAll(dictionary.getSuggestions(wordComposerForLookup, 256 prevWordForBigram, proximityInfo, blockOffensiveWords, 257 additionalFeaturesOptions)); 258 } 259 260 final String whitelistedWord; 261 if (suggestionsSet.isEmpty()) { 262 whitelistedWord = null; 263 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 264 whitelistedWord = null; 265 } else { 266 whitelistedWord = suggestionsSet.first().mWord; 267 } 268 269 // The word can be auto-corrected if it has a whitelist entry that is not itself, 270 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 271 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 272 && !whitelistedWord.equals(consideredWord)) 273 || (consideredWord.length() > 1 && !AutoCorrectionUtils.isValidWord(this, 274 consideredWord, wordComposer.isFirstCharCapitalized())); 275 276 final boolean hasAutoCorrection; 277 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 278 // any attempt to do auto-correction is already shielded with a test for this flag; at the 279 // same time, it feels wrong that the SuggestedWord object includes information about 280 // the current settings. It may also be useful to know, when the setting is off, whether 281 // the word *would* have been auto-corrected. 282 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 283 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 284 || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary() 285 || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) { 286 // If we don't have a main dictionary, we never want to auto-correct. The reason for 287 // this is, the user may have a contact whose name happens to match a valid word in 288 // their language, and it will unexpectedly auto-correct. For example, if the user 289 // types in English with no dictionary and has a "Will" in their contact list, "will" 290 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 291 // auto-correct. 292 // Also, shortcuts should never auto-correct unless they are whitelist entries. 293 // TODO: we may want to have shortcut-only entries auto-correct in the future. 294 hasAutoCorrection = false; 295 } else { 296 hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold( 297 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 298 } 299 300 final ArrayList<SuggestedWordInfo> suggestionsContainer = 301 CollectionUtils.newArrayList(suggestionsSet); 302 final int suggestionsCount = suggestionsContainer.size(); 303 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 304 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 305 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 306 for (int i = 0; i < suggestionsCount; ++i) { 307 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 308 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 309 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 310 trailingSingleQuotesCount); 311 suggestionsContainer.set(i, transformedWordInfo); 312 } 313 } 314 315 for (int i = 0; i < suggestionsCount; ++i) { 316 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 317 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), 318 wordInfo.mSourceDict.mDictType); 319 } 320 321 if (!TextUtils.isEmpty(typedWord)) { 322 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 323 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 324 Dictionary.DICTIONARY_USER_TYPED, 325 SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, 326 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); 327 } 328 SuggestedWordInfo.removeDups(suggestionsContainer); 329 330 final ArrayList<SuggestedWordInfo> suggestionsList; 331 if (DBG && !suggestionsContainer.isEmpty()) { 332 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 333 } else { 334 suggestionsList = suggestionsContainer; 335 } 336 337 callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, 338 // TODO: this first argument is lying. If this is a whitelisted word which is an 339 // actual word, it says typedWordValid = false, which looks wrong. We should either 340 // rename the attribute or change the value. 341 !allowsToBeAutoCorrected /* typedWordValid */, 342 hasAutoCorrection, /* willAutoCorrect */ 343 false /* isPunctuationSuggestions */, 344 false /* isObsoleteSuggestions */, 345 !wordComposer.isComposingWord() /* isPrediction */, sequenceNumber)); 346 } 347 348 // Retrieves suggestions for the batch input 349 // and calls the callback function with the suggestions. 350 private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, 351 final String prevWordForBigram, final ProximityInfo proximityInfo, 352 final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, 353 final int sessionId, final int sequenceNumber, 354 final OnGetSuggestedWordsCallback callback) { 355 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 356 MAX_SUGGESTIONS); 357 358 // At second character typed, search the unigrams (scores being affected by bigrams) 359 for (final String key : mDictionaries.keySet()) { 360 final Dictionary dictionary = mDictionaries.get(key); 361 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer, 362 prevWordForBigram, proximityInfo, blockOffensiveWords, 363 additionalFeaturesOptions, sessionId)); 364 } 365 366 for (SuggestedWordInfo wordInfo : suggestionsSet) { 367 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType); 368 } 369 370 final ArrayList<SuggestedWordInfo> suggestionsContainer = 371 CollectionUtils.newArrayList(suggestionsSet); 372 final int suggestionsCount = suggestionsContainer.size(); 373 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 374 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 375 if (isFirstCharCapitalized || isAllUpperCase) { 376 for (int i = 0; i < suggestionsCount; ++i) { 377 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 378 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 379 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 380 0 /* trailingSingleQuotesCount */); 381 suggestionsContainer.set(i, transformedWordInfo); 382 } 383 } 384 385 if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord, 386 wordComposer.getRejectedBatchModeSuggestion())) { 387 final SuggestedWordInfo rejected = suggestionsContainer.remove(0); 388 suggestionsContainer.add(1, rejected); 389 } 390 SuggestedWordInfo.removeDups(suggestionsContainer); 391 392 // For some reason some suggestions with MIN_VALUE are making their way here. 393 // TODO: Find a more robust way to detect distractors. 394 for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { 395 if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { 396 suggestionsContainer.remove(i); 397 } 398 } 399 400 // In the batch input mode, the most relevant suggested word should act as a "typed word" 401 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 402 callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer, 403 true /* typedWordValid */, 404 false /* willAutoCorrect */, 405 false /* isPunctuationSuggestions */, 406 false /* isObsoleteSuggestions */, 407 false /* isPrediction */, sequenceNumber)); 408 } 409 410 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 411 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 412 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 413 typedWordInfo.setDebugString("+"); 414 final int suggestionsSize = suggestions.size(); 415 final ArrayList<SuggestedWordInfo> suggestionsList = 416 CollectionUtils.newArrayList(suggestionsSize); 417 suggestionsList.add(typedWordInfo); 418 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 419 // than i because we added the typed word to mSuggestions without touching mScores. 420 for (int i = 0; i < suggestionsSize - 1; ++i) { 421 final SuggestedWordInfo cur = suggestions.get(i + 1); 422 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 423 typedWord, cur.toString(), cur.mScore); 424 final String scoreInfoString; 425 if (normalizedScore > 0) { 426 scoreInfoString = String.format( 427 Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore, 428 cur.mSourceDict.mDictType); 429 } else { 430 scoreInfoString = Integer.toString(cur.mScore); 431 } 432 cur.setDebugString(scoreInfoString); 433 suggestionsList.add(cur); 434 } 435 return suggestionsList; 436 } 437 438 private static final class SuggestedWordInfoComparator 439 implements Comparator<SuggestedWordInfo> { 440 // This comparator ranks the word info with the higher frequency first. That's because 441 // that's the order we want our elements in. 442 @Override 443 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 444 if (o1.mScore > o2.mScore) return -1; 445 if (o1.mScore < o2.mScore) return 1; 446 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 447 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 448 return o1.mWord.compareTo(o2.mWord); 449 } 450 } 451 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 452 new SuggestedWordInfoComparator(); 453 454 /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( 455 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 456 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 457 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 458 if (isAllUpperCase) { 459 sb.append(wordInfo.mWord.toUpperCase(locale)); 460 } else if (isFirstCharCapitalized) { 461 sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); 462 } else { 463 sb.append(wordInfo.mWord); 464 } 465 // Appending quotes is here to help people quote words. However, it's not helpful 466 // when they type words with quotes toward the end like "it's" or "didn't", where 467 // it's more likely the user missed the last character (or didn't type it yet). 468 final int quotesToAppend = trailingSingleQuotesCount 469 - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); 470 for (int i = quotesToAppend - 1; i >= 0; --i) { 471 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 472 } 473 return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, 474 wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, 475 wordInfo.mAutoCommitFirstWordConfidence); 476 } 477 478 public void close() { 479 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 480 dictionaries.addAll(mDictionaries.values()); 481 for (final Dictionary dictionary : dictionaries) { 482 dictionary.close(); 483 } 484 mMainDictionary = null; 485 } 486} 487