Suggest.java revision 1079665c3c017ee024a2ffdaf3488cc8c37f087a
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.text.TextUtils; 20 21import com.android.inputmethod.event.Event; 22import com.android.inputmethod.keyboard.ProximityInfo; 23import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 24import com.android.inputmethod.latin.define.ProductionFlag; 25import com.android.inputmethod.latin.utils.AutoCorrectionUtils; 26import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 27import com.android.inputmethod.latin.utils.BoundedTreeSet; 28import com.android.inputmethod.latin.utils.CollectionUtils; 29import com.android.inputmethod.latin.utils.StringUtils; 30 31import java.util.ArrayList; 32import java.util.Comparator; 33import java.util.Locale; 34 35/** 36 * This class loads a dictionary and provides a list of suggestions for a given sequence of 37 * characters. This includes corrections and completions. 38 */ 39public final class Suggest { 40 public static final String TAG = Suggest.class.getSimpleName(); 41 42 // Session id for 43 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 44 // We are sharing the same ID between typing and gesture to save RAM footprint. 45 public static final int SESSION_TYPING = 0; 46 public static final int SESSION_GESTURE = 0; 47 48 // TODO: rename this to CORRECTION_OFF 49 public static final int CORRECTION_NONE = 0; 50 // TODO: rename this to CORRECTION_ON 51 public static final int CORRECTION_FULL = 1; 52 53 // Close to -2**31 54 private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; 55 56 private static final boolean DBG = LatinImeLogger.sDBG; 57 public final DictionaryFacilitatorForSuggest mDictionaryFacilitator; 58 59 private float mAutoCorrectionThreshold; 60 61 // Locale used for upper- and title-casing words 62 public final Locale mLocale; 63 64 // TODO: Move dictionaryFacilitator constructing logics from LatinIME to Suggest. 65 public Suggest(final Locale locale, 66 final DictionaryFacilitatorForSuggest dictionaryFacilitator) { 67 mLocale = locale; 68 mDictionaryFacilitator = dictionaryFacilitator; 69 } 70 71 // Creates instance with new dictionary facilitator. 72 public Suggest(final Suggest oldSuggst, 73 final DictionaryFacilitatorForSuggest dictionaryFacilitator) { 74 mLocale = oldSuggst.mLocale; 75 mAutoCorrectionThreshold = oldSuggst.mAutoCorrectionThreshold; 76 mDictionaryFacilitator = dictionaryFacilitator; 77 } 78 79 public void setAutoCorrectionThreshold(float threshold) { 80 mAutoCorrectionThreshold = threshold; 81 } 82 83 public interface OnGetSuggestedWordsCallback { 84 public void onGetSuggestedWords(final SuggestedWords suggestedWords); 85 } 86 87 public void getSuggestedWords(final WordComposer wordComposer, 88 final String prevWordForBigram, final ProximityInfo proximityInfo, 89 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 90 final int[] additionalFeaturesOptions, final int sessionId, final int sequenceNumber, 91 final OnGetSuggestedWordsCallback callback) { 92 LatinImeLogger.onStartSuggestion(prevWordForBigram); 93 if (wordComposer.isBatchMode()) { 94 getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo, 95 blockOffensiveWords, additionalFeaturesOptions, sessionId, sequenceNumber, 96 callback); 97 } else { 98 getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 99 blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions, 100 sequenceNumber, callback); 101 } 102 } 103 104 // Retrieves suggestions for the typing input 105 // and calls the callback function with the suggestions. 106 private void getSuggestedWordsForTypingInput(final WordComposer wordComposer, 107 final String prevWordForBigram, final ProximityInfo proximityInfo, 108 final boolean blockOffensiveWords, final boolean isCorrectionEnabled, 109 final int[] additionalFeaturesOptions, final int sequenceNumber, 110 final OnGetSuggestedWordsCallback callback) { 111 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 112 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 113 SuggestedWords.MAX_SUGGESTIONS); 114 115 final String typedWord = wordComposer.getTypedWord(); 116 final String consideredWord = trailingSingleQuotesCount > 0 117 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 118 : typedWord; 119 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 120 121 final WordComposer wordComposerForLookup; 122 if (trailingSingleQuotesCount > 0) { 123 wordComposerForLookup = new WordComposer(wordComposer); 124 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 125 // TODO: do not create a fake event for this. Ideally the word composer should know 126 // how to give out the word without trailing quotes and we can remove this entirely 127 wordComposerForLookup.deleteLast(Event.createSoftwareKeypressEvent( 128 Event.NOT_A_CODE_POINT, Constants.CODE_DELETE, 129 Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE)); 130 } 131 } else { 132 wordComposerForLookup = wordComposer; 133 } 134 final ArrayList<SuggestedWordInfo> rawSuggestions; 135 if (ProductionFlag.INCLUDE_RAW_SUGGESTIONS) { 136 rawSuggestions = CollectionUtils.newArrayList(); 137 } else { 138 rawSuggestions = null; 139 } 140 mDictionaryFacilitator.getSuggestions(wordComposerForLookup, prevWordForBigram, 141 proximityInfo, blockOffensiveWords, additionalFeaturesOptions, SESSION_TYPING, 142 suggestionsSet, rawSuggestions); 143 144 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 145 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 146 final String firstSuggestion; 147 final String whitelistedWord; 148 if (suggestionsSet.isEmpty()) { 149 whitelistedWord = firstSuggestion = null; 150 } else { 151 final SuggestedWordInfo firstSuggestedWordInfo = getTransformedSuggestedWordInfo( 152 suggestionsSet.first(), mLocale, isAllUpperCase, isFirstCharCapitalized, 153 trailingSingleQuotesCount); 154 firstSuggestion = firstSuggestedWordInfo.mWord; 155 if (SuggestedWordInfo.KIND_WHITELIST != firstSuggestedWordInfo.mKind) { 156 whitelistedWord = null; 157 } else { 158 whitelistedWord = firstSuggestion; 159 } 160 } 161 162 final boolean isPrediction = !wordComposer.isComposingWord(); 163 164 // We allow auto-correction if we have a whitelisted word, or if the word is not a valid 165 // word of more than 1 char, except if the first suggestion is the same as the typed string 166 // because in this case if it's strong enough to auto-correct that will mistakenly designate 167 // the second candidate for auto-correction. 168 // TODO: stop relying on indices to find where is the auto-correction in the suggested 169 // words, and correct this test. 170 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 171 && !whitelistedWord.equals(typedWord)) 172 || (consideredWord.length() > 1 && !mDictionaryFacilitator.isValidWord( 173 consideredWord, wordComposer.isFirstCharCapitalized()) 174 && !typedWord.equals(firstSuggestion)); 175 176 final boolean hasAutoCorrection; 177 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 178 // any attempt to do auto-correction is already shielded with a test for this flag; at the 179 // same time, it feels wrong that the SuggestedWord object includes information about 180 // the current settings. It may also be useful to know, when the setting is off, whether 181 // the word *would* have been auto-corrected. 182 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || isPrediction 183 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 184 || wordComposer.isMostlyCaps() || wordComposer.isResumed() 185 || !mDictionaryFacilitator.hasMainDictionary() 186 || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) { 187 // If we don't have a main dictionary, we never want to auto-correct. The reason for 188 // this is, the user may have a contact whose name happens to match a valid word in 189 // their language, and it will unexpectedly auto-correct. For example, if the user 190 // types in English with no dictionary and has a "Will" in their contact list, "will" 191 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 192 // auto-correct. 193 // Also, shortcuts should never auto-correct unless they are whitelist entries. 194 // TODO: we may want to have shortcut-only entries auto-correct in the future. 195 hasAutoCorrection = false; 196 } else { 197 hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold( 198 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 199 } 200 201 final ArrayList<SuggestedWordInfo> suggestionsContainer = 202 CollectionUtils.newArrayList(suggestionsSet); 203 final int suggestionsCount = suggestionsContainer.size(); 204 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 205 for (int i = 0; i < suggestionsCount; ++i) { 206 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 207 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 208 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 209 trailingSingleQuotesCount); 210 suggestionsContainer.set(i, transformedWordInfo); 211 } 212 } 213 214 for (int i = 0; i < suggestionsCount; ++i) { 215 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 216 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), 217 wordInfo.mSourceDict.mDictType); 218 } 219 220 if (!TextUtils.isEmpty(typedWord)) { 221 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 222 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 223 Dictionary.DICTIONARY_USER_TYPED, 224 SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, 225 SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */)); 226 } 227 SuggestedWordInfo.removeDups(suggestionsContainer); 228 229 final ArrayList<SuggestedWordInfo> suggestionsList; 230 if (DBG && !suggestionsContainer.isEmpty()) { 231 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 232 } else { 233 suggestionsList = suggestionsContainer; 234 } 235 236 callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, rawSuggestions, 237 // TODO: this first argument is lying. If this is a whitelisted word which is an 238 // actual word, it says typedWordValid = false, which looks wrong. We should either 239 // rename the attribute or change the value. 240 !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */, 241 hasAutoCorrection, /* willAutoCorrect */ 242 false /* isObsoleteSuggestions */, isPrediction, sequenceNumber)); 243 } 244 245 // Retrieves suggestions for the batch input 246 // and calls the callback function with the suggestions. 247 private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, 248 final String prevWordForBigram, final ProximityInfo proximityInfo, 249 final boolean blockOffensiveWords, final int[] additionalFeaturesOptions, 250 final int sessionId, final int sequenceNumber, 251 final OnGetSuggestedWordsCallback callback) { 252 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 253 SuggestedWords.MAX_SUGGESTIONS); 254 final ArrayList<SuggestedWordInfo> rawSuggestions; 255 if (ProductionFlag.INCLUDE_RAW_SUGGESTIONS) { 256 rawSuggestions = CollectionUtils.newArrayList(); 257 } else { 258 rawSuggestions = null; 259 } 260 mDictionaryFacilitator.getSuggestions(wordComposer, prevWordForBigram, proximityInfo, 261 blockOffensiveWords, additionalFeaturesOptions, sessionId, suggestionsSet, 262 rawSuggestions); 263 for (SuggestedWordInfo wordInfo : suggestionsSet) { 264 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType); 265 } 266 267 final ArrayList<SuggestedWordInfo> suggestionsContainer = 268 CollectionUtils.newArrayList(suggestionsSet); 269 final int suggestionsCount = suggestionsContainer.size(); 270 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 271 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 272 if (isFirstCharCapitalized || isAllUpperCase) { 273 for (int i = 0; i < suggestionsCount; ++i) { 274 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 275 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 276 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 277 0 /* trailingSingleQuotesCount */); 278 suggestionsContainer.set(i, transformedWordInfo); 279 } 280 } 281 282 if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord, 283 wordComposer.getRejectedBatchModeSuggestion())) { 284 final SuggestedWordInfo rejected = suggestionsContainer.remove(0); 285 suggestionsContainer.add(1, rejected); 286 } 287 SuggestedWordInfo.removeDups(suggestionsContainer); 288 289 // For some reason some suggestions with MIN_VALUE are making their way here. 290 // TODO: Find a more robust way to detect distractors. 291 for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { 292 if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { 293 suggestionsContainer.remove(i); 294 } 295 } 296 297 // In the batch input mode, the most relevant suggested word should act as a "typed word" 298 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 299 callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer, rawSuggestions, 300 true /* typedWordValid */, 301 false /* willAutoCorrect */, 302 false /* isObsoleteSuggestions */, 303 false /* isPrediction */, sequenceNumber)); 304 } 305 306 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 307 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 308 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 309 typedWordInfo.setDebugString("+"); 310 final int suggestionsSize = suggestions.size(); 311 final ArrayList<SuggestedWordInfo> suggestionsList = 312 CollectionUtils.newArrayList(suggestionsSize); 313 suggestionsList.add(typedWordInfo); 314 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 315 // than i because we added the typed word to mSuggestions without touching mScores. 316 for (int i = 0; i < suggestionsSize - 1; ++i) { 317 final SuggestedWordInfo cur = suggestions.get(i + 1); 318 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 319 typedWord, cur.toString(), cur.mScore); 320 final String scoreInfoString; 321 if (normalizedScore > 0) { 322 scoreInfoString = String.format( 323 Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore, 324 cur.mSourceDict.mDictType); 325 } else { 326 scoreInfoString = Integer.toString(cur.mScore); 327 } 328 cur.setDebugString(scoreInfoString); 329 suggestionsList.add(cur); 330 } 331 return suggestionsList; 332 } 333 334 private static final class SuggestedWordInfoComparator 335 implements Comparator<SuggestedWordInfo> { 336 // This comparator ranks the word info with the higher frequency first. That's because 337 // that's the order we want our elements in. 338 @Override 339 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 340 if (o1.mScore > o2.mScore) return -1; 341 if (o1.mScore < o2.mScore) return 1; 342 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 343 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 344 return o1.mWord.compareTo(o2.mWord); 345 } 346 } 347 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 348 new SuggestedWordInfoComparator(); 349 350 /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( 351 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 352 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 353 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 354 if (isAllUpperCase) { 355 sb.append(wordInfo.mWord.toUpperCase(locale)); 356 } else if (isFirstCharCapitalized) { 357 sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); 358 } else { 359 sb.append(wordInfo.mWord); 360 } 361 // Appending quotes is here to help people quote words. However, it's not helpful 362 // when they type words with quotes toward the end like "it's" or "didn't", where 363 // it's more likely the user missed the last character (or didn't type it yet). 364 final int quotesToAppend = trailingSingleQuotesCount 365 - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); 366 for (int i = quotesToAppend - 1; i >= 0; --i) { 367 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 368 } 369 return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, 370 wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, 371 wordInfo.mAutoCommitFirstWordConfidence); 372 } 373 374 public void close() { 375 mDictionaryFacilitator.close(); 376 } 377} 378