Suggest.java revision 240871ecafde7834ebb4270cd7758fc904a5f3a7
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21 22import com.android.inputmethod.annotations.UsedForTesting; 23import com.android.inputmethod.keyboard.ProximityInfo; 24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 26import java.io.File; 27import java.util.ArrayList; 28import java.util.Comparator; 29import java.util.HashSet; 30import java.util.Locale; 31import java.util.concurrent.ConcurrentHashMap; 32 33/** 34 * This class loads a dictionary and provides a list of suggestions for a given sequence of 35 * characters. This includes corrections and completions. 36 */ 37public final class Suggest { 38 public static final String TAG = Suggest.class.getSimpleName(); 39 40 // Session id for 41 // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. 42 public static final int SESSION_TYPING = 0; 43 public static final int SESSION_GESTURE = 1; 44 45 // TODO: rename this to CORRECTION_OFF 46 public static final int CORRECTION_NONE = 0; 47 // TODO: rename this to CORRECTION_ON 48 public static final int CORRECTION_FULL = 1; 49 50 public interface SuggestInitializationListener { 51 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 52 } 53 54 private static final boolean DBG = LatinImeLogger.sDBG; 55 56 private Dictionary mMainDictionary; 57 private ContactsBinaryDictionary mContactsDict; 58 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 59 CollectionUtils.newConcurrentHashMap(); 60 61 public static final int MAX_SUGGESTIONS = 18; 62 63 private float mAutoCorrectionThreshold; 64 65 // Locale used for upper- and title-casing words 66 private final Locale mLocale; 67 68 public Suggest(final Context context, final Locale locale, 69 final SuggestInitializationListener listener) { 70 initAsynchronously(context, locale, listener); 71 mLocale = locale; 72 } 73 74 @UsedForTesting 75 Suggest(final Context context, final File dictionary, 76 final long startOffset, final long length, final Locale locale) { 77 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, 78 startOffset, length /* useFullEditDistance */, false, locale); 79 mLocale = locale; 80 mMainDictionary = mainDict; 81 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 82 } 83 84 private void initAsynchronously(final Context context, final Locale locale, 85 final SuggestInitializationListener listener) { 86 resetMainDict(context, locale, listener); 87 } 88 89 private static void addOrReplaceDictionary( 90 final ConcurrentHashMap<String, Dictionary> dictionaries, 91 final String key, final Dictionary dict) { 92 final Dictionary oldDict = (dict == null) 93 ? dictionaries.remove(key) 94 : dictionaries.put(key, dict); 95 if (oldDict != null && dict != oldDict) { 96 oldDict.close(); 97 } 98 } 99 100 public void resetMainDict(final Context context, final Locale locale, 101 final SuggestInitializationListener listener) { 102 mMainDictionary = null; 103 if (listener != null) { 104 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 105 } 106 new Thread("InitializeBinaryDictionary") { 107 @Override 108 public void run() { 109 final DictionaryCollection newMainDict = 110 DictionaryFactory.createMainDictionaryFromManager(context, locale); 111 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 112 mMainDictionary = newMainDict; 113 if (listener != null) { 114 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 115 } 116 } 117 }.start(); 118 } 119 120 // The main dictionary could have been loaded asynchronously. Don't cache the return value 121 // of this method. 122 public boolean hasMainDictionary() { 123 return null != mMainDictionary && mMainDictionary.isInitialized(); 124 } 125 126 public Dictionary getMainDictionary() { 127 return mMainDictionary; 128 } 129 130 public ContactsBinaryDictionary getContactsDictionary() { 131 return mContactsDict; 132 } 133 134 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 135 return mDictionaries; 136 } 137 138 /** 139 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 140 * before the main dictionary, if set. This refers to the system-managed user dictionary. 141 */ 142 public void setUserDictionary(final UserBinaryDictionary userDictionary) { 143 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 144 } 145 146 /** 147 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 148 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 149 * won't be used. 150 */ 151 public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) { 152 mContactsDict = contactsDictionary; 153 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 154 } 155 156 public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) { 157 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 158 } 159 160 public void setAutoCorrectionThreshold(float threshold) { 161 mAutoCorrectionThreshold = threshold; 162 } 163 164 public SuggestedWords getSuggestedWords(final WordComposer wordComposer, 165 final String prevWordForBigram, final ProximityInfo proximityInfo, 166 final boolean isCorrectionEnabled, final int sessionId) { 167 LatinImeLogger.onStartSuggestion(prevWordForBigram); 168 if (wordComposer.isBatchMode()) { 169 return getSuggestedWordsForBatchInput( 170 wordComposer, prevWordForBigram, proximityInfo, sessionId); 171 } else { 172 return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 173 isCorrectionEnabled); 174 } 175 } 176 177 // Retrieves suggestions for the typing input. 178 private SuggestedWords getSuggestedWordsForTypingInput(final WordComposer wordComposer, 179 final String prevWordForBigram, final ProximityInfo proximityInfo, 180 final boolean isCorrectionEnabled) { 181 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 182 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 183 MAX_SUGGESTIONS); 184 185 final String typedWord = wordComposer.getTypedWord(); 186 final String consideredWord = trailingSingleQuotesCount > 0 187 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 188 : typedWord; 189 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 190 191 final WordComposer wordComposerForLookup; 192 if (trailingSingleQuotesCount > 0) { 193 wordComposerForLookup = new WordComposer(wordComposer); 194 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 195 wordComposerForLookup.deleteLast(); 196 } 197 } else { 198 wordComposerForLookup = wordComposer; 199 } 200 201 for (final String key : mDictionaries.keySet()) { 202 final Dictionary dictionary = mDictionaries.get(key); 203 suggestionsSet.addAll(dictionary.getSuggestions( 204 wordComposerForLookup, prevWordForBigram, proximityInfo)); 205 } 206 207 final String whitelistedWord; 208 if (suggestionsSet.isEmpty()) { 209 whitelistedWord = null; 210 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 211 whitelistedWord = null; 212 } else { 213 whitelistedWord = suggestionsSet.first().mWord; 214 } 215 216 // The word can be auto-corrected if it has a whitelist entry that is not itself, 217 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 218 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 219 && !whitelistedWord.equals(consideredWord)) 220 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries, 221 consideredWord, wordComposer.isFirstCharCapitalized())); 222 223 final boolean hasAutoCorrection; 224 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 225 // any attempt to do auto-correction is already shielded with a test for this flag; at the 226 // same time, it feels wrong that the SuggestedWord object includes information about 227 // the current settings. It may also be useful to know, when the setting is off, whether 228 // the word *would* have been auto-corrected. 229 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 230 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 231 || wordComposer.isMostlyCaps() || wordComposer.isResumed() 232 || !hasMainDictionary()) { 233 // If we don't have a main dictionary, we never want to auto-correct. The reason for 234 // this is, the user may have a contact whose name happens to match a valid word in 235 // their language, and it will unexpectedly auto-correct. For example, if the user 236 // types in English with no dictionary and has a "Will" in their contact list, "will" 237 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 238 // auto-correct. 239 hasAutoCorrection = false; 240 } else { 241 hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold( 242 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 243 } 244 245 final ArrayList<SuggestedWordInfo> suggestionsContainer = 246 CollectionUtils.newArrayList(suggestionsSet); 247 final int suggestionsCount = suggestionsContainer.size(); 248 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 249 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 250 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 251 for (int i = 0; i < suggestionsCount; ++i) { 252 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 253 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 254 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 255 trailingSingleQuotesCount); 256 suggestionsContainer.set(i, transformedWordInfo); 257 } 258 } 259 260 for (int i = 0; i < suggestionsCount; ++i) { 261 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 262 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 263 } 264 265 if (!TextUtils.isEmpty(typedWord)) { 266 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 267 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 268 Dictionary.TYPE_USER_TYPED)); 269 } 270 SuggestedWordInfo.removeDups(suggestionsContainer); 271 272 final ArrayList<SuggestedWordInfo> suggestionsList; 273 if (DBG && !suggestionsContainer.isEmpty()) { 274 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 275 } else { 276 suggestionsList = suggestionsContainer; 277 } 278 279 return new SuggestedWords(suggestionsList, 280 // TODO: this first argument is lying. If this is a whitelisted word which is an 281 // actual word, it says typedWordValid = false, which looks wrong. We should either 282 // rename the attribute or change the value. 283 !allowsToBeAutoCorrected /* typedWordValid */, 284 hasAutoCorrection, /* willAutoCorrect */ 285 false /* isPunctuationSuggestions */, 286 false /* isObsoleteSuggestions */, 287 !wordComposer.isComposingWord() /* isPrediction */); 288 } 289 290 // Retrieves suggestions for the batch input. 291 private SuggestedWords getSuggestedWordsForBatchInput(final WordComposer wordComposer, 292 final String prevWordForBigram, final ProximityInfo proximityInfo, 293 final int sessionId) { 294 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 295 MAX_SUGGESTIONS); 296 297 // At second character typed, search the unigrams (scores being affected by bigrams) 298 for (final String key : mDictionaries.keySet()) { 299 // Skip User history dictionary for lookup 300 // TODO: The user history dictionary should just override getSuggestionsWithSessionId 301 // to make sure it doesn't return anything and we should remove this test 302 if (key.equals(Dictionary.TYPE_USER_HISTORY)) { 303 continue; 304 } 305 final Dictionary dictionary = mDictionaries.get(key); 306 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId( 307 wordComposer, prevWordForBigram, proximityInfo, sessionId)); 308 } 309 310 for (SuggestedWordInfo wordInfo : suggestionsSet) { 311 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict); 312 } 313 314 final ArrayList<SuggestedWordInfo> suggestionsContainer = 315 CollectionUtils.newArrayList(suggestionsSet); 316 final int suggestionsCount = suggestionsContainer.size(); 317 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 318 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 319 if (isFirstCharCapitalized || isAllUpperCase) { 320 for (int i = 0; i < suggestionsCount; ++i) { 321 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 322 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 323 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 324 0 /* trailingSingleQuotesCount */); 325 suggestionsContainer.set(i, transformedWordInfo); 326 } 327 } 328 329 SuggestedWordInfo.removeDups(suggestionsContainer); 330 // In the batch input mode, the most relevant suggested word should act as a "typed word" 331 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 332 return new SuggestedWords(suggestionsContainer, 333 true /* typedWordValid */, 334 false /* willAutoCorrect */, 335 false /* isPunctuationSuggestions */, 336 false /* isObsoleteSuggestions */, 337 false /* isPrediction */); 338 } 339 340 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 341 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 342 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 343 typedWordInfo.setDebugString("+"); 344 final int suggestionsSize = suggestions.size(); 345 final ArrayList<SuggestedWordInfo> suggestionsList = 346 CollectionUtils.newArrayList(suggestionsSize); 347 suggestionsList.add(typedWordInfo); 348 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 349 // than i because we added the typed word to mSuggestions without touching mScores. 350 for (int i = 0; i < suggestionsSize - 1; ++i) { 351 final SuggestedWordInfo cur = suggestions.get(i + 1); 352 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 353 typedWord, cur.toString(), cur.mScore); 354 final String scoreInfoString; 355 if (normalizedScore > 0) { 356 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 357 } else { 358 scoreInfoString = Integer.toString(cur.mScore); 359 } 360 cur.setDebugString(scoreInfoString); 361 suggestionsList.add(cur); 362 } 363 return suggestionsList; 364 } 365 366 private static final class SuggestedWordInfoComparator 367 implements Comparator<SuggestedWordInfo> { 368 // This comparator ranks the word info with the higher frequency first. That's because 369 // that's the order we want our elements in. 370 @Override 371 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 372 if (o1.mScore > o2.mScore) return -1; 373 if (o1.mScore < o2.mScore) return 1; 374 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 375 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 376 return o1.mWord.compareTo(o2.mWord); 377 } 378 } 379 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 380 new SuggestedWordInfoComparator(); 381 382 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 383 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 384 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 385 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 386 if (isAllUpperCase) { 387 sb.append(wordInfo.mWord.toUpperCase(locale)); 388 } else if (isFirstCharCapitalized) { 389 sb.append(StringUtils.toTitleCase(wordInfo.mWord, locale)); 390 } else { 391 sb.append(wordInfo.mWord); 392 } 393 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 394 sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); 395 } 396 return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind, 397 wordInfo.mSourceDict); 398 } 399 400 public void close() { 401 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 402 dictionaries.addAll(mDictionaries.values()); 403 for (final Dictionary dictionary : dictionaries) { 404 dictionary.close(); 405 } 406 mMainDictionary = null; 407 } 408} 409