Suggest.java revision e9a86e2cdb58dd8d5601138294521e966d164520
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21 22import com.android.inputmethod.keyboard.Keyboard; 23import com.android.inputmethod.keyboard.ProximityInfo; 24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 26import java.io.File; 27import java.util.ArrayList; 28import java.util.Comparator; 29import java.util.HashSet; 30import java.util.Locale; 31import java.util.concurrent.ConcurrentHashMap; 32 33/** 34 * This class loads a dictionary and provides a list of suggestions for a given sequence of 35 * characters. This includes corrections and completions. 36 */ 37public class Suggest { 38 public static final String TAG = Suggest.class.getSimpleName(); 39 40 // TODO: rename this to CORRECTION_OFF 41 public static final int CORRECTION_NONE = 0; 42 // TODO: rename this to CORRECTION_ON 43 public static final int CORRECTION_FULL = 1; 44 45 private static final boolean DBG = LatinImeLogger.sDBG; 46 47 private Dictionary mMainDictionary; 48 private ContactsBinaryDictionary mContactsDict; 49 private WhitelistDictionary mWhiteListDictionary; 50 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 51 new ConcurrentHashMap<String, Dictionary>(); 52 53 public static final int MAX_SUGGESTIONS = 18; 54 55 private float mAutoCorrectionThreshold; 56 57 // Locale used for upper- and title-casing words 58 final private Locale mLocale; 59 60 public Suggest(final Context context, final Locale locale) { 61 initAsynchronously(context, locale); 62 mLocale = locale; 63 } 64 65 /* package for test */ Suggest(final Context context, final File dictionary, 66 final long startOffset, final long length, final Locale locale) { 67 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, 68 startOffset, length /* useFullEditDistance */, false, locale); 69 mLocale = locale; 70 mMainDictionary = mainDict; 71 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 72 initWhitelistAndAutocorrectAndPool(context, locale); 73 } 74 75 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 76 mWhiteListDictionary = new WhitelistDictionary(context, locale); 77 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_WHITELIST, mWhiteListDictionary); 78 } 79 80 private void initAsynchronously(final Context context, final Locale locale) { 81 resetMainDict(context, locale); 82 83 // TODO: read the whitelist and init the pool asynchronously too. 84 // initPool should be done asynchronously now that the pool is thread-safe. 85 initWhitelistAndAutocorrectAndPool(context, locale); 86 } 87 88 private static void addOrReplaceDictionary( 89 final ConcurrentHashMap<String, Dictionary> dictionaries, 90 final String key, final Dictionary dict) { 91 final Dictionary oldDict = (dict == null) 92 ? dictionaries.remove(key) 93 : dictionaries.put(key, dict); 94 if (oldDict != null && dict != oldDict) { 95 oldDict.close(); 96 } 97 } 98 99 public void resetMainDict(final Context context, final Locale locale) { 100 mMainDictionary = null; 101 new Thread("InitializeBinaryDictionary") { 102 @Override 103 public void run() { 104 final DictionaryCollection newMainDict = 105 DictionaryFactory.createMainDictionaryFromManager(context, locale); 106 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 107 mMainDictionary = newMainDict; 108 } 109 }.start(); 110 } 111 112 // The main dictionary could have been loaded asynchronously. Don't cache the return value 113 // of this method. 114 public boolean hasMainDictionary() { 115 return null != mMainDictionary && mMainDictionary.isInitialized(); 116 } 117 118 public Dictionary getMainDictionary() { 119 return mMainDictionary; 120 } 121 122 public ContactsBinaryDictionary getContactsDictionary() { 123 return mContactsDict; 124 } 125 126 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 127 return mDictionaries; 128 } 129 130 /** 131 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 132 * before the main dictionary, if set. This refers to the system-managed user dictionary. 133 */ 134 public void setUserDictionary(UserBinaryDictionary userDictionary) { 135 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 136 } 137 138 /** 139 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 140 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 141 * won't be used. 142 */ 143 public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { 144 mContactsDict = contactsDictionary; 145 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 146 } 147 148 public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { 149 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 150 } 151 152 public void setAutoCorrectionThreshold(float threshold) { 153 mAutoCorrectionThreshold = threshold; 154 } 155 156 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 157 public SuggestedWords getSuggestedWords( 158 final WordComposer wordComposer, CharSequence prevWordForBigram, 159 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, 160 // TODO: remove isPrediction parameter. It effectively means the same thing 161 // as wordComposer.size() <= 1 162 final boolean isPrediction) { 163 LatinImeLogger.onStartSuggestion(prevWordForBigram); 164 final boolean isFirstCharCapitalized = 165 !isPrediction && wordComposer.isFirstCharCapitalized(); 166 final boolean isAllUpperCase = !isPrediction && wordComposer.isAllUpperCase(); 167 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 168 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 169 MAX_SUGGESTIONS); 170 171 final String typedWord = wordComposer.getTypedWord(); 172 final String consideredWord = trailingSingleQuotesCount > 0 173 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 174 : typedWord; 175 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 176 177 if (wordComposer.size() <= 1 && isCorrectionEnabled) { 178 // At first character typed, search only the bigrams 179 if (!TextUtils.isEmpty(prevWordForBigram)) { 180 for (final String key : mDictionaries.keySet()) { 181 final Dictionary dictionary = mDictionaries.get(key); 182 suggestionsSet.addAll(dictionary.getBigrams(wordComposer, prevWordForBigram)); 183 } 184 } 185 } else if (wordComposer.size() > 1) { 186 final WordComposer wordComposerForLookup; 187 if (trailingSingleQuotesCount > 0) { 188 wordComposerForLookup = new WordComposer(wordComposer); 189 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 190 wordComposerForLookup.deleteLast(); 191 } 192 } else { 193 wordComposerForLookup = wordComposer; 194 } 195 // At second character typed, search the unigrams (scores being affected by bigrams) 196 for (final String key : mDictionaries.keySet()) { 197 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 198 if (key.equals(Dictionary.TYPE_USER_HISTORY) 199 || key.equals(Dictionary.TYPE_WHITELIST)) 200 continue; 201 final Dictionary dictionary = mDictionaries.get(key); 202 suggestionsSet.addAll(dictionary.getWords( 203 wordComposerForLookup, prevWordForBigram, proximityInfo)); 204 } 205 } 206 207 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 208 // but still autocorrected from - in the case the whitelist only capitalizes the word. 209 // The whitelist should be case-insensitive, so it's not possible to be consistent with 210 // a boolean flag. Right now this is handled with a slight hack in 211 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 212 final boolean allowsToBeAutoCorrected = AutoCorrection.isWhitelistedOrNotAWord( 213 mDictionaries, consideredWord, wordComposer.isFirstCharCapitalized()); 214 215 final CharSequence whitelistedWord = 216 mWhiteListDictionary.getWhitelistedWord(consideredWord); 217 218 final boolean hasAutoCorrection; 219 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || wordComposer.isMostlyCaps() 220 || wordComposer.isResumed() || !hasMainDictionary()) { 221 // If we don't have a main dictionary, we never want to auto-correct. The reason for 222 // this is, the user may have a contact whose name happens to match a valid word in 223 // their language, and it will unexpectedly auto-correct. For example, if the user 224 // types in English with no dictionary and has a "Will" in their contact list, "will" 225 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 226 // auto-correct. 227 hasAutoCorrection = false; 228 } else if (null != whitelistedWord) { 229 hasAutoCorrection = true; 230 } else if (suggestionsSet.isEmpty()) { 231 hasAutoCorrection = false; 232 } else if (AutoCorrection.suggestionExceedsAutoCorrectionThreshold(suggestionsSet.first(), 233 consideredWord, mAutoCorrectionThreshold)) { 234 hasAutoCorrection = true; 235 } else { 236 hasAutoCorrection = false; 237 } 238 239 if (whitelistedWord != null) { 240 suggestionsSet.add(new SuggestedWordInfo(whitelistedWord, 241 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST, 242 Dictionary.TYPE_WHITELIST)); 243 } 244 245 final ArrayList<SuggestedWordInfo> suggestionsContainer = 246 new ArrayList<SuggestedWordInfo>(suggestionsSet); 247 final int suggestionsCount = suggestionsContainer.size(); 248 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 249 for (int i = 0; i < suggestionsCount; ++i) { 250 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 251 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 252 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 253 trailingSingleQuotesCount); 254 suggestionsContainer.set(i, transformedWordInfo); 255 } 256 } 257 258 for (int i = 0; i < suggestionsCount; ++i) { 259 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 260 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 261 } 262 263 if (!TextUtils.isEmpty(typedWord)) { 264 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 265 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 266 Dictionary.TYPE_USER_TYPED)); 267 } 268 SuggestedWordInfo.removeDups(suggestionsContainer); 269 270 final ArrayList<SuggestedWordInfo> suggestionsList; 271 if (DBG && !suggestionsContainer.isEmpty()) { 272 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 273 } else { 274 suggestionsList = suggestionsContainer; 275 } 276 277 return new SuggestedWords(suggestionsList, 278 // TODO: this first argument is lying. If this is a whitelisted word which is an 279 // actual word, it says typedWordValid = false, which looks wrong. We should either 280 // rename the attribute or change the value. 281 !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */, 282 !isPrediction && hasAutoCorrection, /* willAutoCorrect */ 283 false /* isPunctuationSuggestions */, 284 false /* isObsoleteSuggestions */, 285 isPrediction); 286 } 287 288 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 289 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 290 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 291 typedWordInfo.setDebugString("+"); 292 final int suggestionsSize = suggestions.size(); 293 final ArrayList<SuggestedWordInfo> suggestionsList = 294 new ArrayList<SuggestedWordInfo>(suggestionsSize); 295 suggestionsList.add(typedWordInfo); 296 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 297 // than i because we added the typed word to mSuggestions without touching mScores. 298 for (int i = 0; i < suggestionsSize - 1; ++i) { 299 final SuggestedWordInfo cur = suggestions.get(i + 1); 300 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 301 typedWord, cur.toString(), cur.mScore); 302 final String scoreInfoString; 303 if (normalizedScore > 0) { 304 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 305 } else { 306 scoreInfoString = Integer.toString(cur.mScore); 307 } 308 cur.setDebugString(scoreInfoString); 309 suggestionsList.add(cur); 310 } 311 return suggestionsList; 312 } 313 314 private static class SuggestedWordInfoComparator implements Comparator<SuggestedWordInfo> { 315 // This comparator ranks the word info with the higher frequency first. That's because 316 // that's the order we want our elements in. 317 @Override 318 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 319 if (o1.mScore > o2.mScore) return -1; 320 if (o1.mScore < o2.mScore) return 1; 321 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 322 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 323 return o1.mWord.toString().compareTo(o2.mWord.toString()); 324 } 325 } 326 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 327 new SuggestedWordInfoComparator(); 328 329 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 330 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 331 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 332 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 333 if (isAllUpperCase) { 334 sb.append(wordInfo.mWord.toString().toUpperCase(locale)); 335 } else if (isFirstCharCapitalized) { 336 sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); 337 } else { 338 sb.append(wordInfo.mWord); 339 } 340 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 341 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 342 } 343 return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); 344 } 345 346 public void close() { 347 final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); 348 dictionaries.addAll(mDictionaries.values()); 349 for (final Dictionary dictionary : dictionaries) { 350 dictionary.close(); 351 } 352 mMainDictionary = null; 353 } 354} 355