DistracterFilterCheckingExactMatchesAndSuggestions.java revision e05eb2182602dd62e2bfa5b78ab6df7f331cff24
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.utils; 18 19import java.util.HashMap; 20import java.util.List; 21import java.util.Locale; 22import java.util.Map; 23import java.util.concurrent.TimeUnit; 24 25import android.content.Context; 26import android.content.res.Resources; 27import android.text.InputType; 28import android.util.Log; 29import android.util.LruCache; 30import android.view.inputmethod.EditorInfo; 31import android.view.inputmethod.InputMethodSubtype; 32 33import com.android.inputmethod.keyboard.Keyboard; 34import com.android.inputmethod.keyboard.KeyboardId; 35import com.android.inputmethod.keyboard.KeyboardLayoutSet; 36import com.android.inputmethod.latin.DictionaryFacilitator; 37import com.android.inputmethod.latin.PrevWordsInfo; 38import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 39import com.android.inputmethod.latin.WordComposer; 40import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; 41 42/** 43 * This class is used to prevent distracters being added to personalization 44 * or user history dictionaries 45 */ 46public class DistracterFilterCheckingExactMatchesAndSuggestions implements DistracterFilter { 47 private static final String TAG = 48 DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName(); 49 private static final boolean DEBUG = false; 50 51 private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120; 52 private static final int MAX_DISTRACTERS_CACHE_SIZE = 512; 53 54 private final Context mContext; 55 private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap; 56 private final Map<Locale, Keyboard> mLocaleToKeyboardMap; 57 private final DictionaryFacilitator mDictionaryFacilitator; 58 private final LruCache<String, Boolean> mDistractersCache; 59 private Keyboard mKeyboard; 60 private final Object mLock = new Object(); 61 62 // If the score of the top suggestion exceeds this value, the tested word (e.g., 63 // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to 64 // words in dictionary. The greater the threshold is, the less likely the tested word would 65 // become a distractor, which means the tested word will be more likely to be added to 66 // the dictionary. 67 private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f; 68 69 /** 70 * Create a DistracterFilter instance. 71 * 72 * @param context the context. 73 */ 74 public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) { 75 mContext = context; 76 mLocaleToSubtypeMap = new HashMap<>(); 77 mLocaleToKeyboardMap = new HashMap<>(); 78 mDictionaryFacilitator = new DictionaryFacilitator(); 79 mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE); 80 mKeyboard = null; 81 } 82 83 @Override 84 public void close() { 85 mDictionaryFacilitator.closeDictionaries(); 86 } 87 88 @Override 89 public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) { 90 final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>(); 91 if (enabledSubtypes != null) { 92 for (final InputMethodSubtype subtype : enabledSubtypes) { 93 final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype); 94 if (newLocaleToSubtypeMap.containsKey(locale)) { 95 // Multiple subtypes are enabled for one locale. 96 // TODO: Investigate what we should do for this case. 97 continue; 98 } 99 newLocaleToSubtypeMap.put(locale, subtype); 100 } 101 } 102 if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) { 103 // Enabled subtypes have not been changed. 104 return; 105 } 106 synchronized (mLock) { 107 mLocaleToSubtypeMap.clear(); 108 mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap); 109 mLocaleToKeyboardMap.clear(); 110 } 111 } 112 113 private void loadKeyboardForLocale(final Locale newLocale) { 114 final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale); 115 if (cachedKeyboard != null) { 116 mKeyboard = cachedKeyboard; 117 return; 118 } 119 final InputMethodSubtype subtype; 120 synchronized (mLock) { 121 subtype = mLocaleToSubtypeMap.get(newLocale); 122 } 123 if (subtype == null) { 124 return; 125 } 126 final EditorInfo editorInfo = new EditorInfo(); 127 editorInfo.inputType = InputType.TYPE_CLASS_TEXT; 128 final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder( 129 mContext, editorInfo); 130 final Resources res = mContext.getResources(); 131 final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res); 132 final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res); 133 builder.setKeyboardGeometry(keyboardWidth, keyboardHeight); 134 builder.setSubtype(subtype); 135 builder.setIsSpellChecker(false /* isSpellChecker */); 136 final KeyboardLayoutSet layoutSet = builder.build(); 137 mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET); 138 } 139 140 private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException { 141 mDictionaryFacilitator.resetDictionaries(mContext, newlocale, 142 false /* useContactsDict */, false /* usePersonalizedDicts */, 143 false /* forceReloadMainDictionary */, null /* listener */); 144 mDictionaryFacilitator.waitForLoadingMainDictionary( 145 TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS); 146 } 147 148 /** 149 * Determine whether a word is a distracter to words in dictionaries. 150 * 151 * @param prevWordsInfo the information of previous words. Not used for now. 152 * @param testedWord the word that will be tested to see whether it is a distracter to words 153 * in dictionaries. 154 * @param locale the locale of word. 155 * @return true if testedWord is a distracter, otherwise false. 156 */ 157 @Override 158 public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo, 159 final String testedWord, final Locale locale) { 160 if (locale == null) { 161 return false; 162 } 163 if (!locale.equals(mDictionaryFacilitator.getLocale())) { 164 synchronized (mLock) { 165 if (!mLocaleToSubtypeMap.containsKey(locale)) { 166 Log.e(TAG, "Locale " + locale + " is not enabled."); 167 // TODO: Investigate what we should do for disabled locales. 168 return false; 169 } 170 loadKeyboardForLocale(locale); 171 // Reset dictionaries for the locale. 172 try { 173 mDistractersCache.evictAll(); 174 loadDictionariesForLocale(locale); 175 } catch (final InterruptedException e) { 176 Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter", 177 e); 178 return false; 179 } 180 } 181 } 182 183 if (DEBUG) { 184 Log.d(TAG, "testedWord: " + testedWord); 185 } 186 final Boolean isCachedDistracter = mDistractersCache.get(testedWord); 187 if (isCachedDistracter != null && isCachedDistracter) { 188 if (DEBUG) { 189 Log.d(TAG, "isDistracter: true (cache hit)"); 190 } 191 return true; 192 } 193 194 final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches = 195 checkDistracterUsingMaxFreqencyOfExactMatches(testedWord); 196 if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) { 197 // Add the word to the cache. 198 mDistractersCache.put(testedWord, Boolean.TRUE); 199 return true; 200 } 201 final boolean isValidWord = mDictionaryFacilitator.isValidWord(testedWord, 202 false /* ignoreCase */); 203 if (isValidWord) { 204 // Valid word is not a distractor. 205 if (DEBUG) { 206 Log.d(TAG, "isDistracter: false (valid word)"); 207 } 208 return false; 209 } 210 211 final boolean isDistracterCheckedByGetSuggestion = 212 checkDistracterUsingGetSuggestions(testedWord); 213 if (isDistracterCheckedByGetSuggestion) { 214 // Add the word to the cache. 215 mDistractersCache.put(testedWord, Boolean.TRUE); 216 return true; 217 } 218 return false; 219 } 220 221 private boolean checkDistracterUsingMaxFreqencyOfExactMatches(final String testedWord) { 222 // The tested word is a distracter when there is a word that is exact matched to the tested 223 // word and its probability is higher than the tested word's probability. 224 final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord); 225 final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord); 226 final boolean isDistracter = perfectMatchFreq < exactMatchFreq; 227 if (DEBUG) { 228 Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq); 229 Log.d(TAG, "exactMatchFreq: " + exactMatchFreq); 230 Log.d(TAG, "isDistracter: " + isDistracter); 231 } 232 return isDistracter; 233 } 234 235 private boolean checkDistracterUsingGetSuggestions(final String testedWord) { 236 if (mKeyboard == null) { 237 return false; 238 } 239 final SettingsValuesForSuggestion settingsValuesForSuggestion = 240 new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */, 241 false /* spaceAwareGestureEnabled */, 242 null /* additionalFeaturesSettingValues */); 243 final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord); 244 final String consideredWord = trailingSingleQuotesCount > 0 ? 245 testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) : 246 testedWord; 247 final WordComposer composer = new WordComposer(); 248 final int[] codePoints = StringUtils.toCodePointArray(testedWord); 249 250 synchronized (mLock) { 251 final int[] coordinates = mKeyboard.getCoordinates(codePoints); 252 composer.setComposingWord(codePoints, coordinates); 253 final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( 254 composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(), 255 settingsValuesForSuggestion, 0 /* sessionId */); 256 if (suggestionResults.isEmpty()) { 257 return false; 258 } 259 final SuggestedWordInfo firstSuggestion = suggestionResults.first(); 260 final boolean isDistractor = suggestionExceedsDistracterThreshold( 261 firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD); 262 if (DEBUG) { 263 Log.d(TAG, "isDistracter: " + isDistractor); 264 } 265 return isDistractor; 266 } 267 } 268 269 private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion, 270 final String consideredWord, final float distracterThreshold) { 271 if (suggestion == null) { 272 return false; 273 } 274 final int suggestionScore = suggestion.mScore; 275 final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore( 276 consideredWord, suggestion.mWord, suggestionScore); 277 if (DEBUG) { 278 Log.d(TAG, "normalizedScore: " + normalizedScore); 279 Log.d(TAG, "distracterThreshold: " + distracterThreshold); 280 } 281 if (normalizedScore > distracterThreshold) { 282 return true; 283 } 284 return false; 285 } 286} 287