DistracterFilterCheckingExactMatchesAndSuggestions.java revision e05eb2182602dd62e2bfa5b78ab6df7f331cff24
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.utils;
18
19import java.util.HashMap;
20import java.util.List;
21import java.util.Locale;
22import java.util.Map;
23import java.util.concurrent.TimeUnit;
24
25import android.content.Context;
26import android.content.res.Resources;
27import android.text.InputType;
28import android.util.Log;
29import android.util.LruCache;
30import android.view.inputmethod.EditorInfo;
31import android.view.inputmethod.InputMethodSubtype;
32
33import com.android.inputmethod.keyboard.Keyboard;
34import com.android.inputmethod.keyboard.KeyboardId;
35import com.android.inputmethod.keyboard.KeyboardLayoutSet;
36import com.android.inputmethod.latin.DictionaryFacilitator;
37import com.android.inputmethod.latin.PrevWordsInfo;
38import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
39import com.android.inputmethod.latin.WordComposer;
40import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
41
42/**
43 * This class is used to prevent distracters being added to personalization
44 * or user history dictionaries
45 */
46public class DistracterFilterCheckingExactMatchesAndSuggestions implements DistracterFilter {
47    private static final String TAG =
48            DistracterFilterCheckingExactMatchesAndSuggestions.class.getSimpleName();
49    private static final boolean DEBUG = false;
50
51    private static final long TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS = 120;
52    private static final int MAX_DISTRACTERS_CACHE_SIZE = 512;
53
54    private final Context mContext;
55    private final Map<Locale, InputMethodSubtype> mLocaleToSubtypeMap;
56    private final Map<Locale, Keyboard> mLocaleToKeyboardMap;
57    private final DictionaryFacilitator mDictionaryFacilitator;
58    private final LruCache<String, Boolean> mDistractersCache;
59    private Keyboard mKeyboard;
60    private final Object mLock = new Object();
61
62    // If the score of the top suggestion exceeds this value, the tested word (e.g.,
63    // an OOV, a misspelling, or an in-vocabulary word) would be considered as a distractor to
64    // words in dictionary. The greater the threshold is, the less likely the tested word would
65    // become a distractor, which means the tested word will be more likely to be added to
66    // the dictionary.
67    private static final float DISTRACTER_WORD_SCORE_THRESHOLD = 0.4f;
68
69    /**
70     * Create a DistracterFilter instance.
71     *
72     * @param context the context.
73     */
74    public DistracterFilterCheckingExactMatchesAndSuggestions(final Context context) {
75        mContext = context;
76        mLocaleToSubtypeMap = new HashMap<>();
77        mLocaleToKeyboardMap = new HashMap<>();
78        mDictionaryFacilitator = new DictionaryFacilitator();
79        mDistractersCache = new LruCache<>(MAX_DISTRACTERS_CACHE_SIZE);
80        mKeyboard = null;
81    }
82
83    @Override
84    public void close() {
85        mDictionaryFacilitator.closeDictionaries();
86    }
87
88    @Override
89    public void updateEnabledSubtypes(final List<InputMethodSubtype> enabledSubtypes) {
90        final Map<Locale, InputMethodSubtype> newLocaleToSubtypeMap = new HashMap<>();
91        if (enabledSubtypes != null) {
92            for (final InputMethodSubtype subtype : enabledSubtypes) {
93                final Locale locale = SubtypeLocaleUtils.getSubtypeLocale(subtype);
94                if (newLocaleToSubtypeMap.containsKey(locale)) {
95                    // Multiple subtypes are enabled for one locale.
96                    // TODO: Investigate what we should do for this case.
97                    continue;
98                }
99                newLocaleToSubtypeMap.put(locale, subtype);
100            }
101        }
102        if (mLocaleToSubtypeMap.equals(newLocaleToSubtypeMap)) {
103            // Enabled subtypes have not been changed.
104            return;
105        }
106        synchronized (mLock) {
107            mLocaleToSubtypeMap.clear();
108            mLocaleToSubtypeMap.putAll(newLocaleToSubtypeMap);
109            mLocaleToKeyboardMap.clear();
110        }
111    }
112
113    private void loadKeyboardForLocale(final Locale newLocale) {
114        final Keyboard cachedKeyboard = mLocaleToKeyboardMap.get(newLocale);
115        if (cachedKeyboard != null) {
116            mKeyboard = cachedKeyboard;
117            return;
118        }
119        final InputMethodSubtype subtype;
120        synchronized (mLock) {
121            subtype = mLocaleToSubtypeMap.get(newLocale);
122        }
123        if (subtype == null) {
124            return;
125        }
126        final EditorInfo editorInfo = new EditorInfo();
127        editorInfo.inputType = InputType.TYPE_CLASS_TEXT;
128        final KeyboardLayoutSet.Builder builder = new KeyboardLayoutSet.Builder(
129                mContext, editorInfo);
130        final Resources res = mContext.getResources();
131        final int keyboardWidth = ResourceUtils.getDefaultKeyboardWidth(res);
132        final int keyboardHeight = ResourceUtils.getDefaultKeyboardHeight(res);
133        builder.setKeyboardGeometry(keyboardWidth, keyboardHeight);
134        builder.setSubtype(subtype);
135        builder.setIsSpellChecker(false /* isSpellChecker */);
136        final KeyboardLayoutSet layoutSet = builder.build();
137        mKeyboard = layoutSet.getKeyboard(KeyboardId.ELEMENT_ALPHABET);
138    }
139
140    private void loadDictionariesForLocale(final Locale newlocale) throws InterruptedException {
141        mDictionaryFacilitator.resetDictionaries(mContext, newlocale,
142                false /* useContactsDict */, false /* usePersonalizedDicts */,
143                false /* forceReloadMainDictionary */, null /* listener */);
144        mDictionaryFacilitator.waitForLoadingMainDictionary(
145                TIMEOUT_TO_WAIT_LOADING_DICTIONARIES_IN_SECONDS, TimeUnit.SECONDS);
146    }
147
148    /**
149     * Determine whether a word is a distracter to words in dictionaries.
150     *
151     * @param prevWordsInfo the information of previous words. Not used for now.
152     * @param testedWord the word that will be tested to see whether it is a distracter to words
153     *                   in dictionaries.
154     * @param locale the locale of word.
155     * @return true if testedWord is a distracter, otherwise false.
156     */
157    @Override
158    public boolean isDistracterToWordsInDictionaries(final PrevWordsInfo prevWordsInfo,
159            final String testedWord, final Locale locale) {
160        if (locale == null) {
161            return false;
162        }
163        if (!locale.equals(mDictionaryFacilitator.getLocale())) {
164            synchronized (mLock) {
165                if (!mLocaleToSubtypeMap.containsKey(locale)) {
166                    Log.e(TAG, "Locale " + locale + " is not enabled.");
167                    // TODO: Investigate what we should do for disabled locales.
168                    return false;
169                }
170                loadKeyboardForLocale(locale);
171                // Reset dictionaries for the locale.
172                try {
173                    mDistractersCache.evictAll();
174                    loadDictionariesForLocale(locale);
175                } catch (final InterruptedException e) {
176                    Log.e(TAG, "Interrupted while waiting for loading dicts in DistracterFilter",
177                            e);
178                    return false;
179                }
180            }
181        }
182
183        if (DEBUG) {
184            Log.d(TAG, "testedWord: " + testedWord);
185        }
186        final Boolean isCachedDistracter = mDistractersCache.get(testedWord);
187        if (isCachedDistracter != null && isCachedDistracter) {
188            if (DEBUG) {
189                Log.d(TAG, "isDistracter: true (cache hit)");
190            }
191            return true;
192        }
193
194        final boolean isDistracterCheckedByGetMaxFreqencyOfExactMatches =
195                checkDistracterUsingMaxFreqencyOfExactMatches(testedWord);
196        if (isDistracterCheckedByGetMaxFreqencyOfExactMatches) {
197            // Add the word to the cache.
198            mDistractersCache.put(testedWord, Boolean.TRUE);
199            return true;
200        }
201        final boolean isValidWord = mDictionaryFacilitator.isValidWord(testedWord,
202                false /* ignoreCase */);
203        if (isValidWord) {
204            // Valid word is not a distractor.
205            if (DEBUG) {
206                Log.d(TAG, "isDistracter: false (valid word)");
207            }
208            return false;
209        }
210
211        final boolean isDistracterCheckedByGetSuggestion =
212                checkDistracterUsingGetSuggestions(testedWord);
213        if (isDistracterCheckedByGetSuggestion) {
214            // Add the word to the cache.
215            mDistractersCache.put(testedWord, Boolean.TRUE);
216            return true;
217        }
218        return false;
219    }
220
221    private boolean checkDistracterUsingMaxFreqencyOfExactMatches(final String testedWord) {
222        // The tested word is a distracter when there is a word that is exact matched to the tested
223        // word and its probability is higher than the tested word's probability.
224        final int perfectMatchFreq = mDictionaryFacilitator.getFrequency(testedWord);
225        final int exactMatchFreq = mDictionaryFacilitator.getMaxFrequencyOfExactMatches(testedWord);
226        final boolean isDistracter = perfectMatchFreq < exactMatchFreq;
227        if (DEBUG) {
228            Log.d(TAG, "perfectMatchFreq: " + perfectMatchFreq);
229            Log.d(TAG, "exactMatchFreq: " + exactMatchFreq);
230            Log.d(TAG, "isDistracter: " + isDistracter);
231        }
232        return isDistracter;
233    }
234
235    private boolean checkDistracterUsingGetSuggestions(final String testedWord) {
236        if (mKeyboard == null) {
237            return false;
238        }
239        final SettingsValuesForSuggestion settingsValuesForSuggestion =
240                new SettingsValuesForSuggestion(false /* blockPotentiallyOffensive */,
241                        false /* spaceAwareGestureEnabled */,
242                        null /* additionalFeaturesSettingValues */);
243        final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(testedWord);
244        final String consideredWord = trailingSingleQuotesCount > 0 ?
245                testedWord.substring(0, testedWord.length() - trailingSingleQuotesCount) :
246                testedWord;
247        final WordComposer composer = new WordComposer();
248        final int[] codePoints = StringUtils.toCodePointArray(testedWord);
249
250        synchronized (mLock) {
251            final int[] coordinates = mKeyboard.getCoordinates(codePoints);
252            composer.setComposingWord(codePoints, coordinates);
253            final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
254                    composer, PrevWordsInfo.EMPTY_PREV_WORDS_INFO, mKeyboard.getProximityInfo(),
255                    settingsValuesForSuggestion, 0 /* sessionId */);
256            if (suggestionResults.isEmpty()) {
257                return false;
258            }
259            final SuggestedWordInfo firstSuggestion = suggestionResults.first();
260            final boolean isDistractor = suggestionExceedsDistracterThreshold(
261                    firstSuggestion, consideredWord, DISTRACTER_WORD_SCORE_THRESHOLD);
262            if (DEBUG) {
263                Log.d(TAG, "isDistracter: " + isDistractor);
264            }
265            return isDistractor;
266        }
267    }
268
269    private static boolean suggestionExceedsDistracterThreshold(final SuggestedWordInfo suggestion,
270            final String consideredWord, final float distracterThreshold) {
271        if (suggestion == null) {
272            return false;
273        }
274        final int suggestionScore = suggestion.mScore;
275        final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
276                consideredWord, suggestion.mWord, suggestionScore);
277        if (DEBUG) {
278            Log.d(TAG, "normalizedScore: " + normalizedScore);
279            Log.d(TAG, "distracterThreshold: " + distracterThreshold);
280        }
281        if (normalizedScore > distracterThreshold) {
282            return true;
283        }
284        return false;
285    }
286}
287