Suggest.java revision 499371a5efdd5e3b76d12370fcd63fb7963a2488
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.preference.PreferenceManager;
21import android.text.TextUtils;
22import android.util.Log;
23
24import com.android.inputmethod.annotations.UsedForTesting;
25import com.android.inputmethod.keyboard.ProximityInfo;
26import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
27import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary;
28import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
29import com.android.inputmethod.latin.settings.Settings;
30import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
31import com.android.inputmethod.latin.utils.BoundedTreeSet;
32import com.android.inputmethod.latin.utils.CollectionUtils;
33import com.android.inputmethod.latin.utils.StringUtils;
34
35import java.util.ArrayList;
36import java.util.Comparator;
37import java.util.HashSet;
38import java.util.Locale;
39import java.util.concurrent.ConcurrentHashMap;
40
41/**
42 * This class loads a dictionary and provides a list of suggestions for a given sequence of
43 * characters. This includes corrections and completions.
44 */
45public final class Suggest {
46    public static final String TAG = Suggest.class.getSimpleName();
47
48    // Session id for
49    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
50    // We are sharing the same ID between typing and gesture to save RAM footprint.
51    public static final int SESSION_TYPING = 0;
52    public static final int SESSION_GESTURE = 0;
53
54    // TODO: rename this to CORRECTION_OFF
55    public static final int CORRECTION_NONE = 0;
56    // TODO: rename this to CORRECTION_ON
57    public static final int CORRECTION_FULL = 1;
58
59    // Close to -2**31
60    private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
61
62    public static final int MAX_SUGGESTIONS = 18;
63
64    public interface SuggestInitializationListener {
65        public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
66    }
67
68    private static final boolean DBG = LatinImeLogger.sDBG;
69
70    private final ConcurrentHashMap<String, Dictionary> mDictionaries =
71            CollectionUtils.newConcurrentHashMap();
72    private HashSet<String> mOnlyDictionarySetForDebug = null;
73    private Dictionary mMainDictionary;
74    private ContactsBinaryDictionary mContactsDict;
75    @UsedForTesting
76    private boolean mIsCurrentlyWaitingForMainDictionary = false;
77
78    private float mAutoCorrectionThreshold;
79
80    // Locale used for upper- and title-casing words
81    public final Locale mLocale;
82
83    public Suggest(final Context context, final Locale locale,
84            final SuggestInitializationListener listener) {
85        initAsynchronously(context, locale, listener);
86        mLocale = locale;
87        // initialize a debug flag for the personalization
88        if (Settings.readUseOnlyPersonalizationDictionaryForDebug(
89                PreferenceManager.getDefaultSharedPreferences(context))) {
90            mOnlyDictionarySetForDebug = new HashSet<String>();
91            mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA);
92        }
93    }
94
95    @UsedForTesting
96    Suggest(final AssetFileAddress[] dictionaryList, final Locale locale) {
97        final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionaryList,
98                false /* useFullEditDistance */, locale);
99        mLocale = locale;
100        mMainDictionary = mainDict;
101        addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, mainDict);
102    }
103
104    private void initAsynchronously(final Context context, final Locale locale,
105            final SuggestInitializationListener listener) {
106        resetMainDict(context, locale, listener);
107    }
108
109    private void addOrReplaceDictionaryInternal(final String key, final Dictionary dict) {
110        if (mOnlyDictionarySetForDebug != null && !mOnlyDictionarySetForDebug.contains(key)) {
111            Log.w(TAG, "Ignore add " + key + " dictionary for debug.");
112            return;
113        }
114        addOrReplaceDictionary(mDictionaries, key, dict);
115    }
116
117    private static void addOrReplaceDictionary(
118            final ConcurrentHashMap<String, Dictionary> dictionaries,
119            final String key, final Dictionary dict) {
120        final Dictionary oldDict = (dict == null)
121                ? dictionaries.remove(key)
122                : dictionaries.put(key, dict);
123        if (oldDict != null && dict != oldDict) {
124            oldDict.close();
125        }
126    }
127
128    public void resetMainDict(final Context context, final Locale locale,
129            final SuggestInitializationListener listener) {
130        mIsCurrentlyWaitingForMainDictionary = true;
131        mMainDictionary = null;
132        if (listener != null) {
133            listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
134        }
135        new Thread("InitializeBinaryDictionary") {
136            @Override
137            public void run() {
138                final DictionaryCollection newMainDict =
139                        DictionaryFactory.createMainDictionaryFromManager(context, locale);
140                addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, newMainDict);
141                mMainDictionary = newMainDict;
142                if (listener != null) {
143                    listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
144                }
145                mIsCurrentlyWaitingForMainDictionary = false;
146            }
147        }.start();
148    }
149
150    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
151    // of this method.
152    public boolean hasMainDictionary() {
153        return null != mMainDictionary && mMainDictionary.isInitialized();
154    }
155
156    @UsedForTesting
157    public boolean isCurrentlyWaitingForMainDictionary() {
158        return mIsCurrentlyWaitingForMainDictionary;
159    }
160
161    public Dictionary getMainDictionary() {
162        return mMainDictionary;
163    }
164
165    public ContactsBinaryDictionary getContactsDictionary() {
166        return mContactsDict;
167    }
168
169    public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
170        return mDictionaries;
171    }
172
173    /**
174     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
175     * before the main dictionary, if set. This refers to the system-managed user dictionary.
176     */
177    public void setUserDictionary(final UserBinaryDictionary userDictionary) {
178        addOrReplaceDictionaryInternal(Dictionary.TYPE_USER, userDictionary);
179    }
180
181    /**
182     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
183     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
184     * won't be used.
185     */
186    public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) {
187        mContactsDict = contactsDictionary;
188        addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary);
189    }
190
191    public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
192        addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
193    }
194
195    public void setPersonalizationPredictionDictionary(
196            final PersonalizationPredictionDictionary personalizationPredictionDictionary) {
197        addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA,
198                personalizationPredictionDictionary);
199    }
200
201    public void setAutoCorrectionThreshold(float threshold) {
202        mAutoCorrectionThreshold = threshold;
203    }
204
205    public interface OnGetSuggestedWordsCallback {
206        public void onGetSuggestedWords(final SuggestedWords suggestedWords);
207    }
208
209    public void getSuggestedWords(final WordComposer wordComposer,
210            final String prevWordForBigram, final ProximityInfo proximityInfo,
211            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
212            final int[] additionalFeaturesOptions, final int sessionId, final int sequenceNumber,
213            final OnGetSuggestedWordsCallback callback) {
214        LatinImeLogger.onStartSuggestion(prevWordForBigram);
215        if (wordComposer.isBatchMode()) {
216            getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo,
217                    blockOffensiveWords, additionalFeaturesOptions, sessionId, sequenceNumber,
218                    callback);
219        } else {
220            getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
221                    blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions,
222                    sequenceNumber, callback);
223        }
224    }
225
226    // Retrieves suggestions for the typing input
227    // and calls the callback function with the suggestions.
228    private void getSuggestedWordsForTypingInput(final WordComposer wordComposer,
229            final String prevWordForBigram, final ProximityInfo proximityInfo,
230            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
231            final int[] additionalFeaturesOptions, final int sequenceNumber,
232            final OnGetSuggestedWordsCallback callback) {
233        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
234        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
235                MAX_SUGGESTIONS);
236
237        final String typedWord = wordComposer.getTypedWord();
238        final String consideredWord = trailingSingleQuotesCount > 0
239                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
240                : typedWord;
241        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
242
243        final WordComposer wordComposerForLookup;
244        if (trailingSingleQuotesCount > 0) {
245            wordComposerForLookup = new WordComposer(wordComposer);
246            for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
247                wordComposerForLookup.deleteLast();
248            }
249        } else {
250            wordComposerForLookup = wordComposer;
251        }
252
253        for (final String key : mDictionaries.keySet()) {
254            final Dictionary dictionary = mDictionaries.get(key);
255            suggestionsSet.addAll(dictionary.getSuggestions(wordComposerForLookup,
256                    prevWordForBigram, proximityInfo, blockOffensiveWords,
257                    additionalFeaturesOptions));
258        }
259
260        final String whitelistedWord;
261        if (suggestionsSet.isEmpty()) {
262            whitelistedWord = null;
263        } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
264            whitelistedWord = null;
265        } else {
266            whitelistedWord = suggestionsSet.first().mWord;
267        }
268
269        // The word can be auto-corrected if it has a whitelist entry that is not itself,
270        // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
271        final boolean allowsToBeAutoCorrected = (null != whitelistedWord
272                && !whitelistedWord.equals(consideredWord))
273                || (consideredWord.length() > 1 && !AutoCorrectionUtils.isValidWord(this,
274                        consideredWord, wordComposer.isFirstCharCapitalized()));
275
276        final boolean hasAutoCorrection;
277        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
278        // any attempt to do auto-correction is already shielded with a test for this flag; at the
279        // same time, it feels wrong that the SuggestedWord object includes information about
280        // the current settings. It may also be useful to know, when the setting is off, whether
281        // the word *would* have been auto-corrected.
282        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
283                || suggestionsSet.isEmpty() || wordComposer.hasDigits()
284                || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary()
285                || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) {
286            // If we don't have a main dictionary, we never want to auto-correct. The reason for
287            // this is, the user may have a contact whose name happens to match a valid word in
288            // their language, and it will unexpectedly auto-correct. For example, if the user
289            // types in English with no dictionary and has a "Will" in their contact list, "will"
290            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
291            // auto-correct.
292            // Also, shortcuts should never auto-correct unless they are whitelist entries.
293            // TODO: we may want to have shortcut-only entries auto-correct in the future.
294            hasAutoCorrection = false;
295        } else {
296            hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
297                    suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
298        }
299
300        final ArrayList<SuggestedWordInfo> suggestionsContainer =
301                CollectionUtils.newArrayList(suggestionsSet);
302        final int suggestionsCount = suggestionsContainer.size();
303        final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
304        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
305        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
306            for (int i = 0; i < suggestionsCount; ++i) {
307                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
308                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
309                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
310                        trailingSingleQuotesCount);
311                suggestionsContainer.set(i, transformedWordInfo);
312            }
313        }
314
315        for (int i = 0; i < suggestionsCount; ++i) {
316            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
317            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(),
318                    wordInfo.mSourceDict.mDictType);
319        }
320
321        if (!TextUtils.isEmpty(typedWord)) {
322            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
323                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
324                    Dictionary.DICTIONARY_USER_TYPED,
325                    SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
326                    SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */));
327        }
328        SuggestedWordInfo.removeDups(suggestionsContainer);
329
330        final ArrayList<SuggestedWordInfo> suggestionsList;
331        if (DBG && !suggestionsContainer.isEmpty()) {
332            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
333        } else {
334            suggestionsList = suggestionsContainer;
335        }
336
337        callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
338                // TODO: this first argument is lying. If this is a whitelisted word which is an
339                // actual word, it says typedWordValid = false, which looks wrong. We should either
340                // rename the attribute or change the value.
341                !allowsToBeAutoCorrected /* typedWordValid */,
342                hasAutoCorrection, /* willAutoCorrect */
343                false /* isPunctuationSuggestions */,
344                false /* isObsoleteSuggestions */,
345                !wordComposer.isComposingWord() /* isPrediction */, sequenceNumber));
346    }
347
348    // Retrieves suggestions for the batch input
349    // and calls the callback function with the suggestions.
350    private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
351            final String prevWordForBigram, final ProximityInfo proximityInfo,
352            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
353            final int sessionId, final int sequenceNumber,
354            final OnGetSuggestedWordsCallback callback) {
355        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
356                MAX_SUGGESTIONS);
357
358        // At second character typed, search the unigrams (scores being affected by bigrams)
359        for (final String key : mDictionaries.keySet()) {
360            final Dictionary dictionary = mDictionaries.get(key);
361            suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer,
362                    prevWordForBigram, proximityInfo, blockOffensiveWords,
363                    additionalFeaturesOptions, sessionId));
364        }
365
366        for (SuggestedWordInfo wordInfo : suggestionsSet) {
367            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType);
368        }
369
370        final ArrayList<SuggestedWordInfo> suggestionsContainer =
371                CollectionUtils.newArrayList(suggestionsSet);
372        final int suggestionsCount = suggestionsContainer.size();
373        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
374        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
375        if (isFirstCharCapitalized || isAllUpperCase) {
376            for (int i = 0; i < suggestionsCount; ++i) {
377                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
378                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
379                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
380                        0 /* trailingSingleQuotesCount */);
381                suggestionsContainer.set(i, transformedWordInfo);
382            }
383        }
384
385        if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord,
386                wordComposer.getRejectedBatchModeSuggestion())) {
387            final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
388            suggestionsContainer.add(1, rejected);
389        }
390        SuggestedWordInfo.removeDups(suggestionsContainer);
391
392        // For some reason some suggestions with MIN_VALUE are making their way here.
393        // TODO: Find a more robust way to detect distractors.
394        for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
395            if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
396                suggestionsContainer.remove(i);
397            }
398        }
399
400        // In the batch input mode, the most relevant suggested word should act as a "typed word"
401        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
402        callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
403                true /* typedWordValid */,
404                false /* willAutoCorrect */,
405                false /* isPunctuationSuggestions */,
406                false /* isObsoleteSuggestions */,
407                false /* isPrediction */, sequenceNumber));
408    }
409
410    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
411            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
412        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
413        typedWordInfo.setDebugString("+");
414        final int suggestionsSize = suggestions.size();
415        final ArrayList<SuggestedWordInfo> suggestionsList =
416                CollectionUtils.newArrayList(suggestionsSize);
417        suggestionsList.add(typedWordInfo);
418        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
419        // than i because we added the typed word to mSuggestions without touching mScores.
420        for (int i = 0; i < suggestionsSize - 1; ++i) {
421            final SuggestedWordInfo cur = suggestions.get(i + 1);
422            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
423                    typedWord, cur.toString(), cur.mScore);
424            final String scoreInfoString;
425            if (normalizedScore > 0) {
426                scoreInfoString = String.format(
427                        Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore,
428                        cur.mSourceDict.mDictType);
429            } else {
430                scoreInfoString = Integer.toString(cur.mScore);
431            }
432            cur.setDebugString(scoreInfoString);
433            suggestionsList.add(cur);
434        }
435        return suggestionsList;
436    }
437
438    private static final class SuggestedWordInfoComparator
439            implements Comparator<SuggestedWordInfo> {
440        // This comparator ranks the word info with the higher frequency first. That's because
441        // that's the order we want our elements in.
442        @Override
443        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
444            if (o1.mScore > o2.mScore) return -1;
445            if (o1.mScore < o2.mScore) return 1;
446            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
447            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
448            return o1.mWord.compareTo(o2.mWord);
449        }
450    }
451    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
452            new SuggestedWordInfoComparator();
453
454    /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
455            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
456            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
457        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
458        if (isAllUpperCase) {
459            sb.append(wordInfo.mWord.toUpperCase(locale));
460        } else if (isFirstCharCapitalized) {
461            sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
462        } else {
463            sb.append(wordInfo.mWord);
464        }
465        // Appending quotes is here to help people quote words. However, it's not helpful
466        // when they type words with quotes toward the end like "it's" or "didn't", where
467        // it's more likely the user missed the last character (or didn't type it yet).
468        final int quotesToAppend = trailingSingleQuotesCount
469                - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
470        for (int i = quotesToAppend - 1; i >= 0; --i) {
471            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
472        }
473        return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind,
474                wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
475                wordInfo.mAutoCommitFirstWordConfidence);
476    }
477
478    public void close() {
479        final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
480        dictionaries.addAll(mDictionaries.values());
481        for (final Dictionary dictionary : dictionaries) {
482            dictionary.close();
483        }
484        mMainDictionary = null;
485    }
486}
487