Suggest.java revision 1079665c3c017ee024a2ffdaf3488cc8c37f087a
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20
21import com.android.inputmethod.event.Event;
22import com.android.inputmethod.keyboard.ProximityInfo;
23import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
24import com.android.inputmethod.latin.define.ProductionFlag;
25import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
26import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
27import com.android.inputmethod.latin.utils.BoundedTreeSet;
28import com.android.inputmethod.latin.utils.CollectionUtils;
29import com.android.inputmethod.latin.utils.StringUtils;
30
31import java.util.ArrayList;
32import java.util.Comparator;
33import java.util.Locale;
34
35/**
36 * This class loads a dictionary and provides a list of suggestions for a given sequence of
37 * characters. This includes corrections and completions.
38 */
39public final class Suggest {
40    public static final String TAG = Suggest.class.getSimpleName();
41
42    // Session id for
43    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
44    // We are sharing the same ID between typing and gesture to save RAM footprint.
45    public static final int SESSION_TYPING = 0;
46    public static final int SESSION_GESTURE = 0;
47
48    // TODO: rename this to CORRECTION_OFF
49    public static final int CORRECTION_NONE = 0;
50    // TODO: rename this to CORRECTION_ON
51    public static final int CORRECTION_FULL = 1;
52
53    // Close to -2**31
54    private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
55
56    private static final boolean DBG = LatinImeLogger.sDBG;
57    public final DictionaryFacilitatorForSuggest mDictionaryFacilitator;
58
59    private float mAutoCorrectionThreshold;
60
61    // Locale used for upper- and title-casing words
62    public final Locale mLocale;
63
64    // TODO: Move dictionaryFacilitator constructing logics from LatinIME to Suggest.
65    public Suggest(final Locale locale,
66            final DictionaryFacilitatorForSuggest dictionaryFacilitator) {
67        mLocale = locale;
68        mDictionaryFacilitator = dictionaryFacilitator;
69    }
70
71    // Creates instance with new dictionary facilitator.
72    public Suggest(final Suggest oldSuggst,
73            final DictionaryFacilitatorForSuggest dictionaryFacilitator) {
74        mLocale = oldSuggst.mLocale;
75        mAutoCorrectionThreshold = oldSuggst.mAutoCorrectionThreshold;
76        mDictionaryFacilitator = dictionaryFacilitator;
77    }
78
79    public void setAutoCorrectionThreshold(float threshold) {
80        mAutoCorrectionThreshold = threshold;
81    }
82
83    public interface OnGetSuggestedWordsCallback {
84        public void onGetSuggestedWords(final SuggestedWords suggestedWords);
85    }
86
87    public void getSuggestedWords(final WordComposer wordComposer,
88            final String prevWordForBigram, final ProximityInfo proximityInfo,
89            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
90            final int[] additionalFeaturesOptions, final int sessionId, final int sequenceNumber,
91            final OnGetSuggestedWordsCallback callback) {
92        LatinImeLogger.onStartSuggestion(prevWordForBigram);
93        if (wordComposer.isBatchMode()) {
94            getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo,
95                    blockOffensiveWords, additionalFeaturesOptions, sessionId, sequenceNumber,
96                    callback);
97        } else {
98            getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
99                    blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions,
100                    sequenceNumber, callback);
101        }
102    }
103
104    // Retrieves suggestions for the typing input
105    // and calls the callback function with the suggestions.
106    private void getSuggestedWordsForTypingInput(final WordComposer wordComposer,
107            final String prevWordForBigram, final ProximityInfo proximityInfo,
108            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
109            final int[] additionalFeaturesOptions, final int sequenceNumber,
110            final OnGetSuggestedWordsCallback callback) {
111        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
112        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
113                SuggestedWords.MAX_SUGGESTIONS);
114
115        final String typedWord = wordComposer.getTypedWord();
116        final String consideredWord = trailingSingleQuotesCount > 0
117                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
118                : typedWord;
119        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
120
121        final WordComposer wordComposerForLookup;
122        if (trailingSingleQuotesCount > 0) {
123            wordComposerForLookup = new WordComposer(wordComposer);
124            for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
125                // TODO: do not create a fake event for this. Ideally the word composer should know
126                // how to give out the word without trailing quotes and we can remove this entirely
127                wordComposerForLookup.deleteLast(Event.createSoftwareKeypressEvent(
128                        Event.NOT_A_CODE_POINT, Constants.CODE_DELETE,
129                        Constants.NOT_A_COORDINATE, Constants.NOT_A_COORDINATE));
130            }
131        } else {
132            wordComposerForLookup = wordComposer;
133        }
134        final ArrayList<SuggestedWordInfo> rawSuggestions;
135        if (ProductionFlag.INCLUDE_RAW_SUGGESTIONS) {
136            rawSuggestions = CollectionUtils.newArrayList();
137        } else {
138            rawSuggestions = null;
139        }
140        mDictionaryFacilitator.getSuggestions(wordComposerForLookup, prevWordForBigram,
141                proximityInfo, blockOffensiveWords, additionalFeaturesOptions, SESSION_TYPING,
142                suggestionsSet, rawSuggestions);
143
144        final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
145        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
146        final String firstSuggestion;
147        final String whitelistedWord;
148        if (suggestionsSet.isEmpty()) {
149            whitelistedWord = firstSuggestion = null;
150        } else {
151            final SuggestedWordInfo firstSuggestedWordInfo = getTransformedSuggestedWordInfo(
152                    suggestionsSet.first(), mLocale, isAllUpperCase, isFirstCharCapitalized,
153                    trailingSingleQuotesCount);
154            firstSuggestion = firstSuggestedWordInfo.mWord;
155            if (SuggestedWordInfo.KIND_WHITELIST != firstSuggestedWordInfo.mKind) {
156                whitelistedWord = null;
157            } else {
158                whitelistedWord = firstSuggestion;
159            }
160        }
161
162        final boolean isPrediction = !wordComposer.isComposingWord();
163
164        // We allow auto-correction if we have a whitelisted word, or if the word is not a valid
165        // word of more than 1 char, except if the first suggestion is the same as the typed string
166        // because in this case if it's strong enough to auto-correct that will mistakenly designate
167        // the second candidate for auto-correction.
168        // TODO: stop relying on indices to find where is the auto-correction in the suggested
169        // words, and correct this test.
170        final boolean allowsToBeAutoCorrected = (null != whitelistedWord
171                && !whitelistedWord.equals(typedWord))
172                || (consideredWord.length() > 1 && !mDictionaryFacilitator.isValidWord(
173                        consideredWord, wordComposer.isFirstCharCapitalized())
174                        && !typedWord.equals(firstSuggestion));
175
176        final boolean hasAutoCorrection;
177        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
178        // any attempt to do auto-correction is already shielded with a test for this flag; at the
179        // same time, it feels wrong that the SuggestedWord object includes information about
180        // the current settings. It may also be useful to know, when the setting is off, whether
181        // the word *would* have been auto-corrected.
182        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || isPrediction
183                || suggestionsSet.isEmpty() || wordComposer.hasDigits()
184                || wordComposer.isMostlyCaps() || wordComposer.isResumed()
185                || !mDictionaryFacilitator.hasMainDictionary()
186                || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) {
187            // If we don't have a main dictionary, we never want to auto-correct. The reason for
188            // this is, the user may have a contact whose name happens to match a valid word in
189            // their language, and it will unexpectedly auto-correct. For example, if the user
190            // types in English with no dictionary and has a "Will" in their contact list, "will"
191            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
192            // auto-correct.
193            // Also, shortcuts should never auto-correct unless they are whitelist entries.
194            // TODO: we may want to have shortcut-only entries auto-correct in the future.
195            hasAutoCorrection = false;
196        } else {
197            hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
198                    suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
199        }
200
201        final ArrayList<SuggestedWordInfo> suggestionsContainer =
202                CollectionUtils.newArrayList(suggestionsSet);
203        final int suggestionsCount = suggestionsContainer.size();
204        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
205            for (int i = 0; i < suggestionsCount; ++i) {
206                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
207                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
208                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
209                        trailingSingleQuotesCount);
210                suggestionsContainer.set(i, transformedWordInfo);
211            }
212        }
213
214        for (int i = 0; i < suggestionsCount; ++i) {
215            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
216            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(),
217                    wordInfo.mSourceDict.mDictType);
218        }
219
220        if (!TextUtils.isEmpty(typedWord)) {
221            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
222                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
223                    Dictionary.DICTIONARY_USER_TYPED,
224                    SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
225                    SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */));
226        }
227        SuggestedWordInfo.removeDups(suggestionsContainer);
228
229        final ArrayList<SuggestedWordInfo> suggestionsList;
230        if (DBG && !suggestionsContainer.isEmpty()) {
231            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
232        } else {
233            suggestionsList = suggestionsContainer;
234        }
235
236        callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, rawSuggestions,
237                // TODO: this first argument is lying. If this is a whitelisted word which is an
238                // actual word, it says typedWordValid = false, which looks wrong. We should either
239                // rename the attribute or change the value.
240                !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */,
241                hasAutoCorrection, /* willAutoCorrect */
242                false /* isObsoleteSuggestions */, isPrediction, sequenceNumber));
243    }
244
245    // Retrieves suggestions for the batch input
246    // and calls the callback function with the suggestions.
247    private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
248            final String prevWordForBigram, final ProximityInfo proximityInfo,
249            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
250            final int sessionId, final int sequenceNumber,
251            final OnGetSuggestedWordsCallback callback) {
252        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
253                SuggestedWords.MAX_SUGGESTIONS);
254        final ArrayList<SuggestedWordInfo> rawSuggestions;
255        if (ProductionFlag.INCLUDE_RAW_SUGGESTIONS) {
256            rawSuggestions = CollectionUtils.newArrayList();
257        } else {
258            rawSuggestions = null;
259        }
260        mDictionaryFacilitator.getSuggestions(wordComposer, prevWordForBigram, proximityInfo,
261                blockOffensiveWords, additionalFeaturesOptions, sessionId, suggestionsSet,
262                rawSuggestions);
263        for (SuggestedWordInfo wordInfo : suggestionsSet) {
264            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType);
265        }
266
267        final ArrayList<SuggestedWordInfo> suggestionsContainer =
268                CollectionUtils.newArrayList(suggestionsSet);
269        final int suggestionsCount = suggestionsContainer.size();
270        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
271        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
272        if (isFirstCharCapitalized || isAllUpperCase) {
273            for (int i = 0; i < suggestionsCount; ++i) {
274                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
275                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
276                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
277                        0 /* trailingSingleQuotesCount */);
278                suggestionsContainer.set(i, transformedWordInfo);
279            }
280        }
281
282        if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord,
283                wordComposer.getRejectedBatchModeSuggestion())) {
284            final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
285            suggestionsContainer.add(1, rejected);
286        }
287        SuggestedWordInfo.removeDups(suggestionsContainer);
288
289        // For some reason some suggestions with MIN_VALUE are making their way here.
290        // TODO: Find a more robust way to detect distractors.
291        for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
292            if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
293                suggestionsContainer.remove(i);
294            }
295        }
296
297        // In the batch input mode, the most relevant suggested word should act as a "typed word"
298        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
299        callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer, rawSuggestions,
300                true /* typedWordValid */,
301                false /* willAutoCorrect */,
302                false /* isObsoleteSuggestions */,
303                false /* isPrediction */, sequenceNumber));
304    }
305
306    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
307            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
308        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
309        typedWordInfo.setDebugString("+");
310        final int suggestionsSize = suggestions.size();
311        final ArrayList<SuggestedWordInfo> suggestionsList =
312                CollectionUtils.newArrayList(suggestionsSize);
313        suggestionsList.add(typedWordInfo);
314        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
315        // than i because we added the typed word to mSuggestions without touching mScores.
316        for (int i = 0; i < suggestionsSize - 1; ++i) {
317            final SuggestedWordInfo cur = suggestions.get(i + 1);
318            final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
319                    typedWord, cur.toString(), cur.mScore);
320            final String scoreInfoString;
321            if (normalizedScore > 0) {
322                scoreInfoString = String.format(
323                        Locale.ROOT, "%d (%4.2f), %s", cur.mScore, normalizedScore,
324                        cur.mSourceDict.mDictType);
325            } else {
326                scoreInfoString = Integer.toString(cur.mScore);
327            }
328            cur.setDebugString(scoreInfoString);
329            suggestionsList.add(cur);
330        }
331        return suggestionsList;
332    }
333
334    private static final class SuggestedWordInfoComparator
335            implements Comparator<SuggestedWordInfo> {
336        // This comparator ranks the word info with the higher frequency first. That's because
337        // that's the order we want our elements in.
338        @Override
339        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
340            if (o1.mScore > o2.mScore) return -1;
341            if (o1.mScore < o2.mScore) return 1;
342            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
343            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
344            return o1.mWord.compareTo(o2.mWord);
345        }
346    }
347    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
348            new SuggestedWordInfoComparator();
349
350    /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
351            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
352            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
353        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
354        if (isAllUpperCase) {
355            sb.append(wordInfo.mWord.toUpperCase(locale));
356        } else if (isFirstCharCapitalized) {
357            sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
358        } else {
359            sb.append(wordInfo.mWord);
360        }
361        // Appending quotes is here to help people quote words. However, it's not helpful
362        // when they type words with quotes toward the end like "it's" or "didn't", where
363        // it's more likely the user missed the last character (or didn't type it yet).
364        final int quotesToAppend = trailingSingleQuotesCount
365                - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
366        for (int i = quotesToAppend - 1; i >= 0; --i) {
367            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
368        }
369        return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind,
370                wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
371                wordInfo.mAutoCommitFirstWordConfidence);
372    }
373
374    public void close() {
375        mDictionaryFacilitator.close();
376    }
377}
378