1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.preference.PreferenceManager;
21import android.text.TextUtils;
22import android.util.Log;
23
24import com.android.inputmethod.annotations.UsedForTesting;
25import com.android.inputmethod.keyboard.ProximityInfo;
26import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
27import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
28import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary;
29import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
30import com.android.inputmethod.latin.settings.Settings;
31import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
32import com.android.inputmethod.latin.utils.BoundedTreeSet;
33import com.android.inputmethod.latin.utils.CollectionUtils;
34import com.android.inputmethod.latin.utils.StringUtils;
35
36import java.util.ArrayList;
37import java.util.Comparator;
38import java.util.HashSet;
39import java.util.Locale;
40import java.util.concurrent.ConcurrentHashMap;
41
42/**
43 * This class loads a dictionary and provides a list of suggestions for a given sequence of
44 * characters. This includes corrections and completions.
45 */
46public final class Suggest {
47    public static final String TAG = Suggest.class.getSimpleName();
48
49    // Session id for
50    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
51    // We are sharing the same ID between typing and gesture to save RAM footprint.
52    public static final int SESSION_TYPING = 0;
53    public static final int SESSION_GESTURE = 0;
54
55    // TODO: rename this to CORRECTION_OFF
56    public static final int CORRECTION_NONE = 0;
57    // TODO: rename this to CORRECTION_ON
58    public static final int CORRECTION_FULL = 1;
59
60    // Close to -2**31
61    private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
62
63    public static final int MAX_SUGGESTIONS = 18;
64
65    public interface SuggestInitializationListener {
66        public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
67    }
68
69    private static final boolean DBG = LatinImeLogger.sDBG;
70
71    private final ConcurrentHashMap<String, Dictionary> mDictionaries =
72            CollectionUtils.newConcurrentHashMap();
73    private HashSet<String> mOnlyDictionarySetForDebug = null;
74    private Dictionary mMainDictionary;
75    private ContactsBinaryDictionary mContactsDict;
76    @UsedForTesting
77    private boolean mIsCurrentlyWaitingForMainDictionary = false;
78
79    private float mAutoCorrectionThreshold;
80
81    // Locale used for upper- and title-casing words
82    public final Locale mLocale;
83
84    public Suggest(final Context context, final Locale locale,
85            final SuggestInitializationListener listener) {
86        initAsynchronously(context, locale, listener);
87        mLocale = locale;
88        // initialize a debug flag for the personalization
89        if (Settings.readUseOnlyPersonalizationDictionaryForDebug(
90                PreferenceManager.getDefaultSharedPreferences(context))) {
91            mOnlyDictionarySetForDebug = new HashSet<String>();
92            mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION);
93            mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA);
94        }
95    }
96
97    @UsedForTesting
98    Suggest(final AssetFileAddress[] dictionaryList, final Locale locale) {
99        final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionaryList,
100                false /* useFullEditDistance */, locale);
101        mLocale = locale;
102        mMainDictionary = mainDict;
103        addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, mainDict);
104    }
105
106    private void initAsynchronously(final Context context, final Locale locale,
107            final SuggestInitializationListener listener) {
108        resetMainDict(context, locale, listener);
109    }
110
111    private void addOrReplaceDictionaryInternal(final String key, final Dictionary dict) {
112        if (mOnlyDictionarySetForDebug != null && !mOnlyDictionarySetForDebug.contains(key)) {
113            Log.w(TAG, "Ignore add " + key + " dictionary for debug.");
114            return;
115        }
116        addOrReplaceDictionary(mDictionaries, key, dict);
117    }
118
119    private static void addOrReplaceDictionary(
120            final ConcurrentHashMap<String, Dictionary> dictionaries,
121            final String key, final Dictionary dict) {
122        final Dictionary oldDict = (dict == null)
123                ? dictionaries.remove(key)
124                : dictionaries.put(key, dict);
125        if (oldDict != null && dict != oldDict) {
126            oldDict.close();
127        }
128    }
129
130    public void resetMainDict(final Context context, final Locale locale,
131            final SuggestInitializationListener listener) {
132        mIsCurrentlyWaitingForMainDictionary = true;
133        mMainDictionary = null;
134        if (listener != null) {
135            listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
136        }
137        new Thread("InitializeBinaryDictionary") {
138            @Override
139            public void run() {
140                final DictionaryCollection newMainDict =
141                        DictionaryFactory.createMainDictionaryFromManager(context, locale);
142                addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, newMainDict);
143                mMainDictionary = newMainDict;
144                if (listener != null) {
145                    listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
146                }
147                mIsCurrentlyWaitingForMainDictionary = false;
148            }
149        }.start();
150    }
151
152    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
153    // of this method.
154    public boolean hasMainDictionary() {
155        return null != mMainDictionary && mMainDictionary.isInitialized();
156    }
157
158    @UsedForTesting
159    public boolean isCurrentlyWaitingForMainDictionary() {
160        return mIsCurrentlyWaitingForMainDictionary;
161    }
162
163    public Dictionary getMainDictionary() {
164        return mMainDictionary;
165    }
166
167    public ContactsBinaryDictionary getContactsDictionary() {
168        return mContactsDict;
169    }
170
171    public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
172        return mDictionaries;
173    }
174
175    /**
176     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
177     * before the main dictionary, if set. This refers to the system-managed user dictionary.
178     */
179    public void setUserDictionary(final UserBinaryDictionary userDictionary) {
180        addOrReplaceDictionaryInternal(Dictionary.TYPE_USER, userDictionary);
181    }
182
183    /**
184     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
185     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
186     * won't be used.
187     */
188    public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) {
189        mContactsDict = contactsDictionary;
190        addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary);
191    }
192
193    public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
194        addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
195    }
196
197    public void setPersonalizationPredictionDictionary(
198            final PersonalizationPredictionDictionary personalizationPredictionDictionary) {
199        addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA,
200                personalizationPredictionDictionary);
201    }
202
203    public void setPersonalizationDictionary(
204            final PersonalizationDictionary personalizationDictionary) {
205        addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION,
206                personalizationDictionary);
207    }
208
209    public void setAutoCorrectionThreshold(float threshold) {
210        mAutoCorrectionThreshold = threshold;
211    }
212
213    public interface OnGetSuggestedWordsCallback {
214        public void onGetSuggestedWords(final SuggestedWords suggestedWords);
215    }
216
217    public void getSuggestedWords(final WordComposer wordComposer,
218            final String prevWordForBigram, final ProximityInfo proximityInfo,
219            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
220            final int[] additionalFeaturesOptions, final int sessionId, final int sequenceNumber,
221            final OnGetSuggestedWordsCallback callback) {
222        LatinImeLogger.onStartSuggestion(prevWordForBigram);
223        if (wordComposer.isBatchMode()) {
224            getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo,
225                    blockOffensiveWords, additionalFeaturesOptions, sessionId, sequenceNumber,
226                    callback);
227        } else {
228            getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
229                    blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions,
230                    sequenceNumber, callback);
231        }
232    }
233
234    // Retrieves suggestions for the typing input
235    // and calls the callback function with the suggestions.
236    private void getSuggestedWordsForTypingInput(final WordComposer wordComposer,
237            final String prevWordForBigram, final ProximityInfo proximityInfo,
238            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
239            final int[] additionalFeaturesOptions, final int sequenceNumber,
240            final OnGetSuggestedWordsCallback callback) {
241        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
242        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
243                MAX_SUGGESTIONS);
244
245        final String typedWord = wordComposer.getTypedWord();
246        final String consideredWord = trailingSingleQuotesCount > 0
247                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
248                : typedWord;
249        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
250
251        final WordComposer wordComposerForLookup;
252        if (trailingSingleQuotesCount > 0) {
253            wordComposerForLookup = new WordComposer(wordComposer);
254            for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
255                wordComposerForLookup.deleteLast();
256            }
257        } else {
258            wordComposerForLookup = wordComposer;
259        }
260
261        for (final String key : mDictionaries.keySet()) {
262            final Dictionary dictionary = mDictionaries.get(key);
263            suggestionsSet.addAll(dictionary.getSuggestions(wordComposerForLookup,
264                    prevWordForBigram, proximityInfo, blockOffensiveWords,
265                    additionalFeaturesOptions));
266        }
267
268        final String whitelistedWord;
269        if (suggestionsSet.isEmpty()) {
270            whitelistedWord = null;
271        } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
272            whitelistedWord = null;
273        } else {
274            whitelistedWord = suggestionsSet.first().mWord;
275        }
276
277        // The word can be auto-corrected if it has a whitelist entry that is not itself,
278        // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
279        final boolean allowsToBeAutoCorrected = (null != whitelistedWord
280                && !whitelistedWord.equals(consideredWord))
281                || (consideredWord.length() > 1 && !AutoCorrectionUtils.isValidWord(this,
282                        consideredWord, wordComposer.isFirstCharCapitalized()));
283
284        final boolean hasAutoCorrection;
285        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
286        // any attempt to do auto-correction is already shielded with a test for this flag; at the
287        // same time, it feels wrong that the SuggestedWord object includes information about
288        // the current settings. It may also be useful to know, when the setting is off, whether
289        // the word *would* have been auto-corrected.
290        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
291                || suggestionsSet.isEmpty() || wordComposer.hasDigits()
292                || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary()
293                || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) {
294            // If we don't have a main dictionary, we never want to auto-correct. The reason for
295            // this is, the user may have a contact whose name happens to match a valid word in
296            // their language, and it will unexpectedly auto-correct. For example, if the user
297            // types in English with no dictionary and has a "Will" in their contact list, "will"
298            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
299            // auto-correct.
300            // Also, shortcuts should never auto-correct unless they are whitelist entries.
301            // TODO: we may want to have shortcut-only entries auto-correct in the future.
302            hasAutoCorrection = false;
303        } else {
304            hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
305                    suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
306        }
307
308        final ArrayList<SuggestedWordInfo> suggestionsContainer =
309                CollectionUtils.newArrayList(suggestionsSet);
310        final int suggestionsCount = suggestionsContainer.size();
311        final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
312        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
313        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
314            for (int i = 0; i < suggestionsCount; ++i) {
315                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
316                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
317                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
318                        trailingSingleQuotesCount);
319                suggestionsContainer.set(i, transformedWordInfo);
320            }
321        }
322
323        for (int i = 0; i < suggestionsCount; ++i) {
324            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
325            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(),
326                    wordInfo.mSourceDict.mDictType);
327        }
328
329        if (!TextUtils.isEmpty(typedWord)) {
330            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
331                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
332                    Dictionary.DICTIONARY_USER_TYPED,
333                    SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
334                    SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */));
335        }
336        SuggestedWordInfo.removeDups(suggestionsContainer);
337
338        final ArrayList<SuggestedWordInfo> suggestionsList;
339        if (DBG && !suggestionsContainer.isEmpty()) {
340            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
341        } else {
342            suggestionsList = suggestionsContainer;
343        }
344
345        callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
346                // TODO: this first argument is lying. If this is a whitelisted word which is an
347                // actual word, it says typedWordValid = false, which looks wrong. We should either
348                // rename the attribute or change the value.
349                !allowsToBeAutoCorrected /* typedWordValid */,
350                hasAutoCorrection, /* willAutoCorrect */
351                false /* isPunctuationSuggestions */,
352                false /* isObsoleteSuggestions */,
353                !wordComposer.isComposingWord() /* isPrediction */, sequenceNumber));
354    }
355
356    // Retrieves suggestions for the batch input
357    // and calls the callback function with the suggestions.
358    private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
359            final String prevWordForBigram, final ProximityInfo proximityInfo,
360            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
361            final int sessionId, final int sequenceNumber,
362            final OnGetSuggestedWordsCallback callback) {
363        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
364                MAX_SUGGESTIONS);
365
366        // At second character typed, search the unigrams (scores being affected by bigrams)
367        for (final String key : mDictionaries.keySet()) {
368            final Dictionary dictionary = mDictionaries.get(key);
369            suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer,
370                    prevWordForBigram, proximityInfo, blockOffensiveWords,
371                    additionalFeaturesOptions, sessionId));
372        }
373
374        for (SuggestedWordInfo wordInfo : suggestionsSet) {
375            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType);
376        }
377
378        final ArrayList<SuggestedWordInfo> suggestionsContainer =
379                CollectionUtils.newArrayList(suggestionsSet);
380        final int suggestionsCount = suggestionsContainer.size();
381        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
382        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
383        if (isFirstCharCapitalized || isAllUpperCase) {
384            for (int i = 0; i < suggestionsCount; ++i) {
385                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
386                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
387                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
388                        0 /* trailingSingleQuotesCount */);
389                suggestionsContainer.set(i, transformedWordInfo);
390            }
391        }
392
393        if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord,
394                wordComposer.getRejectedBatchModeSuggestion())) {
395            final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
396            suggestionsContainer.add(1, rejected);
397        }
398        SuggestedWordInfo.removeDups(suggestionsContainer);
399
400        // For some reason some suggestions with MIN_VALUE are making their way here.
401        // TODO: Find a more robust way to detect distractors.
402        for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
403            if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
404                suggestionsContainer.remove(i);
405            }
406        }
407
408        // In the batch input mode, the most relevant suggested word should act as a "typed word"
409        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
410        callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
411                true /* typedWordValid */,
412                false /* willAutoCorrect */,
413                false /* isPunctuationSuggestions */,
414                false /* isObsoleteSuggestions */,
415                false /* isPrediction */, sequenceNumber));
416    }
417
418    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
419            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
420        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
421        typedWordInfo.setDebugString("+");
422        final int suggestionsSize = suggestions.size();
423        final ArrayList<SuggestedWordInfo> suggestionsList =
424                CollectionUtils.newArrayList(suggestionsSize);
425        suggestionsList.add(typedWordInfo);
426        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
427        // than i because we added the typed word to mSuggestions without touching mScores.
428        for (int i = 0; i < suggestionsSize - 1; ++i) {
429            final SuggestedWordInfo cur = suggestions.get(i + 1);
430            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
431                    typedWord, cur.toString(), cur.mScore);
432            final String scoreInfoString;
433            if (normalizedScore > 0) {
434                scoreInfoString = String.format(
435                        Locale.ROOT, "%d (%4.2f)", cur.mScore, normalizedScore);
436            } else {
437                scoreInfoString = Integer.toString(cur.mScore);
438            }
439            cur.setDebugString(scoreInfoString);
440            suggestionsList.add(cur);
441        }
442        return suggestionsList;
443    }
444
445    private static final class SuggestedWordInfoComparator
446            implements Comparator<SuggestedWordInfo> {
447        // This comparator ranks the word info with the higher frequency first. That's because
448        // that's the order we want our elements in.
449        @Override
450        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
451            if (o1.mScore > o2.mScore) return -1;
452            if (o1.mScore < o2.mScore) return 1;
453            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
454            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
455            return o1.mWord.compareTo(o2.mWord);
456        }
457    }
458    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
459            new SuggestedWordInfoComparator();
460
461    /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
462            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
463            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
464        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
465        if (isAllUpperCase) {
466            sb.append(wordInfo.mWord.toUpperCase(locale));
467        } else if (isFirstCharCapitalized) {
468            sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
469        } else {
470            sb.append(wordInfo.mWord);
471        }
472        // Appending quotes is here to help people quote words. However, it's not helpful
473        // when they type words with quotes toward the end like "it's" or "didn't", where
474        // it's more likely the user missed the last character (or didn't type it yet).
475        final int quotesToAppend = trailingSingleQuotesCount
476                - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
477        for (int i = quotesToAppend - 1; i >= 0; --i) {
478            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
479        }
480        return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind,
481                wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
482                wordInfo.mAutoCommitFirstWordConfidence);
483    }
484
485    public void close() {
486        final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
487        dictionaries.addAll(mDictionaries.values());
488        for (final Dictionary dictionary : dictionaries) {
489            dictionary.close();
490        }
491        mMainDictionary = null;
492    }
493}
494