1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.preference.PreferenceManager;
21import android.text.TextUtils;
22import android.util.Log;
23
24import com.android.inputmethod.annotations.UsedForTesting;
25import com.android.inputmethod.keyboard.ProximityInfo;
26import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
27import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
28import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary;
29import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
30import com.android.inputmethod.latin.settings.Settings;
31import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
32import com.android.inputmethod.latin.utils.BoundedTreeSet;
33import com.android.inputmethod.latin.utils.CollectionUtils;
34import com.android.inputmethod.latin.utils.StringUtils;
35
36import java.util.ArrayList;
37import java.util.Comparator;
38import java.util.HashSet;
39import java.util.Locale;
40import java.util.concurrent.ConcurrentHashMap;
41
42/**
43 * This class loads a dictionary and provides a list of suggestions for a given sequence of
44 * characters. This includes corrections and completions.
45 */
46public final class Suggest {
47    public static final String TAG = Suggest.class.getSimpleName();
48
49    // Session id for
50    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
51    // We are sharing the same ID between typing and gesture to save RAM footprint.
52    public static final int SESSION_TYPING = 0;
53    public static final int SESSION_GESTURE = 0;
54
55    // TODO: rename this to CORRECTION_OFF
56    public static final int CORRECTION_NONE = 0;
57    // TODO: rename this to CORRECTION_ON
58    public static final int CORRECTION_FULL = 1;
59
60    // Close to -2**31
61    private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
62
63    public static final int MAX_SUGGESTIONS = 18;
64
65    public interface SuggestInitializationListener {
66        public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
67    }
68
69    private static final boolean DBG = LatinImeLogger.sDBG;
70
71    private final ConcurrentHashMap<String, Dictionary> mDictionaries =
72            CollectionUtils.newConcurrentHashMap();
73    private HashSet<String> mOnlyDictionarySetForDebug = null;
74    private Dictionary mMainDictionary;
75    private ContactsBinaryDictionary mContactsDict;
76    @UsedForTesting
77    private boolean mIsCurrentlyWaitingForMainDictionary = false;
78
79    private float mAutoCorrectionThreshold;
80
81    // Locale used for upper- and title-casing words
82    public final Locale mLocale;
83
84    public Suggest(final Context context, final Locale locale,
85            final SuggestInitializationListener listener) {
86        initAsynchronously(context, locale, listener);
87        mLocale = locale;
88        // initialize a debug flag for the personalization
89        if (Settings.readUseOnlyPersonalizationDictionaryForDebug(
90                PreferenceManager.getDefaultSharedPreferences(context))) {
91            mOnlyDictionarySetForDebug = new HashSet<String>();
92            mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION);
93            mOnlyDictionarySetForDebug.add(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA);
94        }
95    }
96
97    @UsedForTesting
98    Suggest(final AssetFileAddress[] dictionaryList, final Locale locale) {
99        final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(dictionaryList,
100                false /* useFullEditDistance */, locale);
101        mLocale = locale;
102        mMainDictionary = mainDict;
103        addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, mainDict);
104    }
105
106    private void initAsynchronously(final Context context, final Locale locale,
107            final SuggestInitializationListener listener) {
108        resetMainDict(context, locale, listener);
109    }
110
111    private void addOrReplaceDictionaryInternal(final String key, final Dictionary dict) {
112        if (mOnlyDictionarySetForDebug != null && !mOnlyDictionarySetForDebug.contains(key)) {
113            Log.w(TAG, "Ignore add " + key + " dictionary for debug.");
114            return;
115        }
116        addOrReplaceDictionary(mDictionaries, key, dict);
117    }
118
119    private static void addOrReplaceDictionary(
120            final ConcurrentHashMap<String, Dictionary> dictionaries,
121            final String key, final Dictionary dict) {
122        final Dictionary oldDict = (dict == null)
123                ? dictionaries.remove(key)
124                : dictionaries.put(key, dict);
125        if (oldDict != null && dict != oldDict) {
126            oldDict.close();
127        }
128    }
129
130    public void resetMainDict(final Context context, final Locale locale,
131            final SuggestInitializationListener listener) {
132        mIsCurrentlyWaitingForMainDictionary = true;
133        mMainDictionary = null;
134        if (listener != null) {
135            listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
136        }
137        new Thread("InitializeBinaryDictionary") {
138            @Override
139            public void run() {
140                final DictionaryCollection newMainDict =
141                        DictionaryFactory.createMainDictionaryFromManager(context, locale);
142                addOrReplaceDictionaryInternal(Dictionary.TYPE_MAIN, newMainDict);
143                mMainDictionary = newMainDict;
144                if (listener != null) {
145                    listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
146                }
147                mIsCurrentlyWaitingForMainDictionary = false;
148            }
149        }.start();
150    }
151
152    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
153    // of this method.
154    public boolean hasMainDictionary() {
155        return null != mMainDictionary && mMainDictionary.isInitialized();
156    }
157
158    @UsedForTesting
159    public boolean isCurrentlyWaitingForMainDictionary() {
160        return mIsCurrentlyWaitingForMainDictionary;
161    }
162
163    public Dictionary getMainDictionary() {
164        return mMainDictionary;
165    }
166
167    public ContactsBinaryDictionary getContactsDictionary() {
168        return mContactsDict;
169    }
170
171    public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
172        return mDictionaries;
173    }
174
175    /**
176     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
177     * before the main dictionary, if set. This refers to the system-managed user dictionary.
178     */
179    public void setUserDictionary(final UserBinaryDictionary userDictionary) {
180        addOrReplaceDictionaryInternal(Dictionary.TYPE_USER, userDictionary);
181    }
182
183    /**
184     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
185     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
186     * won't be used.
187     */
188    public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) {
189        mContactsDict = contactsDictionary;
190        addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary);
191    }
192
193    public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
194        addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
195    }
196
197    public void setPersonalizationPredictionDictionary(
198            final PersonalizationPredictionDictionary personalizationPredictionDictionary) {
199        addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION_PREDICTION_IN_JAVA,
200                personalizationPredictionDictionary);
201    }
202
203    public void setPersonalizationDictionary(
204            final PersonalizationDictionary personalizationDictionary) {
205        addOrReplaceDictionaryInternal(Dictionary.TYPE_PERSONALIZATION,
206                personalizationDictionary);
207    }
208
209    public void setAutoCorrectionThreshold(float threshold) {
210        mAutoCorrectionThreshold = threshold;
211    }
212
213    public interface OnGetSuggestedWordsCallback {
214        public void onGetSuggestedWords(final SuggestedWords suggestedWords);
215    }
216
217    public void getSuggestedWords(final WordComposer wordComposer,
218            final String prevWordForBigram, final ProximityInfo proximityInfo,
219            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
220            final int[] additionalFeaturesOptions, final int sessionId,
221            final OnGetSuggestedWordsCallback callback) {
222        LatinImeLogger.onStartSuggestion(prevWordForBigram);
223        if (wordComposer.isBatchMode()) {
224            getSuggestedWordsForBatchInput(wordComposer, prevWordForBigram, proximityInfo,
225                    blockOffensiveWords, additionalFeaturesOptions, sessionId, callback);
226        } else {
227            getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
228                    blockOffensiveWords, isCorrectionEnabled, additionalFeaturesOptions, callback);
229        }
230    }
231
232    // Retrieves suggestions for the typing input
233    // and calls the callback function with the suggestions.
234    private void getSuggestedWordsForTypingInput(final WordComposer wordComposer,
235            final String prevWordForBigram, final ProximityInfo proximityInfo,
236            final boolean blockOffensiveWords, final boolean isCorrectionEnabled,
237            final int[] additionalFeaturesOptions, final OnGetSuggestedWordsCallback callback) {
238        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
239        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
240                MAX_SUGGESTIONS);
241
242        final String typedWord = wordComposer.getTypedWord();
243        final String consideredWord = trailingSingleQuotesCount > 0
244                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
245                : typedWord;
246        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
247
248        final WordComposer wordComposerForLookup;
249        if (trailingSingleQuotesCount > 0) {
250            wordComposerForLookup = new WordComposer(wordComposer);
251            for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
252                wordComposerForLookup.deleteLast();
253            }
254        } else {
255            wordComposerForLookup = wordComposer;
256        }
257
258        for (final String key : mDictionaries.keySet()) {
259            final Dictionary dictionary = mDictionaries.get(key);
260            suggestionsSet.addAll(dictionary.getSuggestions(wordComposerForLookup,
261                    prevWordForBigram, proximityInfo, blockOffensiveWords,
262                    additionalFeaturesOptions));
263        }
264
265        final String whitelistedWord;
266        if (suggestionsSet.isEmpty()) {
267            whitelistedWord = null;
268        } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
269            whitelistedWord = null;
270        } else {
271            whitelistedWord = suggestionsSet.first().mWord;
272        }
273
274        // The word can be auto-corrected if it has a whitelist entry that is not itself,
275        // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
276        final boolean allowsToBeAutoCorrected = (null != whitelistedWord
277                && !whitelistedWord.equals(consideredWord))
278                || (consideredWord.length() > 1 && !AutoCorrectionUtils.isValidWord(this,
279                        consideredWord, wordComposer.isFirstCharCapitalized()));
280
281        final boolean hasAutoCorrection;
282        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
283        // any attempt to do auto-correction is already shielded with a test for this flag; at the
284        // same time, it feels wrong that the SuggestedWord object includes information about
285        // the current settings. It may also be useful to know, when the setting is off, whether
286        // the word *would* have been auto-corrected.
287        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
288                || suggestionsSet.isEmpty() || wordComposer.hasDigits()
289                || wordComposer.isMostlyCaps() || wordComposer.isResumed() || !hasMainDictionary()
290                || SuggestedWordInfo.KIND_SHORTCUT == suggestionsSet.first().mKind) {
291            // If we don't have a main dictionary, we never want to auto-correct. The reason for
292            // this is, the user may have a contact whose name happens to match a valid word in
293            // their language, and it will unexpectedly auto-correct. For example, if the user
294            // types in English with no dictionary and has a "Will" in their contact list, "will"
295            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
296            // auto-correct.
297            // Also, shortcuts should never auto-correct unless they are whitelist entries.
298            // TODO: we may want to have shortcut-only entries auto-correct in the future.
299            hasAutoCorrection = false;
300        } else {
301            hasAutoCorrection = AutoCorrectionUtils.suggestionExceedsAutoCorrectionThreshold(
302                    suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
303        }
304
305        final ArrayList<SuggestedWordInfo> suggestionsContainer =
306                CollectionUtils.newArrayList(suggestionsSet);
307        final int suggestionsCount = suggestionsContainer.size();
308        final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
309        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
310        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
311            for (int i = 0; i < suggestionsCount; ++i) {
312                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
313                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
314                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
315                        trailingSingleQuotesCount);
316                suggestionsContainer.set(i, transformedWordInfo);
317            }
318        }
319
320        for (int i = 0; i < suggestionsCount; ++i) {
321            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
322            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(),
323                    wordInfo.mSourceDict.mDictType);
324        }
325
326        if (!TextUtils.isEmpty(typedWord)) {
327            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
328                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
329                    Dictionary.DICTIONARY_USER_TYPED,
330                    SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
331                    SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */));
332        }
333        SuggestedWordInfo.removeDups(suggestionsContainer);
334
335        final ArrayList<SuggestedWordInfo> suggestionsList;
336        if (DBG && !suggestionsContainer.isEmpty()) {
337            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
338        } else {
339            suggestionsList = suggestionsContainer;
340        }
341
342        callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
343                // TODO: this first argument is lying. If this is a whitelisted word which is an
344                // actual word, it says typedWordValid = false, which looks wrong. We should either
345                // rename the attribute or change the value.
346                !allowsToBeAutoCorrected /* typedWordValid */,
347                hasAutoCorrection, /* willAutoCorrect */
348                false /* isPunctuationSuggestions */,
349                false /* isObsoleteSuggestions */,
350                !wordComposer.isComposingWord() /* isPrediction */));
351    }
352
353    // Retrieves suggestions for the batch input
354    // and calls the callback function with the suggestions.
355    private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
356            final String prevWordForBigram, final ProximityInfo proximityInfo,
357            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
358            final int sessionId, final OnGetSuggestedWordsCallback callback) {
359        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
360                MAX_SUGGESTIONS);
361
362        // At second character typed, search the unigrams (scores being affected by bigrams)
363        for (final String key : mDictionaries.keySet()) {
364            final Dictionary dictionary = mDictionaries.get(key);
365            suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(wordComposer,
366                    prevWordForBigram, proximityInfo, blockOffensiveWords,
367                    additionalFeaturesOptions, sessionId));
368        }
369
370        for (SuggestedWordInfo wordInfo : suggestionsSet) {
371            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict.mDictType);
372        }
373
374        final ArrayList<SuggestedWordInfo> suggestionsContainer =
375                CollectionUtils.newArrayList(suggestionsSet);
376        final int suggestionsCount = suggestionsContainer.size();
377        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
378        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
379        if (isFirstCharCapitalized || isAllUpperCase) {
380            for (int i = 0; i < suggestionsCount; ++i) {
381                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
382                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
383                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
384                        0 /* trailingSingleQuotesCount */);
385                suggestionsContainer.set(i, transformedWordInfo);
386            }
387        }
388
389        if (suggestionsContainer.size() > 1 && TextUtils.equals(suggestionsContainer.get(0).mWord,
390                wordComposer.getRejectedBatchModeSuggestion())) {
391            final SuggestedWordInfo rejected = suggestionsContainer.remove(0);
392            suggestionsContainer.add(1, rejected);
393        }
394        SuggestedWordInfo.removeDups(suggestionsContainer);
395
396        // For some reason some suggestions with MIN_VALUE are making their way here.
397        // TODO: Find a more robust way to detect distractors.
398        for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
399            if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
400                suggestionsContainer.remove(i);
401            }
402        }
403
404        // In the batch input mode, the most relevant suggested word should act as a "typed word"
405        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
406        callback.onGetSuggestedWords(new SuggestedWords(suggestionsContainer,
407                true /* typedWordValid */,
408                false /* willAutoCorrect */,
409                false /* isPunctuationSuggestions */,
410                false /* isObsoleteSuggestions */,
411                false /* isPrediction */));
412    }
413
414    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
415            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
416        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
417        typedWordInfo.setDebugString("+");
418        final int suggestionsSize = suggestions.size();
419        final ArrayList<SuggestedWordInfo> suggestionsList =
420                CollectionUtils.newArrayList(suggestionsSize);
421        suggestionsList.add(typedWordInfo);
422        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
423        // than i because we added the typed word to mSuggestions without touching mScores.
424        for (int i = 0; i < suggestionsSize - 1; ++i) {
425            final SuggestedWordInfo cur = suggestions.get(i + 1);
426            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
427                    typedWord, cur.toString(), cur.mScore);
428            final String scoreInfoString;
429            if (normalizedScore > 0) {
430                scoreInfoString = String.format(
431                        Locale.ROOT, "%d (%4.2f)", cur.mScore, normalizedScore);
432            } else {
433                scoreInfoString = Integer.toString(cur.mScore);
434            }
435            cur.setDebugString(scoreInfoString);
436            suggestionsList.add(cur);
437        }
438        return suggestionsList;
439    }
440
441    private static final class SuggestedWordInfoComparator
442            implements Comparator<SuggestedWordInfo> {
443        // This comparator ranks the word info with the higher frequency first. That's because
444        // that's the order we want our elements in.
445        @Override
446        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
447            if (o1.mScore > o2.mScore) return -1;
448            if (o1.mScore < o2.mScore) return 1;
449            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
450            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
451            return o1.mWord.compareTo(o2.mWord);
452        }
453    }
454    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
455            new SuggestedWordInfoComparator();
456
457    /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
458            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
459            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
460        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
461        if (isAllUpperCase) {
462            sb.append(wordInfo.mWord.toUpperCase(locale));
463        } else if (isFirstCharCapitalized) {
464            sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
465        } else {
466            sb.append(wordInfo.mWord);
467        }
468        // Appending quotes is here to help people quote words. However, it's not helpful
469        // when they type words with quotes toward the end like "it's" or "didn't", where
470        // it's more likely the user missed the last character (or didn't type it yet).
471        final int quotesToAppend = trailingSingleQuotesCount
472                - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
473        for (int i = quotesToAppend - 1; i >= 0; --i) {
474            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
475        }
476        return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind,
477                wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
478                SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
479    }
480
481    public void close() {
482        final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
483        dictionaries.addAll(mDictionaries.values());
484        for (final Dictionary dictionary : dictionaries) {
485            dictionary.close();
486        }
487        mMainDictionary = null;
488    }
489}
490