Suggest.java revision 240871ecafde7834ebb4270cd7758fc904a5f3a7
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.text.TextUtils;
21
22import com.android.inputmethod.annotations.UsedForTesting;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25
26import java.io.File;
27import java.util.ArrayList;
28import java.util.Comparator;
29import java.util.HashSet;
30import java.util.Locale;
31import java.util.concurrent.ConcurrentHashMap;
32
33/**
34 * This class loads a dictionary and provides a list of suggestions for a given sequence of
35 * characters. This includes corrections and completions.
36 */
37public final class Suggest {
38    public static final String TAG = Suggest.class.getSimpleName();
39
40    // Session id for
41    // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
42    public static final int SESSION_TYPING = 0;
43    public static final int SESSION_GESTURE = 1;
44
45    // TODO: rename this to CORRECTION_OFF
46    public static final int CORRECTION_NONE = 0;
47    // TODO: rename this to CORRECTION_ON
48    public static final int CORRECTION_FULL = 1;
49
50    public interface SuggestInitializationListener {
51        public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
52    }
53
54    private static final boolean DBG = LatinImeLogger.sDBG;
55
56    private Dictionary mMainDictionary;
57    private ContactsBinaryDictionary mContactsDict;
58    private final ConcurrentHashMap<String, Dictionary> mDictionaries =
59            CollectionUtils.newConcurrentHashMap();
60
61    public static final int MAX_SUGGESTIONS = 18;
62
63    private float mAutoCorrectionThreshold;
64
65    // Locale used for upper- and title-casing words
66    private final Locale mLocale;
67
68    public Suggest(final Context context, final Locale locale,
69            final SuggestInitializationListener listener) {
70        initAsynchronously(context, locale, listener);
71        mLocale = locale;
72    }
73
74    @UsedForTesting
75    Suggest(final Context context, final File dictionary,
76            final long startOffset, final long length, final Locale locale) {
77        final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary,
78                startOffset, length /* useFullEditDistance */, false, locale);
79        mLocale = locale;
80        mMainDictionary = mainDict;
81        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict);
82    }
83
84    private void initAsynchronously(final Context context, final Locale locale,
85            final SuggestInitializationListener listener) {
86        resetMainDict(context, locale, listener);
87    }
88
89    private static void addOrReplaceDictionary(
90            final ConcurrentHashMap<String, Dictionary> dictionaries,
91            final String key, final Dictionary dict) {
92        final Dictionary oldDict = (dict == null)
93                ? dictionaries.remove(key)
94                : dictionaries.put(key, dict);
95        if (oldDict != null && dict != oldDict) {
96            oldDict.close();
97        }
98    }
99
100    public void resetMainDict(final Context context, final Locale locale,
101            final SuggestInitializationListener listener) {
102        mMainDictionary = null;
103        if (listener != null) {
104            listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
105        }
106        new Thread("InitializeBinaryDictionary") {
107            @Override
108            public void run() {
109                final DictionaryCollection newMainDict =
110                        DictionaryFactory.createMainDictionaryFromManager(context, locale);
111                addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict);
112                mMainDictionary = newMainDict;
113                if (listener != null) {
114                    listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
115                }
116            }
117        }.start();
118    }
119
120    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
121    // of this method.
122    public boolean hasMainDictionary() {
123        return null != mMainDictionary && mMainDictionary.isInitialized();
124    }
125
126    public Dictionary getMainDictionary() {
127        return mMainDictionary;
128    }
129
130    public ContactsBinaryDictionary getContactsDictionary() {
131        return mContactsDict;
132    }
133
134    public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
135        return mDictionaries;
136    }
137
138    /**
139     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
140     * before the main dictionary, if set. This refers to the system-managed user dictionary.
141     */
142    public void setUserDictionary(final UserBinaryDictionary userDictionary) {
143        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary);
144    }
145
146    /**
147     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
148     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
149     * won't be used.
150     */
151    public void setContactsDictionary(final ContactsBinaryDictionary contactsDictionary) {
152        mContactsDict = contactsDictionary;
153        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
154    }
155
156    public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
157        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
158    }
159
160    public void setAutoCorrectionThreshold(float threshold) {
161        mAutoCorrectionThreshold = threshold;
162    }
163
164    public SuggestedWords getSuggestedWords(final WordComposer wordComposer,
165            final String prevWordForBigram, final ProximityInfo proximityInfo,
166            final boolean isCorrectionEnabled, final int sessionId) {
167        LatinImeLogger.onStartSuggestion(prevWordForBigram);
168        if (wordComposer.isBatchMode()) {
169            return getSuggestedWordsForBatchInput(
170                    wordComposer, prevWordForBigram, proximityInfo, sessionId);
171        } else {
172            return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
173                    isCorrectionEnabled);
174        }
175    }
176
177    // Retrieves suggestions for the typing input.
178    private SuggestedWords getSuggestedWordsForTypingInput(final WordComposer wordComposer,
179            final String prevWordForBigram, final ProximityInfo proximityInfo,
180            final boolean isCorrectionEnabled) {
181        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
182        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
183                MAX_SUGGESTIONS);
184
185        final String typedWord = wordComposer.getTypedWord();
186        final String consideredWord = trailingSingleQuotesCount > 0
187                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
188                : typedWord;
189        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
190
191        final WordComposer wordComposerForLookup;
192        if (trailingSingleQuotesCount > 0) {
193            wordComposerForLookup = new WordComposer(wordComposer);
194            for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
195                wordComposerForLookup.deleteLast();
196            }
197        } else {
198            wordComposerForLookup = wordComposer;
199        }
200
201        for (final String key : mDictionaries.keySet()) {
202            final Dictionary dictionary = mDictionaries.get(key);
203            suggestionsSet.addAll(dictionary.getSuggestions(
204                    wordComposerForLookup, prevWordForBigram, proximityInfo));
205        }
206
207        final String whitelistedWord;
208        if (suggestionsSet.isEmpty()) {
209            whitelistedWord = null;
210        } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
211            whitelistedWord = null;
212        } else {
213            whitelistedWord = suggestionsSet.first().mWord;
214        }
215
216        // The word can be auto-corrected if it has a whitelist entry that is not itself,
217        // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
218        final boolean allowsToBeAutoCorrected = (null != whitelistedWord
219                && !whitelistedWord.equals(consideredWord))
220                || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries,
221                        consideredWord, wordComposer.isFirstCharCapitalized()));
222
223        final boolean hasAutoCorrection;
224        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
225        // any attempt to do auto-correction is already shielded with a test for this flag; at the
226        // same time, it feels wrong that the SuggestedWord object includes information about
227        // the current settings. It may also be useful to know, when the setting is off, whether
228        // the word *would* have been auto-corrected.
229        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
230                || suggestionsSet.isEmpty() || wordComposer.hasDigits()
231                || wordComposer.isMostlyCaps() || wordComposer.isResumed()
232                || !hasMainDictionary()) {
233            // If we don't have a main dictionary, we never want to auto-correct. The reason for
234            // this is, the user may have a contact whose name happens to match a valid word in
235            // their language, and it will unexpectedly auto-correct. For example, if the user
236            // types in English with no dictionary and has a "Will" in their contact list, "will"
237            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
238            // auto-correct.
239            hasAutoCorrection = false;
240        } else {
241            hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold(
242                    suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
243        }
244
245        final ArrayList<SuggestedWordInfo> suggestionsContainer =
246                CollectionUtils.newArrayList(suggestionsSet);
247        final int suggestionsCount = suggestionsContainer.size();
248        final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
249        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
250        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
251            for (int i = 0; i < suggestionsCount; ++i) {
252                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
253                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
254                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
255                        trailingSingleQuotesCount);
256                suggestionsContainer.set(i, transformedWordInfo);
257            }
258        }
259
260        for (int i = 0; i < suggestionsCount; ++i) {
261            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
262            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
263        }
264
265        if (!TextUtils.isEmpty(typedWord)) {
266            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
267                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
268                    Dictionary.TYPE_USER_TYPED));
269        }
270        SuggestedWordInfo.removeDups(suggestionsContainer);
271
272        final ArrayList<SuggestedWordInfo> suggestionsList;
273        if (DBG && !suggestionsContainer.isEmpty()) {
274            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
275        } else {
276            suggestionsList = suggestionsContainer;
277        }
278
279        return new SuggestedWords(suggestionsList,
280                // TODO: this first argument is lying. If this is a whitelisted word which is an
281                // actual word, it says typedWordValid = false, which looks wrong. We should either
282                // rename the attribute or change the value.
283                !allowsToBeAutoCorrected /* typedWordValid */,
284                hasAutoCorrection, /* willAutoCorrect */
285                false /* isPunctuationSuggestions */,
286                false /* isObsoleteSuggestions */,
287                !wordComposer.isComposingWord() /* isPrediction */);
288    }
289
290    // Retrieves suggestions for the batch input.
291    private SuggestedWords getSuggestedWordsForBatchInput(final WordComposer wordComposer,
292            final String prevWordForBigram, final ProximityInfo proximityInfo,
293            final int sessionId) {
294        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
295                MAX_SUGGESTIONS);
296
297        // At second character typed, search the unigrams (scores being affected by bigrams)
298        for (final String key : mDictionaries.keySet()) {
299            // Skip User history dictionary for lookup
300            // TODO: The user history dictionary should just override getSuggestionsWithSessionId
301            // to make sure it doesn't return anything and we should remove this test
302            if (key.equals(Dictionary.TYPE_USER_HISTORY)) {
303                continue;
304            }
305            final Dictionary dictionary = mDictionaries.get(key);
306            suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(
307                    wordComposer, prevWordForBigram, proximityInfo, sessionId));
308        }
309
310        for (SuggestedWordInfo wordInfo : suggestionsSet) {
311            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord, wordInfo.mSourceDict);
312        }
313
314        final ArrayList<SuggestedWordInfo> suggestionsContainer =
315                CollectionUtils.newArrayList(suggestionsSet);
316        final int suggestionsCount = suggestionsContainer.size();
317        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
318        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
319        if (isFirstCharCapitalized || isAllUpperCase) {
320            for (int i = 0; i < suggestionsCount; ++i) {
321                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
322                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
323                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
324                        0 /* trailingSingleQuotesCount */);
325                suggestionsContainer.set(i, transformedWordInfo);
326            }
327        }
328
329        SuggestedWordInfo.removeDups(suggestionsContainer);
330        // In the batch input mode, the most relevant suggested word should act as a "typed word"
331        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
332        return new SuggestedWords(suggestionsContainer,
333                true /* typedWordValid */,
334                false /* willAutoCorrect */,
335                false /* isPunctuationSuggestions */,
336                false /* isObsoleteSuggestions */,
337                false /* isPrediction */);
338    }
339
340    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
341            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
342        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
343        typedWordInfo.setDebugString("+");
344        final int suggestionsSize = suggestions.size();
345        final ArrayList<SuggestedWordInfo> suggestionsList =
346                CollectionUtils.newArrayList(suggestionsSize);
347        suggestionsList.add(typedWordInfo);
348        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
349        // than i because we added the typed word to mSuggestions without touching mScores.
350        for (int i = 0; i < suggestionsSize - 1; ++i) {
351            final SuggestedWordInfo cur = suggestions.get(i + 1);
352            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
353                    typedWord, cur.toString(), cur.mScore);
354            final String scoreInfoString;
355            if (normalizedScore > 0) {
356                scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore);
357            } else {
358                scoreInfoString = Integer.toString(cur.mScore);
359            }
360            cur.setDebugString(scoreInfoString);
361            suggestionsList.add(cur);
362        }
363        return suggestionsList;
364    }
365
366    private static final class SuggestedWordInfoComparator
367            implements Comparator<SuggestedWordInfo> {
368        // This comparator ranks the word info with the higher frequency first. That's because
369        // that's the order we want our elements in.
370        @Override
371        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
372            if (o1.mScore > o2.mScore) return -1;
373            if (o1.mScore < o2.mScore) return 1;
374            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
375            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
376            return o1.mWord.compareTo(o2.mWord);
377        }
378    }
379    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
380            new SuggestedWordInfoComparator();
381
382    private static SuggestedWordInfo getTransformedSuggestedWordInfo(
383            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
384            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
385        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
386        if (isAllUpperCase) {
387            sb.append(wordInfo.mWord.toUpperCase(locale));
388        } else if (isFirstCharCapitalized) {
389            sb.append(StringUtils.toTitleCase(wordInfo.mWord, locale));
390        } else {
391            sb.append(wordInfo.mWord);
392        }
393        for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
394            sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
395        }
396        return new SuggestedWordInfo(sb.toString(), wordInfo.mScore, wordInfo.mKind,
397                wordInfo.mSourceDict);
398    }
399
400    public void close() {
401        final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
402        dictionaries.addAll(mDictionaries.values());
403        for (final Dictionary dictionary : dictionaries) {
404            dictionary.close();
405        }
406        mMainDictionary = null;
407    }
408}
409