Suggest.java revision e9a86e2cdb58dd8d5601138294521e966d164520
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.text.TextUtils;
21
22import com.android.inputmethod.keyboard.Keyboard;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25
26import java.io.File;
27import java.util.ArrayList;
28import java.util.Comparator;
29import java.util.HashSet;
30import java.util.Locale;
31import java.util.concurrent.ConcurrentHashMap;
32
33/**
34 * This class loads a dictionary and provides a list of suggestions for a given sequence of
35 * characters. This includes corrections and completions.
36 */
37public class Suggest {
38    public static final String TAG = Suggest.class.getSimpleName();
39
40    // TODO: rename this to CORRECTION_OFF
41    public static final int CORRECTION_NONE = 0;
42    // TODO: rename this to CORRECTION_ON
43    public static final int CORRECTION_FULL = 1;
44
45    private static final boolean DBG = LatinImeLogger.sDBG;
46
47    private Dictionary mMainDictionary;
48    private ContactsBinaryDictionary mContactsDict;
49    private WhitelistDictionary mWhiteListDictionary;
50    private final ConcurrentHashMap<String, Dictionary> mDictionaries =
51            new ConcurrentHashMap<String, Dictionary>();
52
53    public static final int MAX_SUGGESTIONS = 18;
54
55    private float mAutoCorrectionThreshold;
56
57    // Locale used for upper- and title-casing words
58    final private Locale mLocale;
59
60    public Suggest(final Context context, final Locale locale) {
61        initAsynchronously(context, locale);
62        mLocale = locale;
63    }
64
65    /* package for test */ Suggest(final Context context, final File dictionary,
66            final long startOffset, final long length, final Locale locale) {
67        final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary,
68                startOffset, length /* useFullEditDistance */, false, locale);
69        mLocale = locale;
70        mMainDictionary = mainDict;
71        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict);
72        initWhitelistAndAutocorrectAndPool(context, locale);
73    }
74
75    private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) {
76        mWhiteListDictionary = new WhitelistDictionary(context, locale);
77        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_WHITELIST, mWhiteListDictionary);
78    }
79
80    private void initAsynchronously(final Context context, final Locale locale) {
81        resetMainDict(context, locale);
82
83        // TODO: read the whitelist and init the pool asynchronously too.
84        // initPool should be done asynchronously now that the pool is thread-safe.
85        initWhitelistAndAutocorrectAndPool(context, locale);
86    }
87
88    private static void addOrReplaceDictionary(
89            final ConcurrentHashMap<String, Dictionary> dictionaries,
90            final String key, final Dictionary dict) {
91        final Dictionary oldDict = (dict == null)
92                ? dictionaries.remove(key)
93                : dictionaries.put(key, dict);
94        if (oldDict != null && dict != oldDict) {
95            oldDict.close();
96        }
97    }
98
99    public void resetMainDict(final Context context, final Locale locale) {
100        mMainDictionary = null;
101        new Thread("InitializeBinaryDictionary") {
102            @Override
103            public void run() {
104                final DictionaryCollection newMainDict =
105                        DictionaryFactory.createMainDictionaryFromManager(context, locale);
106                addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict);
107                mMainDictionary = newMainDict;
108            }
109        }.start();
110    }
111
112    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
113    // of this method.
114    public boolean hasMainDictionary() {
115        return null != mMainDictionary && mMainDictionary.isInitialized();
116    }
117
118    public Dictionary getMainDictionary() {
119        return mMainDictionary;
120    }
121
122    public ContactsBinaryDictionary getContactsDictionary() {
123        return mContactsDict;
124    }
125
126    public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
127        return mDictionaries;
128    }
129
130    /**
131     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
132     * before the main dictionary, if set. This refers to the system-managed user dictionary.
133     */
134    public void setUserDictionary(UserBinaryDictionary userDictionary) {
135        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary);
136    }
137
138    /**
139     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
140     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
141     * won't be used.
142     */
143    public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) {
144        mContactsDict = contactsDictionary;
145        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
146    }
147
148    public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) {
149        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
150    }
151
152    public void setAutoCorrectionThreshold(float threshold) {
153        mAutoCorrectionThreshold = threshold;
154    }
155
156    // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
157    public SuggestedWords getSuggestedWords(
158            final WordComposer wordComposer, CharSequence prevWordForBigram,
159            final ProximityInfo proximityInfo, final boolean isCorrectionEnabled,
160            // TODO: remove isPrediction parameter. It effectively means the same thing
161            // as wordComposer.size() <= 1
162            final boolean isPrediction) {
163        LatinImeLogger.onStartSuggestion(prevWordForBigram);
164        final boolean isFirstCharCapitalized =
165                !isPrediction && wordComposer.isFirstCharCapitalized();
166        final boolean isAllUpperCase = !isPrediction && wordComposer.isAllUpperCase();
167        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
168        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
169                MAX_SUGGESTIONS);
170
171        final String typedWord = wordComposer.getTypedWord();
172        final String consideredWord = trailingSingleQuotesCount > 0
173                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
174                : typedWord;
175        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
176
177        if (wordComposer.size() <= 1 && isCorrectionEnabled) {
178            // At first character typed, search only the bigrams
179            if (!TextUtils.isEmpty(prevWordForBigram)) {
180                for (final String key : mDictionaries.keySet()) {
181                    final Dictionary dictionary = mDictionaries.get(key);
182                    suggestionsSet.addAll(dictionary.getBigrams(wordComposer, prevWordForBigram));
183                }
184            }
185        } else if (wordComposer.size() > 1) {
186            final WordComposer wordComposerForLookup;
187            if (trailingSingleQuotesCount > 0) {
188                wordComposerForLookup = new WordComposer(wordComposer);
189                for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
190                    wordComposerForLookup.deleteLast();
191                }
192            } else {
193                wordComposerForLookup = wordComposer;
194            }
195            // At second character typed, search the unigrams (scores being affected by bigrams)
196            for (final String key : mDictionaries.keySet()) {
197                // Skip UserUnigramDictionary and WhitelistDictionary to lookup
198                if (key.equals(Dictionary.TYPE_USER_HISTORY)
199                        || key.equals(Dictionary.TYPE_WHITELIST))
200                    continue;
201                final Dictionary dictionary = mDictionaries.get(key);
202                suggestionsSet.addAll(dictionary.getWords(
203                        wordComposerForLookup, prevWordForBigram, proximityInfo));
204            }
205        }
206
207        // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid"
208        // but still autocorrected from - in the case the whitelist only capitalizes the word.
209        // The whitelist should be case-insensitive, so it's not possible to be consistent with
210        // a boolean flag. Right now this is handled with a slight hack in
211        // WhitelistDictionary#shouldForciblyAutoCorrectFrom.
212        final boolean allowsToBeAutoCorrected = AutoCorrection.isWhitelistedOrNotAWord(
213                mDictionaries, consideredWord, wordComposer.isFirstCharCapitalized());
214
215        final CharSequence whitelistedWord =
216                mWhiteListDictionary.getWhitelistedWord(consideredWord);
217
218        final boolean hasAutoCorrection;
219        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || wordComposer.isMostlyCaps()
220                || wordComposer.isResumed() || !hasMainDictionary()) {
221            // If we don't have a main dictionary, we never want to auto-correct. The reason for
222            // this is, the user may have a contact whose name happens to match a valid word in
223            // their language, and it will unexpectedly auto-correct. For example, if the user
224            // types in English with no dictionary and has a "Will" in their contact list, "will"
225            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
226            // auto-correct.
227            hasAutoCorrection = false;
228        } else if (null != whitelistedWord) {
229            hasAutoCorrection = true;
230        } else if (suggestionsSet.isEmpty()) {
231            hasAutoCorrection = false;
232        } else if (AutoCorrection.suggestionExceedsAutoCorrectionThreshold(suggestionsSet.first(),
233                consideredWord, mAutoCorrectionThreshold)) {
234            hasAutoCorrection = true;
235        } else {
236            hasAutoCorrection = false;
237        }
238
239        if (whitelistedWord != null) {
240            suggestionsSet.add(new SuggestedWordInfo(whitelistedWord,
241                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_WHITELIST,
242                    Dictionary.TYPE_WHITELIST));
243        }
244
245        final ArrayList<SuggestedWordInfo> suggestionsContainer =
246                new ArrayList<SuggestedWordInfo>(suggestionsSet);
247        final int suggestionsCount = suggestionsContainer.size();
248        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
249            for (int i = 0; i < suggestionsCount; ++i) {
250                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
251                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
252                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
253                        trailingSingleQuotesCount);
254                suggestionsContainer.set(i, transformedWordInfo);
255            }
256        }
257
258        for (int i = 0; i < suggestionsCount; ++i) {
259            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
260            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
261        }
262
263        if (!TextUtils.isEmpty(typedWord)) {
264            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
265                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
266                    Dictionary.TYPE_USER_TYPED));
267        }
268        SuggestedWordInfo.removeDups(suggestionsContainer);
269
270        final ArrayList<SuggestedWordInfo> suggestionsList;
271        if (DBG && !suggestionsContainer.isEmpty()) {
272            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
273        } else {
274            suggestionsList = suggestionsContainer;
275        }
276
277        return new SuggestedWords(suggestionsList,
278                // TODO: this first argument is lying. If this is a whitelisted word which is an
279                // actual word, it says typedWordValid = false, which looks wrong. We should either
280                // rename the attribute or change the value.
281                !isPrediction && !allowsToBeAutoCorrected /* typedWordValid */,
282                !isPrediction && hasAutoCorrection, /* willAutoCorrect */
283                false /* isPunctuationSuggestions */,
284                false /* isObsoleteSuggestions */,
285                isPrediction);
286    }
287
288    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
289            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
290        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
291        typedWordInfo.setDebugString("+");
292        final int suggestionsSize = suggestions.size();
293        final ArrayList<SuggestedWordInfo> suggestionsList =
294                new ArrayList<SuggestedWordInfo>(suggestionsSize);
295        suggestionsList.add(typedWordInfo);
296        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
297        // than i because we added the typed word to mSuggestions without touching mScores.
298        for (int i = 0; i < suggestionsSize - 1; ++i) {
299            final SuggestedWordInfo cur = suggestions.get(i + 1);
300            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
301                    typedWord, cur.toString(), cur.mScore);
302            final String scoreInfoString;
303            if (normalizedScore > 0) {
304                scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore);
305            } else {
306                scoreInfoString = Integer.toString(cur.mScore);
307            }
308            cur.setDebugString(scoreInfoString);
309            suggestionsList.add(cur);
310        }
311        return suggestionsList;
312    }
313
314    private static class SuggestedWordInfoComparator implements Comparator<SuggestedWordInfo> {
315        // This comparator ranks the word info with the higher frequency first. That's because
316        // that's the order we want our elements in.
317        @Override
318        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
319            if (o1.mScore > o2.mScore) return -1;
320            if (o1.mScore < o2.mScore) return 1;
321            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
322            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
323            return o1.mWord.toString().compareTo(o2.mWord.toString());
324        }
325    }
326    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
327            new SuggestedWordInfoComparator();
328
329    private static SuggestedWordInfo getTransformedSuggestedWordInfo(
330            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
331            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
332        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
333        if (isAllUpperCase) {
334            sb.append(wordInfo.mWord.toString().toUpperCase(locale));
335        } else if (isFirstCharCapitalized) {
336            sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale));
337        } else {
338            sb.append(wordInfo.mWord);
339        }
340        for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
341            sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
342        }
343        return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict);
344    }
345
346    public void close() {
347        final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>();
348        dictionaries.addAll(mDictionaries.values());
349        for (final Dictionary dictionary : dictionaries) {
350            dictionary.close();
351        }
352        mMainDictionary = null;
353    }
354}
355