1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.text.TextUtils;
21
22import com.android.inputmethod.keyboard.Keyboard;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25
26import java.io.File;
27import java.util.ArrayList;
28import java.util.Comparator;
29import java.util.HashSet;
30import java.util.Locale;
31import java.util.concurrent.ConcurrentHashMap;
32
33/**
34 * This class loads a dictionary and provides a list of suggestions for a given sequence of
35 * characters. This includes corrections and completions.
36 */
37public final class Suggest {
38    public static final String TAG = Suggest.class.getSimpleName();
39
40    // Session id for
41    // {@link #getSuggestedWords(WordComposer,CharSequence,ProximityInfo,boolean,int)}.
42    public static final int SESSION_TYPING = 0;
43    public static final int SESSION_GESTURE = 1;
44
45    // TODO: rename this to CORRECTION_OFF
46    public static final int CORRECTION_NONE = 0;
47    // TODO: rename this to CORRECTION_ON
48    public static final int CORRECTION_FULL = 1;
49
50    public interface SuggestInitializationListener {
51        public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
52    }
53
54    private static final boolean DBG = LatinImeLogger.sDBG;
55
56    private Dictionary mMainDictionary;
57    private ContactsBinaryDictionary mContactsDict;
58    private final ConcurrentHashMap<String, Dictionary> mDictionaries =
59            CollectionUtils.newConcurrentHashMap();
60
61    public static final int MAX_SUGGESTIONS = 18;
62
63    private float mAutoCorrectionThreshold;
64
65    // Locale used for upper- and title-casing words
66    private final Locale mLocale;
67
68    public Suggest(final Context context, final Locale locale,
69            final SuggestInitializationListener listener) {
70        initAsynchronously(context, locale, listener);
71        mLocale = locale;
72    }
73
74    /* package for test */ Suggest(final Context context, final File dictionary,
75            final long startOffset, final long length, final Locale locale) {
76        final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary,
77                startOffset, length /* useFullEditDistance */, false, locale);
78        mLocale = locale;
79        mMainDictionary = mainDict;
80        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict);
81    }
82
83    private void initAsynchronously(final Context context, final Locale locale,
84            final SuggestInitializationListener listener) {
85        resetMainDict(context, locale, listener);
86    }
87
88    private static void addOrReplaceDictionary(
89            final ConcurrentHashMap<String, Dictionary> dictionaries,
90            final String key, final Dictionary dict) {
91        final Dictionary oldDict = (dict == null)
92                ? dictionaries.remove(key)
93                : dictionaries.put(key, dict);
94        if (oldDict != null && dict != oldDict) {
95            oldDict.close();
96        }
97    }
98
99    public void resetMainDict(final Context context, final Locale locale,
100            final SuggestInitializationListener listener) {
101        mMainDictionary = null;
102        if (listener != null) {
103            listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
104        }
105        new Thread("InitializeBinaryDictionary") {
106            @Override
107            public void run() {
108                final DictionaryCollection newMainDict =
109                        DictionaryFactory.createMainDictionaryFromManager(context, locale);
110                addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict);
111                mMainDictionary = newMainDict;
112                if (listener != null) {
113                    listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
114                }
115            }
116        }.start();
117    }
118
119    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
120    // of this method.
121    public boolean hasMainDictionary() {
122        return null != mMainDictionary && mMainDictionary.isInitialized();
123    }
124
125    public Dictionary getMainDictionary() {
126        return mMainDictionary;
127    }
128
129    public ContactsBinaryDictionary getContactsDictionary() {
130        return mContactsDict;
131    }
132
133    public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
134        return mDictionaries;
135    }
136
137    /**
138     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
139     * before the main dictionary, if set. This refers to the system-managed user dictionary.
140     */
141    public void setUserDictionary(UserBinaryDictionary userDictionary) {
142        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary);
143    }
144
145    /**
146     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
147     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
148     * won't be used.
149     */
150    public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) {
151        mContactsDict = contactsDictionary;
152        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
153    }
154
155    public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) {
156        addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
157    }
158
159    public void setAutoCorrectionThreshold(float threshold) {
160        mAutoCorrectionThreshold = threshold;
161    }
162
163    public SuggestedWords getSuggestedWords(
164            final WordComposer wordComposer, CharSequence prevWordForBigram,
165            final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId) {
166        LatinImeLogger.onStartSuggestion(prevWordForBigram);
167        if (wordComposer.isBatchMode()) {
168            return getSuggestedWordsForBatchInput(
169                    wordComposer, prevWordForBigram, proximityInfo, sessionId);
170        } else {
171            return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
172                    isCorrectionEnabled);
173        }
174    }
175
176    // Retrieves suggestions for the typing input.
177    private SuggestedWords getSuggestedWordsForTypingInput(
178            final WordComposer wordComposer, CharSequence prevWordForBigram,
179            final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) {
180        final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
181        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
182                MAX_SUGGESTIONS);
183
184        final String typedWord = wordComposer.getTypedWord();
185        final String consideredWord = trailingSingleQuotesCount > 0
186                ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
187                : typedWord;
188        LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
189
190        final WordComposer wordComposerForLookup;
191        if (trailingSingleQuotesCount > 0) {
192            wordComposerForLookup = new WordComposer(wordComposer);
193            for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
194                wordComposerForLookup.deleteLast();
195            }
196        } else {
197            wordComposerForLookup = wordComposer;
198        }
199
200        for (final String key : mDictionaries.keySet()) {
201            final Dictionary dictionary = mDictionaries.get(key);
202            suggestionsSet.addAll(dictionary.getSuggestions(
203                    wordComposerForLookup, prevWordForBigram, proximityInfo));
204        }
205
206        final CharSequence whitelistedWord;
207        if (suggestionsSet.isEmpty()) {
208            whitelistedWord = null;
209        } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
210            whitelistedWord = null;
211        } else {
212            whitelistedWord = suggestionsSet.first().mWord;
213        }
214
215        // The word can be auto-corrected if it has a whitelist entry that is not itself,
216        // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
217        final boolean allowsToBeAutoCorrected = (null != whitelistedWord
218                && !whitelistedWord.equals(consideredWord))
219                || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries,
220                        consideredWord, wordComposer.isFirstCharCapitalized()));
221
222        final boolean hasAutoCorrection;
223        // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
224        // any attempt to do auto-correction is already shielded with a test for this flag; at the
225        // same time, it feels wrong that the SuggestedWord object includes information about
226        // the current settings. It may also be useful to know, when the setting is off, whether
227        // the word *would* have been auto-corrected.
228        if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
229                || suggestionsSet.isEmpty() || wordComposer.hasDigits()
230                || wordComposer.isMostlyCaps() || wordComposer.isResumed()
231                || !hasMainDictionary()) {
232            // If we don't have a main dictionary, we never want to auto-correct. The reason for
233            // this is, the user may have a contact whose name happens to match a valid word in
234            // their language, and it will unexpectedly auto-correct. For example, if the user
235            // types in English with no dictionary and has a "Will" in their contact list, "will"
236            // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
237            // auto-correct.
238            hasAutoCorrection = false;
239        } else {
240            hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold(
241                    suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
242        }
243
244        final ArrayList<SuggestedWordInfo> suggestionsContainer =
245                CollectionUtils.newArrayList(suggestionsSet);
246        final int suggestionsCount = suggestionsContainer.size();
247        final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
248        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
249        if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
250            for (int i = 0; i < suggestionsCount; ++i) {
251                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
252                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
253                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
254                        trailingSingleQuotesCount);
255                suggestionsContainer.set(i, transformedWordInfo);
256            }
257        }
258
259        for (int i = 0; i < suggestionsCount; ++i) {
260            final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
261            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
262        }
263
264        if (!TextUtils.isEmpty(typedWord)) {
265            suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
266                    SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
267                    Dictionary.TYPE_USER_TYPED));
268        }
269        SuggestedWordInfo.removeDups(suggestionsContainer);
270
271        final ArrayList<SuggestedWordInfo> suggestionsList;
272        if (DBG && !suggestionsContainer.isEmpty()) {
273            suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
274        } else {
275            suggestionsList = suggestionsContainer;
276        }
277
278        return new SuggestedWords(suggestionsList,
279                // TODO: this first argument is lying. If this is a whitelisted word which is an
280                // actual word, it says typedWordValid = false, which looks wrong. We should either
281                // rename the attribute or change the value.
282                !allowsToBeAutoCorrected /* typedWordValid */,
283                hasAutoCorrection, /* willAutoCorrect */
284                false /* isPunctuationSuggestions */,
285                false /* isObsoleteSuggestions */,
286                !wordComposer.isComposingWord() /* isPrediction */);
287    }
288
289    // Retrieves suggestions for the batch input.
290    private SuggestedWords getSuggestedWordsForBatchInput(
291            final WordComposer wordComposer, CharSequence prevWordForBigram,
292            final ProximityInfo proximityInfo, int sessionId) {
293        final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
294                MAX_SUGGESTIONS);
295
296        // At second character typed, search the unigrams (scores being affected by bigrams)
297        for (final String key : mDictionaries.keySet()) {
298            // Skip User history dictionary for lookup
299            // TODO: The user history dictionary should just override getSuggestionsWithSessionId
300            // to make sure it doesn't return anything and we should remove this test
301            if (key.equals(Dictionary.TYPE_USER_HISTORY)) {
302                continue;
303            }
304            final Dictionary dictionary = mDictionaries.get(key);
305            suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(
306                    wordComposer, prevWordForBigram, proximityInfo, sessionId));
307        }
308
309        for (SuggestedWordInfo wordInfo : suggestionsSet) {
310            LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
311        }
312
313        final ArrayList<SuggestedWordInfo> suggestionsContainer =
314                CollectionUtils.newArrayList(suggestionsSet);
315        final int suggestionsCount = suggestionsContainer.size();
316        final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
317        final boolean isAllUpperCase = wordComposer.isAllUpperCase();
318        if (isFirstCharCapitalized || isAllUpperCase) {
319            for (int i = 0; i < suggestionsCount; ++i) {
320                final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
321                final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
322                        wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
323                        0 /* trailingSingleQuotesCount */);
324                suggestionsContainer.set(i, transformedWordInfo);
325            }
326        }
327
328        SuggestedWordInfo.removeDups(suggestionsContainer);
329        // In the batch input mode, the most relevant suggested word should act as a "typed word"
330        // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
331        return new SuggestedWords(suggestionsContainer,
332                true /* typedWordValid */,
333                false /* willAutoCorrect */,
334                false /* isPunctuationSuggestions */,
335                false /* isObsoleteSuggestions */,
336                false /* isPrediction */);
337    }
338
339    private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
340            final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
341        final SuggestedWordInfo typedWordInfo = suggestions.get(0);
342        typedWordInfo.setDebugString("+");
343        final int suggestionsSize = suggestions.size();
344        final ArrayList<SuggestedWordInfo> suggestionsList =
345                CollectionUtils.newArrayList(suggestionsSize);
346        suggestionsList.add(typedWordInfo);
347        // Note: i here is the index in mScores[], but the index in mSuggestions is one more
348        // than i because we added the typed word to mSuggestions without touching mScores.
349        for (int i = 0; i < suggestionsSize - 1; ++i) {
350            final SuggestedWordInfo cur = suggestions.get(i + 1);
351            final float normalizedScore = BinaryDictionary.calcNormalizedScore(
352                    typedWord, cur.toString(), cur.mScore);
353            final String scoreInfoString;
354            if (normalizedScore > 0) {
355                scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore);
356            } else {
357                scoreInfoString = Integer.toString(cur.mScore);
358            }
359            cur.setDebugString(scoreInfoString);
360            suggestionsList.add(cur);
361        }
362        return suggestionsList;
363    }
364
365    private static final class SuggestedWordInfoComparator
366            implements Comparator<SuggestedWordInfo> {
367        // This comparator ranks the word info with the higher frequency first. That's because
368        // that's the order we want our elements in.
369        @Override
370        public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
371            if (o1.mScore > o2.mScore) return -1;
372            if (o1.mScore < o2.mScore) return 1;
373            if (o1.mCodePointCount < o2.mCodePointCount) return -1;
374            if (o1.mCodePointCount > o2.mCodePointCount) return 1;
375            return o1.mWord.toString().compareTo(o2.mWord.toString());
376        }
377    }
378    private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
379            new SuggestedWordInfoComparator();
380
381    private static SuggestedWordInfo getTransformedSuggestedWordInfo(
382            final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
383            final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
384        final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
385        if (isAllUpperCase) {
386            sb.append(wordInfo.mWord.toString().toUpperCase(locale));
387        } else if (isFirstCharCapitalized) {
388            sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale));
389        } else {
390            sb.append(wordInfo.mWord);
391        }
392        for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
393            sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
394        }
395        return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict);
396    }
397
398    public void close() {
399        final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
400        dictionaries.addAll(mDictionaries.values());
401        for (final Dictionary dictionary : dictionaries) {
402            dictionary.close();
403        }
404        mMainDictionary = null;
405    }
406}
407