Suggest.java revision 674ffcdf9361b3c90cc39daf02f3217fb6d870de
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.text.TextUtils;
21import android.util.Log;
22
23import com.android.inputmethod.keyboard.Keyboard;
24import com.android.inputmethod.keyboard.ProximityInfo;
25
26import java.io.File;
27import java.util.ArrayList;
28import java.util.Arrays;
29import java.util.HashMap;
30import java.util.HashSet;
31import java.util.Locale;
32import java.util.Map;
33import java.util.Set;
34
35/**
36 * This class loads a dictionary and provides a list of suggestions for a given sequence of
37 * characters. This includes corrections and completions.
38 */
39public class Suggest implements Dictionary.WordCallback {
40
41    public static final String TAG = Suggest.class.getSimpleName();
42
43    public static final int APPROX_MAX_WORD_LENGTH = 32;
44
45    public static final int CORRECTION_NONE = 0;
46    public static final int CORRECTION_FULL = 1;
47    public static final int CORRECTION_FULL_BIGRAM = 2;
48
49    /**
50     * Words that appear in both bigram and unigram data gets multiplier ranging from
51     * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from
52     * bigram data.
53     */
54    public static final double BIGRAM_MULTIPLIER_MIN = 1.2;
55    public static final double BIGRAM_MULTIPLIER_MAX = 1.5;
56
57    /**
58     * Maximum possible bigram frequency. Will depend on how many bits are being used in data
59     * structure. Maximum bigram frequency will get the BIGRAM_MULTIPLIER_MAX as the multiplier.
60     */
61    public static final int MAXIMUM_BIGRAM_FREQUENCY = 127;
62
63    // It seems the following values are only used for logging.
64    public static final int DIC_USER_TYPED = 0;
65    public static final int DIC_MAIN = 1;
66    public static final int DIC_USER = 2;
67    public static final int DIC_USER_UNIGRAM = 3;
68    public static final int DIC_CONTACTS = 4;
69    public static final int DIC_USER_BIGRAM = 5;
70    public static final int DIC_WHITELIST = 6;
71    // If you add a type of dictionary, increment DIC_TYPE_LAST_ID
72    // TODO: this value seems unused. Remove it?
73    public static final int DIC_TYPE_LAST_ID = 6;
74    public static final String DICT_KEY_MAIN = "main";
75    public static final String DICT_KEY_CONTACTS = "contacts";
76    // User dictionary, the system-managed one.
77    public static final String DICT_KEY_USER = "user";
78    // User unigram dictionary, internal to LatinIME
79    public static final String DICT_KEY_USER_UNIGRAM = "user_unigram";
80    // User bigram dictionary, internal to LatinIME
81    public static final String DICT_KEY_USER_BIGRAM = "user_bigram";
82    public static final String DICT_KEY_WHITELIST ="whitelist";
83
84    private static final boolean DBG = LatinImeLogger.sDBG;
85
86    private Dictionary mMainDict;
87    private ContactsDictionary mContactsDict;
88    private WhitelistDictionary mWhiteListDictionary;
89    private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>();
90    private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>();
91
92    private int mPrefMaxSuggestions = 18;
93
94    private static final int PREF_MAX_BIGRAMS = 60;
95
96    private double mAutoCorrectionThreshold;
97    private int[] mScores = new int[mPrefMaxSuggestions];
98    private int[] mBigramScores = new int[PREF_MAX_BIGRAMS];
99
100    private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
101    private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>();
102    private CharSequence mConsideredWord;
103
104    // TODO: Remove these member variables by passing more context to addWord() callback method
105    private boolean mIsFirstCharCapitalized;
106    private boolean mIsAllUpperCase;
107    private int mTrailingSingleQuotesCount;
108
109    private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
110
111    public Suggest(final Context context, final int dictionaryResId, final Locale locale) {
112        initAsynchronously(context, dictionaryResId, locale);
113    }
114
115    /* package for test */ Suggest(final Context context, final File dictionary,
116            final long startOffset, final long length, final Flag[] flagArray,
117            final Locale locale) {
118        initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary,
119                startOffset, length, flagArray), locale);
120    }
121
122    private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) {
123        mWhiteListDictionary = new WhitelistDictionary(context, locale);
124        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary);
125    }
126
127    private void initAsynchronously(final Context context, final int dictionaryResId,
128            final Locale locale) {
129        resetMainDict(context, dictionaryResId, locale);
130
131        // TODO: read the whitelist and init the pool asynchronously too.
132        // initPool should be done asynchronously now that the pool is thread-safe.
133        initWhitelistAndAutocorrectAndPool(context, locale);
134    }
135
136    private void initSynchronously(final Context context, final Dictionary mainDict,
137            final Locale locale) {
138        mMainDict = mainDict;
139        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict);
140        addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict);
141        initWhitelistAndAutocorrectAndPool(context, locale);
142    }
143
144    private static void addOrReplaceDictionary(Map<String, Dictionary> dictionaries, String key,
145            Dictionary dict) {
146        final Dictionary oldDict = (dict == null)
147                ? dictionaries.remove(key)
148                : dictionaries.put(key, dict);
149        if (oldDict != null && dict != oldDict) {
150            oldDict.close();
151        }
152    }
153
154    public void resetMainDict(final Context context, final int dictionaryResId,
155            final Locale locale) {
156        mMainDict = null;
157        new Thread("InitializeBinaryDictionary") {
158            @Override
159            public void run() {
160                final Dictionary newMainDict = DictionaryFactory.createDictionaryFromManager(
161                        context, locale, dictionaryResId);
162                mMainDict = newMainDict;
163                addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict);
164                addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict);
165            }
166        }.start();
167    }
168
169    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
170    // of this method.
171    public boolean hasMainDictionary() {
172        return mMainDict != null;
173    }
174
175    public ContactsDictionary getContactsDictionary() {
176        return mContactsDict;
177    }
178
179    public Map<String, Dictionary> getUnigramDictionaries() {
180        return mUnigramDictionaries;
181    }
182
183    public static int getApproxMaxWordLength() {
184        return APPROX_MAX_WORD_LENGTH;
185    }
186
187    /**
188     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
189     * before the main dictionary, if set. This refers to the system-managed user dictionary.
190     */
191    public void setUserDictionary(Dictionary userDictionary) {
192        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary);
193    }
194
195    /**
196     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
197     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
198     * won't be used.
199     */
200    public void setContactsDictionary(ContactsDictionary contactsDictionary) {
201        mContactsDict = contactsDictionary;
202        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary);
203        addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary);
204    }
205
206    public void setUserUnigramDictionary(Dictionary userUnigramDictionary) {
207        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary);
208    }
209
210    public void setUserBigramDictionary(Dictionary userBigramDictionary) {
211        addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary);
212    }
213
214    public void setAutoCorrectionThreshold(double threshold) {
215        mAutoCorrectionThreshold = threshold;
216    }
217
218    private static CharSequence capitalizeWord(final boolean all, final boolean first,
219            final CharSequence word) {
220        if (TextUtils.isEmpty(word) || !(all || first)) return word;
221        final int wordLength = word.length();
222        final StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
223        // TODO: Must pay attention to locale when changing case.
224        if (all) {
225            sb.append(word.toString().toUpperCase());
226        } else if (first) {
227            sb.append(Character.toUpperCase(word.charAt(0)));
228            if (wordLength > 1) {
229                sb.append(word.subSequence(1, wordLength));
230            }
231        }
232        return sb;
233    }
234
235    protected void addBigramToSuggestions(CharSequence bigram) {
236        final StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
237        sb.append(bigram);
238        mSuggestions.add(sb);
239    }
240
241    private static final WordComposer sEmptyWordComposer = new WordComposer();
242    public SuggestedWords.Builder getBigramPredictionWordBuilder(CharSequence prevWordForBigram) {
243        LatinImeLogger.onStartSuggestion(prevWordForBigram);
244        mIsFirstCharCapitalized = false;
245        mIsAllUpperCase = false;
246        mTrailingSingleQuotesCount = 0;
247        mSuggestions = new ArrayList<CharSequence>(mPrefMaxSuggestions);
248        Arrays.fill(mScores, 0);
249
250        // Treating USER_TYPED as UNIGRAM suggestion for logging now.
251        LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM);
252        mConsideredWord = "";
253
254        Arrays.fill(mBigramScores, 0);
255        mBigramSuggestions = new ArrayList<CharSequence>(PREF_MAX_BIGRAMS);
256
257        CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
258        if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
259            prevWordForBigram = lowerPrevWord;
260        }
261        for (final Dictionary dictionary : mBigramDictionaries.values()) {
262            dictionary.getBigrams(sEmptyWordComposer, prevWordForBigram, this);
263        }
264        // Nothing entered: return all bigrams for the previous word
265        int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions);
266        for (int i = 0; i < insertCount; ++i) {
267            addBigramToSuggestions(mBigramSuggestions.get(i));
268        }
269
270        StringUtils.removeDupes(mSuggestions);
271
272        return new SuggestedWords.Builder()
273                .setWords(SuggestedWords.Builder.getFromCharSequenceList(mSuggestions))
274                .setAllowsToBeAutoCorrected(false)
275                .setHasAutoCorrection(false);
276    }
277
278    // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
279    public SuggestedWords.Builder getSuggestedWordBuilder(
280            final WordComposer wordComposer, CharSequence prevWordForBigram,
281            final ProximityInfo proximityInfo, final int correctionMode) {
282        LatinImeLogger.onStartSuggestion(prevWordForBigram);
283        mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
284        mIsAllUpperCase = wordComposer.isAllUpperCase();
285        mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
286        mSuggestions = new ArrayList<CharSequence>(mPrefMaxSuggestions);
287        Arrays.fill(mScores, 0);
288
289        final String typedWord = wordComposer.getTypedWord();
290        final String consideredWord = mTrailingSingleQuotesCount > 0
291                ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount)
292                : typedWord;
293        // Treating USER_TYPED as UNIGRAM suggestion for logging now.
294        LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED,
295                Dictionary.UNIGRAM);
296        mConsideredWord = consideredWord;
297
298        // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid"
299        // but still autocorrected from - in the case the whitelist only capitalizes the word.
300        // The whitelist should be case-insensitive, so it's not possible to be consistent with
301        // a boolean flag. Right now this is handled with a slight hack in
302        // WhitelistDictionary#shouldForciblyAutoCorrectFrom.
303        final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected(
304                getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized());
305
306        if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) {
307            // At first character typed, search only the bigrams
308            Arrays.fill(mBigramScores, 0);
309            mBigramSuggestions = new ArrayList<CharSequence>(PREF_MAX_BIGRAMS);
310
311            if (!TextUtils.isEmpty(prevWordForBigram)) {
312                CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
313                if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
314                    prevWordForBigram = lowerPrevWord;
315                }
316                for (final Dictionary dictionary : mBigramDictionaries.values()) {
317                    dictionary.getBigrams(wordComposer, prevWordForBigram, this);
318                }
319                if (TextUtils.isEmpty(consideredWord)) {
320                    // Nothing entered: return all bigrams for the previous word
321                    int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions);
322                    for (int i = 0; i < insertCount; ++i) {
323                        addBigramToSuggestions(mBigramSuggestions.get(i));
324                    }
325                } else {
326                    // Word entered: return only bigrams that match the first char of the typed word
327                    final char currentChar = consideredWord.charAt(0);
328                    // TODO: Must pay attention to locale when changing case.
329                    final char currentCharUpper = Character.toUpperCase(currentChar);
330                    int count = 0;
331                    final int bigramSuggestionSize = mBigramSuggestions.size();
332                    for (int i = 0; i < bigramSuggestionSize; i++) {
333                        final CharSequence bigramSuggestion = mBigramSuggestions.get(i);
334                        final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0);
335                        if (bigramSuggestionFirstChar == currentChar
336                                || bigramSuggestionFirstChar == currentCharUpper) {
337                            addBigramToSuggestions(bigramSuggestion);
338                            if (++count > mPrefMaxSuggestions) break;
339                        }
340                    }
341                }
342            }
343
344        } else if (wordComposer.size() > 1) {
345            // At second character typed, search the unigrams (scores being affected by bigrams)
346            for (final String key : mUnigramDictionaries.keySet()) {
347                // Skip UserUnigramDictionary and WhitelistDictionary to lookup
348                if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST))
349                    continue;
350                final Dictionary dictionary = mUnigramDictionaries.get(key);
351                if (mTrailingSingleQuotesCount > 0) {
352                    final WordComposer tmpWordComposer = new WordComposer(wordComposer);
353                    for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
354                        tmpWordComposer.deleteLast();
355                    }
356                    dictionary.getWords(tmpWordComposer, this, proximityInfo);
357                } else {
358                    dictionary.getWords(wordComposer, this, proximityInfo);
359                }
360            }
361        }
362
363        CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
364                mWhiteListDictionary.getWhitelistedWord(consideredWord));
365
366        final boolean hasAutoCorrection;
367        if (CORRECTION_FULL == correctionMode
368                || CORRECTION_FULL_BIGRAM == correctionMode) {
369            final CharSequence autoCorrection =
370                    AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer,
371                            mSuggestions, mScores, consideredWord, mAutoCorrectionThreshold,
372                            whitelistedWord);
373            hasAutoCorrection = (null != autoCorrection);
374        } else {
375            hasAutoCorrection = false;
376        }
377
378        if (whitelistedWord != null) {
379            if (mTrailingSingleQuotesCount > 0) {
380                final StringBuilder sb = new StringBuilder(whitelistedWord);
381                for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
382                    sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
383                }
384                mSuggestions.add(0, sb.toString());
385            } else {
386                mSuggestions.add(0, whitelistedWord);
387            }
388        }
389
390        mSuggestions.add(0, typedWord);
391        StringUtils.removeDupes(mSuggestions);
392
393        final SuggestedWords.Builder builder;
394        if (DBG) {
395            // TODO: this doesn't take into account the fact that removing dupes from mSuggestions
396            // may have made mScores[] and mSuggestions out of sync.
397            final CharSequence autoCorrectionSuggestion = mSuggestions.get(0);
398            final int autoCorrectionSuggestionScore = mScores[0];
399            double normalizedScore = BinaryDictionary.calcNormalizedScore(
400                    typedWord, autoCorrectionSuggestion.toString(),
401                    autoCorrectionSuggestionScore);
402            ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList =
403                    new ArrayList<SuggestedWords.SuggestedWordInfo>();
404            scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(autoCorrectionSuggestion, "+",
405                    false));
406            final int suggestionsSize = mSuggestions.size();
407            // Note: i here is the index in mScores[], but the index in mSuggestions is one more
408            // than i because we added the typed word to mSuggestions without touching mScores.
409            for (int i = 0; i < mScores.length && i < suggestionsSize - 1; ++i) {
410                if (normalizedScore > 0) {
411                    final String scoreThreshold = String.format("%d (%4.2f)", mScores[i],
412                            normalizedScore);
413                    scoreInfoList.add(
414                            new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i + 1),
415                                    scoreThreshold, false));
416                    normalizedScore = 0.0;
417                } else {
418                    final String score = Integer.toString(mScores[i]);
419                    scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i + 1),
420                            score, false));
421                }
422            }
423            for (int i = mScores.length; i < suggestionsSize; ++i) {
424                scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i),
425                        "--", false));
426            }
427            builder = new SuggestedWords.Builder().setWords(scoreInfoList)
428                    .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected)
429                    .setHasAutoCorrection(hasAutoCorrection);
430        } else {
431            builder = new SuggestedWords.Builder()
432                    .setWords(SuggestedWords.Builder.getFromCharSequenceList(mSuggestions))
433                    .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected)
434                    .setHasAutoCorrection(hasAutoCorrection);
435        }
436
437        boolean autoCorrectionAvailable = hasAutoCorrection;
438        if (correctionMode == Suggest.CORRECTION_FULL
439                || correctionMode == Suggest.CORRECTION_FULL_BIGRAM) {
440            autoCorrectionAvailable |= !allowsToBeAutoCorrected;
441        }
442        // Don't auto-correct words with multiple capital letter
443        autoCorrectionAvailable &= !wordComposer.isMostlyCaps();
444        builder.setTypedWordValid(!allowsToBeAutoCorrected).setHasMinimalSuggestion(
445                autoCorrectionAvailable);
446        if (allowsToBeAutoCorrected && builder.size() > 1 && mAutoCorrectionThreshold > 0
447                && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, builder.getWord(1))) {
448            builder.setShouldBlockAutoCorrectionBySafetyNet();
449        }
450        return builder;
451    }
452
453    @Override
454    public boolean addWord(final char[] word, final int offset, final int length, int score,
455            final int dicTypeId, final int dataType) {
456        int dataTypeForLog = dataType;
457        final ArrayList<CharSequence> suggestions;
458        final int[] sortedScores;
459        final int prefMaxSuggestions;
460        if (dataType == Dictionary.BIGRAM) {
461            suggestions = mBigramSuggestions;
462            sortedScores = mBigramScores;
463            prefMaxSuggestions = PREF_MAX_BIGRAMS;
464        } else {
465            suggestions = mSuggestions;
466            sortedScores = mScores;
467            prefMaxSuggestions = mPrefMaxSuggestions;
468        }
469
470        int pos = 0;
471
472        // Check if it's the same word, only caps are different
473        if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) {
474            // TODO: remove this surrounding if clause and move this logic to
475            // getSuggestedWordBuilder.
476            if (suggestions.size() > 0) {
477                final String currentHighestWord = suggestions.get(0).toString();
478                // If the current highest word is also equal to typed word, we need to compare
479                // frequency to determine the insertion position. This does not ensure strictly
480                // correct ordering, but ensures the top score is on top which is enough for
481                // removing duplicates correctly.
482                if (StringUtils.equalsIgnoreCase(currentHighestWord, word, offset, length)
483                        && score <= sortedScores[0]) {
484                    pos = 1;
485                }
486            }
487        } else {
488            if (dataType == Dictionary.UNIGRAM) {
489                // Check if the word was already added before (by bigram data)
490                int bigramSuggestion = searchBigramSuggestion(word,offset,length);
491                if(bigramSuggestion >= 0) {
492                    dataTypeForLog = Dictionary.BIGRAM;
493                    // turn freq from bigram into multiplier specified above
494                    double multiplier = (((double) mBigramScores[bigramSuggestion])
495                            / MAXIMUM_BIGRAM_FREQUENCY)
496                            * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
497                            + BIGRAM_MULTIPLIER_MIN;
498                    /* Log.d(TAG,"bigram num: " + bigramSuggestion
499                            + "  wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
500                            + "  currentScore: " + score + "  bigramScore: "
501                            + mBigramScores[bigramSuggestion]
502                            + "  multiplier: " + multiplier); */
503                    score = (int)Math.round((score * multiplier));
504                }
505            }
506
507            // Check the last one's score and bail
508            if (sortedScores[prefMaxSuggestions - 1] >= score) return true;
509            while (pos < prefMaxSuggestions) {
510                if (sortedScores[pos] < score
511                        || (sortedScores[pos] == score && length < suggestions.get(pos).length())) {
512                    break;
513                }
514                pos++;
515            }
516        }
517        if (pos >= prefMaxSuggestions) {
518            return true;
519        }
520
521        System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1);
522        sortedScores[pos] = score;
523        final StringBuilder sb = new StringBuilder(getApproxMaxWordLength());
524        // TODO: Must pay attention to locale when changing case.
525        if (mIsAllUpperCase) {
526            sb.append(new String(word, offset, length).toUpperCase());
527        } else if (mIsFirstCharCapitalized) {
528            sb.append(Character.toUpperCase(word[offset]));
529            if (length > 1) {
530                sb.append(word, offset + 1, length - 1);
531            }
532        } else {
533            sb.append(word, offset, length);
534        }
535        for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
536            sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
537        }
538        suggestions.add(pos, sb);
539        if (suggestions.size() > prefMaxSuggestions) {
540            suggestions.remove(prefMaxSuggestions);
541        } else {
542            LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog);
543        }
544        return true;
545    }
546
547    private int searchBigramSuggestion(final char[] word, final int offset, final int length) {
548        // TODO This is almost O(n^2). Might need fix.
549        // search whether the word appeared in bigram data
550        int bigramSuggestSize = mBigramSuggestions.size();
551        for (int i = 0; i < bigramSuggestSize; i++) {
552            if (mBigramSuggestions.get(i).length() == length) {
553                boolean chk = true;
554                for (int j = 0; j < length; j++) {
555                    if (mBigramSuggestions.get(i).charAt(j) != word[offset+j]) {
556                        chk = false;
557                        break;
558                    }
559                }
560                if (chk) return i;
561            }
562        }
563
564        return -1;
565    }
566
567    public void close() {
568        final Set<Dictionary> dictionaries = new HashSet<Dictionary>();
569        dictionaries.addAll(mUnigramDictionaries.values());
570        dictionaries.addAll(mBigramDictionaries.values());
571        for (final Dictionary dictionary : dictionaries) {
572            dictionary.close();
573        }
574        mMainDict = null;
575    }
576
577    // TODO: Resolve the inconsistencies between the native auto correction algorithms and
578    // this safety net
579    public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord,
580            final CharSequence suggestion) {
581        // Safety net for auto correction.
582        // Actually if we hit this safety net, it's a bug.
583        // If user selected aggressive auto correction mode, there is no need to use the safety
584        // net.
585        // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH,
586        // we should not use net because relatively edit distance can be big.
587        final int typedWordLength = typedWord.length();
588        if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) {
589            return false;
590        }
591        final int maxEditDistanceOfNativeDictionary =
592                (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
593        final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString());
594        if (DBG) {
595            Log.d(TAG, "Autocorrected edit distance = " + distance
596                    + ", " + maxEditDistanceOfNativeDictionary);
597        }
598        if (distance > maxEditDistanceOfNativeDictionary) {
599            if (DBG) {
600                Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion);
601                Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. "
602                        + "Turning off auto-correction.");
603            }
604            return true;
605        } else {
606            return false;
607        }
608    }
609}
610