Suggest.java revision a3ee019331e2d8881e19185fba9ccebfeb170614
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.Context;
20import android.text.TextUtils;
21import android.util.Log;
22
23import com.android.inputmethod.keyboard.Keyboard;
24import com.android.inputmethod.keyboard.ProximityInfo;
25
26import java.io.File;
27import java.util.ArrayList;
28import java.util.Arrays;
29import java.util.HashMap;
30import java.util.HashSet;
31import java.util.Locale;
32import java.util.Map;
33import java.util.Set;
34
35/**
36 * This class loads a dictionary and provides a list of suggestions for a given sequence of
37 * characters. This includes corrections and completions.
38 */
39public class Suggest implements Dictionary.WordCallback {
40
41    public static final String TAG = Suggest.class.getSimpleName();
42
43    public static final int APPROX_MAX_WORD_LENGTH = 32;
44
45    public static final int CORRECTION_NONE = 0;
46    public static final int CORRECTION_FULL = 1;
47    public static final int CORRECTION_FULL_BIGRAM = 2;
48
49    /**
50     * Words that appear in both bigram and unigram data gets multiplier ranging from
51     * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from
52     * bigram data.
53     */
54    public static final double BIGRAM_MULTIPLIER_MIN = 1.2;
55    public static final double BIGRAM_MULTIPLIER_MAX = 1.5;
56
57    /**
58     * Maximum possible bigram frequency. Will depend on how many bits are being used in data
59     * structure. Maximum bigram frequency will get the BIGRAM_MULTIPLIER_MAX as the multiplier.
60     */
61    public static final int MAXIMUM_BIGRAM_FREQUENCY = 127;
62
63    // It seems the following values are only used for logging.
64    public static final int DIC_USER_TYPED = 0;
65    public static final int DIC_MAIN = 1;
66    public static final int DIC_USER = 2;
67    public static final int DIC_USER_UNIGRAM = 3;
68    public static final int DIC_CONTACTS = 4;
69    public static final int DIC_USER_BIGRAM = 5;
70    public static final int DIC_WHITELIST = 6;
71    // If you add a type of dictionary, increment DIC_TYPE_LAST_ID
72    // TODO: this value seems unused. Remove it?
73    public static final int DIC_TYPE_LAST_ID = 6;
74    public static final String DICT_KEY_MAIN = "main";
75    public static final String DICT_KEY_CONTACTS = "contacts";
76    // User dictionary, the system-managed one.
77    public static final String DICT_KEY_USER = "user";
78    // User unigram dictionary, internal to LatinIME
79    public static final String DICT_KEY_USER_UNIGRAM = "user_unigram";
80    // User bigram dictionary, internal to LatinIME
81    public static final String DICT_KEY_USER_BIGRAM = "user_bigram";
82    public static final String DICT_KEY_WHITELIST ="whitelist";
83
84    private static final boolean DBG = LatinImeLogger.sDBG;
85
86    private Dictionary mMainDict;
87    private ContactsDictionary mContactsDict;
88    private WhitelistDictionary mWhiteListDictionary;
89    private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>();
90    private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>();
91
92    private int mPrefMaxSuggestions = 18;
93
94    private static final int PREF_MAX_BIGRAMS = 60;
95
96    private double mAutoCorrectionThreshold;
97    private int[] mScores = new int[mPrefMaxSuggestions];
98    private int[] mBigramScores = new int[PREF_MAX_BIGRAMS];
99
100    private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>();
101    ArrayList<CharSequence> mBigramSuggestions  = new ArrayList<CharSequence>();
102    private CharSequence mConsideredWord;
103
104    // TODO: Remove these member variables by passing more context to addWord() callback method
105    private boolean mIsFirstCharCapitalized;
106    private boolean mIsAllUpperCase;
107    private int mTrailingSingleQuotesCount;
108
109    private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4;
110
111    public Suggest(final Context context, final int dictionaryResId, final Locale locale) {
112        initAsynchronously(context, dictionaryResId, locale);
113    }
114
115    /* package for test */ Suggest(final Context context, final File dictionary,
116            final long startOffset, final long length, final Flag[] flagArray,
117            final Locale locale) {
118        initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary,
119                startOffset, length, flagArray), locale);
120    }
121
122    private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) {
123        mWhiteListDictionary = new WhitelistDictionary(context, locale);
124        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary);
125        StringBuilderPool.ensureCapacity(mPrefMaxSuggestions, getApproxMaxWordLength());
126    }
127
128    private void initAsynchronously(final Context context, final int dictionaryResId,
129            final Locale locale) {
130        resetMainDict(context, dictionaryResId, locale);
131
132        // TODO: read the whitelist and init the pool asynchronously too.
133        // initPool should be done asynchronously now that the pool is thread-safe.
134        initWhitelistAndAutocorrectAndPool(context, locale);
135    }
136
137    private void initSynchronously(final Context context, final Dictionary mainDict,
138            final Locale locale) {
139        mMainDict = mainDict;
140        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict);
141        addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict);
142        initWhitelistAndAutocorrectAndPool(context, locale);
143    }
144
145    private static void addOrReplaceDictionary(Map<String, Dictionary> dictionaries, String key,
146            Dictionary dict) {
147        final Dictionary oldDict = (dict == null)
148                ? dictionaries.remove(key)
149                : dictionaries.put(key, dict);
150        if (oldDict != null && dict != oldDict) {
151            oldDict.close();
152        }
153    }
154
155    public void resetMainDict(final Context context, final int dictionaryResId,
156            final Locale locale) {
157        mMainDict = null;
158        new Thread("InitializeBinaryDictionary") {
159            @Override
160            public void run() {
161                final Dictionary newMainDict = DictionaryFactory.createDictionaryFromManager(
162                        context, locale, dictionaryResId);
163                mMainDict = newMainDict;
164                addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict);
165                addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict);
166            }
167        }.start();
168    }
169
170    // The main dictionary could have been loaded asynchronously.  Don't cache the return value
171    // of this method.
172    public boolean hasMainDictionary() {
173        return mMainDict != null;
174    }
175
176    public ContactsDictionary getContactsDictionary() {
177        return mContactsDict;
178    }
179
180    public Map<String, Dictionary> getUnigramDictionaries() {
181        return mUnigramDictionaries;
182    }
183
184    public int getApproxMaxWordLength() {
185        return APPROX_MAX_WORD_LENGTH;
186    }
187
188    /**
189     * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
190     * before the main dictionary, if set. This refers to the system-managed user dictionary.
191     */
192    public void setUserDictionary(Dictionary userDictionary) {
193        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary);
194    }
195
196    /**
197     * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
198     * the contacts dictionary by passing null to this method. In this case no contacts dictionary
199     * won't be used.
200     */
201    public void setContactsDictionary(ContactsDictionary contactsDictionary) {
202        mContactsDict = contactsDictionary;
203        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary);
204        addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary);
205    }
206
207    public void setUserUnigramDictionary(Dictionary userUnigramDictionary) {
208        addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary);
209    }
210
211    public void setUserBigramDictionary(Dictionary userBigramDictionary) {
212        addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary);
213    }
214
215    public void setAutoCorrectionThreshold(double threshold) {
216        mAutoCorrectionThreshold = threshold;
217    }
218
219    /**
220     * Number of suggestions to generate from the input key sequence. This has
221     * to be a number between 1 and 100 (inclusive).
222     * @param maxSuggestions
223     * @throws IllegalArgumentException if the number is out of range
224     */
225    public void setMaxSuggestions(int maxSuggestions) {
226        if (maxSuggestions < 1 || maxSuggestions > 100) {
227            throw new IllegalArgumentException("maxSuggestions must be between 1 and 100");
228        }
229        mPrefMaxSuggestions = maxSuggestions;
230        mScores = new int[mPrefMaxSuggestions];
231        mBigramScores = new int[PREF_MAX_BIGRAMS];
232        collectGarbage(mSuggestions, mPrefMaxSuggestions);
233        StringBuilderPool.ensureCapacity(mPrefMaxSuggestions, getApproxMaxWordLength());
234    }
235
236    private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) {
237        if (TextUtils.isEmpty(word) || !(all || first)) return word;
238        final int wordLength = word.length();
239        final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength());
240        // TODO: Must pay attention to locale when changing case.
241        if (all) {
242            sb.append(word.toString().toUpperCase());
243        } else if (first) {
244            sb.append(Character.toUpperCase(word.charAt(0)));
245            if (wordLength > 1) {
246                sb.append(word.subSequence(1, wordLength));
247            }
248        }
249        return sb;
250    }
251
252    protected void addBigramToSuggestions(CharSequence bigram) {
253        // TODO: Try to be a little more shrewd with resource allocation.
254        // At the moment we copy this object because the StringBuilders are pooled (see
255        // StringBuilderPool.java) and when we are finished using mSuggestions and
256        // mBigramSuggestions we will take everything from both and insert them back in the
257        // pool, so we can't allow the same object to be in both lists at the same time.
258        final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength());
259        sb.append(bigram);
260        mSuggestions.add(sb);
261    }
262
263    private static final WordComposer sEmptyWordComposer = new WordComposer();
264    public SuggestedWords.Builder getBigramPredictionWordBuilder(CharSequence prevWordForBigram,
265            final int correctionMode) {
266        LatinImeLogger.onStartSuggestion(prevWordForBigram);
267        mIsFirstCharCapitalized = false;
268        mIsAllUpperCase = false;
269        mTrailingSingleQuotesCount = 0;
270        collectGarbage(mSuggestions, mPrefMaxSuggestions);
271        Arrays.fill(mScores, 0);
272
273        // Treating USER_TYPED as UNIGRAM suggestion for logging now.
274        LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM);
275        mConsideredWord = "";
276
277        // Note that if correctionMode != CORRECTION_FULL_BIGRAM, we'll always return the
278        // same empty SuggestedWords.Builder, which has size() == 0
279        if (correctionMode == CORRECTION_FULL_BIGRAM) {
280            // At first character typed, search only the bigrams
281            Arrays.fill(mBigramScores, 0);
282            collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS);
283
284            if (!TextUtils.isEmpty(prevWordForBigram)) {
285                CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
286                if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
287                    prevWordForBigram = lowerPrevWord;
288                }
289                for (final Dictionary dictionary : mBigramDictionaries.values()) {
290                    dictionary.getBigrams(sEmptyWordComposer, prevWordForBigram, this);
291                }
292                // Nothing entered: return all bigrams for the previous word
293                int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions);
294                for (int i = 0; i < insertCount; ++i) {
295                    addBigramToSuggestions(mBigramSuggestions.get(i));
296                }
297            }
298        }
299
300        StringUtils.removeDupes(mSuggestions);
301
302        return new SuggestedWords.Builder().addWords(mSuggestions, null)
303                .setAllowsToBeAutoCorrected(false)
304                .setHasAutoCorrection(false);
305    }
306
307    // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder
308    public SuggestedWords.Builder getSuggestedWordBuilder(
309            final WordComposer wordComposer, CharSequence prevWordForBigram,
310            final ProximityInfo proximityInfo, final int correctionMode) {
311        LatinImeLogger.onStartSuggestion(prevWordForBigram);
312        mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
313        mIsAllUpperCase = wordComposer.isAllUpperCase();
314        mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
315        collectGarbage(mSuggestions, mPrefMaxSuggestions);
316        Arrays.fill(mScores, 0);
317
318        final String typedWord = wordComposer.getTypedWord();
319        final String consideredWord = mTrailingSingleQuotesCount > 0
320                ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount)
321                : typedWord;
322        // Treating USER_TYPED as UNIGRAM suggestion for logging now.
323        LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED,
324                Dictionary.UNIGRAM);
325        mConsideredWord = consideredWord;
326
327        // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid"
328        // but still autocorrected from - in the case the whitelist only capitalizes the word.
329        // The whitelist should be case-insensitive, so it's not possible to be consistent with
330        // a boolean flag. Right now this is handled with a slight hack in
331        // WhitelistDictionary#shouldForciblyAutoCorrectFrom.
332        final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected(
333                getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized());
334
335        if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) {
336            // At first character typed, search only the bigrams
337            Arrays.fill(mBigramScores, 0);
338            collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS);
339
340            if (!TextUtils.isEmpty(prevWordForBigram)) {
341                CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase();
342                if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) {
343                    prevWordForBigram = lowerPrevWord;
344                }
345                for (final Dictionary dictionary : mBigramDictionaries.values()) {
346                    dictionary.getBigrams(wordComposer, prevWordForBigram, this);
347                }
348                if (TextUtils.isEmpty(consideredWord)) {
349                    // Nothing entered: return all bigrams for the previous word
350                    int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions);
351                    for (int i = 0; i < insertCount; ++i) {
352                        addBigramToSuggestions(mBigramSuggestions.get(i));
353                    }
354                } else {
355                    // Word entered: return only bigrams that match the first char of the typed word
356                    final char currentChar = consideredWord.charAt(0);
357                    // TODO: Must pay attention to locale when changing case.
358                    final char currentCharUpper = Character.toUpperCase(currentChar);
359                    int count = 0;
360                    final int bigramSuggestionSize = mBigramSuggestions.size();
361                    for (int i = 0; i < bigramSuggestionSize; i++) {
362                        final CharSequence bigramSuggestion = mBigramSuggestions.get(i);
363                        final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0);
364                        if (bigramSuggestionFirstChar == currentChar
365                                || bigramSuggestionFirstChar == currentCharUpper) {
366                            addBigramToSuggestions(bigramSuggestion);
367                            if (++count > mPrefMaxSuggestions) break;
368                        }
369                    }
370                }
371            }
372
373        } else if (wordComposer.size() > 1) {
374            // At second character typed, search the unigrams (scores being affected by bigrams)
375            for (final String key : mUnigramDictionaries.keySet()) {
376                // Skip UserUnigramDictionary and WhitelistDictionary to lookup
377                if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST))
378                    continue;
379                final Dictionary dictionary = mUnigramDictionaries.get(key);
380                if (mTrailingSingleQuotesCount > 0) {
381                    final WordComposer tmpWordComposer = new WordComposer(wordComposer);
382                    for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
383                        tmpWordComposer.deleteLast();
384                    }
385                    dictionary.getWords(tmpWordComposer, this, proximityInfo);
386                } else {
387                    dictionary.getWords(wordComposer, this, proximityInfo);
388                }
389            }
390        }
391
392        CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized,
393                mWhiteListDictionary.getWhitelistedWord(consideredWord));
394
395        final boolean hasAutoCorrection;
396        if (CORRECTION_FULL == correctionMode
397                || CORRECTION_FULL_BIGRAM == correctionMode) {
398            final CharSequence autoCorrection =
399                    AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer,
400                            mSuggestions, mScores, consideredWord, mAutoCorrectionThreshold,
401                            whitelistedWord);
402            hasAutoCorrection = (null != autoCorrection);
403        } else {
404            hasAutoCorrection = false;
405        }
406
407        if (whitelistedWord != null) {
408            if (mTrailingSingleQuotesCount > 0) {
409                final StringBuilder sb = new StringBuilder(whitelistedWord);
410                for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
411                    sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
412                }
413                mSuggestions.add(0, sb.toString());
414            } else {
415                mSuggestions.add(0, whitelistedWord);
416            }
417        }
418
419        mSuggestions.add(0, typedWord);
420        StringUtils.removeDupes(mSuggestions);
421
422        if (DBG) {
423            final CharSequence autoCorrectionSuggestion = mSuggestions.get(0);
424            final int autoCorrectionSuggestionScore = mScores[0];
425            double normalizedScore = BinaryDictionary.calcNormalizedScore(
426                    typedWord, autoCorrectionSuggestion.toString(),
427                    autoCorrectionSuggestionScore);
428            ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList =
429                    new ArrayList<SuggestedWords.SuggestedWordInfo>();
430            scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false));
431            for (int i = 0; i < mScores.length; ++i) {
432                if (normalizedScore > 0) {
433                    final String scoreThreshold = String.format("%d (%4.2f)", mScores[i],
434                            normalizedScore);
435                    scoreInfoList.add(
436                            new SuggestedWords.SuggestedWordInfo(scoreThreshold, false));
437                    normalizedScore = 0.0;
438                } else {
439                    final String score = Integer.toString(mScores[i]);
440                    scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(score, false));
441                }
442            }
443            for (int i = mScores.length; i < mSuggestions.size(); ++i) {
444                scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false));
445            }
446            return new SuggestedWords.Builder().addWords(mSuggestions, scoreInfoList)
447                    .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected)
448                    .setHasAutoCorrection(hasAutoCorrection);
449        }
450        return new SuggestedWords.Builder().addWords(mSuggestions, null)
451                .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected)
452                .setHasAutoCorrection(hasAutoCorrection);
453    }
454
455    @Override
456    public boolean addWord(final char[] word, final int offset, final int length, int score,
457            final int dicTypeId, final int dataType) {
458        int dataTypeForLog = dataType;
459        final ArrayList<CharSequence> suggestions;
460        final int[] sortedScores;
461        final int prefMaxSuggestions;
462        if (dataType == Dictionary.BIGRAM) {
463            suggestions = mBigramSuggestions;
464            sortedScores = mBigramScores;
465            prefMaxSuggestions = PREF_MAX_BIGRAMS;
466        } else {
467            suggestions = mSuggestions;
468            sortedScores = mScores;
469            prefMaxSuggestions = mPrefMaxSuggestions;
470        }
471
472        int pos = 0;
473
474        // Check if it's the same word, only caps are different
475        if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) {
476            // TODO: remove this surrounding if clause and move this logic to
477            // getSuggestedWordBuilder.
478            if (suggestions.size() > 0) {
479                final String currentHighestWord = suggestions.get(0).toString();
480                // If the current highest word is also equal to typed word, we need to compare
481                // frequency to determine the insertion position. This does not ensure strictly
482                // correct ordering, but ensures the top score is on top which is enough for
483                // removing duplicates correctly.
484                if (StringUtils.equalsIgnoreCase(currentHighestWord, word, offset, length)
485                        && score <= sortedScores[0]) {
486                    pos = 1;
487                }
488            }
489        } else {
490            if (dataType == Dictionary.UNIGRAM) {
491                // Check if the word was already added before (by bigram data)
492                int bigramSuggestion = searchBigramSuggestion(word,offset,length);
493                if(bigramSuggestion >= 0) {
494                    dataTypeForLog = Dictionary.BIGRAM;
495                    // turn freq from bigram into multiplier specified above
496                    double multiplier = (((double) mBigramScores[bigramSuggestion])
497                            / MAXIMUM_BIGRAM_FREQUENCY)
498                            * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN)
499                            + BIGRAM_MULTIPLIER_MIN;
500                    /* Log.d(TAG,"bigram num: " + bigramSuggestion
501                            + "  wordB: " + mBigramSuggestions.get(bigramSuggestion).toString()
502                            + "  currentScore: " + score + "  bigramScore: "
503                            + mBigramScores[bigramSuggestion]
504                            + "  multiplier: " + multiplier); */
505                    score = (int)Math.round((score * multiplier));
506                }
507            }
508
509            // Check the last one's score and bail
510            if (sortedScores[prefMaxSuggestions - 1] >= score) return true;
511            while (pos < prefMaxSuggestions) {
512                if (sortedScores[pos] < score
513                        || (sortedScores[pos] == score && length < suggestions.get(pos).length())) {
514                    break;
515                }
516                pos++;
517            }
518        }
519        if (pos >= prefMaxSuggestions) {
520            return true;
521        }
522
523        System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1);
524        sortedScores[pos] = score;
525        final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength());
526        // TODO: Must pay attention to locale when changing case.
527        if (mIsAllUpperCase) {
528            sb.append(new String(word, offset, length).toUpperCase());
529        } else if (mIsFirstCharCapitalized) {
530            sb.append(Character.toUpperCase(word[offset]));
531            if (length > 1) {
532                sb.append(word, offset + 1, length - 1);
533            }
534        } else {
535            sb.append(word, offset, length);
536        }
537        for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) {
538            sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
539        }
540        suggestions.add(pos, sb);
541        if (suggestions.size() > prefMaxSuggestions) {
542            final CharSequence garbage = suggestions.remove(prefMaxSuggestions);
543            if (garbage instanceof StringBuilder) {
544                StringBuilderPool.recycle((StringBuilder)garbage);
545            }
546        } else {
547            LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog);
548        }
549        return true;
550    }
551
552    private int searchBigramSuggestion(final char[] word, final int offset, final int length) {
553        // TODO This is almost O(n^2). Might need fix.
554        // search whether the word appeared in bigram data
555        int bigramSuggestSize = mBigramSuggestions.size();
556        for(int i = 0; i < bigramSuggestSize; i++) {
557            if(mBigramSuggestions.get(i).length() == length) {
558                boolean chk = true;
559                for(int j = 0; j < length; j++) {
560                    if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) {
561                        chk = false;
562                        break;
563                    }
564                }
565                if(chk) return i;
566            }
567        }
568
569        return -1;
570    }
571
572    private static void collectGarbage(ArrayList<CharSequence> suggestions,
573            int prefMaxSuggestions) {
574        int poolSize = StringBuilderPool.getSize();
575        int garbageSize = suggestions.size();
576        while (poolSize < prefMaxSuggestions && garbageSize > 0) {
577            final CharSequence garbage = suggestions.get(garbageSize - 1);
578            if (garbage instanceof StringBuilder) {
579                StringBuilderPool.recycle((StringBuilder)garbage);
580                poolSize++;
581            }
582            garbageSize--;
583        }
584        if (poolSize == prefMaxSuggestions + 1) {
585            Log.w("Suggest", "String pool got too big: " + poolSize);
586        }
587        suggestions.clear();
588    }
589
590    public void close() {
591        final Set<Dictionary> dictionaries = new HashSet<Dictionary>();
592        dictionaries.addAll(mUnigramDictionaries.values());
593        dictionaries.addAll(mBigramDictionaries.values());
594        for (final Dictionary dictionary : dictionaries) {
595            dictionary.close();
596        }
597        mMainDict = null;
598    }
599
600    // TODO: Resolve the inconsistencies between the native auto correction algorithms and
601    // this safety net
602    public static boolean shouldBlockAutoCorrectionBySafetyNet(
603            final SuggestedWords.Builder suggestedWordsBuilder, final Suggest suggest,
604            final double autoCorrectionThreshold) {
605        // Safety net for auto correction.
606        // Actually if we hit this safety net, it's actually a bug.
607        if (suggestedWordsBuilder.size() <= 1 || suggestedWordsBuilder.isTypedWordValid()) {
608            return false;
609        }
610        // If user selected aggressive auto correction mode, there is no need to use the safety
611        // net.
612        if (0 == autoCorrectionThreshold) {
613            return false;
614        }
615        final CharSequence typedWord = suggestedWordsBuilder.getWord(0);
616        // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH,
617        // we should not use net because relatively edit distance can be big.
618        if (typedWord.length() < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) {
619            return false;
620        }
621        final CharSequence suggestionWord = suggestedWordsBuilder.getWord(1);
622        final int typedWordLength = typedWord.length();
623        final int maxEditDistanceOfNativeDictionary =
624                (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1;
625        final int distance = BinaryDictionary.editDistance(
626                typedWord.toString(), suggestionWord.toString());
627        if (DBG) {
628            Log.d(TAG, "Autocorrected edit distance = " + distance
629                    + ", " + maxEditDistanceOfNativeDictionary);
630        }
631        if (distance > maxEditDistanceOfNativeDictionary) {
632            if (DBG) {
633                Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestionWord);
634                Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. "
635                        + "Turning off auto-correction.");
636            }
637            return true;
638        } else {
639            return false;
640        }
641    }
642}
643