Suggest.java revision 0142b997bf18f5d07e83b3fd403f0b3ea4736040
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21import android.util.Log; 22 23import com.android.inputmethod.keyboard.Keyboard; 24import com.android.inputmethod.keyboard.ProximityInfo; 25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26 27import java.io.File; 28import java.util.ArrayList; 29import java.util.HashMap; 30import java.util.HashSet; 31import java.util.Locale; 32 33/** 34 * This class loads a dictionary and provides a list of suggestions for a given sequence of 35 * characters. This includes corrections and completions. 36 */ 37public class Suggest implements Dictionary.WordCallback { 38 public static final String TAG = Suggest.class.getSimpleName(); 39 40 public static final int APPROX_MAX_WORD_LENGTH = 32; 41 42 public static final int CORRECTION_NONE = 0; 43 public static final int CORRECTION_FULL = 1; 44 public static final int CORRECTION_FULL_BIGRAM = 2; 45 46 /** 47 * Words that appear in both bigram and unigram data gets multiplier ranging from 48 * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from 49 * bigram data. 50 */ 51 public static final double BIGRAM_MULTIPLIER_MIN = 1.2; 52 public static final double BIGRAM_MULTIPLIER_MAX = 1.5; 53 54 /** 55 * Maximum possible bigram frequency. Will depend on how many bits are being used in data 56 * structure. Maximum bigram frequency will get the BIGRAM_MULTIPLIER_MAX as the multiplier. 57 */ 58 public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; 59 60 // It seems the following values are only used for logging. 61 public static final int DIC_USER_TYPED = 0; 62 public static final int DIC_MAIN = 1; 63 public static final int DIC_USER = 2; 64 public static final int DIC_USER_HISTORY = 3; 65 public static final int DIC_CONTACTS = 4; 66 public static final int DIC_WHITELIST = 6; 67 // If you add a type of dictionary, increment DIC_TYPE_LAST_ID 68 // TODO: this value seems unused. Remove it? 69 public static final int DIC_TYPE_LAST_ID = 6; 70 public static final String DICT_KEY_MAIN = "main"; 71 public static final String DICT_KEY_CONTACTS = "contacts"; 72 // User dictionary, the system-managed one. 73 public static final String DICT_KEY_USER = "user"; 74 // User history dictionary for the unigram map, internal to LatinIME 75 public static final String DICT_KEY_USER_HISTORY_UNIGRAM = "history_unigram"; 76 // User history dictionary for the bigram map, internal to LatinIME 77 public static final String DICT_KEY_USER_HISTORY_BIGRAM = "history_bigram"; 78 public static final String DICT_KEY_WHITELIST ="whitelist"; 79 80 private static final boolean DBG = LatinImeLogger.sDBG; 81 82 private Dictionary mMainDict; 83 private Dictionary mContactsDict; 84 private WhitelistDictionary mWhiteListDictionary; 85 private final HashMap<String, Dictionary> mUnigramDictionaries = 86 new HashMap<String, Dictionary>(); 87 private final HashMap<String, Dictionary> mBigramDictionaries = 88 new HashMap<String, Dictionary>(); 89 90 private int mPrefMaxSuggestions = 18; 91 92 private static final int PREF_MAX_BIGRAMS = 60; 93 94 private double mAutoCorrectionThreshold; 95 96 private ArrayList<SuggestedWordInfo> mSuggestions = new ArrayList<SuggestedWordInfo>(); 97 private ArrayList<SuggestedWordInfo> mBigramSuggestions = new ArrayList<SuggestedWordInfo>(); 98 private CharSequence mConsideredWord; 99 100 // TODO: Remove these member variables by passing more context to addWord() callback method 101 private boolean mIsFirstCharCapitalized; 102 private boolean mIsAllUpperCase; 103 private int mTrailingSingleQuotesCount; 104 105 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 106 107 public Suggest(final Context context, final Locale locale) { 108 initAsynchronously(context, locale); 109 } 110 111 /* package for test */ Suggest(final Context context, final File dictionary, 112 final long startOffset, final long length, final Locale locale) { 113 initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary, 114 startOffset, length /* useFullEditDistance */, false, locale), locale); 115 } 116 117 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 118 mWhiteListDictionary = new WhitelistDictionary(context, locale); 119 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); 120 } 121 122 private void initAsynchronously(final Context context, final Locale locale) { 123 resetMainDict(context, locale); 124 125 // TODO: read the whitelist and init the pool asynchronously too. 126 // initPool should be done asynchronously now that the pool is thread-safe. 127 initWhitelistAndAutocorrectAndPool(context, locale); 128 } 129 130 private void initSynchronously(final Context context, final Dictionary mainDict, 131 final Locale locale) { 132 mMainDict = mainDict; 133 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict); 134 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict); 135 initWhitelistAndAutocorrectAndPool(context, locale); 136 } 137 138 private static void addOrReplaceDictionary(HashMap<String, Dictionary> dictionaries, String key, 139 Dictionary dict) { 140 final Dictionary oldDict = (dict == null) 141 ? dictionaries.remove(key) 142 : dictionaries.put(key, dict); 143 if (oldDict != null && dict != oldDict) { 144 oldDict.close(); 145 } 146 } 147 148 public void resetMainDict(final Context context, final Locale locale) { 149 mMainDict = null; 150 new Thread("InitializeBinaryDictionary") { 151 @Override 152 public void run() { 153 final Dictionary newMainDict = DictionaryFactory.createDictionaryFromManager( 154 context, locale); 155 mMainDict = newMainDict; 156 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict); 157 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict); 158 } 159 }.start(); 160 } 161 162 // The main dictionary could have been loaded asynchronously. Don't cache the return value 163 // of this method. 164 public boolean hasMainDictionary() { 165 return mMainDict != null; 166 } 167 168 public Dictionary getContactsDictionary() { 169 return mContactsDict; 170 } 171 172 public HashMap<String, Dictionary> getUnigramDictionaries() { 173 return mUnigramDictionaries; 174 } 175 176 public static int getApproxMaxWordLength() { 177 return APPROX_MAX_WORD_LENGTH; 178 } 179 180 /** 181 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 182 * before the main dictionary, if set. This refers to the system-managed user dictionary. 183 */ 184 public void setUserDictionary(Dictionary userDictionary) { 185 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary); 186 } 187 188 /** 189 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 190 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 191 * won't be used. 192 */ 193 public void setContactsDictionary(Dictionary contactsDictionary) { 194 mContactsDict = contactsDictionary; 195 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 196 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 197 } 198 199 public void setUserHistoryDictionary(Dictionary userHistoryDictionary) { 200 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_HISTORY_UNIGRAM, 201 userHistoryDictionary); 202 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_HISTORY_BIGRAM, 203 userHistoryDictionary); 204 } 205 206 public void setAutoCorrectionThreshold(double threshold) { 207 mAutoCorrectionThreshold = threshold; 208 } 209 210 private static CharSequence capitalizeWord(final boolean all, final boolean first, 211 final CharSequence word) { 212 if (TextUtils.isEmpty(word) || !(all || first)) return word; 213 final int wordLength = word.length(); 214 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 215 // TODO: Must pay attention to locale when changing case. 216 if (all) { 217 sb.append(word.toString().toUpperCase()); 218 } else if (first) { 219 sb.append(Character.toUpperCase(word.charAt(0))); 220 if (wordLength > 1) { 221 sb.append(word.subSequence(1, wordLength)); 222 } 223 } 224 return sb; 225 } 226 227 protected void addBigramToSuggestions(SuggestedWordInfo bigram) { 228 mSuggestions.add(bigram); 229 } 230 231 private static final WordComposer sEmptyWordComposer = new WordComposer(); 232 public SuggestedWords getBigramPredictions(CharSequence prevWordForBigram) { 233 LatinImeLogger.onStartSuggestion(prevWordForBigram); 234 mIsFirstCharCapitalized = false; 235 mIsAllUpperCase = false; 236 mTrailingSingleQuotesCount = 0; 237 mSuggestions = new ArrayList<SuggestedWordInfo>(mPrefMaxSuggestions); 238 239 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 240 LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); 241 mConsideredWord = ""; 242 243 mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); 244 245 getAllBigrams(prevWordForBigram, sEmptyWordComposer); 246 247 // Nothing entered: return all bigrams for the previous word 248 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 249 for (int i = 0; i < insertCount; ++i) { 250 addBigramToSuggestions(mBigramSuggestions.get(i)); 251 } 252 253 SuggestedWordInfo.removeDups(mSuggestions); 254 255 return new SuggestedWords(mSuggestions, 256 false /* typedWordValid */, 257 false /* hasAutoCorrectionCandidate */, 258 false /* allowsToBeAutoCorrected */, 259 false /* isPunctuationSuggestions */, 260 false /* isObsoleteSuggestions */, 261 true /* isPrediction */); 262 } 263 264 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 265 public SuggestedWords getSuggestedWords( 266 final WordComposer wordComposer, CharSequence prevWordForBigram, 267 final ProximityInfo proximityInfo, final int correctionMode) { 268 LatinImeLogger.onStartSuggestion(prevWordForBigram); 269 mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 270 mIsAllUpperCase = wordComposer.isAllUpperCase(); 271 mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 272 mSuggestions = new ArrayList<SuggestedWordInfo>(mPrefMaxSuggestions); 273 274 final String typedWord = wordComposer.getTypedWord(); 275 final String consideredWord = mTrailingSingleQuotesCount > 0 276 ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) 277 : typedWord; 278 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 279 LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); 280 mConsideredWord = consideredWord; 281 282 if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) { 283 // At first character typed, search only the bigrams 284 mBigramSuggestions = new ArrayList<SuggestedWordInfo>(PREF_MAX_BIGRAMS); 285 286 if (!TextUtils.isEmpty(prevWordForBigram)) { 287 getAllBigrams(prevWordForBigram, wordComposer); 288 if (TextUtils.isEmpty(consideredWord)) { 289 // Nothing entered: return all bigrams for the previous word 290 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 291 for (int i = 0; i < insertCount; ++i) { 292 addBigramToSuggestions(mBigramSuggestions.get(i)); 293 } 294 } else { 295 // Word entered: return only bigrams that match the first char of the typed word 296 final char currentChar = consideredWord.charAt(0); 297 // TODO: Must pay attention to locale when changing case. 298 // TODO: Use codepoint instead of char 299 final char currentCharUpper = Character.toUpperCase(currentChar); 300 int count = 0; 301 final int bigramSuggestionSize = mBigramSuggestions.size(); 302 for (int i = 0; i < bigramSuggestionSize; i++) { 303 final SuggestedWordInfo bigramSuggestion = mBigramSuggestions.get(i); 304 final char bigramSuggestionFirstChar = 305 (char)bigramSuggestion.codePointAt(0); 306 if (bigramSuggestionFirstChar == currentChar 307 || bigramSuggestionFirstChar == currentCharUpper) { 308 addBigramToSuggestions(bigramSuggestion); 309 if (++count > mPrefMaxSuggestions) break; 310 } 311 } 312 } 313 } 314 315 } else if (wordComposer.size() > 1) { 316 final WordComposer wordComposerForLookup; 317 if (mTrailingSingleQuotesCount > 0) { 318 wordComposerForLookup = new WordComposer(wordComposer); 319 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 320 wordComposerForLookup.deleteLast(); 321 } 322 } else { 323 wordComposerForLookup = wordComposer; 324 } 325 // At second character typed, search the unigrams (scores being affected by bigrams) 326 for (final String key : mUnigramDictionaries.keySet()) { 327 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 328 if (key.equals(DICT_KEY_USER_HISTORY_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) 329 continue; 330 final Dictionary dictionary = mUnigramDictionaries.get(key); 331 dictionary.getWords(wordComposerForLookup, prevWordForBigram, this, proximityInfo); 332 } 333 } 334 335 final CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, 336 mIsFirstCharCapitalized, mWhiteListDictionary.getWhitelistedWord(consideredWord)); 337 338 final boolean hasAutoCorrection; 339 if (CORRECTION_FULL == correctionMode || CORRECTION_FULL_BIGRAM == correctionMode) { 340 final CharSequence autoCorrection = 341 AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, 342 mSuggestions, consideredWord, mAutoCorrectionThreshold, 343 whitelistedWord); 344 hasAutoCorrection = (null != autoCorrection); 345 } else { 346 hasAutoCorrection = false; 347 } 348 349 if (whitelistedWord != null) { 350 if (mTrailingSingleQuotesCount > 0) { 351 final StringBuilder sb = new StringBuilder(whitelistedWord); 352 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 353 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 354 } 355 mSuggestions.add(0, new SuggestedWordInfo( 356 sb.toString(), SuggestedWordInfo.MAX_SCORE)); 357 } else { 358 mSuggestions.add(0, new SuggestedWordInfo( 359 whitelistedWord, SuggestedWordInfo.MAX_SCORE)); 360 } 361 } 362 363 mSuggestions.add(0, new SuggestedWordInfo(typedWord, SuggestedWordInfo.MAX_SCORE)); 364 SuggestedWordInfo.removeDups(mSuggestions); 365 366 final ArrayList<SuggestedWordInfo> suggestionsList; 367 if (DBG) { 368 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, mSuggestions); 369 } else { 370 suggestionsList = mSuggestions; 371 } 372 373 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 374 // but still autocorrected from - in the case the whitelist only capitalizes the word. 375 // The whitelist should be case-insensitive, so it's not possible to be consistent with 376 // a boolean flag. Right now this is handled with a slight hack in 377 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 378 final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( 379 getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()); 380 381 boolean autoCorrectionAvailable = hasAutoCorrection; 382 if (correctionMode == CORRECTION_FULL || correctionMode == CORRECTION_FULL_BIGRAM) { 383 autoCorrectionAvailable |= !allowsToBeAutoCorrected; 384 } 385 // Don't auto-correct words with multiple capital letter 386 autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); 387 autoCorrectionAvailable &= !wordComposer.isResumed(); 388 if (allowsToBeAutoCorrected && suggestionsList.size() > 1 && mAutoCorrectionThreshold > 0 389 && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, 390 suggestionsList.get(1).mWord)) { 391 autoCorrectionAvailable = false; 392 } 393 return new SuggestedWords(suggestionsList, 394 !allowsToBeAutoCorrected /* typedWordValid */, 395 autoCorrectionAvailable /* hasAutoCorrectionCandidate */, 396 allowsToBeAutoCorrected /* allowsToBeAutoCorrected */, 397 false /* isPunctuationSuggestions */, 398 false /* isObsoleteSuggestions */, 399 false /* isPrediction */); 400 } 401 402 /** 403 * Adds all bigram predictions for prevWord. Also checks the lower case version of prevWord if 404 * it contains any upper case characters. 405 */ 406 private void getAllBigrams(final CharSequence prevWord, final WordComposer wordComposer) { 407 if (StringUtils.hasUpperCase(prevWord)) { 408 // TODO: Must pay attention to locale when changing case. 409 final CharSequence lowerPrevWord = prevWord.toString().toLowerCase(); 410 for (final Dictionary dictionary : mBigramDictionaries.values()) { 411 dictionary.getBigrams(wordComposer, lowerPrevWord, this); 412 } 413 } 414 for (final Dictionary dictionary : mBigramDictionaries.values()) { 415 dictionary.getBigrams(wordComposer, prevWord, this); 416 } 417 } 418 419 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 420 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 421 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 422 typedWordInfo.setDebugString("+"); 423 double normalizedScore = BinaryDictionary.calcNormalizedScore( 424 typedWord, typedWordInfo.toString(), typedWordInfo.mScore); 425 final int suggestionsSize = suggestions.size(); 426 final ArrayList<SuggestedWordInfo> suggestionsList = 427 new ArrayList<SuggestedWordInfo>(suggestionsSize); 428 suggestionsList.add(typedWordInfo); 429 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 430 // than i because we added the typed word to mSuggestions without touching mScores. 431 for (int i = 0; i < suggestionsSize - 1; ++i) { 432 final SuggestedWordInfo cur = suggestions.get(i + 1); 433 final String scoreInfoString; 434 if (normalizedScore > 0) { 435 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 436 normalizedScore = 0.0; 437 } else { 438 scoreInfoString = Integer.toString(cur.mScore); 439 } 440 cur.setDebugString(scoreInfoString); 441 suggestionsList.add(cur); 442 } 443 return suggestionsList; 444 } 445 446 // TODO: Use codepoint instead of char 447 @Override 448 public boolean addWord(final char[] word, final int offset, final int length, int score, 449 final int dicTypeId, final int dataType) { 450 int dataTypeForLog = dataType; 451 final ArrayList<SuggestedWordInfo> suggestions; 452 final int prefMaxSuggestions; 453 if (dataType == Dictionary.BIGRAM) { 454 suggestions = mBigramSuggestions; 455 prefMaxSuggestions = PREF_MAX_BIGRAMS; 456 } else { 457 suggestions = mSuggestions; 458 prefMaxSuggestions = mPrefMaxSuggestions; 459 } 460 461 int pos = 0; 462 463 // Check if it's the same word, only caps are different 464 if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { 465 // TODO: remove this surrounding if clause and move this logic to 466 // getSuggestedWordBuilder. 467 if (suggestions.size() > 0) { 468 final SuggestedWordInfo currentHighestWord = suggestions.get(0); 469 // If the current highest word is also equal to typed word, we need to compare 470 // frequency to determine the insertion position. This does not ensure strictly 471 // correct ordering, but ensures the top score is on top which is enough for 472 // removing duplicates correctly. 473 if (StringUtils.equalsIgnoreCase(currentHighestWord.mWord, word, offset, length) 474 && score <= currentHighestWord.mScore) { 475 pos = 1; 476 } 477 } 478 } else { 479 if (dataType == Dictionary.UNIGRAM) { 480 // Check if the word was already added before (by bigram data) 481 int bigramSuggestion = searchBigramSuggestion(word,offset,length); 482 if(bigramSuggestion >= 0) { 483 dataTypeForLog = Dictionary.BIGRAM; 484 // turn freq from bigram into multiplier specified above 485 double multiplier = (((double) mBigramSuggestions.get(bigramSuggestion).mScore) 486 / MAXIMUM_BIGRAM_FREQUENCY) 487 * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) 488 + BIGRAM_MULTIPLIER_MIN; 489 /* Log.d(TAG,"bigram num: " + bigramSuggestion 490 + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() 491 + " currentScore: " + score + " bigramScore: " 492 + mBigramScores[bigramSuggestion] 493 + " multiplier: " + multiplier); */ 494 score = (int)Math.round((score * multiplier)); 495 } 496 } 497 498 // Check the last one's score and bail 499 if (suggestions.size() >= prefMaxSuggestions 500 && suggestions.get(prefMaxSuggestions - 1).mScore >= score) return true; 501 while (pos < suggestions.size()) { 502 final int curScore = suggestions.get(pos).mScore; 503 if (curScore < score 504 || (curScore == score && length < suggestions.get(pos).codePointCount())) { 505 break; 506 } 507 pos++; 508 } 509 } 510 if (pos >= prefMaxSuggestions) { 511 return true; 512 } 513 514 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 515 // TODO: Must pay attention to locale when changing case. 516 if (mIsAllUpperCase) { 517 sb.append(new String(word, offset, length).toUpperCase()); 518 } else if (mIsFirstCharCapitalized) { 519 sb.append(Character.toUpperCase(word[offset])); 520 if (length > 1) { 521 sb.append(word, offset + 1, length - 1); 522 } 523 } else { 524 sb.append(word, offset, length); 525 } 526 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 527 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 528 } 529 suggestions.add(pos, new SuggestedWordInfo(sb, score)); 530 if (suggestions.size() > prefMaxSuggestions) { 531 suggestions.remove(prefMaxSuggestions); 532 } else { 533 LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); 534 } 535 return true; 536 } 537 538 // TODO: Use codepoint instead of char 539 private int searchBigramSuggestion(final char[] word, final int offset, final int length) { 540 // TODO This is almost O(n^2). Might need fix. 541 // search whether the word appeared in bigram data 542 int bigramSuggestSize = mBigramSuggestions.size(); 543 for (int i = 0; i < bigramSuggestSize; i++) { 544 if (mBigramSuggestions.get(i).codePointCount() == length) { 545 boolean chk = true; 546 for (int j = 0; j < length; j++) { 547 if (mBigramSuggestions.get(i).codePointAt(j) != word[offset+j]) { 548 chk = false; 549 break; 550 } 551 } 552 if (chk) return i; 553 } 554 } 555 556 return -1; 557 } 558 559 public void close() { 560 final HashSet<Dictionary> dictionaries = new HashSet<Dictionary>(); 561 dictionaries.addAll(mUnigramDictionaries.values()); 562 dictionaries.addAll(mBigramDictionaries.values()); 563 for (final Dictionary dictionary : dictionaries) { 564 dictionary.close(); 565 } 566 mMainDict = null; 567 } 568 569 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 570 // this safety net 571 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 572 final CharSequence suggestion) { 573 // Safety net for auto correction. 574 // Actually if we hit this safety net, it's a bug. 575 // If user selected aggressive auto correction mode, there is no need to use the safety 576 // net. 577 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 578 // we should not use net because relatively edit distance can be big. 579 final int typedWordLength = typedWord.length(); 580 if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { 581 return false; 582 } 583 final int maxEditDistanceOfNativeDictionary = 584 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 585 final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); 586 if (DBG) { 587 Log.d(TAG, "Autocorrected edit distance = " + distance 588 + ", " + maxEditDistanceOfNativeDictionary); 589 } 590 if (distance > maxEditDistanceOfNativeDictionary) { 591 if (DBG) { 592 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 593 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 594 + "Turning off auto-correction."); 595 } 596 return true; 597 } else { 598 return false; 599 } 600 } 601} 602