Suggest.java revision 08adb09e149f920a63ecbc5c2f63d9cb7dbee0ab
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21import android.util.Log; 22 23import com.android.inputmethod.keyboard.Keyboard; 24import com.android.inputmethod.keyboard.ProximityInfo; 25 26import java.io.File; 27import java.util.ArrayList; 28import java.util.Arrays; 29import java.util.HashMap; 30import java.util.HashSet; 31import java.util.Locale; 32import java.util.Map; 33import java.util.Set; 34 35/** 36 * This class loads a dictionary and provides a list of suggestions for a given sequence of 37 * characters. This includes corrections and completions. 38 */ 39public class Suggest implements Dictionary.WordCallback { 40 41 public static final String TAG = Suggest.class.getSimpleName(); 42 43 public static final int APPROX_MAX_WORD_LENGTH = 32; 44 45 public static final int CORRECTION_NONE = 0; 46 public static final int CORRECTION_FULL = 1; 47 public static final int CORRECTION_FULL_BIGRAM = 2; 48 49 /** 50 * Words that appear in both bigram and unigram data gets multiplier ranging from 51 * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from 52 * bigram data. 53 */ 54 public static final double BIGRAM_MULTIPLIER_MIN = 1.2; 55 public static final double BIGRAM_MULTIPLIER_MAX = 1.5; 56 57 /** 58 * Maximum possible bigram frequency. Will depend on how many bits are being used in data 59 * structure. Maximum bigram frequency will get the BIGRAM_MULTIPLIER_MAX as the multiplier. 60 */ 61 public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; 62 63 // It seems the following values are only used for logging. 64 public static final int DIC_USER_TYPED = 0; 65 public static final int DIC_MAIN = 1; 66 public static final int DIC_USER = 2; 67 public static final int DIC_USER_UNIGRAM = 3; 68 public static final int DIC_CONTACTS = 4; 69 public static final int DIC_USER_BIGRAM = 5; 70 public static final int DIC_WHITELIST = 6; 71 // If you add a type of dictionary, increment DIC_TYPE_LAST_ID 72 // TODO: this value seems unused. Remove it? 73 public static final int DIC_TYPE_LAST_ID = 6; 74 public static final String DICT_KEY_MAIN = "main"; 75 public static final String DICT_KEY_CONTACTS = "contacts"; 76 // User dictionary, the system-managed one. 77 public static final String DICT_KEY_USER = "user"; 78 // User unigram dictionary, internal to LatinIME 79 public static final String DICT_KEY_USER_UNIGRAM = "user_unigram"; 80 // User bigram dictionary, internal to LatinIME 81 public static final String DICT_KEY_USER_BIGRAM = "user_bigram"; 82 public static final String DICT_KEY_WHITELIST ="whitelist"; 83 84 private static final boolean DBG = LatinImeLogger.sDBG; 85 86 private Dictionary mMainDict; 87 private ContactsDictionary mContactsDict; 88 private WhitelistDictionary mWhiteListDictionary; 89 private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>(); 90 private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>(); 91 92 private int mPrefMaxSuggestions = 18; 93 94 private static final int PREF_MAX_BIGRAMS = 60; 95 96 private double mAutoCorrectionThreshold; 97 private int[] mScores = new int[mPrefMaxSuggestions]; 98 private int[] mBigramScores = new int[PREF_MAX_BIGRAMS]; 99 100 private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); 101 private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); 102 private CharSequence mConsideredWord; 103 104 // TODO: Remove these member variables by passing more context to addWord() callback method 105 private boolean mIsFirstCharCapitalized; 106 private boolean mIsAllUpperCase; 107 private int mTrailingSingleQuotesCount; 108 109 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 110 111 public Suggest(final Context context, final int dictionaryResId, final Locale locale) { 112 initAsynchronously(context, dictionaryResId, locale); 113 } 114 115 /* package for test */ Suggest(final Context context, final File dictionary, 116 final long startOffset, final long length, final Flag[] flagArray, 117 final Locale locale) { 118 initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary, 119 startOffset, length, flagArray), locale); 120 } 121 122 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 123 mWhiteListDictionary = new WhitelistDictionary(context, locale); 124 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); 125 } 126 127 private void initAsynchronously(final Context context, final int dictionaryResId, 128 final Locale locale) { 129 resetMainDict(context, dictionaryResId, locale); 130 131 // TODO: read the whitelist and init the pool asynchronously too. 132 // initPool should be done asynchronously now that the pool is thread-safe. 133 initWhitelistAndAutocorrectAndPool(context, locale); 134 } 135 136 private void initSynchronously(final Context context, final Dictionary mainDict, 137 final Locale locale) { 138 mMainDict = mainDict; 139 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict); 140 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict); 141 initWhitelistAndAutocorrectAndPool(context, locale); 142 } 143 144 private static void addOrReplaceDictionary(Map<String, Dictionary> dictionaries, String key, 145 Dictionary dict) { 146 final Dictionary oldDict = (dict == null) 147 ? dictionaries.remove(key) 148 : dictionaries.put(key, dict); 149 if (oldDict != null && dict != oldDict) { 150 oldDict.close(); 151 } 152 } 153 154 public void resetMainDict(final Context context, final int dictionaryResId, 155 final Locale locale) { 156 mMainDict = null; 157 new Thread("InitializeBinaryDictionary") { 158 @Override 159 public void run() { 160 final Dictionary newMainDict = DictionaryFactory.createDictionaryFromManager( 161 context, locale, dictionaryResId); 162 mMainDict = newMainDict; 163 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict); 164 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict); 165 } 166 }.start(); 167 } 168 169 // The main dictionary could have been loaded asynchronously. Don't cache the return value 170 // of this method. 171 public boolean hasMainDictionary() { 172 return mMainDict != null; 173 } 174 175 public ContactsDictionary getContactsDictionary() { 176 return mContactsDict; 177 } 178 179 public Map<String, Dictionary> getUnigramDictionaries() { 180 return mUnigramDictionaries; 181 } 182 183 public static int getApproxMaxWordLength() { 184 return APPROX_MAX_WORD_LENGTH; 185 } 186 187 /** 188 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 189 * before the main dictionary, if set. This refers to the system-managed user dictionary. 190 */ 191 public void setUserDictionary(Dictionary userDictionary) { 192 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary); 193 } 194 195 /** 196 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 197 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 198 * won't be used. 199 */ 200 public void setContactsDictionary(ContactsDictionary contactsDictionary) { 201 mContactsDict = contactsDictionary; 202 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 203 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 204 } 205 206 public void setUserUnigramDictionary(Dictionary userUnigramDictionary) { 207 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary); 208 } 209 210 public void setUserBigramDictionary(Dictionary userBigramDictionary) { 211 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary); 212 } 213 214 public void setAutoCorrectionThreshold(double threshold) { 215 mAutoCorrectionThreshold = threshold; 216 } 217 218 private static CharSequence capitalizeWord(final boolean all, final boolean first, 219 final CharSequence word) { 220 if (TextUtils.isEmpty(word) || !(all || first)) return word; 221 final int wordLength = word.length(); 222 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 223 // TODO: Must pay attention to locale when changing case. 224 if (all) { 225 sb.append(word.toString().toUpperCase()); 226 } else if (first) { 227 sb.append(Character.toUpperCase(word.charAt(0))); 228 if (wordLength > 1) { 229 sb.append(word.subSequence(1, wordLength)); 230 } 231 } 232 return sb; 233 } 234 235 protected void addBigramToSuggestions(CharSequence bigram) { 236 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 237 sb.append(bigram); 238 mSuggestions.add(sb); 239 } 240 241 private static final WordComposer sEmptyWordComposer = new WordComposer(); 242 public SuggestedWords.Builder getBigramPredictionWordBuilder(CharSequence prevWordForBigram) { 243 LatinImeLogger.onStartSuggestion(prevWordForBigram); 244 mIsFirstCharCapitalized = false; 245 mIsAllUpperCase = false; 246 mTrailingSingleQuotesCount = 0; 247 mSuggestions = new ArrayList<CharSequence>(mPrefMaxSuggestions); 248 Arrays.fill(mScores, 0); 249 250 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 251 LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); 252 mConsideredWord = ""; 253 254 Arrays.fill(mBigramScores, 0); 255 mBigramSuggestions = new ArrayList<CharSequence>(PREF_MAX_BIGRAMS); 256 257 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 258 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 259 prevWordForBigram = lowerPrevWord; 260 } 261 for (final Dictionary dictionary : mBigramDictionaries.values()) { 262 dictionary.getBigrams(sEmptyWordComposer, prevWordForBigram, this); 263 } 264 // Nothing entered: return all bigrams for the previous word 265 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 266 for (int i = 0; i < insertCount; ++i) { 267 addBigramToSuggestions(mBigramSuggestions.get(i)); 268 } 269 270 StringUtils.removeDupes(mSuggestions); 271 272 return new SuggestedWords.Builder() 273 .setWords(SuggestedWords.Builder.getFromCharSequenceList(mSuggestions)) 274 .setAllowsToBeAutoCorrected(false); 275 } 276 277 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 278 public SuggestedWords.Builder getSuggestedWordBuilder( 279 final WordComposer wordComposer, CharSequence prevWordForBigram, 280 final ProximityInfo proximityInfo, final int correctionMode) { 281 LatinImeLogger.onStartSuggestion(prevWordForBigram); 282 mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 283 mIsAllUpperCase = wordComposer.isAllUpperCase(); 284 mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 285 mSuggestions = new ArrayList<CharSequence>(mPrefMaxSuggestions); 286 Arrays.fill(mScores, 0); 287 288 final String typedWord = wordComposer.getTypedWord(); 289 final String consideredWord = mTrailingSingleQuotesCount > 0 290 ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) 291 : typedWord; 292 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 293 LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, 294 Dictionary.UNIGRAM); 295 mConsideredWord = consideredWord; 296 297 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 298 // but still autocorrected from - in the case the whitelist only capitalizes the word. 299 // The whitelist should be case-insensitive, so it's not possible to be consistent with 300 // a boolean flag. Right now this is handled with a slight hack in 301 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 302 final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( 303 getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()); 304 305 if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) { 306 // At first character typed, search only the bigrams 307 Arrays.fill(mBigramScores, 0); 308 mBigramSuggestions = new ArrayList<CharSequence>(PREF_MAX_BIGRAMS); 309 310 if (!TextUtils.isEmpty(prevWordForBigram)) { 311 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 312 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 313 prevWordForBigram = lowerPrevWord; 314 } 315 for (final Dictionary dictionary : mBigramDictionaries.values()) { 316 dictionary.getBigrams(wordComposer, prevWordForBigram, this); 317 } 318 if (TextUtils.isEmpty(consideredWord)) { 319 // Nothing entered: return all bigrams for the previous word 320 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 321 for (int i = 0; i < insertCount; ++i) { 322 addBigramToSuggestions(mBigramSuggestions.get(i)); 323 } 324 } else { 325 // Word entered: return only bigrams that match the first char of the typed word 326 final char currentChar = consideredWord.charAt(0); 327 // TODO: Must pay attention to locale when changing case. 328 final char currentCharUpper = Character.toUpperCase(currentChar); 329 int count = 0; 330 final int bigramSuggestionSize = mBigramSuggestions.size(); 331 for (int i = 0; i < bigramSuggestionSize; i++) { 332 final CharSequence bigramSuggestion = mBigramSuggestions.get(i); 333 final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0); 334 if (bigramSuggestionFirstChar == currentChar 335 || bigramSuggestionFirstChar == currentCharUpper) { 336 addBigramToSuggestions(bigramSuggestion); 337 if (++count > mPrefMaxSuggestions) break; 338 } 339 } 340 } 341 } 342 343 } else if (wordComposer.size() > 1) { 344 // At second character typed, search the unigrams (scores being affected by bigrams) 345 for (final String key : mUnigramDictionaries.keySet()) { 346 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 347 if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) 348 continue; 349 final Dictionary dictionary = mUnigramDictionaries.get(key); 350 if (mTrailingSingleQuotesCount > 0) { 351 final WordComposer tmpWordComposer = new WordComposer(wordComposer); 352 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 353 tmpWordComposer.deleteLast(); 354 } 355 dictionary.getWords(tmpWordComposer, this, proximityInfo); 356 } else { 357 dictionary.getWords(wordComposer, this, proximityInfo); 358 } 359 } 360 } 361 362 CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized, 363 mWhiteListDictionary.getWhitelistedWord(consideredWord)); 364 365 final boolean hasAutoCorrection; 366 if (CORRECTION_FULL == correctionMode 367 || CORRECTION_FULL_BIGRAM == correctionMode) { 368 final CharSequence autoCorrection = 369 AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, 370 mSuggestions, mScores, consideredWord, mAutoCorrectionThreshold, 371 whitelistedWord); 372 hasAutoCorrection = (null != autoCorrection); 373 } else { 374 hasAutoCorrection = false; 375 } 376 377 if (whitelistedWord != null) { 378 if (mTrailingSingleQuotesCount > 0) { 379 final StringBuilder sb = new StringBuilder(whitelistedWord); 380 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 381 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 382 } 383 mSuggestions.add(0, sb.toString()); 384 } else { 385 mSuggestions.add(0, whitelistedWord); 386 } 387 } 388 389 mSuggestions.add(0, typedWord); 390 StringUtils.removeDupes(mSuggestions); 391 392 final SuggestedWords.Builder builder; 393 if (DBG) { 394 // TODO: this doesn't take into account the fact that removing dupes from mSuggestions 395 // may have made mScores[] and mSuggestions out of sync. 396 final CharSequence autoCorrectionSuggestion = mSuggestions.get(0); 397 final int autoCorrectionSuggestionScore = mScores[0]; 398 double normalizedScore = BinaryDictionary.calcNormalizedScore( 399 typedWord, autoCorrectionSuggestion.toString(), 400 autoCorrectionSuggestionScore); 401 ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList = 402 new ArrayList<SuggestedWords.SuggestedWordInfo>(); 403 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(autoCorrectionSuggestion, "+", 404 false)); 405 final int suggestionsSize = mSuggestions.size(); 406 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 407 // than i because we added the typed word to mSuggestions without touching mScores. 408 for (int i = 0; i < mScores.length && i < suggestionsSize - 1; ++i) { 409 if (normalizedScore > 0) { 410 final String scoreThreshold = String.format("%d (%4.2f)", mScores[i], 411 normalizedScore); 412 scoreInfoList.add( 413 new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i + 1), 414 scoreThreshold, false)); 415 normalizedScore = 0.0; 416 } else { 417 final String score = Integer.toString(mScores[i]); 418 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i + 1), 419 score, false)); 420 } 421 } 422 for (int i = mScores.length; i < suggestionsSize; ++i) { 423 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i), 424 "--", false)); 425 } 426 builder = new SuggestedWords.Builder().setWords(scoreInfoList) 427 .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected); 428 } else { 429 builder = new SuggestedWords.Builder() 430 .setWords(SuggestedWords.Builder.getFromCharSequenceList(mSuggestions)) 431 .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected); 432 } 433 434 boolean autoCorrectionAvailable = hasAutoCorrection; 435 if (correctionMode == Suggest.CORRECTION_FULL 436 || correctionMode == Suggest.CORRECTION_FULL_BIGRAM) { 437 autoCorrectionAvailable |= !allowsToBeAutoCorrected; 438 } 439 // Don't auto-correct words with multiple capital letter 440 autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); 441 builder.setTypedWordValid(!allowsToBeAutoCorrected).setHasMinimalSuggestion( 442 autoCorrectionAvailable); 443 if (allowsToBeAutoCorrected && builder.size() > 1 && mAutoCorrectionThreshold > 0 444 && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, builder.getWord(1))) { 445 builder.setShouldBlockAutoCorrectionBySafetyNet(); 446 } 447 return builder; 448 } 449 450 @Override 451 public boolean addWord(final char[] word, final int offset, final int length, int score, 452 final int dicTypeId, final int dataType) { 453 int dataTypeForLog = dataType; 454 final ArrayList<CharSequence> suggestions; 455 final int[] sortedScores; 456 final int prefMaxSuggestions; 457 if (dataType == Dictionary.BIGRAM) { 458 suggestions = mBigramSuggestions; 459 sortedScores = mBigramScores; 460 prefMaxSuggestions = PREF_MAX_BIGRAMS; 461 } else { 462 suggestions = mSuggestions; 463 sortedScores = mScores; 464 prefMaxSuggestions = mPrefMaxSuggestions; 465 } 466 467 int pos = 0; 468 469 // Check if it's the same word, only caps are different 470 if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { 471 // TODO: remove this surrounding if clause and move this logic to 472 // getSuggestedWordBuilder. 473 if (suggestions.size() > 0) { 474 final String currentHighestWord = suggestions.get(0).toString(); 475 // If the current highest word is also equal to typed word, we need to compare 476 // frequency to determine the insertion position. This does not ensure strictly 477 // correct ordering, but ensures the top score is on top which is enough for 478 // removing duplicates correctly. 479 if (StringUtils.equalsIgnoreCase(currentHighestWord, word, offset, length) 480 && score <= sortedScores[0]) { 481 pos = 1; 482 } 483 } 484 } else { 485 if (dataType == Dictionary.UNIGRAM) { 486 // Check if the word was already added before (by bigram data) 487 int bigramSuggestion = searchBigramSuggestion(word,offset,length); 488 if(bigramSuggestion >= 0) { 489 dataTypeForLog = Dictionary.BIGRAM; 490 // turn freq from bigram into multiplier specified above 491 double multiplier = (((double) mBigramScores[bigramSuggestion]) 492 / MAXIMUM_BIGRAM_FREQUENCY) 493 * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) 494 + BIGRAM_MULTIPLIER_MIN; 495 /* Log.d(TAG,"bigram num: " + bigramSuggestion 496 + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() 497 + " currentScore: " + score + " bigramScore: " 498 + mBigramScores[bigramSuggestion] 499 + " multiplier: " + multiplier); */ 500 score = (int)Math.round((score * multiplier)); 501 } 502 } 503 504 // Check the last one's score and bail 505 if (sortedScores[prefMaxSuggestions - 1] >= score) return true; 506 while (pos < prefMaxSuggestions) { 507 if (sortedScores[pos] < score 508 || (sortedScores[pos] == score && length < suggestions.get(pos).length())) { 509 break; 510 } 511 pos++; 512 } 513 } 514 if (pos >= prefMaxSuggestions) { 515 return true; 516 } 517 518 System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1); 519 sortedScores[pos] = score; 520 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 521 // TODO: Must pay attention to locale when changing case. 522 if (mIsAllUpperCase) { 523 sb.append(new String(word, offset, length).toUpperCase()); 524 } else if (mIsFirstCharCapitalized) { 525 sb.append(Character.toUpperCase(word[offset])); 526 if (length > 1) { 527 sb.append(word, offset + 1, length - 1); 528 } 529 } else { 530 sb.append(word, offset, length); 531 } 532 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 533 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 534 } 535 suggestions.add(pos, sb); 536 if (suggestions.size() > prefMaxSuggestions) { 537 suggestions.remove(prefMaxSuggestions); 538 } else { 539 LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); 540 } 541 return true; 542 } 543 544 private int searchBigramSuggestion(final char[] word, final int offset, final int length) { 545 // TODO This is almost O(n^2). Might need fix. 546 // search whether the word appeared in bigram data 547 int bigramSuggestSize = mBigramSuggestions.size(); 548 for (int i = 0; i < bigramSuggestSize; i++) { 549 if (mBigramSuggestions.get(i).length() == length) { 550 boolean chk = true; 551 for (int j = 0; j < length; j++) { 552 if (mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { 553 chk = false; 554 break; 555 } 556 } 557 if (chk) return i; 558 } 559 } 560 561 return -1; 562 } 563 564 public void close() { 565 final Set<Dictionary> dictionaries = new HashSet<Dictionary>(); 566 dictionaries.addAll(mUnigramDictionaries.values()); 567 dictionaries.addAll(mBigramDictionaries.values()); 568 for (final Dictionary dictionary : dictionaries) { 569 dictionary.close(); 570 } 571 mMainDict = null; 572 } 573 574 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 575 // this safety net 576 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 577 final CharSequence suggestion) { 578 // Safety net for auto correction. 579 // Actually if we hit this safety net, it's a bug. 580 // If user selected aggressive auto correction mode, there is no need to use the safety 581 // net. 582 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 583 // we should not use net because relatively edit distance can be big. 584 final int typedWordLength = typedWord.length(); 585 if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { 586 return false; 587 } 588 final int maxEditDistanceOfNativeDictionary = 589 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 590 final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); 591 if (DBG) { 592 Log.d(TAG, "Autocorrected edit distance = " + distance 593 + ", " + maxEditDistanceOfNativeDictionary); 594 } 595 if (distance > maxEditDistanceOfNativeDictionary) { 596 if (DBG) { 597 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 598 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 599 + "Turning off auto-correction."); 600 } 601 return true; 602 } else { 603 return false; 604 } 605 } 606} 607