Suggest.java revision 674ffcdf9361b3c90cc39daf02f3217fb6d870de
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21import android.util.Log; 22 23import com.android.inputmethod.keyboard.Keyboard; 24import com.android.inputmethod.keyboard.ProximityInfo; 25 26import java.io.File; 27import java.util.ArrayList; 28import java.util.Arrays; 29import java.util.HashMap; 30import java.util.HashSet; 31import java.util.Locale; 32import java.util.Map; 33import java.util.Set; 34 35/** 36 * This class loads a dictionary and provides a list of suggestions for a given sequence of 37 * characters. This includes corrections and completions. 38 */ 39public class Suggest implements Dictionary.WordCallback { 40 41 public static final String TAG = Suggest.class.getSimpleName(); 42 43 public static final int APPROX_MAX_WORD_LENGTH = 32; 44 45 public static final int CORRECTION_NONE = 0; 46 public static final int CORRECTION_FULL = 1; 47 public static final int CORRECTION_FULL_BIGRAM = 2; 48 49 /** 50 * Words that appear in both bigram and unigram data gets multiplier ranging from 51 * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from 52 * bigram data. 53 */ 54 public static final double BIGRAM_MULTIPLIER_MIN = 1.2; 55 public static final double BIGRAM_MULTIPLIER_MAX = 1.5; 56 57 /** 58 * Maximum possible bigram frequency. Will depend on how many bits are being used in data 59 * structure. Maximum bigram frequency will get the BIGRAM_MULTIPLIER_MAX as the multiplier. 60 */ 61 public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; 62 63 // It seems the following values are only used for logging. 64 public static final int DIC_USER_TYPED = 0; 65 public static final int DIC_MAIN = 1; 66 public static final int DIC_USER = 2; 67 public static final int DIC_USER_UNIGRAM = 3; 68 public static final int DIC_CONTACTS = 4; 69 public static final int DIC_USER_BIGRAM = 5; 70 public static final int DIC_WHITELIST = 6; 71 // If you add a type of dictionary, increment DIC_TYPE_LAST_ID 72 // TODO: this value seems unused. Remove it? 73 public static final int DIC_TYPE_LAST_ID = 6; 74 public static final String DICT_KEY_MAIN = "main"; 75 public static final String DICT_KEY_CONTACTS = "contacts"; 76 // User dictionary, the system-managed one. 77 public static final String DICT_KEY_USER = "user"; 78 // User unigram dictionary, internal to LatinIME 79 public static final String DICT_KEY_USER_UNIGRAM = "user_unigram"; 80 // User bigram dictionary, internal to LatinIME 81 public static final String DICT_KEY_USER_BIGRAM = "user_bigram"; 82 public static final String DICT_KEY_WHITELIST ="whitelist"; 83 84 private static final boolean DBG = LatinImeLogger.sDBG; 85 86 private Dictionary mMainDict; 87 private ContactsDictionary mContactsDict; 88 private WhitelistDictionary mWhiteListDictionary; 89 private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>(); 90 private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>(); 91 92 private int mPrefMaxSuggestions = 18; 93 94 private static final int PREF_MAX_BIGRAMS = 60; 95 96 private double mAutoCorrectionThreshold; 97 private int[] mScores = new int[mPrefMaxSuggestions]; 98 private int[] mBigramScores = new int[PREF_MAX_BIGRAMS]; 99 100 private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); 101 private ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); 102 private CharSequence mConsideredWord; 103 104 // TODO: Remove these member variables by passing more context to addWord() callback method 105 private boolean mIsFirstCharCapitalized; 106 private boolean mIsAllUpperCase; 107 private int mTrailingSingleQuotesCount; 108 109 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 110 111 public Suggest(final Context context, final int dictionaryResId, final Locale locale) { 112 initAsynchronously(context, dictionaryResId, locale); 113 } 114 115 /* package for test */ Suggest(final Context context, final File dictionary, 116 final long startOffset, final long length, final Flag[] flagArray, 117 final Locale locale) { 118 initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary, 119 startOffset, length, flagArray), locale); 120 } 121 122 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 123 mWhiteListDictionary = new WhitelistDictionary(context, locale); 124 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); 125 } 126 127 private void initAsynchronously(final Context context, final int dictionaryResId, 128 final Locale locale) { 129 resetMainDict(context, dictionaryResId, locale); 130 131 // TODO: read the whitelist and init the pool asynchronously too. 132 // initPool should be done asynchronously now that the pool is thread-safe. 133 initWhitelistAndAutocorrectAndPool(context, locale); 134 } 135 136 private void initSynchronously(final Context context, final Dictionary mainDict, 137 final Locale locale) { 138 mMainDict = mainDict; 139 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict); 140 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict); 141 initWhitelistAndAutocorrectAndPool(context, locale); 142 } 143 144 private static void addOrReplaceDictionary(Map<String, Dictionary> dictionaries, String key, 145 Dictionary dict) { 146 final Dictionary oldDict = (dict == null) 147 ? dictionaries.remove(key) 148 : dictionaries.put(key, dict); 149 if (oldDict != null && dict != oldDict) { 150 oldDict.close(); 151 } 152 } 153 154 public void resetMainDict(final Context context, final int dictionaryResId, 155 final Locale locale) { 156 mMainDict = null; 157 new Thread("InitializeBinaryDictionary") { 158 @Override 159 public void run() { 160 final Dictionary newMainDict = DictionaryFactory.createDictionaryFromManager( 161 context, locale, dictionaryResId); 162 mMainDict = newMainDict; 163 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict); 164 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict); 165 } 166 }.start(); 167 } 168 169 // The main dictionary could have been loaded asynchronously. Don't cache the return value 170 // of this method. 171 public boolean hasMainDictionary() { 172 return mMainDict != null; 173 } 174 175 public ContactsDictionary getContactsDictionary() { 176 return mContactsDict; 177 } 178 179 public Map<String, Dictionary> getUnigramDictionaries() { 180 return mUnigramDictionaries; 181 } 182 183 public static int getApproxMaxWordLength() { 184 return APPROX_MAX_WORD_LENGTH; 185 } 186 187 /** 188 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 189 * before the main dictionary, if set. This refers to the system-managed user dictionary. 190 */ 191 public void setUserDictionary(Dictionary userDictionary) { 192 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary); 193 } 194 195 /** 196 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 197 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 198 * won't be used. 199 */ 200 public void setContactsDictionary(ContactsDictionary contactsDictionary) { 201 mContactsDict = contactsDictionary; 202 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 203 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 204 } 205 206 public void setUserUnigramDictionary(Dictionary userUnigramDictionary) { 207 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary); 208 } 209 210 public void setUserBigramDictionary(Dictionary userBigramDictionary) { 211 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary); 212 } 213 214 public void setAutoCorrectionThreshold(double threshold) { 215 mAutoCorrectionThreshold = threshold; 216 } 217 218 private static CharSequence capitalizeWord(final boolean all, final boolean first, 219 final CharSequence word) { 220 if (TextUtils.isEmpty(word) || !(all || first)) return word; 221 final int wordLength = word.length(); 222 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 223 // TODO: Must pay attention to locale when changing case. 224 if (all) { 225 sb.append(word.toString().toUpperCase()); 226 } else if (first) { 227 sb.append(Character.toUpperCase(word.charAt(0))); 228 if (wordLength > 1) { 229 sb.append(word.subSequence(1, wordLength)); 230 } 231 } 232 return sb; 233 } 234 235 protected void addBigramToSuggestions(CharSequence bigram) { 236 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 237 sb.append(bigram); 238 mSuggestions.add(sb); 239 } 240 241 private static final WordComposer sEmptyWordComposer = new WordComposer(); 242 public SuggestedWords.Builder getBigramPredictionWordBuilder(CharSequence prevWordForBigram) { 243 LatinImeLogger.onStartSuggestion(prevWordForBigram); 244 mIsFirstCharCapitalized = false; 245 mIsAllUpperCase = false; 246 mTrailingSingleQuotesCount = 0; 247 mSuggestions = new ArrayList<CharSequence>(mPrefMaxSuggestions); 248 Arrays.fill(mScores, 0); 249 250 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 251 LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); 252 mConsideredWord = ""; 253 254 Arrays.fill(mBigramScores, 0); 255 mBigramSuggestions = new ArrayList<CharSequence>(PREF_MAX_BIGRAMS); 256 257 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 258 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 259 prevWordForBigram = lowerPrevWord; 260 } 261 for (final Dictionary dictionary : mBigramDictionaries.values()) { 262 dictionary.getBigrams(sEmptyWordComposer, prevWordForBigram, this); 263 } 264 // Nothing entered: return all bigrams for the previous word 265 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 266 for (int i = 0; i < insertCount; ++i) { 267 addBigramToSuggestions(mBigramSuggestions.get(i)); 268 } 269 270 StringUtils.removeDupes(mSuggestions); 271 272 return new SuggestedWords.Builder() 273 .setWords(SuggestedWords.Builder.getFromCharSequenceList(mSuggestions)) 274 .setAllowsToBeAutoCorrected(false) 275 .setHasAutoCorrection(false); 276 } 277 278 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 279 public SuggestedWords.Builder getSuggestedWordBuilder( 280 final WordComposer wordComposer, CharSequence prevWordForBigram, 281 final ProximityInfo proximityInfo, final int correctionMode) { 282 LatinImeLogger.onStartSuggestion(prevWordForBigram); 283 mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 284 mIsAllUpperCase = wordComposer.isAllUpperCase(); 285 mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 286 mSuggestions = new ArrayList<CharSequence>(mPrefMaxSuggestions); 287 Arrays.fill(mScores, 0); 288 289 final String typedWord = wordComposer.getTypedWord(); 290 final String consideredWord = mTrailingSingleQuotesCount > 0 291 ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) 292 : typedWord; 293 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 294 LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, 295 Dictionary.UNIGRAM); 296 mConsideredWord = consideredWord; 297 298 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 299 // but still autocorrected from - in the case the whitelist only capitalizes the word. 300 // The whitelist should be case-insensitive, so it's not possible to be consistent with 301 // a boolean flag. Right now this is handled with a slight hack in 302 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 303 final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( 304 getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()); 305 306 if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) { 307 // At first character typed, search only the bigrams 308 Arrays.fill(mBigramScores, 0); 309 mBigramSuggestions = new ArrayList<CharSequence>(PREF_MAX_BIGRAMS); 310 311 if (!TextUtils.isEmpty(prevWordForBigram)) { 312 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 313 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 314 prevWordForBigram = lowerPrevWord; 315 } 316 for (final Dictionary dictionary : mBigramDictionaries.values()) { 317 dictionary.getBigrams(wordComposer, prevWordForBigram, this); 318 } 319 if (TextUtils.isEmpty(consideredWord)) { 320 // Nothing entered: return all bigrams for the previous word 321 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 322 for (int i = 0; i < insertCount; ++i) { 323 addBigramToSuggestions(mBigramSuggestions.get(i)); 324 } 325 } else { 326 // Word entered: return only bigrams that match the first char of the typed word 327 final char currentChar = consideredWord.charAt(0); 328 // TODO: Must pay attention to locale when changing case. 329 final char currentCharUpper = Character.toUpperCase(currentChar); 330 int count = 0; 331 final int bigramSuggestionSize = mBigramSuggestions.size(); 332 for (int i = 0; i < bigramSuggestionSize; i++) { 333 final CharSequence bigramSuggestion = mBigramSuggestions.get(i); 334 final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0); 335 if (bigramSuggestionFirstChar == currentChar 336 || bigramSuggestionFirstChar == currentCharUpper) { 337 addBigramToSuggestions(bigramSuggestion); 338 if (++count > mPrefMaxSuggestions) break; 339 } 340 } 341 } 342 } 343 344 } else if (wordComposer.size() > 1) { 345 // At second character typed, search the unigrams (scores being affected by bigrams) 346 for (final String key : mUnigramDictionaries.keySet()) { 347 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 348 if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) 349 continue; 350 final Dictionary dictionary = mUnigramDictionaries.get(key); 351 if (mTrailingSingleQuotesCount > 0) { 352 final WordComposer tmpWordComposer = new WordComposer(wordComposer); 353 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 354 tmpWordComposer.deleteLast(); 355 } 356 dictionary.getWords(tmpWordComposer, this, proximityInfo); 357 } else { 358 dictionary.getWords(wordComposer, this, proximityInfo); 359 } 360 } 361 } 362 363 CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized, 364 mWhiteListDictionary.getWhitelistedWord(consideredWord)); 365 366 final boolean hasAutoCorrection; 367 if (CORRECTION_FULL == correctionMode 368 || CORRECTION_FULL_BIGRAM == correctionMode) { 369 final CharSequence autoCorrection = 370 AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, 371 mSuggestions, mScores, consideredWord, mAutoCorrectionThreshold, 372 whitelistedWord); 373 hasAutoCorrection = (null != autoCorrection); 374 } else { 375 hasAutoCorrection = false; 376 } 377 378 if (whitelistedWord != null) { 379 if (mTrailingSingleQuotesCount > 0) { 380 final StringBuilder sb = new StringBuilder(whitelistedWord); 381 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 382 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 383 } 384 mSuggestions.add(0, sb.toString()); 385 } else { 386 mSuggestions.add(0, whitelistedWord); 387 } 388 } 389 390 mSuggestions.add(0, typedWord); 391 StringUtils.removeDupes(mSuggestions); 392 393 final SuggestedWords.Builder builder; 394 if (DBG) { 395 // TODO: this doesn't take into account the fact that removing dupes from mSuggestions 396 // may have made mScores[] and mSuggestions out of sync. 397 final CharSequence autoCorrectionSuggestion = mSuggestions.get(0); 398 final int autoCorrectionSuggestionScore = mScores[0]; 399 double normalizedScore = BinaryDictionary.calcNormalizedScore( 400 typedWord, autoCorrectionSuggestion.toString(), 401 autoCorrectionSuggestionScore); 402 ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList = 403 new ArrayList<SuggestedWords.SuggestedWordInfo>(); 404 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(autoCorrectionSuggestion, "+", 405 false)); 406 final int suggestionsSize = mSuggestions.size(); 407 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 408 // than i because we added the typed word to mSuggestions without touching mScores. 409 for (int i = 0; i < mScores.length && i < suggestionsSize - 1; ++i) { 410 if (normalizedScore > 0) { 411 final String scoreThreshold = String.format("%d (%4.2f)", mScores[i], 412 normalizedScore); 413 scoreInfoList.add( 414 new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i + 1), 415 scoreThreshold, false)); 416 normalizedScore = 0.0; 417 } else { 418 final String score = Integer.toString(mScores[i]); 419 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i + 1), 420 score, false)); 421 } 422 } 423 for (int i = mScores.length; i < suggestionsSize; ++i) { 424 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(mSuggestions.get(i), 425 "--", false)); 426 } 427 builder = new SuggestedWords.Builder().setWords(scoreInfoList) 428 .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected) 429 .setHasAutoCorrection(hasAutoCorrection); 430 } else { 431 builder = new SuggestedWords.Builder() 432 .setWords(SuggestedWords.Builder.getFromCharSequenceList(mSuggestions)) 433 .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected) 434 .setHasAutoCorrection(hasAutoCorrection); 435 } 436 437 boolean autoCorrectionAvailable = hasAutoCorrection; 438 if (correctionMode == Suggest.CORRECTION_FULL 439 || correctionMode == Suggest.CORRECTION_FULL_BIGRAM) { 440 autoCorrectionAvailable |= !allowsToBeAutoCorrected; 441 } 442 // Don't auto-correct words with multiple capital letter 443 autoCorrectionAvailable &= !wordComposer.isMostlyCaps(); 444 builder.setTypedWordValid(!allowsToBeAutoCorrected).setHasMinimalSuggestion( 445 autoCorrectionAvailable); 446 if (allowsToBeAutoCorrected && builder.size() > 1 && mAutoCorrectionThreshold > 0 447 && Suggest.shouldBlockAutoCorrectionBySafetyNet(typedWord, builder.getWord(1))) { 448 builder.setShouldBlockAutoCorrectionBySafetyNet(); 449 } 450 return builder; 451 } 452 453 @Override 454 public boolean addWord(final char[] word, final int offset, final int length, int score, 455 final int dicTypeId, final int dataType) { 456 int dataTypeForLog = dataType; 457 final ArrayList<CharSequence> suggestions; 458 final int[] sortedScores; 459 final int prefMaxSuggestions; 460 if (dataType == Dictionary.BIGRAM) { 461 suggestions = mBigramSuggestions; 462 sortedScores = mBigramScores; 463 prefMaxSuggestions = PREF_MAX_BIGRAMS; 464 } else { 465 suggestions = mSuggestions; 466 sortedScores = mScores; 467 prefMaxSuggestions = mPrefMaxSuggestions; 468 } 469 470 int pos = 0; 471 472 // Check if it's the same word, only caps are different 473 if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { 474 // TODO: remove this surrounding if clause and move this logic to 475 // getSuggestedWordBuilder. 476 if (suggestions.size() > 0) { 477 final String currentHighestWord = suggestions.get(0).toString(); 478 // If the current highest word is also equal to typed word, we need to compare 479 // frequency to determine the insertion position. This does not ensure strictly 480 // correct ordering, but ensures the top score is on top which is enough for 481 // removing duplicates correctly. 482 if (StringUtils.equalsIgnoreCase(currentHighestWord, word, offset, length) 483 && score <= sortedScores[0]) { 484 pos = 1; 485 } 486 } 487 } else { 488 if (dataType == Dictionary.UNIGRAM) { 489 // Check if the word was already added before (by bigram data) 490 int bigramSuggestion = searchBigramSuggestion(word,offset,length); 491 if(bigramSuggestion >= 0) { 492 dataTypeForLog = Dictionary.BIGRAM; 493 // turn freq from bigram into multiplier specified above 494 double multiplier = (((double) mBigramScores[bigramSuggestion]) 495 / MAXIMUM_BIGRAM_FREQUENCY) 496 * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) 497 + BIGRAM_MULTIPLIER_MIN; 498 /* Log.d(TAG,"bigram num: " + bigramSuggestion 499 + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() 500 + " currentScore: " + score + " bigramScore: " 501 + mBigramScores[bigramSuggestion] 502 + " multiplier: " + multiplier); */ 503 score = (int)Math.round((score * multiplier)); 504 } 505 } 506 507 // Check the last one's score and bail 508 if (sortedScores[prefMaxSuggestions - 1] >= score) return true; 509 while (pos < prefMaxSuggestions) { 510 if (sortedScores[pos] < score 511 || (sortedScores[pos] == score && length < suggestions.get(pos).length())) { 512 break; 513 } 514 pos++; 515 } 516 } 517 if (pos >= prefMaxSuggestions) { 518 return true; 519 } 520 521 System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1); 522 sortedScores[pos] = score; 523 final StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 524 // TODO: Must pay attention to locale when changing case. 525 if (mIsAllUpperCase) { 526 sb.append(new String(word, offset, length).toUpperCase()); 527 } else if (mIsFirstCharCapitalized) { 528 sb.append(Character.toUpperCase(word[offset])); 529 if (length > 1) { 530 sb.append(word, offset + 1, length - 1); 531 } 532 } else { 533 sb.append(word, offset, length); 534 } 535 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 536 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 537 } 538 suggestions.add(pos, sb); 539 if (suggestions.size() > prefMaxSuggestions) { 540 suggestions.remove(prefMaxSuggestions); 541 } else { 542 LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); 543 } 544 return true; 545 } 546 547 private int searchBigramSuggestion(final char[] word, final int offset, final int length) { 548 // TODO This is almost O(n^2). Might need fix. 549 // search whether the word appeared in bigram data 550 int bigramSuggestSize = mBigramSuggestions.size(); 551 for (int i = 0; i < bigramSuggestSize; i++) { 552 if (mBigramSuggestions.get(i).length() == length) { 553 boolean chk = true; 554 for (int j = 0; j < length; j++) { 555 if (mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { 556 chk = false; 557 break; 558 } 559 } 560 if (chk) return i; 561 } 562 } 563 564 return -1; 565 } 566 567 public void close() { 568 final Set<Dictionary> dictionaries = new HashSet<Dictionary>(); 569 dictionaries.addAll(mUnigramDictionaries.values()); 570 dictionaries.addAll(mBigramDictionaries.values()); 571 for (final Dictionary dictionary : dictionaries) { 572 dictionary.close(); 573 } 574 mMainDict = null; 575 } 576 577 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 578 // this safety net 579 public static boolean shouldBlockAutoCorrectionBySafetyNet(final String typedWord, 580 final CharSequence suggestion) { 581 // Safety net for auto correction. 582 // Actually if we hit this safety net, it's a bug. 583 // If user selected aggressive auto correction mode, there is no need to use the safety 584 // net. 585 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 586 // we should not use net because relatively edit distance can be big. 587 final int typedWordLength = typedWord.length(); 588 if (typedWordLength < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { 589 return false; 590 } 591 final int maxEditDistanceOfNativeDictionary = 592 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 593 final int distance = BinaryDictionary.editDistance(typedWord, suggestion.toString()); 594 if (DBG) { 595 Log.d(TAG, "Autocorrected edit distance = " + distance 596 + ", " + maxEditDistanceOfNativeDictionary); 597 } 598 if (distance > maxEditDistanceOfNativeDictionary) { 599 if (DBG) { 600 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestion); 601 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 602 + "Turning off auto-correction."); 603 } 604 return true; 605 } else { 606 return false; 607 } 608 } 609} 610