Suggest.java revision a3ee019331e2d8881e19185fba9ccebfeb170614
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21import android.util.Log; 22 23import com.android.inputmethod.keyboard.Keyboard; 24import com.android.inputmethod.keyboard.ProximityInfo; 25 26import java.io.File; 27import java.util.ArrayList; 28import java.util.Arrays; 29import java.util.HashMap; 30import java.util.HashSet; 31import java.util.Locale; 32import java.util.Map; 33import java.util.Set; 34 35/** 36 * This class loads a dictionary and provides a list of suggestions for a given sequence of 37 * characters. This includes corrections and completions. 38 */ 39public class Suggest implements Dictionary.WordCallback { 40 41 public static final String TAG = Suggest.class.getSimpleName(); 42 43 public static final int APPROX_MAX_WORD_LENGTH = 32; 44 45 public static final int CORRECTION_NONE = 0; 46 public static final int CORRECTION_FULL = 1; 47 public static final int CORRECTION_FULL_BIGRAM = 2; 48 49 /** 50 * Words that appear in both bigram and unigram data gets multiplier ranging from 51 * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the score from 52 * bigram data. 53 */ 54 public static final double BIGRAM_MULTIPLIER_MIN = 1.2; 55 public static final double BIGRAM_MULTIPLIER_MAX = 1.5; 56 57 /** 58 * Maximum possible bigram frequency. Will depend on how many bits are being used in data 59 * structure. Maximum bigram frequency will get the BIGRAM_MULTIPLIER_MAX as the multiplier. 60 */ 61 public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; 62 63 // It seems the following values are only used for logging. 64 public static final int DIC_USER_TYPED = 0; 65 public static final int DIC_MAIN = 1; 66 public static final int DIC_USER = 2; 67 public static final int DIC_USER_UNIGRAM = 3; 68 public static final int DIC_CONTACTS = 4; 69 public static final int DIC_USER_BIGRAM = 5; 70 public static final int DIC_WHITELIST = 6; 71 // If you add a type of dictionary, increment DIC_TYPE_LAST_ID 72 // TODO: this value seems unused. Remove it? 73 public static final int DIC_TYPE_LAST_ID = 6; 74 public static final String DICT_KEY_MAIN = "main"; 75 public static final String DICT_KEY_CONTACTS = "contacts"; 76 // User dictionary, the system-managed one. 77 public static final String DICT_KEY_USER = "user"; 78 // User unigram dictionary, internal to LatinIME 79 public static final String DICT_KEY_USER_UNIGRAM = "user_unigram"; 80 // User bigram dictionary, internal to LatinIME 81 public static final String DICT_KEY_USER_BIGRAM = "user_bigram"; 82 public static final String DICT_KEY_WHITELIST ="whitelist"; 83 84 private static final boolean DBG = LatinImeLogger.sDBG; 85 86 private Dictionary mMainDict; 87 private ContactsDictionary mContactsDict; 88 private WhitelistDictionary mWhiteListDictionary; 89 private final Map<String, Dictionary> mUnigramDictionaries = new HashMap<String, Dictionary>(); 90 private final Map<String, Dictionary> mBigramDictionaries = new HashMap<String, Dictionary>(); 91 92 private int mPrefMaxSuggestions = 18; 93 94 private static final int PREF_MAX_BIGRAMS = 60; 95 96 private double mAutoCorrectionThreshold; 97 private int[] mScores = new int[mPrefMaxSuggestions]; 98 private int[] mBigramScores = new int[PREF_MAX_BIGRAMS]; 99 100 private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); 101 ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); 102 private CharSequence mConsideredWord; 103 104 // TODO: Remove these member variables by passing more context to addWord() callback method 105 private boolean mIsFirstCharCapitalized; 106 private boolean mIsAllUpperCase; 107 private int mTrailingSingleQuotesCount; 108 109 private static final int MINIMUM_SAFETY_NET_CHAR_LENGTH = 4; 110 111 public Suggest(final Context context, final int dictionaryResId, final Locale locale) { 112 initAsynchronously(context, dictionaryResId, locale); 113 } 114 115 /* package for test */ Suggest(final Context context, final File dictionary, 116 final long startOffset, final long length, final Flag[] flagArray, 117 final Locale locale) { 118 initSynchronously(context, DictionaryFactory.createDictionaryForTest(context, dictionary, 119 startOffset, length, flagArray), locale); 120 } 121 122 private void initWhitelistAndAutocorrectAndPool(final Context context, final Locale locale) { 123 mWhiteListDictionary = new WhitelistDictionary(context, locale); 124 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_WHITELIST, mWhiteListDictionary); 125 StringBuilderPool.ensureCapacity(mPrefMaxSuggestions, getApproxMaxWordLength()); 126 } 127 128 private void initAsynchronously(final Context context, final int dictionaryResId, 129 final Locale locale) { 130 resetMainDict(context, dictionaryResId, locale); 131 132 // TODO: read the whitelist and init the pool asynchronously too. 133 // initPool should be done asynchronously now that the pool is thread-safe. 134 initWhitelistAndAutocorrectAndPool(context, locale); 135 } 136 137 private void initSynchronously(final Context context, final Dictionary mainDict, 138 final Locale locale) { 139 mMainDict = mainDict; 140 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, mainDict); 141 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, mainDict); 142 initWhitelistAndAutocorrectAndPool(context, locale); 143 } 144 145 private static void addOrReplaceDictionary(Map<String, Dictionary> dictionaries, String key, 146 Dictionary dict) { 147 final Dictionary oldDict = (dict == null) 148 ? dictionaries.remove(key) 149 : dictionaries.put(key, dict); 150 if (oldDict != null && dict != oldDict) { 151 oldDict.close(); 152 } 153 } 154 155 public void resetMainDict(final Context context, final int dictionaryResId, 156 final Locale locale) { 157 mMainDict = null; 158 new Thread("InitializeBinaryDictionary") { 159 @Override 160 public void run() { 161 final Dictionary newMainDict = DictionaryFactory.createDictionaryFromManager( 162 context, locale, dictionaryResId); 163 mMainDict = newMainDict; 164 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_MAIN, newMainDict); 165 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_MAIN, newMainDict); 166 } 167 }.start(); 168 } 169 170 // The main dictionary could have been loaded asynchronously. Don't cache the return value 171 // of this method. 172 public boolean hasMainDictionary() { 173 return mMainDict != null; 174 } 175 176 public ContactsDictionary getContactsDictionary() { 177 return mContactsDict; 178 } 179 180 public Map<String, Dictionary> getUnigramDictionaries() { 181 return mUnigramDictionaries; 182 } 183 184 public int getApproxMaxWordLength() { 185 return APPROX_MAX_WORD_LENGTH; 186 } 187 188 /** 189 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 190 * before the main dictionary, if set. This refers to the system-managed user dictionary. 191 */ 192 public void setUserDictionary(Dictionary userDictionary) { 193 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER, userDictionary); 194 } 195 196 /** 197 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 198 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 199 * won't be used. 200 */ 201 public void setContactsDictionary(ContactsDictionary contactsDictionary) { 202 mContactsDict = contactsDictionary; 203 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 204 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_CONTACTS, contactsDictionary); 205 } 206 207 public void setUserUnigramDictionary(Dictionary userUnigramDictionary) { 208 addOrReplaceDictionary(mUnigramDictionaries, DICT_KEY_USER_UNIGRAM, userUnigramDictionary); 209 } 210 211 public void setUserBigramDictionary(Dictionary userBigramDictionary) { 212 addOrReplaceDictionary(mBigramDictionaries, DICT_KEY_USER_BIGRAM, userBigramDictionary); 213 } 214 215 public void setAutoCorrectionThreshold(double threshold) { 216 mAutoCorrectionThreshold = threshold; 217 } 218 219 /** 220 * Number of suggestions to generate from the input key sequence. This has 221 * to be a number between 1 and 100 (inclusive). 222 * @param maxSuggestions 223 * @throws IllegalArgumentException if the number is out of range 224 */ 225 public void setMaxSuggestions(int maxSuggestions) { 226 if (maxSuggestions < 1 || maxSuggestions > 100) { 227 throw new IllegalArgumentException("maxSuggestions must be between 1 and 100"); 228 } 229 mPrefMaxSuggestions = maxSuggestions; 230 mScores = new int[mPrefMaxSuggestions]; 231 mBigramScores = new int[PREF_MAX_BIGRAMS]; 232 collectGarbage(mSuggestions, mPrefMaxSuggestions); 233 StringBuilderPool.ensureCapacity(mPrefMaxSuggestions, getApproxMaxWordLength()); 234 } 235 236 private CharSequence capitalizeWord(boolean all, boolean first, CharSequence word) { 237 if (TextUtils.isEmpty(word) || !(all || first)) return word; 238 final int wordLength = word.length(); 239 final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength()); 240 // TODO: Must pay attention to locale when changing case. 241 if (all) { 242 sb.append(word.toString().toUpperCase()); 243 } else if (first) { 244 sb.append(Character.toUpperCase(word.charAt(0))); 245 if (wordLength > 1) { 246 sb.append(word.subSequence(1, wordLength)); 247 } 248 } 249 return sb; 250 } 251 252 protected void addBigramToSuggestions(CharSequence bigram) { 253 // TODO: Try to be a little more shrewd with resource allocation. 254 // At the moment we copy this object because the StringBuilders are pooled (see 255 // StringBuilderPool.java) and when we are finished using mSuggestions and 256 // mBigramSuggestions we will take everything from both and insert them back in the 257 // pool, so we can't allow the same object to be in both lists at the same time. 258 final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength()); 259 sb.append(bigram); 260 mSuggestions.add(sb); 261 } 262 263 private static final WordComposer sEmptyWordComposer = new WordComposer(); 264 public SuggestedWords.Builder getBigramPredictionWordBuilder(CharSequence prevWordForBigram, 265 final int correctionMode) { 266 LatinImeLogger.onStartSuggestion(prevWordForBigram); 267 mIsFirstCharCapitalized = false; 268 mIsAllUpperCase = false; 269 mTrailingSingleQuotesCount = 0; 270 collectGarbage(mSuggestions, mPrefMaxSuggestions); 271 Arrays.fill(mScores, 0); 272 273 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 274 LatinImeLogger.onAddSuggestedWord("", Suggest.DIC_USER_TYPED, Dictionary.UNIGRAM); 275 mConsideredWord = ""; 276 277 // Note that if correctionMode != CORRECTION_FULL_BIGRAM, we'll always return the 278 // same empty SuggestedWords.Builder, which has size() == 0 279 if (correctionMode == CORRECTION_FULL_BIGRAM) { 280 // At first character typed, search only the bigrams 281 Arrays.fill(mBigramScores, 0); 282 collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS); 283 284 if (!TextUtils.isEmpty(prevWordForBigram)) { 285 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 286 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 287 prevWordForBigram = lowerPrevWord; 288 } 289 for (final Dictionary dictionary : mBigramDictionaries.values()) { 290 dictionary.getBigrams(sEmptyWordComposer, prevWordForBigram, this); 291 } 292 // Nothing entered: return all bigrams for the previous word 293 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 294 for (int i = 0; i < insertCount; ++i) { 295 addBigramToSuggestions(mBigramSuggestions.get(i)); 296 } 297 } 298 } 299 300 StringUtils.removeDupes(mSuggestions); 301 302 return new SuggestedWords.Builder().addWords(mSuggestions, null) 303 .setAllowsToBeAutoCorrected(false) 304 .setHasAutoCorrection(false); 305 } 306 307 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 308 public SuggestedWords.Builder getSuggestedWordBuilder( 309 final WordComposer wordComposer, CharSequence prevWordForBigram, 310 final ProximityInfo proximityInfo, final int correctionMode) { 311 LatinImeLogger.onStartSuggestion(prevWordForBigram); 312 mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 313 mIsAllUpperCase = wordComposer.isAllUpperCase(); 314 mTrailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 315 collectGarbage(mSuggestions, mPrefMaxSuggestions); 316 Arrays.fill(mScores, 0); 317 318 final String typedWord = wordComposer.getTypedWord(); 319 final String consideredWord = mTrailingSingleQuotesCount > 0 320 ? typedWord.substring(0, typedWord.length() - mTrailingSingleQuotesCount) 321 : typedWord; 322 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 323 LatinImeLogger.onAddSuggestedWord(typedWord, Suggest.DIC_USER_TYPED, 324 Dictionary.UNIGRAM); 325 mConsideredWord = consideredWord; 326 327 // TODO: Change this scheme - a boolean is not enough. A whitelisted word may be "valid" 328 // but still autocorrected from - in the case the whitelist only capitalizes the word. 329 // The whitelist should be case-insensitive, so it's not possible to be consistent with 330 // a boolean flag. Right now this is handled with a slight hack in 331 // WhitelistDictionary#shouldForciblyAutoCorrectFrom. 332 final boolean allowsToBeAutoCorrected = AutoCorrection.allowsToBeAutoCorrected( 333 getUnigramDictionaries(), consideredWord, wordComposer.isFirstCharCapitalized()); 334 335 if (wordComposer.size() <= 1 && (correctionMode == CORRECTION_FULL_BIGRAM)) { 336 // At first character typed, search only the bigrams 337 Arrays.fill(mBigramScores, 0); 338 collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS); 339 340 if (!TextUtils.isEmpty(prevWordForBigram)) { 341 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 342 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 343 prevWordForBigram = lowerPrevWord; 344 } 345 for (final Dictionary dictionary : mBigramDictionaries.values()) { 346 dictionary.getBigrams(wordComposer, prevWordForBigram, this); 347 } 348 if (TextUtils.isEmpty(consideredWord)) { 349 // Nothing entered: return all bigrams for the previous word 350 int insertCount = Math.min(mBigramSuggestions.size(), mPrefMaxSuggestions); 351 for (int i = 0; i < insertCount; ++i) { 352 addBigramToSuggestions(mBigramSuggestions.get(i)); 353 } 354 } else { 355 // Word entered: return only bigrams that match the first char of the typed word 356 final char currentChar = consideredWord.charAt(0); 357 // TODO: Must pay attention to locale when changing case. 358 final char currentCharUpper = Character.toUpperCase(currentChar); 359 int count = 0; 360 final int bigramSuggestionSize = mBigramSuggestions.size(); 361 for (int i = 0; i < bigramSuggestionSize; i++) { 362 final CharSequence bigramSuggestion = mBigramSuggestions.get(i); 363 final char bigramSuggestionFirstChar = bigramSuggestion.charAt(0); 364 if (bigramSuggestionFirstChar == currentChar 365 || bigramSuggestionFirstChar == currentCharUpper) { 366 addBigramToSuggestions(bigramSuggestion); 367 if (++count > mPrefMaxSuggestions) break; 368 } 369 } 370 } 371 } 372 373 } else if (wordComposer.size() > 1) { 374 // At second character typed, search the unigrams (scores being affected by bigrams) 375 for (final String key : mUnigramDictionaries.keySet()) { 376 // Skip UserUnigramDictionary and WhitelistDictionary to lookup 377 if (key.equals(DICT_KEY_USER_UNIGRAM) || key.equals(DICT_KEY_WHITELIST)) 378 continue; 379 final Dictionary dictionary = mUnigramDictionaries.get(key); 380 if (mTrailingSingleQuotesCount > 0) { 381 final WordComposer tmpWordComposer = new WordComposer(wordComposer); 382 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 383 tmpWordComposer.deleteLast(); 384 } 385 dictionary.getWords(tmpWordComposer, this, proximityInfo); 386 } else { 387 dictionary.getWords(wordComposer, this, proximityInfo); 388 } 389 } 390 } 391 392 CharSequence whitelistedWord = capitalizeWord(mIsAllUpperCase, mIsFirstCharCapitalized, 393 mWhiteListDictionary.getWhitelistedWord(consideredWord)); 394 395 final boolean hasAutoCorrection; 396 if (CORRECTION_FULL == correctionMode 397 || CORRECTION_FULL_BIGRAM == correctionMode) { 398 final CharSequence autoCorrection = 399 AutoCorrection.computeAutoCorrectionWord(mUnigramDictionaries, wordComposer, 400 mSuggestions, mScores, consideredWord, mAutoCorrectionThreshold, 401 whitelistedWord); 402 hasAutoCorrection = (null != autoCorrection); 403 } else { 404 hasAutoCorrection = false; 405 } 406 407 if (whitelistedWord != null) { 408 if (mTrailingSingleQuotesCount > 0) { 409 final StringBuilder sb = new StringBuilder(whitelistedWord); 410 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 411 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 412 } 413 mSuggestions.add(0, sb.toString()); 414 } else { 415 mSuggestions.add(0, whitelistedWord); 416 } 417 } 418 419 mSuggestions.add(0, typedWord); 420 StringUtils.removeDupes(mSuggestions); 421 422 if (DBG) { 423 final CharSequence autoCorrectionSuggestion = mSuggestions.get(0); 424 final int autoCorrectionSuggestionScore = mScores[0]; 425 double normalizedScore = BinaryDictionary.calcNormalizedScore( 426 typedWord, autoCorrectionSuggestion.toString(), 427 autoCorrectionSuggestionScore); 428 ArrayList<SuggestedWords.SuggestedWordInfo> scoreInfoList = 429 new ArrayList<SuggestedWords.SuggestedWordInfo>(); 430 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false)); 431 for (int i = 0; i < mScores.length; ++i) { 432 if (normalizedScore > 0) { 433 final String scoreThreshold = String.format("%d (%4.2f)", mScores[i], 434 normalizedScore); 435 scoreInfoList.add( 436 new SuggestedWords.SuggestedWordInfo(scoreThreshold, false)); 437 normalizedScore = 0.0; 438 } else { 439 final String score = Integer.toString(mScores[i]); 440 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo(score, false)); 441 } 442 } 443 for (int i = mScores.length; i < mSuggestions.size(); ++i) { 444 scoreInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false)); 445 } 446 return new SuggestedWords.Builder().addWords(mSuggestions, scoreInfoList) 447 .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected) 448 .setHasAutoCorrection(hasAutoCorrection); 449 } 450 return new SuggestedWords.Builder().addWords(mSuggestions, null) 451 .setAllowsToBeAutoCorrected(allowsToBeAutoCorrected) 452 .setHasAutoCorrection(hasAutoCorrection); 453 } 454 455 @Override 456 public boolean addWord(final char[] word, final int offset, final int length, int score, 457 final int dicTypeId, final int dataType) { 458 int dataTypeForLog = dataType; 459 final ArrayList<CharSequence> suggestions; 460 final int[] sortedScores; 461 final int prefMaxSuggestions; 462 if (dataType == Dictionary.BIGRAM) { 463 suggestions = mBigramSuggestions; 464 sortedScores = mBigramScores; 465 prefMaxSuggestions = PREF_MAX_BIGRAMS; 466 } else { 467 suggestions = mSuggestions; 468 sortedScores = mScores; 469 prefMaxSuggestions = mPrefMaxSuggestions; 470 } 471 472 int pos = 0; 473 474 // Check if it's the same word, only caps are different 475 if (StringUtils.equalsIgnoreCase(mConsideredWord, word, offset, length)) { 476 // TODO: remove this surrounding if clause and move this logic to 477 // getSuggestedWordBuilder. 478 if (suggestions.size() > 0) { 479 final String currentHighestWord = suggestions.get(0).toString(); 480 // If the current highest word is also equal to typed word, we need to compare 481 // frequency to determine the insertion position. This does not ensure strictly 482 // correct ordering, but ensures the top score is on top which is enough for 483 // removing duplicates correctly. 484 if (StringUtils.equalsIgnoreCase(currentHighestWord, word, offset, length) 485 && score <= sortedScores[0]) { 486 pos = 1; 487 } 488 } 489 } else { 490 if (dataType == Dictionary.UNIGRAM) { 491 // Check if the word was already added before (by bigram data) 492 int bigramSuggestion = searchBigramSuggestion(word,offset,length); 493 if(bigramSuggestion >= 0) { 494 dataTypeForLog = Dictionary.BIGRAM; 495 // turn freq from bigram into multiplier specified above 496 double multiplier = (((double) mBigramScores[bigramSuggestion]) 497 / MAXIMUM_BIGRAM_FREQUENCY) 498 * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) 499 + BIGRAM_MULTIPLIER_MIN; 500 /* Log.d(TAG,"bigram num: " + bigramSuggestion 501 + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() 502 + " currentScore: " + score + " bigramScore: " 503 + mBigramScores[bigramSuggestion] 504 + " multiplier: " + multiplier); */ 505 score = (int)Math.round((score * multiplier)); 506 } 507 } 508 509 // Check the last one's score and bail 510 if (sortedScores[prefMaxSuggestions - 1] >= score) return true; 511 while (pos < prefMaxSuggestions) { 512 if (sortedScores[pos] < score 513 || (sortedScores[pos] == score && length < suggestions.get(pos).length())) { 514 break; 515 } 516 pos++; 517 } 518 } 519 if (pos >= prefMaxSuggestions) { 520 return true; 521 } 522 523 System.arraycopy(sortedScores, pos, sortedScores, pos + 1, prefMaxSuggestions - pos - 1); 524 sortedScores[pos] = score; 525 final StringBuilder sb = StringBuilderPool.getStringBuilder(getApproxMaxWordLength()); 526 // TODO: Must pay attention to locale when changing case. 527 if (mIsAllUpperCase) { 528 sb.append(new String(word, offset, length).toUpperCase()); 529 } else if (mIsFirstCharCapitalized) { 530 sb.append(Character.toUpperCase(word[offset])); 531 if (length > 1) { 532 sb.append(word, offset + 1, length - 1); 533 } 534 } else { 535 sb.append(word, offset, length); 536 } 537 for (int i = mTrailingSingleQuotesCount - 1; i >= 0; --i) { 538 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 539 } 540 suggestions.add(pos, sb); 541 if (suggestions.size() > prefMaxSuggestions) { 542 final CharSequence garbage = suggestions.remove(prefMaxSuggestions); 543 if (garbage instanceof StringBuilder) { 544 StringBuilderPool.recycle((StringBuilder)garbage); 545 } 546 } else { 547 LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); 548 } 549 return true; 550 } 551 552 private int searchBigramSuggestion(final char[] word, final int offset, final int length) { 553 // TODO This is almost O(n^2). Might need fix. 554 // search whether the word appeared in bigram data 555 int bigramSuggestSize = mBigramSuggestions.size(); 556 for(int i = 0; i < bigramSuggestSize; i++) { 557 if(mBigramSuggestions.get(i).length() == length) { 558 boolean chk = true; 559 for(int j = 0; j < length; j++) { 560 if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { 561 chk = false; 562 break; 563 } 564 } 565 if(chk) return i; 566 } 567 } 568 569 return -1; 570 } 571 572 private static void collectGarbage(ArrayList<CharSequence> suggestions, 573 int prefMaxSuggestions) { 574 int poolSize = StringBuilderPool.getSize(); 575 int garbageSize = suggestions.size(); 576 while (poolSize < prefMaxSuggestions && garbageSize > 0) { 577 final CharSequence garbage = suggestions.get(garbageSize - 1); 578 if (garbage instanceof StringBuilder) { 579 StringBuilderPool.recycle((StringBuilder)garbage); 580 poolSize++; 581 } 582 garbageSize--; 583 } 584 if (poolSize == prefMaxSuggestions + 1) { 585 Log.w("Suggest", "String pool got too big: " + poolSize); 586 } 587 suggestions.clear(); 588 } 589 590 public void close() { 591 final Set<Dictionary> dictionaries = new HashSet<Dictionary>(); 592 dictionaries.addAll(mUnigramDictionaries.values()); 593 dictionaries.addAll(mBigramDictionaries.values()); 594 for (final Dictionary dictionary : dictionaries) { 595 dictionary.close(); 596 } 597 mMainDict = null; 598 } 599 600 // TODO: Resolve the inconsistencies between the native auto correction algorithms and 601 // this safety net 602 public static boolean shouldBlockAutoCorrectionBySafetyNet( 603 final SuggestedWords.Builder suggestedWordsBuilder, final Suggest suggest, 604 final double autoCorrectionThreshold) { 605 // Safety net for auto correction. 606 // Actually if we hit this safety net, it's actually a bug. 607 if (suggestedWordsBuilder.size() <= 1 || suggestedWordsBuilder.isTypedWordValid()) { 608 return false; 609 } 610 // If user selected aggressive auto correction mode, there is no need to use the safety 611 // net. 612 if (0 == autoCorrectionThreshold) { 613 return false; 614 } 615 final CharSequence typedWord = suggestedWordsBuilder.getWord(0); 616 // If the length of typed word is less than MINIMUM_SAFETY_NET_CHAR_LENGTH, 617 // we should not use net because relatively edit distance can be big. 618 if (typedWord.length() < Suggest.MINIMUM_SAFETY_NET_CHAR_LENGTH) { 619 return false; 620 } 621 final CharSequence suggestionWord = suggestedWordsBuilder.getWord(1); 622 final int typedWordLength = typedWord.length(); 623 final int maxEditDistanceOfNativeDictionary = 624 (typedWordLength < 5 ? 2 : typedWordLength / 2) + 1; 625 final int distance = BinaryDictionary.editDistance( 626 typedWord.toString(), suggestionWord.toString()); 627 if (DBG) { 628 Log.d(TAG, "Autocorrected edit distance = " + distance 629 + ", " + maxEditDistanceOfNativeDictionary); 630 } 631 if (distance > maxEditDistanceOfNativeDictionary) { 632 if (DBG) { 633 Log.e(TAG, "Safety net: before = " + typedWord + ", after = " + suggestionWord); 634 Log.e(TAG, "(Error) The edit distance of this correction exceeds limit. " 635 + "Turning off auto-correction."); 636 } 637 return true; 638 } else { 639 return false; 640 } 641 } 642} 643