Suggest.java revision 9f67e12a0e3f77985fb8bafe0db4c00e32317b9a
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.AutoText; 21import android.text.TextUtils; 22import android.util.Log; 23import android.view.View; 24 25import java.io.File; 26import java.util.ArrayList; 27import java.util.Arrays; 28 29/** 30 * This class loads a dictionary and provides a list of suggestions for a given sequence of 31 * characters. This includes corrections and completions. 32 */ 33public class Suggest implements Dictionary.WordCallback { 34 35 public static final String TAG = Suggest.class.getSimpleName(); 36 37 public static final int APPROX_MAX_WORD_LENGTH = 32; 38 39 public static final int CORRECTION_NONE = 0; 40 public static final int CORRECTION_BASIC = 1; 41 public static final int CORRECTION_FULL = 2; 42 public static final int CORRECTION_FULL_BIGRAM = 3; 43 44 /** 45 * Words that appear in both bigram and unigram data gets multiplier ranging from 46 * BIGRAM_MULTIPLIER_MIN to BIGRAM_MULTIPLIER_MAX depending on the frequency score from 47 * bigram data. 48 */ 49 public static final double BIGRAM_MULTIPLIER_MIN = 1.2; 50 public static final double BIGRAM_MULTIPLIER_MAX = 1.5; 51 52 /** 53 * Maximum possible bigram frequency. Will depend on how many bits are being used in data 54 * structure. Maximum bigram freqeuncy will get the BIGRAM_MULTIPLIER_MAX as the multiplier. 55 */ 56 public static final int MAXIMUM_BIGRAM_FREQUENCY = 127; 57 58 public static final int DIC_USER_TYPED = 0; 59 public static final int DIC_MAIN = 1; 60 public static final int DIC_USER = 2; 61 public static final int DIC_AUTO = 3; 62 public static final int DIC_CONTACTS = 4; 63 // If you add a type of dictionary, increment DIC_TYPE_LAST_ID 64 public static final int DIC_TYPE_LAST_ID = 4; 65 66 static final int LARGE_DICTIONARY_THRESHOLD = 200 * 1000; 67 68 private static final boolean DBG = LatinImeLogger.sDBG; 69 70 private AutoCorrection mAutoCorrection; 71 72 private BinaryDictionary mMainDict; 73 74 private Dictionary mUserDictionary; 75 76 private Dictionary mAutoDictionary; 77 78 private Dictionary mContactsDictionary; 79 80 private Dictionary mUserBigramDictionary; 81 82 private int mPrefMaxSuggestions = 12; 83 84 private static final int PREF_MAX_BIGRAMS = 60; 85 86 private boolean mQuickFixesEnabled; 87 88 private double mAutoCorrectionThreshold; 89 private int[] mPriorities = new int[mPrefMaxSuggestions]; 90 private int[] mBigramPriorities = new int[PREF_MAX_BIGRAMS]; 91 92 private ArrayList<CharSequence> mSuggestions = new ArrayList<CharSequence>(); 93 ArrayList<CharSequence> mBigramSuggestions = new ArrayList<CharSequence>(); 94 private ArrayList<CharSequence> mStringPool = new ArrayList<CharSequence>(); 95 private String mLowerOriginalWord; 96 97 // TODO: Remove these member variables by passing more context to addWord() callback method 98 private boolean mIsFirstCharCapitalized; 99 private boolean mIsAllUpperCase; 100 101 private int mCorrectionMode = CORRECTION_BASIC; 102 103 public Suggest(Context context, int dictionaryResId) { 104 mMainDict = BinaryDictionary.initDictionary(context, dictionaryResId, DIC_MAIN); 105 init(); 106 } 107 108 /* package for test */ Suggest(File dictionary, long startOffset, long length) { 109 mMainDict = BinaryDictionary.initDictionary(dictionary, startOffset, length, DIC_MAIN); 110 init(); 111 } 112 113 private void init() { 114 mAutoCorrection = new AutoCorrection(); 115 initPool(); 116 } 117 118 private void initPool() { 119 for (int i = 0; i < mPrefMaxSuggestions; i++) { 120 StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 121 mStringPool.add(sb); 122 } 123 } 124 125 public void setQuickFixesEnabled(boolean enabled) { 126 mQuickFixesEnabled = enabled; 127 } 128 129 public int getCorrectionMode() { 130 return mCorrectionMode; 131 } 132 133 public void setCorrectionMode(int mode) { 134 mCorrectionMode = mode; 135 } 136 137 public boolean hasMainDictionary() { 138 return mMainDict != null && mMainDict.getSize() > LARGE_DICTIONARY_THRESHOLD; 139 } 140 141 public int getApproxMaxWordLength() { 142 return APPROX_MAX_WORD_LENGTH; 143 } 144 145 /** 146 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 147 * before the main dictionary, if set. 148 */ 149 public void setUserDictionary(Dictionary userDictionary) { 150 mUserDictionary = userDictionary; 151 } 152 153 /** 154 * Sets an optional contacts dictionary resource to be loaded. 155 */ 156 public void setContactsDictionary(Dictionary userDictionary) { 157 mContactsDictionary = userDictionary; 158 } 159 160 public void setAutoDictionary(Dictionary autoDictionary) { 161 mAutoDictionary = autoDictionary; 162 } 163 164 public void setUserBigramDictionary(Dictionary userBigramDictionary) { 165 mUserBigramDictionary = userBigramDictionary; 166 } 167 168 public void setAutoCorrectionThreshold(double threshold) { 169 mAutoCorrectionThreshold = threshold; 170 } 171 172 public boolean isAggressiveAutoCorrectionMode() { 173 return (mAutoCorrectionThreshold == 0); 174 } 175 176 /** 177 * Number of suggestions to generate from the input key sequence. This has 178 * to be a number between 1 and 100 (inclusive). 179 * @param maxSuggestions 180 * @throws IllegalArgumentException if the number is out of range 181 */ 182 public void setMaxSuggestions(int maxSuggestions) { 183 if (maxSuggestions < 1 || maxSuggestions > 100) { 184 throw new IllegalArgumentException("maxSuggestions must be between 1 and 100"); 185 } 186 mPrefMaxSuggestions = maxSuggestions; 187 mPriorities = new int[mPrefMaxSuggestions]; 188 mBigramPriorities = new int[PREF_MAX_BIGRAMS]; 189 collectGarbage(mSuggestions, mPrefMaxSuggestions); 190 while (mStringPool.size() < mPrefMaxSuggestions) { 191 StringBuilder sb = new StringBuilder(getApproxMaxWordLength()); 192 mStringPool.add(sb); 193 } 194 } 195 196 /** 197 * Returns a object which represents suggested words that match the list of character codes 198 * passed in. This object contents will be overwritten the next time this function is called. 199 * @param view a view for retrieving the context for AutoText 200 * @param wordComposer contains what is currently being typed 201 * @param prevWordForBigram previous word (used only for bigram) 202 * @return suggested words object. 203 */ 204 public SuggestedWords getSuggestions(View view, WordComposer wordComposer, 205 CharSequence prevWordForBigram) { 206 return getSuggestedWordBuilder(view, wordComposer, prevWordForBigram).build(); 207 } 208 209 // TODO: cleanup dictionaries looking up and suggestions building with SuggestedWords.Builder 210 public SuggestedWords.Builder getSuggestedWordBuilder(View view, WordComposer wordComposer, 211 CharSequence prevWordForBigram) { 212 LatinImeLogger.onStartSuggestion(prevWordForBigram); 213 mAutoCorrection.init(); 214 mIsFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 215 mIsAllUpperCase = wordComposer.isAllUpperCase(); 216 collectGarbage(mSuggestions, mPrefMaxSuggestions); 217 Arrays.fill(mPriorities, 0); 218 219 // Save a lowercase version of the original word 220 CharSequence typedWord = wordComposer.getTypedWord(); 221 if (typedWord != null) { 222 final String typedWordString = typedWord.toString(); 223 typedWord = typedWordString; 224 mLowerOriginalWord = typedWordString.toLowerCase(); 225 // Treating USER_TYPED as UNIGRAM suggestion for logging now. 226 LatinImeLogger.onAddSuggestedWord(typedWordString, Suggest.DIC_USER_TYPED, 227 Dictionary.DataType.UNIGRAM); 228 } else { 229 mLowerOriginalWord = ""; 230 } 231 232 if (wordComposer.size() == 1 && (mCorrectionMode == CORRECTION_FULL_BIGRAM 233 || mCorrectionMode == CORRECTION_BASIC)) { 234 // At first character typed, search only the bigrams 235 Arrays.fill(mBigramPriorities, 0); 236 collectGarbage(mBigramSuggestions, PREF_MAX_BIGRAMS); 237 238 if (!TextUtils.isEmpty(prevWordForBigram)) { 239 CharSequence lowerPrevWord = prevWordForBigram.toString().toLowerCase(); 240 if (mMainDict != null && mMainDict.isValidWord(lowerPrevWord)) { 241 prevWordForBigram = lowerPrevWord; 242 } 243 if (mUserBigramDictionary != null) { 244 mUserBigramDictionary.getBigrams(wordComposer, prevWordForBigram, this); 245 } 246 if (mContactsDictionary != null) { 247 mContactsDictionary.getBigrams(wordComposer, prevWordForBigram, this); 248 } 249 if (mMainDict != null) { 250 mMainDict.getBigrams(wordComposer, prevWordForBigram, this); 251 } 252 char currentChar = wordComposer.getTypedWord().charAt(0); 253 char currentCharUpper = Character.toUpperCase(currentChar); 254 int count = 0; 255 int bigramSuggestionSize = mBigramSuggestions.size(); 256 for (int i = 0; i < bigramSuggestionSize; i++) { 257 if (mBigramSuggestions.get(i).charAt(0) == currentChar 258 || mBigramSuggestions.get(i).charAt(0) == currentCharUpper) { 259 int poolSize = mStringPool.size(); 260 StringBuilder sb = poolSize > 0 ? 261 (StringBuilder) mStringPool.remove(poolSize - 1) 262 : new StringBuilder(getApproxMaxWordLength()); 263 sb.setLength(0); 264 sb.append(mBigramSuggestions.get(i)); 265 mSuggestions.add(count++, sb); 266 if (count > mPrefMaxSuggestions) break; 267 } 268 } 269 } 270 271 } else if (wordComposer.size() > 1) { 272 // At second character typed, search the unigrams (scores being affected by bigrams) 273 if (mUserDictionary != null || mContactsDictionary != null) { 274 if (mUserDictionary != null) { 275 mUserDictionary.getWords(wordComposer, this); 276 } 277 if (mContactsDictionary != null) { 278 mContactsDictionary.getWords(wordComposer, this); 279 } 280 } 281 if (mMainDict != null) mMainDict.getWords(wordComposer, this); 282 } 283 CharSequence autoText = null; 284 final String typedWordString = typedWord == null ? null : typedWord.toString(); 285 if (typedWord != null) { 286 // Apply quick fix only for the typed word. 287 if (mQuickFixesEnabled) { 288 final String lowerCaseTypedWord = typedWordString.toLowerCase(); 289 CharSequence tempAutoText = 290 AutoText.get(lowerCaseTypedWord, 0, lowerCaseTypedWord.length(), view); 291 // Is there an AutoText (also known as Quick Fixes) correction? 292 // Capitalize as needed 293 if (!TextUtils.isEmpty(tempAutoText) 294 && (mIsAllUpperCase || mIsFirstCharCapitalized)) { 295 final int tempAutoTextLength = tempAutoText.length(); 296 final int poolSize = mStringPool.size(); 297 final StringBuilder sb = 298 poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) 299 : new StringBuilder(getApproxMaxWordLength()); 300 sb.setLength(0); 301 if (mIsAllUpperCase) { 302 sb.append(tempAutoText.toString().toUpperCase()); 303 } else if (mIsFirstCharCapitalized) { 304 sb.append(Character.toUpperCase(tempAutoText.charAt(0))); 305 if (tempAutoTextLength > 1) { 306 sb.append(tempAutoText.subSequence(1, tempAutoTextLength)); 307 } 308 } 309 tempAutoText = sb.toString(); 310 } 311 boolean canAdd = tempAutoText != null; 312 // Is that correction already the current prediction (or original word)? 313 canAdd &= !TextUtils.equals(tempAutoText, typedWord); 314 // Is that correction already the next predicted word? 315 if (canAdd && mSuggestions.size() > 0 && mCorrectionMode != CORRECTION_BASIC) { 316 canAdd &= !TextUtils.equals(tempAutoText, mSuggestions.get(0)); 317 } 318 if (canAdd) { 319 if (DBG) { 320 Log.d(TAG, "Auto corrected by AUTOTEXT."); 321 } 322 autoText = tempAutoText; 323 } 324 } 325 } 326 327 mAutoCorrection.updateAutoCorrectionStatus(this, wordComposer, mSuggestions, mPriorities, 328 typedWord, mAutoCorrectionThreshold, mCorrectionMode, autoText); 329 330 if (autoText != null) { 331 mSuggestions.add(0, autoText); 332 } 333 334 if (typedWord != null) { 335 mSuggestions.add(0, typedWordString); 336 } 337 removeDupes(); 338 339 if (DBG) { 340 double normalizedScore = mAutoCorrection.getNormalizedScore(); 341 ArrayList<SuggestedWords.SuggestedWordInfo> frequencyInfoList = 342 new ArrayList<SuggestedWords.SuggestedWordInfo>(); 343 frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("+", false)); 344 final int priorityLength = mPriorities.length; 345 for (int i = 0; i < priorityLength; ++i) { 346 if (normalizedScore > 0) { 347 final String priorityThreshold = Integer.toString(mPriorities[i]) + " (" + 348 normalizedScore + ")"; 349 frequencyInfoList.add( 350 new SuggestedWords.SuggestedWordInfo(priorityThreshold, false)); 351 normalizedScore = 0.0; 352 } else { 353 final String priority = Integer.toString(mPriorities[i]); 354 frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo(priority, false)); 355 } 356 } 357 for (int i = priorityLength; i < mSuggestions.size(); ++i) { 358 frequencyInfoList.add(new SuggestedWords.SuggestedWordInfo("--", false)); 359 } 360 return new SuggestedWords.Builder().addWords(mSuggestions, frequencyInfoList); 361 } 362 return new SuggestedWords.Builder().addWords(mSuggestions, null); 363 } 364 365 private void removeDupes() { 366 final ArrayList<CharSequence> suggestions = mSuggestions; 367 if (suggestions.size() < 2) return; 368 int i = 1; 369 // Don't cache suggestions.size(), since we may be removing items 370 while (i < suggestions.size()) { 371 final CharSequence cur = suggestions.get(i); 372 // Compare each candidate with each previous candidate 373 for (int j = 0; j < i; j++) { 374 CharSequence previous = suggestions.get(j); 375 if (TextUtils.equals(cur, previous)) { 376 removeFromSuggestions(i); 377 i--; 378 break; 379 } 380 } 381 i++; 382 } 383 } 384 385 private void removeFromSuggestions(int index) { 386 CharSequence garbage = mSuggestions.remove(index); 387 if (garbage != null && garbage instanceof StringBuilder) { 388 mStringPool.add(garbage); 389 } 390 } 391 392 public boolean hasAutoCorrection() { 393 return mAutoCorrection.hasAutoCorrection(); 394 } 395 396 private static boolean compareCaseInsensitive(final String lowerOriginalWord, 397 final char[] word, final int offset, final int length) { 398 final int originalLength = lowerOriginalWord.length(); 399 if (originalLength == length && Character.isUpperCase(word[offset])) { 400 for (int i = 0; i < originalLength; i++) { 401 if (lowerOriginalWord.charAt(i) != Character.toLowerCase(word[offset+i])) { 402 return false; 403 } 404 } 405 return true; 406 } 407 return false; 408 } 409 410 @Override 411 public boolean addWord(final char[] word, final int offset, final int length, int freq, 412 final int dicTypeId, final Dictionary.DataType dataType) { 413 Dictionary.DataType dataTypeForLog = dataType; 414 ArrayList<CharSequence> suggestions; 415 int[] priorities; 416 int prefMaxSuggestions; 417 if(dataType == Dictionary.DataType.BIGRAM) { 418 suggestions = mBigramSuggestions; 419 priorities = mBigramPriorities; 420 prefMaxSuggestions = PREF_MAX_BIGRAMS; 421 } else { 422 suggestions = mSuggestions; 423 priorities = mPriorities; 424 prefMaxSuggestions = mPrefMaxSuggestions; 425 } 426 427 int pos = 0; 428 429 // Check if it's the same word, only caps are different 430 if (compareCaseInsensitive(mLowerOriginalWord, word, offset, length)) { 431 pos = 0; 432 } else { 433 if (dataType == Dictionary.DataType.UNIGRAM) { 434 // Check if the word was already added before (by bigram data) 435 int bigramSuggestion = searchBigramSuggestion(word,offset,length); 436 if(bigramSuggestion >= 0) { 437 dataTypeForLog = Dictionary.DataType.BIGRAM; 438 // turn freq from bigram into multiplier specified above 439 double multiplier = (((double) mBigramPriorities[bigramSuggestion]) 440 / MAXIMUM_BIGRAM_FREQUENCY) 441 * (BIGRAM_MULTIPLIER_MAX - BIGRAM_MULTIPLIER_MIN) 442 + BIGRAM_MULTIPLIER_MIN; 443 /* Log.d(TAG,"bigram num: " + bigramSuggestion 444 + " wordB: " + mBigramSuggestions.get(bigramSuggestion).toString() 445 + " currentPriority: " + freq + " bigramPriority: " 446 + mBigramPriorities[bigramSuggestion] 447 + " multiplier: " + multiplier); */ 448 freq = (int)Math.round((freq * multiplier)); 449 } 450 } 451 452 // Check the last one's priority and bail 453 if (priorities[prefMaxSuggestions - 1] >= freq) return true; 454 while (pos < prefMaxSuggestions) { 455 if (priorities[pos] < freq 456 || (priorities[pos] == freq && length < suggestions.get(pos).length())) { 457 break; 458 } 459 pos++; 460 } 461 } 462 if (pos >= prefMaxSuggestions) { 463 return true; 464 } 465 466 System.arraycopy(priorities, pos, priorities, pos + 1, prefMaxSuggestions - pos - 1); 467 priorities[pos] = freq; 468 int poolSize = mStringPool.size(); 469 StringBuilder sb = poolSize > 0 ? (StringBuilder) mStringPool.remove(poolSize - 1) 470 : new StringBuilder(getApproxMaxWordLength()); 471 sb.setLength(0); 472 if (mIsAllUpperCase) { 473 sb.append(new String(word, offset, length).toUpperCase()); 474 } else if (mIsFirstCharCapitalized) { 475 sb.append(Character.toUpperCase(word[offset])); 476 if (length > 1) { 477 sb.append(word, offset + 1, length - 1); 478 } 479 } else { 480 sb.append(word, offset, length); 481 } 482 suggestions.add(pos, sb); 483 if (suggestions.size() > prefMaxSuggestions) { 484 CharSequence garbage = suggestions.remove(prefMaxSuggestions); 485 if (garbage instanceof StringBuilder) { 486 mStringPool.add(garbage); 487 } 488 } else { 489 LatinImeLogger.onAddSuggestedWord(sb.toString(), dicTypeId, dataTypeForLog); 490 } 491 return true; 492 } 493 494 private int searchBigramSuggestion(final char[] word, final int offset, final int length) { 495 // TODO This is almost O(n^2). Might need fix. 496 // search whether the word appeared in bigram data 497 int bigramSuggestSize = mBigramSuggestions.size(); 498 for(int i = 0; i < bigramSuggestSize; i++) { 499 if(mBigramSuggestions.get(i).length() == length) { 500 boolean chk = true; 501 for(int j = 0; j < length; j++) { 502 if(mBigramSuggestions.get(i).charAt(j) != word[offset+j]) { 503 chk = false; 504 break; 505 } 506 } 507 if(chk) return i; 508 } 509 } 510 511 return -1; 512 } 513 514 public boolean isValidWord(final CharSequence word) { 515 if (word == null || word.length() == 0 || mMainDict == null) { 516 return false; 517 } 518 return mMainDict.isValidWord(word) 519 || (mUserDictionary != null && mUserDictionary.isValidWord(word)) 520 || (mAutoDictionary != null && mAutoDictionary.isValidWord(word)) 521 || (mContactsDictionary != null && mContactsDictionary.isValidWord(word)); 522 } 523 524 private void collectGarbage(ArrayList<CharSequence> suggestions, int prefMaxSuggestions) { 525 int poolSize = mStringPool.size(); 526 int garbageSize = suggestions.size(); 527 while (poolSize < prefMaxSuggestions && garbageSize > 0) { 528 CharSequence garbage = suggestions.get(garbageSize - 1); 529 if (garbage != null && garbage instanceof StringBuilder) { 530 mStringPool.add(garbage); 531 poolSize++; 532 } 533 garbageSize--; 534 } 535 if (poolSize == prefMaxSuggestions + 1) { 536 Log.w("Suggest", "String pool got too big: " + poolSize); 537 } 538 suggestions.clear(); 539 } 540 541 public void close() { 542 if (mMainDict != null) { 543 mMainDict.close(); 544 mMainDict = null; 545 } 546 if (mUserDictionary != null) { 547 mUserDictionary.close(); 548 mUserDictionary = null; 549 } 550 if (mUserBigramDictionary != null) { 551 mUserBigramDictionary.close(); 552 mUserBigramDictionary = null; 553 } 554 if (mContactsDictionary != null) { 555 mContactsDictionary.close(); 556 mContactsDictionary = null; 557 } 558 if (mAutoDictionary != null) { 559 mAutoDictionary.close(); 560 mAutoDictionary = null; 561 } 562 } 563} 564