BinaryDictionary.java revision d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.text.TextUtils; 20import android.util.Log; 21import android.util.SparseArray; 22 23import com.android.inputmethod.annotations.UsedForTesting; 24import com.android.inputmethod.keyboard.ProximityInfo; 25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 26import com.android.inputmethod.latin.makedict.DictionaryHeader; 27import com.android.inputmethod.latin.makedict.FormatSpec; 28import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; 29import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 30import com.android.inputmethod.latin.makedict.WordProperty; 31import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion; 32import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 33import com.android.inputmethod.latin.utils.FileUtils; 34import com.android.inputmethod.latin.utils.JniUtils; 35import com.android.inputmethod.latin.utils.LanguageModelParam; 36import com.android.inputmethod.latin.utils.StringUtils; 37 38import java.io.File; 39import java.util.ArrayList; 40import java.util.Arrays; 41import java.util.HashMap; 42import java.util.Locale; 43import java.util.Map; 44 45import javax.annotation.Nonnull; 46 47/** 48 * Implements a static, compacted, binary dictionary of standard words. 49 */ 50// TODO: All methods which should be locked need to have a suffix "Locked". 51public final class BinaryDictionary extends Dictionary { 52 private static final String TAG = BinaryDictionary.class.getSimpleName(); 53 54 // The cutoff returned by native for auto-commit confidence. 55 // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h 56 private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; 57 58 @UsedForTesting 59 public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; 60 @UsedForTesting 61 public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT"; 62 @UsedForTesting 63 public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT"; 64 @UsedForTesting 65 public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT"; 66 67 public static final int NOT_A_VALID_TIMESTAMP = -1; 68 69 // Format to get unigram flags from native side via getWordPropertyNative(). 70 private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5; 71 private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0; 72 private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1; 73 private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2; 74 private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; 75 private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4; 76 77 // Format to get probability and historical info from native side via getWordPropertyNative(). 78 public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4; 79 public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0; 80 public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1; 81 public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2; 82 public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3; 83 84 public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate"; 85 public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating"; 86 87 private long mNativeDict; 88 private final long mDictSize; 89 private final String mDictFilePath; 90 private final boolean mUseFullEditDistance; 91 private final boolean mIsUpdatable; 92 private boolean mHasUpdated; 93 94 private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>(); 95 96 // TODO: There should be a way to remove used DicTraverseSession objects from 97 // {@code mDicTraverseSessions}. 98 private DicTraverseSession getTraverseSession(final int traverseSessionId) { 99 synchronized(mDicTraverseSessions) { 100 DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId); 101 if (traverseSession == null) { 102 traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize); 103 mDicTraverseSessions.put(traverseSessionId, traverseSession); 104 } 105 return traverseSession; 106 } 107 } 108 109 /** 110 * Constructs binary dictionary using existing dictionary file. 111 * @param filename the name of the file to read through native code. 112 * @param offset the offset of the dictionary data within the file. 113 * @param length the length of the binary data. 114 * @param useFullEditDistance whether to use the full edit distance in suggestions 115 * @param dictType the dictionary type, as a human-readable string 116 * @param isUpdatable whether to open the dictionary file in writable mode. 117 */ 118 public BinaryDictionary(final String filename, final long offset, final long length, 119 final boolean useFullEditDistance, final Locale locale, final String dictType, 120 final boolean isUpdatable) { 121 super(dictType, locale); 122 mDictSize = length; 123 mDictFilePath = filename; 124 mIsUpdatable = isUpdatable; 125 mHasUpdated = false; 126 mUseFullEditDistance = useFullEditDistance; 127 loadDictionary(filename, offset, length, isUpdatable); 128 } 129 130 /** 131 * Constructs binary dictionary on memory. 132 * @param filename the name of the file used to flush. 133 * @param useFullEditDistance whether to use the full edit distance in suggestions 134 * @param dictType the dictionary type, as a human-readable string 135 * @param formatVersion the format version of the dictionary 136 * @param attributeMap the attributes of the dictionary 137 */ 138 public BinaryDictionary(final String filename, final boolean useFullEditDistance, 139 final Locale locale, final String dictType, final long formatVersion, 140 final Map<String, String> attributeMap) { 141 super(dictType, locale); 142 mDictSize = 0; 143 mDictFilePath = filename; 144 // On memory dictionary is always updatable. 145 mIsUpdatable = true; 146 mHasUpdated = false; 147 mUseFullEditDistance = useFullEditDistance; 148 final String[] keyArray = new String[attributeMap.size()]; 149 final String[] valueArray = new String[attributeMap.size()]; 150 int index = 0; 151 for (final String key : attributeMap.keySet()) { 152 keyArray[index] = key; 153 valueArray[index] = attributeMap.get(key); 154 index++; 155 } 156 mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray); 157 } 158 159 160 static { 161 JniUtils.loadNativeLibrary(); 162 } 163 164 private static native long openNative(String sourceDir, long dictOffset, long dictSize, 165 boolean isUpdatable); 166 private static native long createOnMemoryNative(long formatVersion, 167 String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray); 168 private static native void getHeaderInfoNative(long dict, int[] outHeaderSize, 169 int[] outFormatVersion, ArrayList<int[]> outAttributeKeys, 170 ArrayList<int[]> outAttributeValues); 171 private static native boolean flushNative(long dict, String filePath); 172 private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC); 173 private static native boolean flushWithGCNative(long dict, String filePath); 174 private static native void closeNative(long dict); 175 private static native int getFormatVersionNative(long dict); 176 private static native int getProbabilityNative(long dict, int[] word); 177 private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word); 178 private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays, 179 boolean[] isBeginningOfSentenceArray, int[] word); 180 private static native void getWordPropertyNative(long dict, int[] word, 181 boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags, 182 int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray, 183 ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray, 184 ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo, 185 ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities); 186 private static native int getNextWordNative(long dict, int token, int[] outCodePoints, 187 boolean[] outIsBeginningOfSentence); 188 private static native void getSuggestionsNative(long dict, long proximityInfo, 189 long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times, 190 int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions, 191 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 192 int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints, 193 int[] outputScores, int[] outputIndices, int[] outputTypes, 194 int[] outputAutoCommitFirstWordConfidence, 195 float[] inOutWeightOfLangModelVsSpatialModel); 196 private static native boolean addUnigramEntryNative(long dict, int[] word, int probability, 197 int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence, 198 boolean isNotAWord, boolean isPossiblyOffensive, int timestamp); 199 private static native boolean removeUnigramEntryNative(long dict, int[] word); 200 private static native boolean addNgramEntryNative(long dict, 201 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 202 int[] word, int probability, int timestamp); 203 private static native boolean removeNgramEntryNative(long dict, 204 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word); 205 private static native boolean updateEntriesForWordWithNgramContextNative(long dict, 206 int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, 207 int[] word, boolean isValidWord, int count, int timestamp); 208 private static native int addMultipleDictionaryEntriesNative(long dict, 209 LanguageModelParam[] languageModelParams, int startIndex); 210 private static native String getPropertyNative(long dict, String query); 211 private static native boolean isCorruptedNative(long dict); 212 private static native boolean migrateNative(long dict, String dictFilePath, 213 long newFormatVersion); 214 215 // TODO: Move native dict into session 216 private final void loadDictionary(final String path, final long startOffset, 217 final long length, final boolean isUpdatable) { 218 mHasUpdated = false; 219 mNativeDict = openNative(path, startOffset, length, isUpdatable); 220 } 221 222 // TODO: Check isCorrupted() for main dictionaries. 223 public boolean isCorrupted() { 224 if (!isValidDictionary()) { 225 return false; 226 } 227 if (!isCorruptedNative(mNativeDict)) { 228 return false; 229 } 230 // TODO: Record the corruption. 231 Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted."); 232 Log.e(TAG, "locale: " + mLocale); 233 Log.e(TAG, "dict size: " + mDictSize); 234 Log.e(TAG, "updatable: " + mIsUpdatable); 235 return true; 236 } 237 238 public DictionaryHeader getHeader() throws UnsupportedFormatException { 239 if (mNativeDict == 0) { 240 return null; 241 } 242 final int[] outHeaderSize = new int[1]; 243 final int[] outFormatVersion = new int[1]; 244 final ArrayList<int[]> outAttributeKeys = new ArrayList<>(); 245 final ArrayList<int[]> outAttributeValues = new ArrayList<>(); 246 getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys, 247 outAttributeValues); 248 final HashMap<String, String> attributes = new HashMap<>(); 249 for (int i = 0; i < outAttributeKeys.size(); i++) { 250 final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray( 251 outAttributeKeys.get(i)); 252 final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray( 253 outAttributeValues.get(i)); 254 attributes.put(attributeKey, attributeValue); 255 } 256 final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals( 257 attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY)); 258 return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes), 259 new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo)); 260 } 261 262 @Override 263 public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, 264 final NgramContext ngramContext, final ProximityInfo proximityInfo, 265 final SettingsValuesForSuggestion settingsValuesForSuggestion, 266 final int sessionId, final float weightForLocale, 267 final float[] inOutWeightOfLangModelVsSpatialModel) { 268 if (!isValidDictionary()) { 269 return null; 270 } 271 final DicTraverseSession session = getTraverseSession(sessionId); 272 Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE); 273 ngramContext.outputToArray(session.mPrevWordCodePointArrays, 274 session.mIsBeginningOfSentenceArray); 275 final InputPointers inputPointers = composer.getInputPointers(); 276 final boolean isGesture = composer.isBatchMode(); 277 final int inputSize; 278 if (!isGesture) { 279 inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount( 280 session.mInputCodePoints); 281 if (inputSize < 0) { 282 return null; 283 } 284 } else { 285 inputSize = inputPointers.getPointerSize(); 286 } 287 session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance); 288 session.mNativeSuggestOptions.setIsGesture(isGesture); 289 session.mNativeSuggestOptions.setBlockOffensiveWords( 290 settingsValuesForSuggestion.mBlockPotentiallyOffensive); 291 session.mNativeSuggestOptions.setSpaceAwareGestureEnabled( 292 settingsValuesForSuggestion.mSpaceAwareGestureEnabled); 293 session.mNativeSuggestOptions.setAdditionalFeaturesOptions( 294 settingsValuesForSuggestion.mAdditionalFeaturesSettingValues); 295 session.mNativeSuggestOptions.setWeightForLocale(weightForLocale); 296 if (inOutWeightOfLangModelVsSpatialModel != null) { 297 session.mInputOutputWeightOfLangModelVsSpatialModel[0] = 298 inOutWeightOfLangModelVsSpatialModel[0]; 299 } else { 300 session.mInputOutputWeightOfLangModelVsSpatialModel[0] = 301 Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL; 302 } 303 // TOOD: Pass multiple previous words information for n-gram. 304 getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), 305 getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(), 306 inputPointers.getYCoordinates(), inputPointers.getTimes(), 307 inputPointers.getPointerIds(), session.mInputCodePoints, inputSize, 308 session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays, 309 session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(), 310 session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores, 311 session.mSpaceIndices, session.mOutputTypes, 312 session.mOutputAutoCommitFirstWordConfidence, 313 session.mInputOutputWeightOfLangModelVsSpatialModel); 314 if (inOutWeightOfLangModelVsSpatialModel != null) { 315 inOutWeightOfLangModelVsSpatialModel[0] = 316 session.mInputOutputWeightOfLangModelVsSpatialModel[0]; 317 } 318 final int count = session.mOutputSuggestionCount[0]; 319 final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>(); 320 for (int j = 0; j < count; ++j) { 321 final int start = j * Constants.DICTIONARY_MAX_WORD_LENGTH; 322 int len = 0; 323 while (len < Constants.DICTIONARY_MAX_WORD_LENGTH 324 && session.mOutputCodePoints[start + len] != 0) { 325 ++len; 326 } 327 if (len > 0) { 328 suggestions.add(new SuggestedWordInfo( 329 new String(session.mOutputCodePoints, start, len), 330 (int)(session.mOutputScores[j] * weightForLocale), session.mOutputTypes[j], 331 this /* sourceDict */, 332 session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */, 333 session.mOutputAutoCommitFirstWordConfidence[0])); 334 } 335 } 336 return suggestions; 337 } 338 339 public boolean isValidDictionary() { 340 return mNativeDict != 0; 341 } 342 343 public int getFormatVersion() { 344 return getFormatVersionNative(mNativeDict); 345 } 346 347 @Override 348 public boolean isInDictionary(final String word) { 349 return getFrequency(word) != NOT_A_PROBABILITY; 350 } 351 352 @Override 353 public int getFrequency(final String word) { 354 if (TextUtils.isEmpty(word)) { 355 return NOT_A_PROBABILITY; 356 } 357 final int[] codePoints = StringUtils.toCodePointArray(word); 358 return getProbabilityNative(mNativeDict, codePoints); 359 } 360 361 @Override 362 public int getMaxFrequencyOfExactMatches(final String word) { 363 if (TextUtils.isEmpty(word)) { 364 return NOT_A_PROBABILITY; 365 } 366 final int[] codePoints = StringUtils.toCodePointArray(word); 367 return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints); 368 } 369 370 @UsedForTesting 371 public boolean isValidNgram(final NgramContext ngramContext, final String word) { 372 return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY; 373 } 374 375 public int getNgramProbability(final NgramContext ngramContext, final String word) { 376 if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { 377 return NOT_A_PROBABILITY; 378 } 379 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 380 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 381 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 382 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 383 return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays, 384 isBeginningOfSentenceArray, wordCodePoints); 385 } 386 387 public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) { 388 if (word == null) { 389 return null; 390 } 391 final int[] codePoints = StringUtils.toCodePointArray(word); 392 final int[] outCodePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; 393 final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT]; 394 final int[] outProbabilityInfo = 395 new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT]; 396 final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>(); 397 final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray = 398 new ArrayList<>(); 399 final ArrayList<int[]> outNgramTargets = new ArrayList<>(); 400 final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>(); 401 final ArrayList<int[]> outShortcutTargets = new ArrayList<>(); 402 final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>(); 403 getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints, 404 outFlags, outProbabilityInfo, outNgramPrevWordsArray, 405 outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets, 406 outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities); 407 return new WordProperty(codePoints, 408 outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX], 409 outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX], 410 outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX], 411 outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX], 412 outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo, 413 outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray, 414 outNgramTargets, outNgramProbabilityInfo, outShortcutTargets, 415 outShortcutProbabilities); 416 } 417 418 public static class GetNextWordPropertyResult { 419 public WordProperty mWordProperty; 420 public int mNextToken; 421 422 public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) { 423 mWordProperty = wordProperty; 424 mNextToken = nextToken; 425 } 426 } 427 428 /** 429 * Method to iterate all words in the dictionary for makedict. 430 * If token is 0, this method newly starts iterating the dictionary. 431 */ 432 public GetNextWordPropertyResult getNextWordProperty(final int token) { 433 final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH]; 434 final boolean[] isBeginningOfSentence = new boolean[1]; 435 final int nextToken = getNextWordNative(mNativeDict, token, codePoints, 436 isBeginningOfSentence); 437 final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); 438 return new GetNextWordPropertyResult( 439 getWordProperty(word, isBeginningOfSentence[0]), nextToken); 440 } 441 442 // Add a unigram entry to binary dictionary with unigram attributes in native code. 443 public boolean addUnigramEntry(final String word, final int probability, 444 final String shortcutTarget, final int shortcutProbability, 445 final boolean isBeginningOfSentence, final boolean isNotAWord, 446 final boolean isPossiblyOffensive, final int timestamp) { 447 if (word == null || (word.isEmpty() && !isBeginningOfSentence)) { 448 return false; 449 } 450 final int[] codePoints = StringUtils.toCodePointArray(word); 451 final int[] shortcutTargetCodePoints = (shortcutTarget != null) ? 452 StringUtils.toCodePointArray(shortcutTarget) : null; 453 if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints, 454 shortcutProbability, isBeginningOfSentence, isNotAWord, isPossiblyOffensive, 455 timestamp)) { 456 return false; 457 } 458 mHasUpdated = true; 459 return true; 460 } 461 462 // Remove a unigram entry from the binary dictionary in native code. 463 public boolean removeUnigramEntry(final String word) { 464 if (TextUtils.isEmpty(word)) { 465 return false; 466 } 467 final int[] codePoints = StringUtils.toCodePointArray(word); 468 if (!removeUnigramEntryNative(mNativeDict, codePoints)) { 469 return false; 470 } 471 mHasUpdated = true; 472 return true; 473 } 474 475 // Add an n-gram entry to the binary dictionary with timestamp in native code. 476 public boolean addNgramEntry(final NgramContext ngramContext, final String word, 477 final int probability, final int timestamp) { 478 if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { 479 return false; 480 } 481 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 482 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 483 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 484 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 485 if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays, 486 isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) { 487 return false; 488 } 489 mHasUpdated = true; 490 return true; 491 } 492 493 // Remove an n-gram entry from the binary dictionary in native code. 494 public boolean removeNgramEntry(final NgramContext ngramContext, final String word) { 495 if (!ngramContext.isValid() || TextUtils.isEmpty(word)) { 496 return false; 497 } 498 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 499 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 500 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 501 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 502 if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays, 503 isBeginningOfSentenceArray, wordCodePoints)) { 504 return false; 505 } 506 mHasUpdated = true; 507 return true; 508 } 509 510 // Update entries for the word occurrence with the ngramContext. 511 public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext, 512 final String word, final boolean isValidWord, final int count, final int timestamp) { 513 if (TextUtils.isEmpty(word)) { 514 return false; 515 } 516 final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][]; 517 final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()]; 518 ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray); 519 final int[] wordCodePoints = StringUtils.toCodePointArray(word); 520 if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays, 521 isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) { 522 return false; 523 } 524 mHasUpdated = true; 525 return true; 526 } 527 528 @UsedForTesting 529 public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) { 530 if (!isValidDictionary()) { 531 return; 532 } 533 int processedParamCount = 0; 534 while (processedParamCount < languageModelParams.length) { 535 if (needsToRunGC(true /* mindsBlockByGC */)) { 536 flushWithGC(); 537 } 538 processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict, 539 languageModelParams, processedParamCount); 540 mHasUpdated = true; 541 if (processedParamCount <= 0) { 542 return; 543 } 544 } 545 } 546 547 private void reopen() { 548 close(); 549 final File dictFile = new File(mDictFilePath); 550 // WARNING: Because we pass 0 as the offset and file.length() as the length, this can 551 // only be called for actual files. Right now it's only called by the flush() family of 552 // functions, which require an updatable dictionary, so it's okay. But beware. 553 loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, 554 dictFile.length(), mIsUpdatable); 555 } 556 557 // Flush to dict file if the dictionary has been updated. 558 public boolean flush() { 559 if (!isValidDictionary()) { 560 return false; 561 } 562 if (mHasUpdated) { 563 if (!flushNative(mNativeDict, mDictFilePath)) { 564 return false; 565 } 566 reopen(); 567 } 568 return true; 569 } 570 571 // Run GC and flush to dict file if the dictionary has been updated. 572 public boolean flushWithGCIfHasUpdated() { 573 if (mHasUpdated) { 574 return flushWithGC(); 575 } 576 return true; 577 } 578 579 // Run GC and flush to dict file. 580 public boolean flushWithGC() { 581 if (!isValidDictionary()) { 582 return false; 583 } 584 if (!flushWithGCNative(mNativeDict, mDictFilePath)) { 585 return false; 586 } 587 reopen(); 588 return true; 589 } 590 591 /** 592 * Checks whether GC is needed to run or not. 593 * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about 594 * the blocking in some situations such as in idle time or just before closing. 595 * @return whether GC is needed to run or not. 596 */ 597 public boolean needsToRunGC(final boolean mindsBlockByGC) { 598 if (!isValidDictionary()) { 599 return false; 600 } 601 return needsToRunGCNative(mNativeDict, mindsBlockByGC); 602 } 603 604 public boolean migrateTo(final int newFormatVersion) { 605 if (!isValidDictionary()) { 606 return false; 607 } 608 final File isMigratingDir = 609 new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION); 610 if (isMigratingDir.exists()) { 611 isMigratingDir.delete(); 612 Log.e(TAG, "Previous migration attempt failed probably due to a crash. " 613 + "Giving up using the old dictionary (" + mDictFilePath + ")."); 614 return false; 615 } 616 if (!isMigratingDir.mkdir()) { 617 Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath() 618 + ") to record migration."); 619 return false; 620 } 621 try { 622 final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION; 623 if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) { 624 return false; 625 } 626 close(); 627 final File dictFile = new File(mDictFilePath); 628 final File tmpDictFile = new File(tmpDictFilePath); 629 if (!FileUtils.deleteRecursively(dictFile)) { 630 return false; 631 } 632 if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { 633 return false; 634 } 635 loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */, 636 dictFile.length(), mIsUpdatable); 637 return true; 638 } finally { 639 isMigratingDir.delete(); 640 } 641 } 642 643 @UsedForTesting 644 public String getPropertyForGettingStats(final String query) { 645 if (!isValidDictionary()) { 646 return ""; 647 } 648 return getPropertyNative(mNativeDict, query); 649 } 650 651 @Override 652 public boolean shouldAutoCommit(final SuggestedWordInfo candidate) { 653 return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT; 654 } 655 656 @Override 657 public void close() { 658 synchronized (mDicTraverseSessions) { 659 final int sessionsSize = mDicTraverseSessions.size(); 660 for (int index = 0; index < sessionsSize; ++index) { 661 final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index); 662 if (traverseSession != null) { 663 traverseSession.close(); 664 } 665 } 666 mDicTraverseSessions.clear(); 667 } 668 closeInternalLocked(); 669 } 670 671 private synchronized void closeInternalLocked() { 672 if (mNativeDict != 0) { 673 closeNative(mNativeDict); 674 mNativeDict = 0; 675 } 676 } 677 678 // TODO: Manage BinaryDictionary instances without using WeakReference or something. 679 @Override 680 protected void finalize() throws Throwable { 681 try { 682 closeInternalLocked(); 683 } finally { 684 super.finalize(); 685 } 686 } 687} 688