BinaryDictionary.java revision 05172bf1a5693c2e108e91436b98ecd35d2dadad
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.Log;
21import android.util.SparseArray;
22
23import com.android.inputmethod.annotations.UsedForTesting;
24import com.android.inputmethod.keyboard.ProximityInfo;
25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
26import com.android.inputmethod.latin.makedict.DictionaryHeader;
27import com.android.inputmethod.latin.makedict.FormatSpec;
28import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
29import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
30import com.android.inputmethod.latin.makedict.WordProperty;
31import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
32import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
33import com.android.inputmethod.latin.utils.FileUtils;
34import com.android.inputmethod.latin.utils.JniUtils;
35import com.android.inputmethod.latin.utils.LanguageModelParam;
36import com.android.inputmethod.latin.utils.StringUtils;
37
38import java.io.File;
39import java.util.ArrayList;
40import java.util.Arrays;
41import java.util.HashMap;
42import java.util.Locale;
43import java.util.Map;
44
45import javax.annotation.Nonnull;
46
47/**
48 * Implements a static, compacted, binary dictionary of standard words.
49 */
50// TODO: All methods which should be locked need to have a suffix "Locked".
51public final class BinaryDictionary extends Dictionary {
52    private static final String TAG = BinaryDictionary.class.getSimpleName();
53
54    // The cutoff returned by native for auto-commit confidence.
55    // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
56    private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
57
58    @UsedForTesting
59    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
60    @UsedForTesting
61    public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
62    @UsedForTesting
63    public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
64    @UsedForTesting
65    public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
66
67    public static final int NOT_A_VALID_TIMESTAMP = -1;
68
69    // Format to get unigram flags from native side via getWordPropertyNative().
70    private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
71    private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
72    private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
73    private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
74    private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
75    private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
76
77    // Format to get probability and historical info from native side via getWordPropertyNative().
78    public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
79    public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
80    public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
81    public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
82    public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
83
84    public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate";
85    public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating";
86
87    private long mNativeDict;
88    private final long mDictSize;
89    private final String mDictFilePath;
90    private final boolean mUseFullEditDistance;
91    private final boolean mIsUpdatable;
92    private boolean mHasUpdated;
93
94    private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>();
95
96    // TODO: There should be a way to remove used DicTraverseSession objects from
97    // {@code mDicTraverseSessions}.
98    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
99        synchronized(mDicTraverseSessions) {
100            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
101            if (traverseSession == null) {
102                traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
103                mDicTraverseSessions.put(traverseSessionId, traverseSession);
104            }
105            return traverseSession;
106        }
107    }
108
109    /**
110     * Constructs binary dictionary using existing dictionary file.
111     * @param filename the name of the file to read through native code.
112     * @param offset the offset of the dictionary data within the file.
113     * @param length the length of the binary data.
114     * @param useFullEditDistance whether to use the full edit distance in suggestions
115     * @param dictType the dictionary type, as a human-readable string
116     * @param isUpdatable whether to open the dictionary file in writable mode.
117     */
118    public BinaryDictionary(final String filename, final long offset, final long length,
119            final boolean useFullEditDistance, final Locale locale, final String dictType,
120            final boolean isUpdatable) {
121        super(dictType, locale);
122        mDictSize = length;
123        mDictFilePath = filename;
124        mIsUpdatable = isUpdatable;
125        mHasUpdated = false;
126        mUseFullEditDistance = useFullEditDistance;
127        loadDictionary(filename, offset, length, isUpdatable);
128    }
129
130    /**
131     * Constructs binary dictionary on memory.
132     * @param filename the name of the file used to flush.
133     * @param useFullEditDistance whether to use the full edit distance in suggestions
134     * @param dictType the dictionary type, as a human-readable string
135     * @param formatVersion the format version of the dictionary
136     * @param attributeMap the attributes of the dictionary
137     */
138    public BinaryDictionary(final String filename, final boolean useFullEditDistance,
139            final Locale locale, final String dictType, final long formatVersion,
140            final Map<String, String> attributeMap) {
141        super(dictType, locale);
142        mDictSize = 0;
143        mDictFilePath = filename;
144        // On memory dictionary is always updatable.
145        mIsUpdatable = true;
146        mHasUpdated = false;
147        mUseFullEditDistance = useFullEditDistance;
148        final String[] keyArray = new String[attributeMap.size()];
149        final String[] valueArray = new String[attributeMap.size()];
150        int index = 0;
151        for (final String key : attributeMap.keySet()) {
152            keyArray[index] = key;
153            valueArray[index] = attributeMap.get(key);
154            index++;
155        }
156        mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
157    }
158
159
160    static {
161        JniUtils.loadNativeLibrary();
162    }
163
164    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
165            boolean isUpdatable);
166    private static native long createOnMemoryNative(long formatVersion,
167            String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
168    private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
169            int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
170            ArrayList<int[]> outAttributeValues);
171    private static native boolean flushNative(long dict, String filePath);
172    private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
173    private static native boolean flushWithGCNative(long dict, String filePath);
174    private static native void closeNative(long dict);
175    private static native int getFormatVersionNative(long dict);
176    private static native int getProbabilityNative(long dict, int[] word);
177    private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
178    private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
179            boolean[] isBeginningOfSentenceArray, int[] word);
180    private static native void getWordPropertyNative(long dict, int[] word,
181            boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
182            int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray,
183            ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
184            ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo,
185            ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
186    private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
187            boolean[] outIsBeginningOfSentence);
188    private static native void getSuggestionsNative(long dict, long proximityInfo,
189            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
190            int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
191            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
192            int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints,
193            int[] outputScores, int[] outputIndices, int[] outputTypes,
194            int[] outputAutoCommitFirstWordConfidence,
195            float[] inOutWeightOfLangModelVsSpatialModel);
196    private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
197            int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
198            boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
199    private static native boolean removeUnigramEntryNative(long dict, int[] word);
200    private static native boolean addNgramEntryNative(long dict,
201            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
202            int[] word, int probability, int timestamp);
203    private static native boolean removeNgramEntryNative(long dict,
204            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
205    private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
206            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
207            int[] word, boolean isValidWord, int count, int timestamp);
208    private static native int addMultipleDictionaryEntriesNative(long dict,
209            LanguageModelParam[] languageModelParams, int startIndex);
210    private static native String getPropertyNative(long dict, String query);
211    private static native boolean isCorruptedNative(long dict);
212    private static native boolean migrateNative(long dict, String dictFilePath,
213            long newFormatVersion);
214
215    // TODO: Move native dict into session
216    private final void loadDictionary(final String path, final long startOffset,
217            final long length, final boolean isUpdatable) {
218        mHasUpdated = false;
219        mNativeDict = openNative(path, startOffset, length, isUpdatable);
220    }
221
222    // TODO: Check isCorrupted() for main dictionaries.
223    public boolean isCorrupted() {
224        if (!isValidDictionary()) {
225            return false;
226        }
227        if (!isCorruptedNative(mNativeDict)) {
228            return false;
229        }
230        // TODO: Record the corruption.
231        Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
232        Log.e(TAG, "locale: " + mLocale);
233        Log.e(TAG, "dict size: " + mDictSize);
234        Log.e(TAG, "updatable: " + mIsUpdatable);
235        return true;
236    }
237
238    public DictionaryHeader getHeader() throws UnsupportedFormatException {
239        if (mNativeDict == 0) {
240            return null;
241        }
242        final int[] outHeaderSize = new int[1];
243        final int[] outFormatVersion = new int[1];
244        final ArrayList<int[]> outAttributeKeys = new ArrayList<>();
245        final ArrayList<int[]> outAttributeValues = new ArrayList<>();
246        getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
247                outAttributeValues);
248        final HashMap<String, String> attributes = new HashMap<>();
249        for (int i = 0; i < outAttributeKeys.size(); i++) {
250            final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
251                    outAttributeKeys.get(i));
252            final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
253                    outAttributeValues.get(i));
254            attributes.put(attributeKey, attributeValue);
255        }
256        final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
257                attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
258        return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
259                new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
260    }
261
262    @Override
263    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
264            final NgramContext ngramContext, final ProximityInfo proximityInfo,
265            final SettingsValuesForSuggestion settingsValuesForSuggestion,
266            final int sessionId, final float weightForLocale,
267            final float[] inOutWeightOfLangModelVsSpatialModel) {
268        if (!isValidDictionary()) {
269            return null;
270        }
271        final DicTraverseSession session = getTraverseSession(sessionId);
272        Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE);
273        ngramContext.outputToArray(session.mPrevWordCodePointArrays,
274                session.mIsBeginningOfSentenceArray);
275        final InputPointers inputPointers = composer.getInputPointers();
276        final boolean isGesture = composer.isBatchMode();
277        final int inputSize;
278        if (!isGesture) {
279            inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
280                    session.mInputCodePoints);
281            if (inputSize < 0) {
282                return null;
283            }
284        } else {
285            inputSize = inputPointers.getPointerSize();
286        }
287        session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance);
288        session.mNativeSuggestOptions.setIsGesture(isGesture);
289        session.mNativeSuggestOptions.setBlockOffensiveWords(
290                settingsValuesForSuggestion.mBlockPotentiallyOffensive);
291        session.mNativeSuggestOptions.setSpaceAwareGestureEnabled(
292                settingsValuesForSuggestion.mSpaceAwareGestureEnabled);
293        session.mNativeSuggestOptions.setAdditionalFeaturesOptions(
294                settingsValuesForSuggestion.mAdditionalFeaturesSettingValues);
295        session.mNativeSuggestOptions.setWeightForLocale(weightForLocale);
296        if (inOutWeightOfLangModelVsSpatialModel != null) {
297            session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
298                    inOutWeightOfLangModelVsSpatialModel[0];
299        } else {
300            session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
301                    Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL;
302        }
303        // TOOD: Pass multiple previous words information for n-gram.
304        getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
305                getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
306                inputPointers.getYCoordinates(), inputPointers.getTimes(),
307                inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
308                session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
309                session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(),
310                session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores,
311                session.mSpaceIndices, session.mOutputTypes,
312                session.mOutputAutoCommitFirstWordConfidence,
313                session.mInputOutputWeightOfLangModelVsSpatialModel);
314        if (inOutWeightOfLangModelVsSpatialModel != null) {
315            inOutWeightOfLangModelVsSpatialModel[0] =
316                    session.mInputOutputWeightOfLangModelVsSpatialModel[0];
317        }
318        final int count = session.mOutputSuggestionCount[0];
319        final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>();
320        for (int j = 0; j < count; ++j) {
321            final int start = j * Constants.DICTIONARY_MAX_WORD_LENGTH;
322            int len = 0;
323            while (len < Constants.DICTIONARY_MAX_WORD_LENGTH
324                    && session.mOutputCodePoints[start + len] != 0) {
325                ++len;
326            }
327            if (len > 0) {
328                suggestions.add(new SuggestedWordInfo(
329                        new String(session.mOutputCodePoints, start, len),
330                        (int)(session.mOutputScores[j] * weightForLocale), session.mOutputTypes[j],
331                        this /* sourceDict */,
332                        session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
333                        session.mOutputAutoCommitFirstWordConfidence[0]));
334            }
335        }
336        return suggestions;
337    }
338
339    public boolean isValidDictionary() {
340        return mNativeDict != 0;
341    }
342
343    public int getFormatVersion() {
344        return getFormatVersionNative(mNativeDict);
345    }
346
347    @Override
348    public boolean isInDictionary(final String word) {
349        return getFrequency(word) != NOT_A_PROBABILITY;
350    }
351
352    @Override
353    public int getFrequency(final String word) {
354        if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
355        int[] codePoints = StringUtils.toCodePointArray(word);
356        return getProbabilityNative(mNativeDict, codePoints);
357    }
358
359    @Override
360    public int getMaxFrequencyOfExactMatches(final String word) {
361        if (TextUtils.isEmpty(word)) return NOT_A_PROBABILITY;
362        int[] codePoints = StringUtils.toCodePointArray(word);
363        return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints);
364    }
365
366    @UsedForTesting
367    public boolean isValidNgram(final NgramContext ngramContext, final String word) {
368        return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY;
369    }
370
371    public int getNgramProbability(final NgramContext ngramContext, final String word) {
372        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
373            return NOT_A_PROBABILITY;
374        }
375        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
376        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
377        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
378        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
379        return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
380                isBeginningOfSentenceArray, wordCodePoints);
381    }
382
383    public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
384        if (word == null) {
385            return null;
386        }
387        final int[] codePoints = StringUtils.toCodePointArray(word);
388        final int[] outCodePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH];
389        final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
390        final int[] outProbabilityInfo =
391                new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
392        final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>();
393        final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray =
394                new ArrayList<>();
395        final ArrayList<int[]> outNgramTargets = new ArrayList<>();
396        final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>();
397        final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
398        final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
399        getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
400                outFlags, outProbabilityInfo, outNgramPrevWordsArray,
401                outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets,
402                outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
403        return new WordProperty(codePoints,
404                outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
405                outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
406                outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
407                outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
408                outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
409                outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
410                outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
411                outShortcutProbabilities);
412    }
413
414    public static class GetNextWordPropertyResult {
415        public WordProperty mWordProperty;
416        public int mNextToken;
417
418        public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) {
419            mWordProperty = wordProperty;
420            mNextToken = nextToken;
421        }
422    }
423
424    /**
425     * Method to iterate all words in the dictionary for makedict.
426     * If token is 0, this method newly starts iterating the dictionary.
427     */
428    public GetNextWordPropertyResult getNextWordProperty(final int token) {
429        final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH];
430        final boolean[] isBeginningOfSentence = new boolean[1];
431        final int nextToken = getNextWordNative(mNativeDict, token, codePoints,
432                isBeginningOfSentence);
433        final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
434        return new GetNextWordPropertyResult(
435                getWordProperty(word, isBeginningOfSentence[0]), nextToken);
436    }
437
438    // Add a unigram entry to binary dictionary with unigram attributes in native code.
439    public boolean addUnigramEntry(final String word, final int probability,
440            final String shortcutTarget, final int shortcutProbability,
441            final boolean isBeginningOfSentence, final boolean isNotAWord,
442            final boolean isPossiblyOffensive, final int timestamp) {
443        if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
444            return false;
445        }
446        final int[] codePoints = StringUtils.toCodePointArray(word);
447        final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
448                StringUtils.toCodePointArray(shortcutTarget) : null;
449        if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
450                shortcutProbability, isBeginningOfSentence, isNotAWord, isPossiblyOffensive,
451                timestamp)) {
452            return false;
453        }
454        mHasUpdated = true;
455        return true;
456    }
457
458    // Remove a unigram entry from the binary dictionary in native code.
459    public boolean removeUnigramEntry(final String word) {
460        if (TextUtils.isEmpty(word)) {
461            return false;
462        }
463        final int[] codePoints = StringUtils.toCodePointArray(word);
464        if (!removeUnigramEntryNative(mNativeDict, codePoints)) {
465            return false;
466        }
467        mHasUpdated = true;
468        return true;
469    }
470
471    // Add an n-gram entry to the binary dictionary with timestamp in native code.
472    public boolean addNgramEntry(final NgramContext ngramContext, final String word,
473            final int probability, final int timestamp) {
474        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
475            return false;
476        }
477        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
478        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
479        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
480        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
481        if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
482                isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
483            return false;
484        }
485        mHasUpdated = true;
486        return true;
487    }
488
489    // Remove an n-gram entry from the binary dictionary in native code.
490    public boolean removeNgramEntry(final NgramContext ngramContext, final String word) {
491        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
492            return false;
493        }
494        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
495        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
496        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
497        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
498        if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays,
499                isBeginningOfSentenceArray, wordCodePoints)) {
500            return false;
501        }
502        mHasUpdated = true;
503        return true;
504    }
505
506    // Update entries for the word occurrence with the ngramContext.
507    public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext,
508            final String word, final boolean isValidWord, final int count, final int timestamp) {
509        if (TextUtils.isEmpty(word)) {
510            return false;
511        }
512        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
513        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
514        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
515        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
516        if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays,
517                isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) {
518            return false;
519        }
520        mHasUpdated = true;
521        return true;
522    }
523
524    @UsedForTesting
525    public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
526        if (!isValidDictionary()) return;
527        int processedParamCount = 0;
528        while (processedParamCount < languageModelParams.length) {
529            if (needsToRunGC(true /* mindsBlockByGC */)) {
530                flushWithGC();
531            }
532            processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
533                    languageModelParams, processedParamCount);
534            mHasUpdated = true;
535            if (processedParamCount <= 0) {
536                return;
537            }
538        }
539    }
540
541    private void reopen() {
542        close();
543        final File dictFile = new File(mDictFilePath);
544        // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
545        // only be called for actual files. Right now it's only called by the flush() family of
546        // functions, which require an updatable dictionary, so it's okay. But beware.
547        loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
548                dictFile.length(), mIsUpdatable);
549    }
550
551    // Flush to dict file if the dictionary has been updated.
552    public boolean flush() {
553        if (!isValidDictionary()) return false;
554        if (mHasUpdated) {
555            if (!flushNative(mNativeDict, mDictFilePath)) {
556                return false;
557            }
558            reopen();
559        }
560        return true;
561    }
562
563    // Run GC and flush to dict file if the dictionary has been updated.
564    public boolean flushWithGCIfHasUpdated() {
565        if (mHasUpdated) {
566            return flushWithGC();
567        }
568        return true;
569    }
570
571    // Run GC and flush to dict file.
572    public boolean flushWithGC() {
573        if (!isValidDictionary()) return false;
574        if (!flushWithGCNative(mNativeDict, mDictFilePath)) {
575            return false;
576        }
577        reopen();
578        return true;
579    }
580
581    /**
582     * Checks whether GC is needed to run or not.
583     * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
584     * the blocking in some situations such as in idle time or just before closing.
585     * @return whether GC is needed to run or not.
586     */
587    public boolean needsToRunGC(final boolean mindsBlockByGC) {
588        if (!isValidDictionary()) return false;
589        return needsToRunGCNative(mNativeDict, mindsBlockByGC);
590    }
591
592    public boolean migrateTo(final int newFormatVersion) {
593        if (!isValidDictionary()) {
594            return false;
595        }
596        final File isMigratingDir =
597                new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION);
598        if (isMigratingDir.exists()) {
599            isMigratingDir.delete();
600            Log.e(TAG, "Previous migration attempt failed probably due to a crash. "
601                        + "Giving up using the old dictionary (" + mDictFilePath + ").");
602            return false;
603        }
604        if (!isMigratingDir.mkdir()) {
605            Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath()
606                    + ") to record migration.");
607            return false;
608        }
609        try {
610            final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION;
611            if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) {
612                return false;
613            }
614            close();
615            final File dictFile = new File(mDictFilePath);
616            final File tmpDictFile = new File(tmpDictFilePath);
617            if (!FileUtils.deleteRecursively(dictFile)) {
618                return false;
619            }
620            if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
621                return false;
622            }
623            loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
624                    dictFile.length(), mIsUpdatable);
625            return true;
626        } finally {
627            isMigratingDir.delete();
628        }
629    }
630
631    @UsedForTesting
632    public String getPropertyForGettingStats(final String query) {
633        if (!isValidDictionary()) return "";
634        return getPropertyNative(mNativeDict, query);
635    }
636
637    @Override
638    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
639        return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
640    }
641
642    @Override
643    public void close() {
644        synchronized (mDicTraverseSessions) {
645            final int sessionsSize = mDicTraverseSessions.size();
646            for (int index = 0; index < sessionsSize; ++index) {
647                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
648                if (traverseSession != null) {
649                    traverseSession.close();
650                }
651            }
652            mDicTraverseSessions.clear();
653        }
654        closeInternalLocked();
655    }
656
657    private synchronized void closeInternalLocked() {
658        if (mNativeDict != 0) {
659            closeNative(mNativeDict);
660            mNativeDict = 0;
661        }
662    }
663
664    // TODO: Manage BinaryDictionary instances without using WeakReference or something.
665    @Override
666    protected void finalize() throws Throwable {
667        try {
668            closeInternalLocked();
669        } finally {
670            super.finalize();
671        }
672    }
673}
674