BinaryDictionary.java revision d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.Log;
21import android.util.SparseArray;
22
23import com.android.inputmethod.annotations.UsedForTesting;
24import com.android.inputmethod.keyboard.ProximityInfo;
25import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
26import com.android.inputmethod.latin.makedict.DictionaryHeader;
27import com.android.inputmethod.latin.makedict.FormatSpec;
28import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
29import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
30import com.android.inputmethod.latin.makedict.WordProperty;
31import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
32import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
33import com.android.inputmethod.latin.utils.FileUtils;
34import com.android.inputmethod.latin.utils.JniUtils;
35import com.android.inputmethod.latin.utils.LanguageModelParam;
36import com.android.inputmethod.latin.utils.StringUtils;
37
38import java.io.File;
39import java.util.ArrayList;
40import java.util.Arrays;
41import java.util.HashMap;
42import java.util.Locale;
43import java.util.Map;
44
45import javax.annotation.Nonnull;
46
47/**
48 * Implements a static, compacted, binary dictionary of standard words.
49 */
50// TODO: All methods which should be locked need to have a suffix "Locked".
51public final class BinaryDictionary extends Dictionary {
52    private static final String TAG = BinaryDictionary.class.getSimpleName();
53
54    // The cutoff returned by native for auto-commit confidence.
55    // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
56    private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
57
58    @UsedForTesting
59    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
60    @UsedForTesting
61    public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
62    @UsedForTesting
63    public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
64    @UsedForTesting
65    public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
66
67    public static final int NOT_A_VALID_TIMESTAMP = -1;
68
69    // Format to get unigram flags from native side via getWordPropertyNative().
70    private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
71    private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
72    private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
73    private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
74    private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
75    private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
76
77    // Format to get probability and historical info from native side via getWordPropertyNative().
78    public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
79    public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
80    public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
81    public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
82    public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
83
84    public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate";
85    public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating";
86
87    private long mNativeDict;
88    private final long mDictSize;
89    private final String mDictFilePath;
90    private final boolean mUseFullEditDistance;
91    private final boolean mIsUpdatable;
92    private boolean mHasUpdated;
93
94    private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>();
95
96    // TODO: There should be a way to remove used DicTraverseSession objects from
97    // {@code mDicTraverseSessions}.
98    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
99        synchronized(mDicTraverseSessions) {
100            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
101            if (traverseSession == null) {
102                traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
103                mDicTraverseSessions.put(traverseSessionId, traverseSession);
104            }
105            return traverseSession;
106        }
107    }
108
109    /**
110     * Constructs binary dictionary using existing dictionary file.
111     * @param filename the name of the file to read through native code.
112     * @param offset the offset of the dictionary data within the file.
113     * @param length the length of the binary data.
114     * @param useFullEditDistance whether to use the full edit distance in suggestions
115     * @param dictType the dictionary type, as a human-readable string
116     * @param isUpdatable whether to open the dictionary file in writable mode.
117     */
118    public BinaryDictionary(final String filename, final long offset, final long length,
119            final boolean useFullEditDistance, final Locale locale, final String dictType,
120            final boolean isUpdatable) {
121        super(dictType, locale);
122        mDictSize = length;
123        mDictFilePath = filename;
124        mIsUpdatable = isUpdatable;
125        mHasUpdated = false;
126        mUseFullEditDistance = useFullEditDistance;
127        loadDictionary(filename, offset, length, isUpdatable);
128    }
129
130    /**
131     * Constructs binary dictionary on memory.
132     * @param filename the name of the file used to flush.
133     * @param useFullEditDistance whether to use the full edit distance in suggestions
134     * @param dictType the dictionary type, as a human-readable string
135     * @param formatVersion the format version of the dictionary
136     * @param attributeMap the attributes of the dictionary
137     */
138    public BinaryDictionary(final String filename, final boolean useFullEditDistance,
139            final Locale locale, final String dictType, final long formatVersion,
140            final Map<String, String> attributeMap) {
141        super(dictType, locale);
142        mDictSize = 0;
143        mDictFilePath = filename;
144        // On memory dictionary is always updatable.
145        mIsUpdatable = true;
146        mHasUpdated = false;
147        mUseFullEditDistance = useFullEditDistance;
148        final String[] keyArray = new String[attributeMap.size()];
149        final String[] valueArray = new String[attributeMap.size()];
150        int index = 0;
151        for (final String key : attributeMap.keySet()) {
152            keyArray[index] = key;
153            valueArray[index] = attributeMap.get(key);
154            index++;
155        }
156        mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
157    }
158
159
160    static {
161        JniUtils.loadNativeLibrary();
162    }
163
164    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
165            boolean isUpdatable);
166    private static native long createOnMemoryNative(long formatVersion,
167            String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
168    private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
169            int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
170            ArrayList<int[]> outAttributeValues);
171    private static native boolean flushNative(long dict, String filePath);
172    private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
173    private static native boolean flushWithGCNative(long dict, String filePath);
174    private static native void closeNative(long dict);
175    private static native int getFormatVersionNative(long dict);
176    private static native int getProbabilityNative(long dict, int[] word);
177    private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
178    private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
179            boolean[] isBeginningOfSentenceArray, int[] word);
180    private static native void getWordPropertyNative(long dict, int[] word,
181            boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
182            int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray,
183            ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
184            ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo,
185            ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
186    private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
187            boolean[] outIsBeginningOfSentence);
188    private static native void getSuggestionsNative(long dict, long proximityInfo,
189            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
190            int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
191            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
192            int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints,
193            int[] outputScores, int[] outputIndices, int[] outputTypes,
194            int[] outputAutoCommitFirstWordConfidence,
195            float[] inOutWeightOfLangModelVsSpatialModel);
196    private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
197            int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
198            boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
199    private static native boolean removeUnigramEntryNative(long dict, int[] word);
200    private static native boolean addNgramEntryNative(long dict,
201            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
202            int[] word, int probability, int timestamp);
203    private static native boolean removeNgramEntryNative(long dict,
204            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
205    private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
206            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
207            int[] word, boolean isValidWord, int count, int timestamp);
208    private static native int addMultipleDictionaryEntriesNative(long dict,
209            LanguageModelParam[] languageModelParams, int startIndex);
210    private static native String getPropertyNative(long dict, String query);
211    private static native boolean isCorruptedNative(long dict);
212    private static native boolean migrateNative(long dict, String dictFilePath,
213            long newFormatVersion);
214
215    // TODO: Move native dict into session
216    private final void loadDictionary(final String path, final long startOffset,
217            final long length, final boolean isUpdatable) {
218        mHasUpdated = false;
219        mNativeDict = openNative(path, startOffset, length, isUpdatable);
220    }
221
222    // TODO: Check isCorrupted() for main dictionaries.
223    public boolean isCorrupted() {
224        if (!isValidDictionary()) {
225            return false;
226        }
227        if (!isCorruptedNative(mNativeDict)) {
228            return false;
229        }
230        // TODO: Record the corruption.
231        Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
232        Log.e(TAG, "locale: " + mLocale);
233        Log.e(TAG, "dict size: " + mDictSize);
234        Log.e(TAG, "updatable: " + mIsUpdatable);
235        return true;
236    }
237
238    public DictionaryHeader getHeader() throws UnsupportedFormatException {
239        if (mNativeDict == 0) {
240            return null;
241        }
242        final int[] outHeaderSize = new int[1];
243        final int[] outFormatVersion = new int[1];
244        final ArrayList<int[]> outAttributeKeys = new ArrayList<>();
245        final ArrayList<int[]> outAttributeValues = new ArrayList<>();
246        getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
247                outAttributeValues);
248        final HashMap<String, String> attributes = new HashMap<>();
249        for (int i = 0; i < outAttributeKeys.size(); i++) {
250            final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
251                    outAttributeKeys.get(i));
252            final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
253                    outAttributeValues.get(i));
254            attributes.put(attributeKey, attributeValue);
255        }
256        final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
257                attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
258        return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
259                new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
260    }
261
262    @Override
263    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
264            final NgramContext ngramContext, final ProximityInfo proximityInfo,
265            final SettingsValuesForSuggestion settingsValuesForSuggestion,
266            final int sessionId, final float weightForLocale,
267            final float[] inOutWeightOfLangModelVsSpatialModel) {
268        if (!isValidDictionary()) {
269            return null;
270        }
271        final DicTraverseSession session = getTraverseSession(sessionId);
272        Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE);
273        ngramContext.outputToArray(session.mPrevWordCodePointArrays,
274                session.mIsBeginningOfSentenceArray);
275        final InputPointers inputPointers = composer.getInputPointers();
276        final boolean isGesture = composer.isBatchMode();
277        final int inputSize;
278        if (!isGesture) {
279            inputSize = composer.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
280                    session.mInputCodePoints);
281            if (inputSize < 0) {
282                return null;
283            }
284        } else {
285            inputSize = inputPointers.getPointerSize();
286        }
287        session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance);
288        session.mNativeSuggestOptions.setIsGesture(isGesture);
289        session.mNativeSuggestOptions.setBlockOffensiveWords(
290                settingsValuesForSuggestion.mBlockPotentiallyOffensive);
291        session.mNativeSuggestOptions.setSpaceAwareGestureEnabled(
292                settingsValuesForSuggestion.mSpaceAwareGestureEnabled);
293        session.mNativeSuggestOptions.setAdditionalFeaturesOptions(
294                settingsValuesForSuggestion.mAdditionalFeaturesSettingValues);
295        session.mNativeSuggestOptions.setWeightForLocale(weightForLocale);
296        if (inOutWeightOfLangModelVsSpatialModel != null) {
297            session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
298                    inOutWeightOfLangModelVsSpatialModel[0];
299        } else {
300            session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
301                    Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL;
302        }
303        // TOOD: Pass multiple previous words information for n-gram.
304        getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
305                getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
306                inputPointers.getYCoordinates(), inputPointers.getTimes(),
307                inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
308                session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
309                session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(),
310                session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores,
311                session.mSpaceIndices, session.mOutputTypes,
312                session.mOutputAutoCommitFirstWordConfidence,
313                session.mInputOutputWeightOfLangModelVsSpatialModel);
314        if (inOutWeightOfLangModelVsSpatialModel != null) {
315            inOutWeightOfLangModelVsSpatialModel[0] =
316                    session.mInputOutputWeightOfLangModelVsSpatialModel[0];
317        }
318        final int count = session.mOutputSuggestionCount[0];
319        final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>();
320        for (int j = 0; j < count; ++j) {
321            final int start = j * Constants.DICTIONARY_MAX_WORD_LENGTH;
322            int len = 0;
323            while (len < Constants.DICTIONARY_MAX_WORD_LENGTH
324                    && session.mOutputCodePoints[start + len] != 0) {
325                ++len;
326            }
327            if (len > 0) {
328                suggestions.add(new SuggestedWordInfo(
329                        new String(session.mOutputCodePoints, start, len),
330                        (int)(session.mOutputScores[j] * weightForLocale), session.mOutputTypes[j],
331                        this /* sourceDict */,
332                        session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
333                        session.mOutputAutoCommitFirstWordConfidence[0]));
334            }
335        }
336        return suggestions;
337    }
338
339    public boolean isValidDictionary() {
340        return mNativeDict != 0;
341    }
342
343    public int getFormatVersion() {
344        return getFormatVersionNative(mNativeDict);
345    }
346
347    @Override
348    public boolean isInDictionary(final String word) {
349        return getFrequency(word) != NOT_A_PROBABILITY;
350    }
351
352    @Override
353    public int getFrequency(final String word) {
354        if (TextUtils.isEmpty(word)) {
355            return NOT_A_PROBABILITY;
356        }
357        final int[] codePoints = StringUtils.toCodePointArray(word);
358        return getProbabilityNative(mNativeDict, codePoints);
359    }
360
361    @Override
362    public int getMaxFrequencyOfExactMatches(final String word) {
363        if (TextUtils.isEmpty(word)) {
364            return NOT_A_PROBABILITY;
365        }
366        final int[] codePoints = StringUtils.toCodePointArray(word);
367        return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints);
368    }
369
370    @UsedForTesting
371    public boolean isValidNgram(final NgramContext ngramContext, final String word) {
372        return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY;
373    }
374
375    public int getNgramProbability(final NgramContext ngramContext, final String word) {
376        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
377            return NOT_A_PROBABILITY;
378        }
379        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
380        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
381        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
382        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
383        return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
384                isBeginningOfSentenceArray, wordCodePoints);
385    }
386
387    public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
388        if (word == null) {
389            return null;
390        }
391        final int[] codePoints = StringUtils.toCodePointArray(word);
392        final int[] outCodePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH];
393        final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
394        final int[] outProbabilityInfo =
395                new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
396        final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>();
397        final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray =
398                new ArrayList<>();
399        final ArrayList<int[]> outNgramTargets = new ArrayList<>();
400        final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>();
401        final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
402        final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
403        getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
404                outFlags, outProbabilityInfo, outNgramPrevWordsArray,
405                outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets,
406                outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
407        return new WordProperty(codePoints,
408                outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
409                outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
410                outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
411                outFlags[FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX],
412                outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
413                outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
414                outNgramTargets, outNgramProbabilityInfo, outShortcutTargets,
415                outShortcutProbabilities);
416    }
417
418    public static class GetNextWordPropertyResult {
419        public WordProperty mWordProperty;
420        public int mNextToken;
421
422        public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) {
423            mWordProperty = wordProperty;
424            mNextToken = nextToken;
425        }
426    }
427
428    /**
429     * Method to iterate all words in the dictionary for makedict.
430     * If token is 0, this method newly starts iterating the dictionary.
431     */
432    public GetNextWordPropertyResult getNextWordProperty(final int token) {
433        final int[] codePoints = new int[Constants.DICTIONARY_MAX_WORD_LENGTH];
434        final boolean[] isBeginningOfSentence = new boolean[1];
435        final int nextToken = getNextWordNative(mNativeDict, token, codePoints,
436                isBeginningOfSentence);
437        final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
438        return new GetNextWordPropertyResult(
439                getWordProperty(word, isBeginningOfSentence[0]), nextToken);
440    }
441
442    // Add a unigram entry to binary dictionary with unigram attributes in native code.
443    public boolean addUnigramEntry(final String word, final int probability,
444            final String shortcutTarget, final int shortcutProbability,
445            final boolean isBeginningOfSentence, final boolean isNotAWord,
446            final boolean isPossiblyOffensive, final int timestamp) {
447        if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
448            return false;
449        }
450        final int[] codePoints = StringUtils.toCodePointArray(word);
451        final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
452                StringUtils.toCodePointArray(shortcutTarget) : null;
453        if (!addUnigramEntryNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
454                shortcutProbability, isBeginningOfSentence, isNotAWord, isPossiblyOffensive,
455                timestamp)) {
456            return false;
457        }
458        mHasUpdated = true;
459        return true;
460    }
461
462    // Remove a unigram entry from the binary dictionary in native code.
463    public boolean removeUnigramEntry(final String word) {
464        if (TextUtils.isEmpty(word)) {
465            return false;
466        }
467        final int[] codePoints = StringUtils.toCodePointArray(word);
468        if (!removeUnigramEntryNative(mNativeDict, codePoints)) {
469            return false;
470        }
471        mHasUpdated = true;
472        return true;
473    }
474
475    // Add an n-gram entry to the binary dictionary with timestamp in native code.
476    public boolean addNgramEntry(final NgramContext ngramContext, final String word,
477            final int probability, final int timestamp) {
478        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
479            return false;
480        }
481        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
482        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
483        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
484        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
485        if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
486                isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
487            return false;
488        }
489        mHasUpdated = true;
490        return true;
491    }
492
493    // Remove an n-gram entry from the binary dictionary in native code.
494    public boolean removeNgramEntry(final NgramContext ngramContext, final String word) {
495        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
496            return false;
497        }
498        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
499        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
500        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
501        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
502        if (!removeNgramEntryNative(mNativeDict, prevWordCodePointArrays,
503                isBeginningOfSentenceArray, wordCodePoints)) {
504            return false;
505        }
506        mHasUpdated = true;
507        return true;
508    }
509
510    // Update entries for the word occurrence with the ngramContext.
511    public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext,
512            final String word, final boolean isValidWord, final int count, final int timestamp) {
513        if (TextUtils.isEmpty(word)) {
514            return false;
515        }
516        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
517        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
518        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
519        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
520        if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays,
521                isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) {
522            return false;
523        }
524        mHasUpdated = true;
525        return true;
526    }
527
528    @UsedForTesting
529    public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
530        if (!isValidDictionary()) {
531            return;
532        }
533        int processedParamCount = 0;
534        while (processedParamCount < languageModelParams.length) {
535            if (needsToRunGC(true /* mindsBlockByGC */)) {
536                flushWithGC();
537            }
538            processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
539                    languageModelParams, processedParamCount);
540            mHasUpdated = true;
541            if (processedParamCount <= 0) {
542                return;
543            }
544        }
545    }
546
547    private void reopen() {
548        close();
549        final File dictFile = new File(mDictFilePath);
550        // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
551        // only be called for actual files. Right now it's only called by the flush() family of
552        // functions, which require an updatable dictionary, so it's okay. But beware.
553        loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
554                dictFile.length(), mIsUpdatable);
555    }
556
557    // Flush to dict file if the dictionary has been updated.
558    public boolean flush() {
559        if (!isValidDictionary()) {
560            return false;
561        }
562        if (mHasUpdated) {
563            if (!flushNative(mNativeDict, mDictFilePath)) {
564                return false;
565            }
566            reopen();
567        }
568        return true;
569    }
570
571    // Run GC and flush to dict file if the dictionary has been updated.
572    public boolean flushWithGCIfHasUpdated() {
573        if (mHasUpdated) {
574            return flushWithGC();
575        }
576        return true;
577    }
578
579    // Run GC and flush to dict file.
580    public boolean flushWithGC() {
581        if (!isValidDictionary()) {
582            return false;
583        }
584        if (!flushWithGCNative(mNativeDict, mDictFilePath)) {
585            return false;
586        }
587        reopen();
588        return true;
589    }
590
591    /**
592     * Checks whether GC is needed to run or not.
593     * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
594     * the blocking in some situations such as in idle time or just before closing.
595     * @return whether GC is needed to run or not.
596     */
597    public boolean needsToRunGC(final boolean mindsBlockByGC) {
598        if (!isValidDictionary()) {
599            return false;
600        }
601        return needsToRunGCNative(mNativeDict, mindsBlockByGC);
602    }
603
604    public boolean migrateTo(final int newFormatVersion) {
605        if (!isValidDictionary()) {
606            return false;
607        }
608        final File isMigratingDir =
609                new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION);
610        if (isMigratingDir.exists()) {
611            isMigratingDir.delete();
612            Log.e(TAG, "Previous migration attempt failed probably due to a crash. "
613                        + "Giving up using the old dictionary (" + mDictFilePath + ").");
614            return false;
615        }
616        if (!isMigratingDir.mkdir()) {
617            Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath()
618                    + ") to record migration.");
619            return false;
620        }
621        try {
622            final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION;
623            if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) {
624                return false;
625            }
626            close();
627            final File dictFile = new File(mDictFilePath);
628            final File tmpDictFile = new File(tmpDictFilePath);
629            if (!FileUtils.deleteRecursively(dictFile)) {
630                return false;
631            }
632            if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
633                return false;
634            }
635            loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
636                    dictFile.length(), mIsUpdatable);
637            return true;
638        } finally {
639            isMigratingDir.delete();
640        }
641    }
642
643    @UsedForTesting
644    public String getPropertyForGettingStats(final String query) {
645        if (!isValidDictionary()) {
646            return "";
647        }
648        return getPropertyNative(mNativeDict, query);
649    }
650
651    @Override
652    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
653        return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
654    }
655
656    @Override
657    public void close() {
658        synchronized (mDicTraverseSessions) {
659            final int sessionsSize = mDicTraverseSessions.size();
660            for (int index = 0; index < sessionsSize; ++index) {
661                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
662                if (traverseSession != null) {
663                    traverseSession.close();
664                }
665            }
666            mDicTraverseSessions.clear();
667        }
668        closeInternalLocked();
669    }
670
671    private synchronized void closeInternalLocked() {
672        if (mNativeDict != 0) {
673            closeNative(mNativeDict);
674            mNativeDict = 0;
675        }
676    }
677
678    // TODO: Manage BinaryDictionary instances without using WeakReference or something.
679    @Override
680    protected void finalize() throws Throwable {
681        try {
682            closeInternalLocked();
683        } finally {
684            super.finalize();
685        }
686    }
687}
688