BinaryDictionary.java revision f4686092232588781910cc4e64406c4958577e86
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.Log;
21import android.util.SparseArray;
22
23import com.android.inputmethod.annotations.UsedForTesting;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25import com.android.inputmethod.latin.common.ComposedData;
26import com.android.inputmethod.latin.common.Constants;
27import com.android.inputmethod.latin.common.FileUtils;
28import com.android.inputmethod.latin.common.InputPointers;
29import com.android.inputmethod.latin.common.StringUtils;
30import com.android.inputmethod.latin.define.DecoderSpecificConstants;
31import com.android.inputmethod.latin.makedict.DictionaryHeader;
32import com.android.inputmethod.latin.makedict.FormatSpec;
33import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
34import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
35import com.android.inputmethod.latin.makedict.WordProperty;
36import com.android.inputmethod.latin.settings.SettingsValuesForSuggestion;
37import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
38import com.android.inputmethod.latin.utils.JniUtils;
39import com.android.inputmethod.latin.utils.WordInputEventForPersonalization;
40
41import java.io.File;
42import java.util.ArrayList;
43import java.util.Arrays;
44import java.util.HashMap;
45import java.util.Locale;
46import java.util.Map;
47
48import javax.annotation.Nonnull;
49
50/**
51 * Implements a static, compacted, binary dictionary of standard words.
52 */
53// TODO: All methods which should be locked need to have a suffix "Locked".
54public final class BinaryDictionary extends Dictionary {
55    private static final String TAG = BinaryDictionary.class.getSimpleName();
56
57    // The cutoff returned by native for auto-commit confidence.
58    // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
59    private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
60
61    public static final int DICTIONARY_MAX_WORD_LENGTH = 48;
62    public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
63
64    @UsedForTesting
65    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
66    @UsedForTesting
67    public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
68    @UsedForTesting
69    public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
70    @UsedForTesting
71    public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
72
73    public static final int NOT_A_VALID_TIMESTAMP = -1;
74
75    // Format to get unigram flags from native side via getWordPropertyNative().
76    private static final int FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT = 5;
77    private static final int FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
78    private static final int FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX = 1;
79    private static final int FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX = 2;
80    private static final int FORMAT_WORD_PROPERTY_HAS_SHORTCUTS_INDEX = 3; // DEPRECATED
81    private static final int FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX = 4;
82
83    // Format to get probability and historical info from native side via getWordPropertyNative().
84    public static final int FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT = 4;
85    public static final int FORMAT_WORD_PROPERTY_PROBABILITY_INDEX = 0;
86    public static final int FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX = 1;
87    public static final int FORMAT_WORD_PROPERTY_LEVEL_INDEX = 2;
88    public static final int FORMAT_WORD_PROPERTY_COUNT_INDEX = 3;
89
90    public static final String DICT_FILE_NAME_SUFFIX_FOR_MIGRATION = ".migrate";
91    public static final String DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION = ".migrating";
92
93    private long mNativeDict;
94    private final long mDictSize;
95    private final String mDictFilePath;
96    private final boolean mUseFullEditDistance;
97    private final boolean mIsUpdatable;
98    private boolean mHasUpdated;
99
100    private final SparseArray<DicTraverseSession> mDicTraverseSessions = new SparseArray<>();
101
102    // TODO: There should be a way to remove used DicTraverseSession objects from
103    // {@code mDicTraverseSessions}.
104    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
105        synchronized(mDicTraverseSessions) {
106            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
107            if (traverseSession == null) {
108                traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
109                mDicTraverseSessions.put(traverseSessionId, traverseSession);
110            }
111            return traverseSession;
112        }
113    }
114
115    /**
116     * Constructs binary dictionary using existing dictionary file.
117     * @param filename the name of the file to read through native code.
118     * @param offset the offset of the dictionary data within the file.
119     * @param length the length of the binary data.
120     * @param useFullEditDistance whether to use the full edit distance in suggestions
121     * @param dictType the dictionary type, as a human-readable string
122     * @param isUpdatable whether to open the dictionary file in writable mode.
123     */
124    public BinaryDictionary(final String filename, final long offset, final long length,
125            final boolean useFullEditDistance, final Locale locale, final String dictType,
126            final boolean isUpdatable) {
127        super(dictType, locale);
128        mDictSize = length;
129        mDictFilePath = filename;
130        mIsUpdatable = isUpdatable;
131        mHasUpdated = false;
132        mUseFullEditDistance = useFullEditDistance;
133        loadDictionary(filename, offset, length, isUpdatable);
134    }
135
136    /**
137     * Constructs binary dictionary on memory.
138     * @param filename the name of the file used to flush.
139     * @param useFullEditDistance whether to use the full edit distance in suggestions
140     * @param dictType the dictionary type, as a human-readable string
141     * @param formatVersion the format version of the dictionary
142     * @param attributeMap the attributes of the dictionary
143     */
144    public BinaryDictionary(final String filename, final boolean useFullEditDistance,
145            final Locale locale, final String dictType, final long formatVersion,
146            final Map<String, String> attributeMap) {
147        super(dictType, locale);
148        mDictSize = 0;
149        mDictFilePath = filename;
150        // On memory dictionary is always updatable.
151        mIsUpdatable = true;
152        mHasUpdated = false;
153        mUseFullEditDistance = useFullEditDistance;
154        final String[] keyArray = new String[attributeMap.size()];
155        final String[] valueArray = new String[attributeMap.size()];
156        int index = 0;
157        for (final String key : attributeMap.keySet()) {
158            keyArray[index] = key;
159            valueArray[index] = attributeMap.get(key);
160            index++;
161        }
162        mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
163    }
164
165
166    static {
167        JniUtils.loadNativeLibrary();
168    }
169
170    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
171            boolean isUpdatable);
172    private static native long createOnMemoryNative(long formatVersion,
173            String locale, String[] attributeKeyStringArray, String[] attributeValueStringArray);
174    private static native void getHeaderInfoNative(long dict, int[] outHeaderSize,
175            int[] outFormatVersion, ArrayList<int[]> outAttributeKeys,
176            ArrayList<int[]> outAttributeValues);
177    private static native boolean flushNative(long dict, String filePath);
178    private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
179    private static native boolean flushWithGCNative(long dict, String filePath);
180    private static native void closeNative(long dict);
181    private static native int getFormatVersionNative(long dict);
182    private static native int getProbabilityNative(long dict, int[] word);
183    private static native int getMaxProbabilityOfExactMatchesNative(long dict, int[] word);
184    private static native int getNgramProbabilityNative(long dict, int[][] prevWordCodePointArrays,
185            boolean[] isBeginningOfSentenceArray, int[] word);
186    private static native void getWordPropertyNative(long dict, int[] word,
187            boolean isBeginningOfSentence, int[] outCodePoints, boolean[] outFlags,
188            int[] outProbabilityInfo, ArrayList<int[][]> outNgramPrevWordsArray,
189            ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray,
190            ArrayList<int[]> outNgramTargets, ArrayList<int[]> outNgramProbabilityInfo,
191            ArrayList<int[]> outShortcutTargets, ArrayList<Integer> outShortcutProbabilities);
192    private static native int getNextWordNative(long dict, int token, int[] outCodePoints,
193            boolean[] outIsBeginningOfSentence);
194    private static native void getSuggestionsNative(long dict, long proximityInfo,
195            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
196            int[] pointerIds, int[] inputCodePoints, int inputSize, int[] suggestOptions,
197            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
198            int prevWordCount, int[] outputSuggestionCount, int[] outputCodePoints,
199            int[] outputScores, int[] outputIndices, int[] outputTypes,
200            int[] outputAutoCommitFirstWordConfidence,
201            float[] inOutWeightOfLangModelVsSpatialModel);
202    private static native boolean addUnigramEntryNative(long dict, int[] word, int probability,
203            int[] shortcutTarget, int shortcutProbability, boolean isBeginningOfSentence,
204            boolean isNotAWord, boolean isPossiblyOffensive, int timestamp);
205    private static native boolean removeUnigramEntryNative(long dict, int[] word);
206    private static native boolean addNgramEntryNative(long dict,
207            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
208            int[] word, int probability, int timestamp);
209    private static native boolean removeNgramEntryNative(long dict,
210            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray, int[] word);
211    private static native boolean updateEntriesForWordWithNgramContextNative(long dict,
212            int[][] prevWordCodePointArrays, boolean[] isBeginningOfSentenceArray,
213            int[] word, boolean isValidWord, int count, int timestamp);
214    private static native int updateEntriesForInputEventsNative(long dict,
215            WordInputEventForPersonalization[] inputEvents, int startIndex);
216    private static native String getPropertyNative(long dict, String query);
217    private static native boolean isCorruptedNative(long dict);
218    private static native boolean migrateNative(long dict, String dictFilePath,
219            long newFormatVersion);
220
221    // TODO: Move native dict into session
222    private void loadDictionary(final String path, final long startOffset,
223            final long length, final boolean isUpdatable) {
224        mHasUpdated = false;
225        mNativeDict = openNative(path, startOffset, length, isUpdatable);
226    }
227
228    // TODO: Check isCorrupted() for main dictionaries.
229    public boolean isCorrupted() {
230        if (!isValidDictionary()) {
231            return false;
232        }
233        if (!isCorruptedNative(mNativeDict)) {
234            return false;
235        }
236        // TODO: Record the corruption.
237        Log.e(TAG, "BinaryDictionary (" + mDictFilePath + ") is corrupted.");
238        Log.e(TAG, "locale: " + mLocale);
239        Log.e(TAG, "dict size: " + mDictSize);
240        Log.e(TAG, "updatable: " + mIsUpdatable);
241        return true;
242    }
243
244    public DictionaryHeader getHeader() throws UnsupportedFormatException {
245        if (mNativeDict == 0) {
246            return null;
247        }
248        final int[] outHeaderSize = new int[1];
249        final int[] outFormatVersion = new int[1];
250        final ArrayList<int[]> outAttributeKeys = new ArrayList<>();
251        final ArrayList<int[]> outAttributeValues = new ArrayList<>();
252        getHeaderInfoNative(mNativeDict, outHeaderSize, outFormatVersion, outAttributeKeys,
253                outAttributeValues);
254        final HashMap<String, String> attributes = new HashMap<>();
255        for (int i = 0; i < outAttributeKeys.size(); i++) {
256            final String attributeKey = StringUtils.getStringFromNullTerminatedCodePointArray(
257                    outAttributeKeys.get(i));
258            final String attributeValue = StringUtils.getStringFromNullTerminatedCodePointArray(
259                    outAttributeValues.get(i));
260            attributes.put(attributeKey, attributeValue);
261        }
262        final boolean hasHistoricalInfo = DictionaryHeader.ATTRIBUTE_VALUE_TRUE.equals(
263                attributes.get(DictionaryHeader.HAS_HISTORICAL_INFO_KEY));
264        return new DictionaryHeader(outHeaderSize[0], new DictionaryOptions(attributes),
265                new FormatSpec.FormatOptions(outFormatVersion[0], hasHistoricalInfo));
266    }
267
268    @Override
269    public ArrayList<SuggestedWordInfo> getSuggestions(final ComposedData composedData,
270            final NgramContext ngramContext, final long proximityInfoHandle,
271            final SettingsValuesForSuggestion settingsValuesForSuggestion,
272            final int sessionId, final float weightForLocale,
273            final float[] inOutWeightOfLangModelVsSpatialModel) {
274        if (!isValidDictionary()) {
275            return null;
276        }
277        final DicTraverseSession session = getTraverseSession(sessionId);
278        Arrays.fill(session.mInputCodePoints, Constants.NOT_A_CODE);
279        ngramContext.outputToArray(session.mPrevWordCodePointArrays,
280                session.mIsBeginningOfSentenceArray);
281        final InputPointers inputPointers = composedData.mInputPointers;
282        final boolean isGesture = composedData.mIsBatchMode;
283        final int inputSize;
284        if (!isGesture) {
285            inputSize =
286                    composedData.copyCodePointsExceptTrailingSingleQuotesAndReturnCodePointCount(
287                        session.mInputCodePoints);
288            if (inputSize < 0) {
289                return null;
290            }
291        } else {
292            inputSize = inputPointers.getPointerSize();
293        }
294        session.mNativeSuggestOptions.setUseFullEditDistance(mUseFullEditDistance);
295        session.mNativeSuggestOptions.setIsGesture(isGesture);
296        session.mNativeSuggestOptions.setBlockOffensiveWords(
297                settingsValuesForSuggestion.mBlockPotentiallyOffensive);
298        session.mNativeSuggestOptions.setWeightForLocale(weightForLocale);
299        if (inOutWeightOfLangModelVsSpatialModel != null) {
300            session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
301                    inOutWeightOfLangModelVsSpatialModel[0];
302        } else {
303            session.mInputOutputWeightOfLangModelVsSpatialModel[0] =
304                    Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL;
305        }
306        // TOOD: Pass multiple previous words information for n-gram.
307        getSuggestionsNative(mNativeDict, proximityInfoHandle,
308                getTraverseSession(sessionId).getSession(), inputPointers.getXCoordinates(),
309                inputPointers.getYCoordinates(), inputPointers.getTimes(),
310                inputPointers.getPointerIds(), session.mInputCodePoints, inputSize,
311                session.mNativeSuggestOptions.getOptions(), session.mPrevWordCodePointArrays,
312                session.mIsBeginningOfSentenceArray, ngramContext.getPrevWordCount(),
313                session.mOutputSuggestionCount, session.mOutputCodePoints, session.mOutputScores,
314                session.mSpaceIndices, session.mOutputTypes,
315                session.mOutputAutoCommitFirstWordConfidence,
316                session.mInputOutputWeightOfLangModelVsSpatialModel);
317        if (inOutWeightOfLangModelVsSpatialModel != null) {
318            inOutWeightOfLangModelVsSpatialModel[0] =
319                    session.mInputOutputWeightOfLangModelVsSpatialModel[0];
320        }
321        final int count = session.mOutputSuggestionCount[0];
322        final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<>();
323        for (int j = 0; j < count; ++j) {
324            final int start = j * DICTIONARY_MAX_WORD_LENGTH;
325            int len = 0;
326            while (len < DICTIONARY_MAX_WORD_LENGTH
327                    && session.mOutputCodePoints[start + len] != 0) {
328                ++len;
329            }
330            if (len > 0) {
331                suggestions.add(new SuggestedWordInfo(
332                        new String(session.mOutputCodePoints, start, len),
333                        (int)(session.mOutputScores[j] * weightForLocale), session.mOutputTypes[j],
334                        this /* sourceDict */,
335                        session.mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
336                        session.mOutputAutoCommitFirstWordConfidence[0]));
337            }
338        }
339        return suggestions;
340    }
341
342    public boolean isValidDictionary() {
343        return mNativeDict != 0;
344    }
345
346    public int getFormatVersion() {
347        return getFormatVersionNative(mNativeDict);
348    }
349
350    @Override
351    public boolean isInDictionary(final String word) {
352        return getFrequency(word) != NOT_A_PROBABILITY;
353    }
354
355    @Override
356    public int getFrequency(final String word) {
357        if (TextUtils.isEmpty(word)) {
358            return NOT_A_PROBABILITY;
359        }
360        final int[] codePoints = StringUtils.toCodePointArray(word);
361        return getProbabilityNative(mNativeDict, codePoints);
362    }
363
364    @Override
365    public int getMaxFrequencyOfExactMatches(final String word) {
366        if (TextUtils.isEmpty(word)) {
367            return NOT_A_PROBABILITY;
368        }
369        final int[] codePoints = StringUtils.toCodePointArray(word);
370        return getMaxProbabilityOfExactMatchesNative(mNativeDict, codePoints);
371    }
372
373    @UsedForTesting
374    public boolean isValidNgram(final NgramContext ngramContext, final String word) {
375        return getNgramProbability(ngramContext, word) != NOT_A_PROBABILITY;
376    }
377
378    public int getNgramProbability(final NgramContext ngramContext, final String word) {
379        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
380            return NOT_A_PROBABILITY;
381        }
382        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
383        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
384        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
385        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
386        return getNgramProbabilityNative(mNativeDict, prevWordCodePointArrays,
387                isBeginningOfSentenceArray, wordCodePoints);
388    }
389
390    public WordProperty getWordProperty(final String word, final boolean isBeginningOfSentence) {
391        if (word == null) {
392            return null;
393        }
394        final int[] codePoints = StringUtils.toCodePointArray(word);
395        final int[] outCodePoints = new int[DICTIONARY_MAX_WORD_LENGTH];
396        final boolean[] outFlags = new boolean[FORMAT_WORD_PROPERTY_OUTPUT_FLAG_COUNT];
397        final int[] outProbabilityInfo =
398                new int[FORMAT_WORD_PROPERTY_OUTPUT_PROBABILITY_INFO_COUNT];
399        final ArrayList<int[][]> outNgramPrevWordsArray = new ArrayList<>();
400        final ArrayList<boolean[]> outNgramPrevWordIsBeginningOfSentenceArray =
401                new ArrayList<>();
402        final ArrayList<int[]> outNgramTargets = new ArrayList<>();
403        final ArrayList<int[]> outNgramProbabilityInfo = new ArrayList<>();
404        final ArrayList<int[]> outShortcutTargets = new ArrayList<>();
405        final ArrayList<Integer> outShortcutProbabilities = new ArrayList<>();
406        getWordPropertyNative(mNativeDict, codePoints, isBeginningOfSentence, outCodePoints,
407                outFlags, outProbabilityInfo, outNgramPrevWordsArray,
408                outNgramPrevWordIsBeginningOfSentenceArray, outNgramTargets,
409                outNgramProbabilityInfo, outShortcutTargets, outShortcutProbabilities);
410        return new WordProperty(codePoints,
411                outFlags[FORMAT_WORD_PROPERTY_IS_NOT_A_WORD_INDEX],
412                outFlags[FORMAT_WORD_PROPERTY_IS_POSSIBLY_OFFENSIVE_INDEX],
413                outFlags[FORMAT_WORD_PROPERTY_HAS_NGRAMS_INDEX],
414                outFlags[FORMAT_WORD_PROPERTY_IS_BEGINNING_OF_SENTENCE_INDEX], outProbabilityInfo,
415                outNgramPrevWordsArray, outNgramPrevWordIsBeginningOfSentenceArray,
416                outNgramTargets, outNgramProbabilityInfo);
417    }
418
419    public static class GetNextWordPropertyResult {
420        public WordProperty mWordProperty;
421        public int mNextToken;
422
423        public GetNextWordPropertyResult(final WordProperty wordProperty, final int nextToken) {
424            mWordProperty = wordProperty;
425            mNextToken = nextToken;
426        }
427    }
428
429    /**
430     * Method to iterate all words in the dictionary for makedict.
431     * If token is 0, this method newly starts iterating the dictionary.
432     */
433    public GetNextWordPropertyResult getNextWordProperty(final int token) {
434        final int[] codePoints = new int[DICTIONARY_MAX_WORD_LENGTH];
435        final boolean[] isBeginningOfSentence = new boolean[1];
436        final int nextToken = getNextWordNative(mNativeDict, token, codePoints,
437                isBeginningOfSentence);
438        final String word = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
439        return new GetNextWordPropertyResult(
440                getWordProperty(word, isBeginningOfSentence[0]), nextToken);
441    }
442
443    // Add a unigram entry to binary dictionary with unigram attributes in native code.
444    public boolean addUnigramEntry(
445            final String word, final int probability, final boolean isBeginningOfSentence,
446            final boolean isNotAWord, final boolean isPossiblyOffensive, final int timestamp) {
447        if (word == null || (word.isEmpty() && !isBeginningOfSentence)) {
448            return false;
449        }
450        final int[] codePoints = StringUtils.toCodePointArray(word);
451        if (!addUnigramEntryNative(mNativeDict, codePoints, probability,
452                null /* shortcutTargetCodePoints */, 0 /* shortcutProbability */,
453                isBeginningOfSentence, isNotAWord, isPossiblyOffensive, timestamp)) {
454            return false;
455        }
456        mHasUpdated = true;
457        return true;
458    }
459
460    // Remove a unigram entry from the binary dictionary in native code.
461    public boolean removeUnigramEntry(final String word) {
462        if (TextUtils.isEmpty(word)) {
463            return false;
464        }
465        final int[] codePoints = StringUtils.toCodePointArray(word);
466        if (!removeUnigramEntryNative(mNativeDict, codePoints)) {
467            return false;
468        }
469        mHasUpdated = true;
470        return true;
471    }
472
473    // Add an n-gram entry to the binary dictionary with timestamp in native code.
474    public boolean addNgramEntry(final NgramContext ngramContext, final String word,
475            final int probability, final int timestamp) {
476        if (!ngramContext.isValid() || TextUtils.isEmpty(word)) {
477            return false;
478        }
479        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
480        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
481        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
482        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
483        if (!addNgramEntryNative(mNativeDict, prevWordCodePointArrays,
484                isBeginningOfSentenceArray, wordCodePoints, probability, timestamp)) {
485            return false;
486        }
487        mHasUpdated = true;
488        return true;
489    }
490
491    // Update entries for the word occurrence with the ngramContext.
492    public boolean updateEntriesForWordWithNgramContext(@Nonnull final NgramContext ngramContext,
493            final String word, final boolean isValidWord, final int count, final int timestamp) {
494        if (TextUtils.isEmpty(word)) {
495            return false;
496        }
497        final int[][] prevWordCodePointArrays = new int[ngramContext.getPrevWordCount()][];
498        final boolean[] isBeginningOfSentenceArray = new boolean[ngramContext.getPrevWordCount()];
499        ngramContext.outputToArray(prevWordCodePointArrays, isBeginningOfSentenceArray);
500        final int[] wordCodePoints = StringUtils.toCodePointArray(word);
501        if (!updateEntriesForWordWithNgramContextNative(mNativeDict, prevWordCodePointArrays,
502                isBeginningOfSentenceArray, wordCodePoints, isValidWord, count, timestamp)) {
503            return false;
504        }
505        mHasUpdated = true;
506        return true;
507    }
508
509    @UsedForTesting
510    public void updateEntriesForInputEvents(final WordInputEventForPersonalization[] inputEvents) {
511        if (!isValidDictionary()) {
512            return;
513        }
514        int processedEventCount = 0;
515        while (processedEventCount < inputEvents.length) {
516            if (needsToRunGC(true /* mindsBlockByGC */)) {
517                flushWithGC();
518            }
519            processedEventCount = updateEntriesForInputEventsNative(mNativeDict, inputEvents,
520                    processedEventCount);
521            mHasUpdated = true;
522            if (processedEventCount <= 0) {
523                return;
524            }
525        }
526    }
527
528    private void reopen() {
529        close();
530        final File dictFile = new File(mDictFilePath);
531        // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
532        // only be called for actual files. Right now it's only called by the flush() family of
533        // functions, which require an updatable dictionary, so it's okay. But beware.
534        loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
535                dictFile.length(), mIsUpdatable);
536    }
537
538    // Flush to dict file if the dictionary has been updated.
539    public boolean flush() {
540        if (!isValidDictionary()) {
541            return false;
542        }
543        if (mHasUpdated) {
544            if (!flushNative(mNativeDict, mDictFilePath)) {
545                return false;
546            }
547            reopen();
548        }
549        return true;
550    }
551
552    // Run GC and flush to dict file if the dictionary has been updated.
553    public boolean flushWithGCIfHasUpdated() {
554        if (mHasUpdated) {
555            return flushWithGC();
556        }
557        return true;
558    }
559
560    // Run GC and flush to dict file.
561    public boolean flushWithGC() {
562        if (!isValidDictionary()) {
563            return false;
564        }
565        if (!flushWithGCNative(mNativeDict, mDictFilePath)) {
566            return false;
567        }
568        reopen();
569        return true;
570    }
571
572    /**
573     * Checks whether GC is needed to run or not.
574     * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
575     * the blocking in some situations such as in idle time or just before closing.
576     * @return whether GC is needed to run or not.
577     */
578    public boolean needsToRunGC(final boolean mindsBlockByGC) {
579        if (!isValidDictionary()) {
580            return false;
581        }
582        return needsToRunGCNative(mNativeDict, mindsBlockByGC);
583    }
584
585    public boolean migrateTo(final int newFormatVersion) {
586        if (!isValidDictionary()) {
587            return false;
588        }
589        final File isMigratingDir =
590                new File(mDictFilePath + DIR_NAME_SUFFIX_FOR_RECORD_MIGRATION);
591        if (isMigratingDir.exists()) {
592            isMigratingDir.delete();
593            Log.e(TAG, "Previous migration attempt failed probably due to a crash. "
594                        + "Giving up using the old dictionary (" + mDictFilePath + ").");
595            return false;
596        }
597        if (!isMigratingDir.mkdir()) {
598            Log.e(TAG, "Cannot create a dir (" + isMigratingDir.getAbsolutePath()
599                    + ") to record migration.");
600            return false;
601        }
602        try {
603            final String tmpDictFilePath = mDictFilePath + DICT_FILE_NAME_SUFFIX_FOR_MIGRATION;
604            if (!migrateNative(mNativeDict, tmpDictFilePath, newFormatVersion)) {
605                return false;
606            }
607            close();
608            final File dictFile = new File(mDictFilePath);
609            final File tmpDictFile = new File(tmpDictFilePath);
610            if (!FileUtils.deleteRecursively(dictFile)) {
611                return false;
612            }
613            if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) {
614                return false;
615            }
616            loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
617                    dictFile.length(), mIsUpdatable);
618            return true;
619        } finally {
620            isMigratingDir.delete();
621        }
622    }
623
624    @UsedForTesting
625    public String getPropertyForGettingStats(final String query) {
626        if (!isValidDictionary()) {
627            return "";
628        }
629        return getPropertyNative(mNativeDict, query);
630    }
631
632    @Override
633    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
634        return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
635    }
636
637    @Override
638    public void close() {
639        synchronized (mDicTraverseSessions) {
640            final int sessionsSize = mDicTraverseSessions.size();
641            for (int index = 0; index < sessionsSize; ++index) {
642                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
643                if (traverseSession != null) {
644                    traverseSession.close();
645                }
646            }
647            mDicTraverseSessions.clear();
648        }
649        closeInternalLocked();
650    }
651
652    private synchronized void closeInternalLocked() {
653        if (mNativeDict != 0) {
654            closeNative(mNativeDict);
655            mNativeDict = 0;
656        }
657    }
658
659    // TODO: Manage BinaryDictionary instances without using WeakReference or something.
660    @Override
661    protected void finalize() throws Throwable {
662        try {
663            closeInternalLocked();
664        } finally {
665            super.finalize();
666        }
667    }
668}
669