BinaryDictionary.java revision d78a447d107ae60d2bb8f16a1b9797f5ebad2277
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.SparseArray;
21
22import com.android.inputmethod.annotations.UsedForTesting;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25import com.android.inputmethod.latin.settings.NativeSuggestOptions;
26import com.android.inputmethod.latin.utils.CollectionUtils;
27import com.android.inputmethod.latin.utils.JniUtils;
28import com.android.inputmethod.latin.utils.LanguageModelParam;
29import com.android.inputmethod.latin.utils.StringUtils;
30import com.android.inputmethod.latin.utils.UnigramProperty;
31
32import java.io.File;
33import java.util.ArrayList;
34import java.util.Arrays;
35import java.util.Locale;
36import java.util.Map;
37
38/**
39 * Implements a static, compacted, binary dictionary of standard words.
40 */
41// TODO: All methods which should be locked need to have a suffix "Locked".
42public final class BinaryDictionary extends Dictionary {
43    private static final String TAG = BinaryDictionary.class.getSimpleName();
44
45    // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h
46    private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
47    // Must be equal to MAX_RESULTS in native/jni/src/defines.h
48    private static final int MAX_RESULTS = 18;
49    // The cutoff returned by native for auto-commit confidence.
50    // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
51    private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
52
53    @UsedForTesting
54    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
55    @UsedForTesting
56    public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
57    @UsedForTesting
58    public static final String MAX_UNIGRAM_COUNT_QUERY = "MAX_UNIGRAM_COUNT";
59    @UsedForTesting
60    public static final String MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_COUNT";
61
62    public static final int NOT_A_VALID_TIMESTAMP = -1;
63
64    // Format to get unigram flags from native side via getUnigramPropertyNative().
65    private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT = 4;
66    private static final int FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX = 0;
67    private static final int FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX = 1;
68    private static final int FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX = 2;
69    private static final int FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX = 3;
70
71    // Format to get unigram historical info from native side via getUnigramPropertyNative().
72    private static final int FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT = 3;
73    private static final int FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX = 0;
74    private static final int FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX = 1;
75    private static final int FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX = 2;
76
77    private long mNativeDict;
78    private final Locale mLocale;
79    private final long mDictSize;
80    private final String mDictFilePath;
81    private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
82    private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
83    private final int[] mSpaceIndices = new int[MAX_RESULTS];
84    private final int[] mOutputScores = new int[MAX_RESULTS];
85    private final int[] mOutputTypes = new int[MAX_RESULTS];
86    // Only one result is ever used
87    private final int[] mOutputAutoCommitFirstWordConfidence = new int[1];
88
89    private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions();
90
91    private final SparseArray<DicTraverseSession> mDicTraverseSessions =
92            CollectionUtils.newSparseArray();
93
94    // TODO: There should be a way to remove used DicTraverseSession objects from
95    // {@code mDicTraverseSessions}.
96    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
97        synchronized(mDicTraverseSessions) {
98            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
99            if (traverseSession == null) {
100                traverseSession = mDicTraverseSessions.get(traverseSessionId);
101                if (traverseSession == null) {
102                    traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
103                    mDicTraverseSessions.put(traverseSessionId, traverseSession);
104                }
105            }
106            return traverseSession;
107        }
108    }
109
110    /**
111     * Constructor for the binary dictionary. This is supposed to be called from the
112     * dictionary factory.
113     * @param filename the name of the file to read through native code.
114     * @param offset the offset of the dictionary data within the file.
115     * @param length the length of the binary data.
116     * @param useFullEditDistance whether to use the full edit distance in suggestions
117     * @param dictType the dictionary type, as a human-readable string
118     * @param isUpdatable whether to open the dictionary file in writable mode.
119     */
120    public BinaryDictionary(final String filename, final long offset, final long length,
121            final boolean useFullEditDistance, final Locale locale, final String dictType,
122            final boolean isUpdatable) {
123        super(dictType);
124        mLocale = locale;
125        mDictSize = length;
126        mDictFilePath = filename;
127        mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
128        loadDictionary(filename, offset, length, isUpdatable);
129    }
130
131    static {
132        JniUtils.loadNativeLibrary();
133    }
134
135    private static native boolean createEmptyDictFileNative(String filePath, long dictVersion,
136            String[] attributeKeyStringArray, String[] attributeValueStringArray);
137    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
138            boolean isUpdatable);
139    private static native void flushNative(long dict, String filePath);
140    private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
141    private static native void flushWithGCNative(long dict, String filePath);
142    private static native void closeNative(long dict);
143    private static native int getFormatVersionNative(long dict);
144    private static native int getProbabilityNative(long dict, int[] word);
145    private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
146    private static native void getUnigramPropertyNative(long dict, int[] word,
147            int[] outCodePoints, boolean[] outFlags, int[] outProbability,
148            int[] outHistoricalInfo, ArrayList<int[]> outShortcutTargets,
149            ArrayList<Integer> outShortcutProbabilities);
150    private static native int getSuggestionsNative(long dict, long proximityInfo,
151            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
152            int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
153            int[] suggestOptions, int[] prevWordCodePointArray,
154            int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes,
155            int[] outputAutoCommitFirstWordConfidence);
156    private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
157    private static native int editDistanceNative(int[] before, int[] after);
158    private static native void addUnigramWordNative(long dict, int[] word, int probability,
159            int[] shortcutTarget, int shortcutProbability, boolean isNotAWord,
160            boolean isBlacklisted, int timestamp);
161    private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
162            int probability, int timestamp);
163    private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
164    private static native int addMultipleDictionaryEntriesNative(long dict,
165            LanguageModelParam[] languageModelParams, int startIndex);
166    private static native int calculateProbabilityNative(long dict, int unigramProbability,
167            int bigramProbability);
168    private static native int setCurrentTimeForTestNative(int currentTime);
169    private static native String getPropertyNative(long dict, String query);
170
171    @UsedForTesting
172    public static boolean createEmptyDictFile(final String filePath, final long dictVersion,
173            final Map<String, String> attributeMap) {
174        final String[] keyArray = new String[attributeMap.size()];
175        final String[] valueArray = new String[attributeMap.size()];
176        int index = 0;
177        for (final String key : attributeMap.keySet()) {
178            keyArray[index] = key;
179            valueArray[index] = attributeMap.get(key);
180            index++;
181        }
182        return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray);
183    }
184
185    // TODO: Move native dict into session
186    private final void loadDictionary(final String path, final long startOffset,
187            final long length, final boolean isUpdatable) {
188        mNativeDict = openNative(path, startOffset, length, isUpdatable);
189    }
190
191    @Override
192    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
193            final String prevWord, final ProximityInfo proximityInfo,
194            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) {
195        return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords,
196                additionalFeaturesOptions, 0 /* sessionId */);
197    }
198
199    @Override
200    public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer,
201            final String prevWord, final ProximityInfo proximityInfo,
202            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
203            final int sessionId) {
204        if (!isValidDictionary()) return null;
205
206        Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE);
207        // TODO: toLowerCase in the native code
208        final int[] prevWordCodePointArray = (null == prevWord)
209                ? null : StringUtils.toCodePointArray(prevWord);
210        final int composerSize = composer.size();
211
212        final boolean isGesture = composer.isBatchMode();
213        if (composerSize <= 1 || !isGesture) {
214            if (composerSize > MAX_WORD_LENGTH - 1) return null;
215            for (int i = 0; i < composerSize; i++) {
216                mInputCodePoints[i] = composer.getCodeAt(i);
217            }
218        }
219
220        final InputPointers ips = composer.getInputPointers();
221        final int inputSize = isGesture ? ips.getPointerSize() : composerSize;
222        mNativeSuggestOptions.setIsGesture(isGesture);
223        mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions);
224        // proximityInfo and/or prevWordForBigrams may not be null.
225        final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
226                getTraverseSession(sessionId).getSession(), ips.getXCoordinates(),
227                ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints,
228                inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(),
229                prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices,
230                mOutputTypes, mOutputAutoCommitFirstWordConfidence);
231        final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
232        for (int j = 0; j < count; ++j) {
233            final int start = j * MAX_WORD_LENGTH;
234            int len = 0;
235            while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
236                ++len;
237            }
238            if (len > 0) {
239                final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS;
240                if (blockOffensiveWords
241                        && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE)
242                        && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) {
243                    // If we block potentially offensive words, and if the word is possibly
244                    // offensive, then we don't output it unless it's also an exact match.
245                    continue;
246                }
247                final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
248                final int score = SuggestedWordInfo.KIND_WHITELIST == kind
249                        ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
250                // TODO: check that all users of the `kind' parameter are ready to accept
251                // flags too and pass mOutputTypes[j] instead of kind
252                suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
253                        score, kind, this /* sourceDict */,
254                        mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
255                        mOutputAutoCommitFirstWordConfidence[0]));
256            }
257        }
258        return suggestions;
259    }
260
261    public boolean isValidDictionary() {
262        return mNativeDict != 0;
263    }
264
265    public int getFormatVersion() {
266        return getFormatVersionNative(mNativeDict);
267    }
268
269    public static float calcNormalizedScore(final String before, final String after,
270            final int score) {
271        return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
272                StringUtils.toCodePointArray(after), score);
273    }
274
275    public static int editDistance(final String before, final String after) {
276        if (before == null || after == null) {
277            throw new IllegalArgumentException();
278        }
279        return editDistanceNative(StringUtils.toCodePointArray(before),
280                StringUtils.toCodePointArray(after));
281    }
282
283    @Override
284    public boolean isValidWord(final String word) {
285        return getFrequency(word) != NOT_A_PROBABILITY;
286    }
287
288    @Override
289    public int getFrequency(final String word) {
290        if (word == null) return NOT_A_PROBABILITY;
291        int[] codePoints = StringUtils.toCodePointArray(word);
292        return getProbabilityNative(mNativeDict, codePoints);
293    }
294
295    // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
296    // calls when checking for changes in an entire dictionary.
297    public boolean isValidBigram(final String word0, final String word1) {
298        return getBigramProbability(word0, word1) != NOT_A_PROBABILITY;
299    }
300
301    public int getBigramProbability(final String word0, final String word1) {
302        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY;
303        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
304        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
305        return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
306    }
307
308    @UsedForTesting
309    public UnigramProperty getUnigramProperty(final String word) {
310        if (TextUtils.isEmpty(word)) {
311            return null;
312        }
313        final int[] codePoints = StringUtils.toCodePointArray(word);
314        final int[] outCodePoints = new int[MAX_WORD_LENGTH];
315        final boolean[] outFlags = new boolean[FORMAT_UNIGRAM_PROPERTY_OUTPUT_FLAG_COUNT];
316        final int[] outProbability = new int[1];
317        final int[] outHistoricalInfo =
318                new int[FORMAT_UNIGRAM_PROPERTY_OUTPUT_HISTORICAL_INFO_COUNT];
319        final ArrayList<int[]> outShortcutTargets = CollectionUtils.newArrayList();
320        final ArrayList<Integer> outShortcutProbabilities = CollectionUtils.newArrayList();
321        getUnigramPropertyNative(mNativeDict, codePoints, outCodePoints, outFlags, outProbability,
322                outHistoricalInfo, outShortcutTargets, outShortcutProbabilities);
323        return new UnigramProperty(codePoints,
324                outFlags[FORMAT_UNIGRAM_PROPERTY_IS_NOT_A_WORD_INDEX],
325                outFlags[FORMAT_UNIGRAM_PROPERTY_IS_BLACKLISTED_INDEX],
326                outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_BIGRAMS_INDEX],
327                outFlags[FORMAT_UNIGRAM_PROPERTY_HAS_SHORTCUTS_INDEX], outProbability[0],
328                outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_TIMESTAMP_INDEX],
329                outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_LEVEL_INDEX],
330                outHistoricalInfo[FORMAT_UNIGRAM_PROPERTY_COUNT_INDEX],
331                outShortcutTargets, outShortcutProbabilities);
332    }
333
334    // Add a unigram entry to binary dictionary with unigram attributes in native code.
335    public void addUnigramWord(final String word, final int probability,
336            final String shortcutTarget, final int shortcutProbability, final boolean isNotAWord,
337            final boolean isBlacklisted, final int timestamp) {
338        if (TextUtils.isEmpty(word)) {
339            return;
340        }
341        final int[] codePoints = StringUtils.toCodePointArray(word);
342        final int[] shortcutTargetCodePoints = (shortcutTarget != null) ?
343                StringUtils.toCodePointArray(shortcutTarget) : null;
344        addUnigramWordNative(mNativeDict, codePoints, probability, shortcutTargetCodePoints,
345                shortcutProbability, isNotAWord, isBlacklisted, timestamp);
346    }
347
348    // Add a bigram entry to binary dictionary with timestamp in native code.
349    public void addBigramWords(final String word0, final String word1, final int probability,
350            final int timestamp) {
351        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
352            return;
353        }
354        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
355        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
356        addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability, timestamp);
357    }
358
359    // Remove a bigram entry form binary dictionary in native code.
360    public void removeBigramWords(final String word0, final String word1) {
361        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
362            return;
363        }
364        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
365        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
366        removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
367    }
368
369    public void addMultipleDictionaryEntries(final LanguageModelParam[] languageModelParams) {
370        if (!isValidDictionary()) return;
371        int processedParamCount = 0;
372        while (processedParamCount < languageModelParams.length) {
373            if (needsToRunGC(true /* mindsBlockByGC */)) {
374                flushWithGC();
375            }
376            processedParamCount = addMultipleDictionaryEntriesNative(mNativeDict,
377                    languageModelParams, processedParamCount);
378            if (processedParamCount <= 0) {
379                return;
380            }
381        }
382
383    }
384
385    private void reopen() {
386        close();
387        final File dictFile = new File(mDictFilePath);
388        // WARNING: Because we pass 0 as the offset and file.length() as the length, this can
389        // only be called for actual files. Right now it's only called by the flush() family of
390        // functions, which require an updatable dictionary, so it's okay. But beware.
391        loadDictionary(dictFile.getAbsolutePath(), 0 /* startOffset */,
392                dictFile.length(), true /* isUpdatable */);
393    }
394
395    public void flush() {
396        if (!isValidDictionary()) return;
397        flushNative(mNativeDict, mDictFilePath);
398        reopen();
399    }
400
401    public void flushWithGC() {
402        if (!isValidDictionary()) return;
403        flushWithGCNative(mNativeDict, mDictFilePath);
404        reopen();
405    }
406
407    /**
408     * Checks whether GC is needed to run or not.
409     * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
410     * the blocking in some situations such as in idle time or just before closing.
411     * @return whether GC is needed to run or not.
412     */
413    public boolean needsToRunGC(final boolean mindsBlockByGC) {
414        if (!isValidDictionary()) return false;
415        return needsToRunGCNative(mNativeDict, mindsBlockByGC);
416    }
417
418    @UsedForTesting
419    public int calculateProbability(final int unigramProbability, final int bigramProbability) {
420        if (!isValidDictionary()) return NOT_A_PROBABILITY;
421        return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
422    }
423
424    /**
425     * Control the current time to be used in the native code. If currentTime >= 0, this method sets
426     * the current time and gets into test mode.
427     * In test mode, set timestamp is used as the current time in the native code.
428     * If currentTime < 0, quit the test mode and returns to using time() to get the current time.
429     *
430     * @param currentTime seconds since the unix epoch
431     * @return current time got in the native code.
432     */
433    @UsedForTesting
434    public static int setCurrentTimeForTest(final int currentTime) {
435        return setCurrentTimeForTestNative(currentTime);
436    }
437
438    @UsedForTesting
439    public String getPropertyForTest(final String query) {
440        if (!isValidDictionary()) return "";
441        return getPropertyNative(mNativeDict, query);
442    }
443
444    @Override
445    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
446        return candidate.mAutoCommitFirstWordConfidence > CONFIDENCE_TO_AUTO_COMMIT;
447    }
448
449    @Override
450    public void close() {
451        synchronized (mDicTraverseSessions) {
452            final int sessionsSize = mDicTraverseSessions.size();
453            for (int index = 0; index < sessionsSize; ++index) {
454                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
455                if (traverseSession != null) {
456                    traverseSession.close();
457                }
458            }
459            mDicTraverseSessions.clear();
460        }
461        closeInternalLocked();
462    }
463
464    private synchronized void closeInternalLocked() {
465        if (mNativeDict != 0) {
466            closeNative(mNativeDict);
467            mNativeDict = 0;
468        }
469    }
470
471    // TODO: Manage BinaryDictionary instances without using WeakReference or something.
472    @Override
473    protected void finalize() throws Throwable {
474        try {
475            closeInternalLocked();
476        } finally {
477            super.finalize();
478        }
479    }
480}
481