BinaryDictionary.java revision 03cb8f751a7f35e9159c724a2d25528b86287b57
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.SparseArray;
21
22import com.android.inputmethod.annotations.UsedForTesting;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25import com.android.inputmethod.latin.settings.NativeSuggestOptions;
26import com.android.inputmethod.latin.utils.CollectionUtils;
27import com.android.inputmethod.latin.utils.JniUtils;
28import com.android.inputmethod.latin.utils.StringUtils;
29
30import java.io.File;
31import java.util.ArrayList;
32import java.util.Arrays;
33import java.util.Locale;
34import java.util.Map;
35
36/**
37 * Implements a static, compacted, binary dictionary of standard words.
38 */
39// TODO: All methods which should be locked need to have a suffix "Locked".
40public final class BinaryDictionary extends Dictionary {
41    private static final String TAG = BinaryDictionary.class.getSimpleName();
42
43    // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h
44    private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
45    // Must be equal to MAX_RESULTS in native/jni/src/defines.h
46    private static final int MAX_RESULTS = 18;
47    // Required space count for auto commit.
48    // TODO: Remove this heuristic.
49    private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3;
50
51    @UsedForTesting
52    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
53    @UsedForTesting
54    public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
55
56    private long mNativeDict;
57    private final Locale mLocale;
58    private final long mDictSize;
59    private final String mDictFilePath;
60    private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
61    private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
62    private final int[] mSpaceIndices = new int[MAX_RESULTS];
63    private final int[] mOutputScores = new int[MAX_RESULTS];
64    private final int[] mOutputTypes = new int[MAX_RESULTS];
65    private final int[] mOutputAutoCommitFirstWordConfidence = new int[MAX_RESULTS];
66
67    private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions();
68
69    private final SparseArray<DicTraverseSession> mDicTraverseSessions =
70            CollectionUtils.newSparseArray();
71
72    // TODO: There should be a way to remove used DicTraverseSession objects from
73    // {@code mDicTraverseSessions}.
74    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
75        synchronized(mDicTraverseSessions) {
76            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
77            if (traverseSession == null) {
78                traverseSession = mDicTraverseSessions.get(traverseSessionId);
79                if (traverseSession == null) {
80                    traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
81                    mDicTraverseSessions.put(traverseSessionId, traverseSession);
82                }
83            }
84            return traverseSession;
85        }
86    }
87
88    /**
89     * Constructor for the binary dictionary. This is supposed to be called from the
90     * dictionary factory.
91     * @param filename the name of the file to read through native code.
92     * @param offset the offset of the dictionary data within the file.
93     * @param length the length of the binary data.
94     * @param useFullEditDistance whether to use the full edit distance in suggestions
95     * @param dictType the dictionary type, as a human-readable string
96     * @param isUpdatable whether to open the dictionary file in writable mode.
97     */
98    public BinaryDictionary(final String filename, final long offset, final long length,
99            final boolean useFullEditDistance, final Locale locale, final String dictType,
100            final boolean isUpdatable) {
101        super(dictType);
102        mLocale = locale;
103        mDictSize = length;
104        mDictFilePath = filename;
105        mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
106        loadDictionary(filename, offset, length, isUpdatable);
107    }
108
109    static {
110        JniUtils.loadNativeLibrary();
111    }
112
113    private static native boolean createEmptyDictFileNative(String filePath, long dictVersion,
114            String[] attributeKeyStringArray, String[] attributeValueStringArray);
115    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
116            boolean isUpdatable);
117    private static native void flushNative(long dict, String filePath);
118    private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
119    private static native void flushWithGCNative(long dict, String filePath);
120    private static native void closeNative(long dict);
121    private static native int getProbabilityNative(long dict, int[] word);
122    private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
123    private static native int getSuggestionsNative(long dict, long proximityInfo,
124            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
125            int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
126            int[] suggestOptions, int[] prevWordCodePointArray,
127            int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes,
128            int[] outputAutoCommitFirstWordConfidence);
129    private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
130    private static native int editDistanceNative(int[] before, int[] after);
131    private static native void addUnigramWordNative(long dict, int[] word, int probability);
132    private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
133            int probability);
134    private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
135    private static native int calculateProbabilityNative(long dict, int unigramProbability,
136            int bigramProbability);
137    private static native String getPropertyNative(long dict, String query);
138
139    @UsedForTesting
140    public static boolean createEmptyDictFile(final String filePath, final long dictVersion,
141            final Map<String, String> attributeMap) {
142        final String[] keyArray = new String[attributeMap.size()];
143        final String[] valueArray = new String[attributeMap.size()];
144        int index = 0;
145        for (final String key : attributeMap.keySet()) {
146            keyArray[index] = key;
147            valueArray[index] = attributeMap.get(key);
148            index++;
149        }
150        return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray);
151    }
152
153    // TODO: Move native dict into session
154    private final void loadDictionary(final String path, final long startOffset,
155            final long length, final boolean isUpdatable) {
156        mNativeDict = openNative(path, startOffset, length, isUpdatable);
157    }
158
159    @Override
160    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
161            final String prevWord, final ProximityInfo proximityInfo,
162            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) {
163        return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords,
164                additionalFeaturesOptions, 0 /* sessionId */);
165    }
166
167    @Override
168    public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer,
169            final String prevWord, final ProximityInfo proximityInfo,
170            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
171            final int sessionId) {
172        if (!isValidDictionary()) return null;
173
174        Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE);
175        // TODO: toLowerCase in the native code
176        final int[] prevWordCodePointArray = (null == prevWord)
177                ? null : StringUtils.toCodePointArray(prevWord);
178        final int composerSize = composer.size();
179
180        final boolean isGesture = composer.isBatchMode();
181        if (composerSize <= 1 || !isGesture) {
182            if (composerSize > MAX_WORD_LENGTH - 1) return null;
183            for (int i = 0; i < composerSize; i++) {
184                mInputCodePoints[i] = composer.getCodeAt(i);
185            }
186        }
187
188        final InputPointers ips = composer.getInputPointers();
189        final int inputSize = isGesture ? ips.getPointerSize() : composerSize;
190        mNativeSuggestOptions.setIsGesture(isGesture);
191        mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions);
192        // proximityInfo and/or prevWordForBigrams may not be null.
193        final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
194                getTraverseSession(sessionId).getSession(), ips.getXCoordinates(),
195                ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints,
196                inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(),
197                prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices,
198                mOutputTypes, mOutputAutoCommitFirstWordConfidence);
199        final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
200        for (int j = 0; j < count; ++j) {
201            final int start = j * MAX_WORD_LENGTH;
202            int len = 0;
203            while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
204                ++len;
205            }
206            if (len > 0) {
207                final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS;
208                if (blockOffensiveWords
209                        && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE)
210                        && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) {
211                    // If we block potentially offensive words, and if the word is possibly
212                    // offensive, then we don't output it unless it's also an exact match.
213                    continue;
214                }
215                final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
216                final int score = SuggestedWordInfo.KIND_WHITELIST == kind
217                        ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
218                // TODO: check that all users of the `kind' parameter are ready to accept
219                // flags too and pass mOutputTypes[j] instead of kind
220                suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
221                        score, kind, this /* sourceDict */,
222                        mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
223                        mOutputAutoCommitFirstWordConfidence[0]));
224            }
225        }
226        return suggestions;
227    }
228
229    public boolean isValidDictionary() {
230        return mNativeDict != 0;
231    }
232
233    public static float calcNormalizedScore(final String before, final String after,
234            final int score) {
235        return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
236                StringUtils.toCodePointArray(after), score);
237    }
238
239    public static int editDistance(final String before, final String after) {
240        if (before == null || after == null) {
241            throw new IllegalArgumentException();
242        }
243        return editDistanceNative(StringUtils.toCodePointArray(before),
244                StringUtils.toCodePointArray(after));
245    }
246
247    @Override
248    public boolean isValidWord(final String word) {
249        return getFrequency(word) != NOT_A_PROBABILITY;
250    }
251
252    @Override
253    public int getFrequency(final String word) {
254        if (word == null) return NOT_A_PROBABILITY;
255        int[] codePoints = StringUtils.toCodePointArray(word);
256        return getProbabilityNative(mNativeDict, codePoints);
257    }
258
259    // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
260    // calls when checking for changes in an entire dictionary.
261    public boolean isValidBigram(final String word0, final String word1) {
262        return getBigramProbability(word0, word1) != NOT_A_PROBABILITY;
263    }
264
265    public int getBigramProbability(final String word0, final String word1) {
266        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY;
267        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
268        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
269        return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
270    }
271
272    // Add a unigram entry to binary dictionary in native code.
273    public void addUnigramWord(final String word, final int probability) {
274        if (TextUtils.isEmpty(word)) {
275            return;
276        }
277        final int[] codePoints = StringUtils.toCodePointArray(word);
278        addUnigramWordNative(mNativeDict, codePoints, probability);
279    }
280
281    // Add a bigram entry to binary dictionary in native code.
282    public void addBigramWords(final String word0, final String word1, final int probability) {
283        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
284            return;
285        }
286        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
287        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
288        addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability);
289    }
290
291    // Remove a bigram entry form binary dictionary in native code.
292    public void removeBigramWords(final String word0, final String word1) {
293        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
294            return;
295        }
296        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
297        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
298        removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
299    }
300
301    private void reopen() {
302        close();
303        final File dictFile = new File(mDictFilePath);
304        mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */,
305                dictFile.length(), true /* isUpdatable */);
306    }
307
308    public void flush() {
309        if (!isValidDictionary()) return;
310        flushNative(mNativeDict, mDictFilePath);
311        reopen();
312    }
313
314    public void flushWithGC() {
315        if (!isValidDictionary()) return;
316        flushWithGCNative(mNativeDict, mDictFilePath);
317        reopen();
318    }
319
320    /**
321     * Checks whether GC is needed to run or not.
322     * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
323     * the blocking in some situations such as in idle time or just before closing.
324     * @return whether GC is needed to run or not.
325     */
326    public boolean needsToRunGC(final boolean mindsBlockByGC) {
327        if (!isValidDictionary()) return false;
328        return needsToRunGCNative(mNativeDict, mindsBlockByGC);
329    }
330
331    @UsedForTesting
332    public int calculateProbability(final int unigramProbability, final int bigramProbability) {
333        if (!isValidDictionary()) return NOT_A_PROBABILITY;
334        return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
335    }
336
337    @UsedForTesting
338    public String getPropertyForTests(String query) {
339        if (!isValidDictionary()) return "";
340        return getPropertyNative(mNativeDict, query);
341    }
342
343    @Override
344    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
345        // TODO: actually use the confidence rather than use this completely broken heuristic
346        final String word = candidate.mWord;
347        final int length = word.length();
348        int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT;
349        for (int i = 0; i < length; ++i) {
350            // This is okay because no low-surrogate and no high-surrogate can ever match the
351            // space character, so we don't need to take care of iterating on code points.
352            if (Constants.CODE_SPACE == word.charAt(i)) {
353                if (0 >= --remainingSpaces) return true;
354            }
355        }
356        return false;
357    }
358
359    @Override
360    public void close() {
361        synchronized (mDicTraverseSessions) {
362            final int sessionsSize = mDicTraverseSessions.size();
363            for (int index = 0; index < sessionsSize; ++index) {
364                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
365                if (traverseSession != null) {
366                    traverseSession.close();
367                }
368            }
369            mDicTraverseSessions.clear();
370        }
371        closeInternalLocked();
372    }
373
374    private synchronized void closeInternalLocked() {
375        if (mNativeDict != 0) {
376            closeNative(mNativeDict);
377            mNativeDict = 0;
378        }
379    }
380
381    // TODO: Manage BinaryDictionary instances without using WeakReference or something.
382    @Override
383    protected void finalize() throws Throwable {
384        try {
385            closeInternalLocked();
386        } finally {
387            super.finalize();
388        }
389    }
390}
391