BinaryDictionary.java revision 060917ed9abff0e777e6b99e43cc4b826d1ac0c8
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.SparseArray;
21
22import com.android.inputmethod.annotations.UsedForTesting;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25import com.android.inputmethod.latin.settings.NativeSuggestOptions;
26import com.android.inputmethod.latin.utils.CollectionUtils;
27import com.android.inputmethod.latin.utils.JniUtils;
28import com.android.inputmethod.latin.utils.StringUtils;
29
30import java.util.ArrayList;
31import java.util.Arrays;
32import java.util.Locale;
33
34/**
35 * Implements a static, compacted, binary dictionary of standard words.
36 */
37public final class BinaryDictionary extends Dictionary {
38    private static final String TAG = BinaryDictionary.class.getSimpleName();
39
40    // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h
41    private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
42    // Must be equal to MAX_RESULTS in native/jni/src/defines.h
43    private static final int MAX_RESULTS = 18;
44    // Required space count for auto commit.
45    // TODO: Remove this heuristic.
46    private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3;
47
48    private long mNativeDict;
49    private final Locale mLocale;
50    private final long mDictSize;
51    private final String mDictFilePath;
52    private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
53    private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
54    private final int[] mSpaceIndices = new int[MAX_RESULTS];
55    private final int[] mOutputScores = new int[MAX_RESULTS];
56    private final int[] mOutputTypes = new int[MAX_RESULTS];
57    private final int[] mOutputAutoCommitFirstWordConfidence = new int[MAX_RESULTS];
58
59    private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions();
60
61    private final SparseArray<DicTraverseSession> mDicTraverseSessions =
62            CollectionUtils.newSparseArray();
63
64    // TODO: There should be a way to remove used DicTraverseSession objects from
65    // {@code mDicTraverseSessions}.
66    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
67        synchronized(mDicTraverseSessions) {
68            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
69            if (traverseSession == null) {
70                traverseSession = mDicTraverseSessions.get(traverseSessionId);
71                if (traverseSession == null) {
72                    traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
73                    mDicTraverseSessions.put(traverseSessionId, traverseSession);
74                }
75            }
76            return traverseSession;
77        }
78    }
79
80    /**
81     * Constructor for the binary dictionary. This is supposed to be called from the
82     * dictionary factory.
83     * @param filename the name of the file to read through native code.
84     * @param offset the offset of the dictionary data within the file.
85     * @param length the length of the binary data.
86     * @param useFullEditDistance whether to use the full edit distance in suggestions
87     * @param dictType the dictionary type, as a human-readable string
88     * @param isUpdatable whether to open the dictionary file in writable mode.
89     */
90    public BinaryDictionary(final String filename, final long offset, final long length,
91            final boolean useFullEditDistance, final Locale locale, final String dictType,
92            final boolean isUpdatable) {
93        super(dictType);
94        mLocale = locale;
95        mDictSize = length;
96        mDictFilePath = filename;
97        mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
98        loadDictionary(filename, offset, length, isUpdatable);
99    }
100
101    static {
102        JniUtils.loadNativeLibrary();
103    }
104
105    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
106            boolean isUpdatable);
107    private static native void flushNative(long dict, String filePath);
108    private static native boolean needsToRunGCNative(long dict);
109    private static native void flushWithGCNative(long dict, String filePath);
110    private static native void closeNative(long dict);
111    private static native int getProbabilityNative(long dict, int[] word);
112    private static native boolean isValidBigramNative(long dict, int[] word0, int[] word1);
113    private static native int getSuggestionsNative(long dict, long proximityInfo,
114            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
115            int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
116            int[] suggestOptions, int[] prevWordCodePointArray,
117            int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes,
118            int[] outputAutoCommitFirstWordConfidence);
119    private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
120    private static native int editDistanceNative(int[] before, int[] after);
121    private static native void addUnigramWordNative(long dict, int[] word, int probability);
122    private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
123            int probability);
124    private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
125
126    // TODO: Move native dict into session
127    private final void loadDictionary(final String path, final long startOffset,
128            final long length, final boolean isUpdatable) {
129        mNativeDict = openNative(path, startOffset, length, isUpdatable);
130    }
131
132    @Override
133    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
134            final String prevWord, final ProximityInfo proximityInfo,
135            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) {
136        return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords,
137                additionalFeaturesOptions, 0 /* sessionId */);
138    }
139
140    @Override
141    public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer,
142            final String prevWord, final ProximityInfo proximityInfo,
143            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
144            final int sessionId) {
145        if (!isValidDictionary()) return null;
146
147        Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE);
148        // TODO: toLowerCase in the native code
149        final int[] prevWordCodePointArray = (null == prevWord)
150                ? null : StringUtils.toCodePointArray(prevWord);
151        final int composerSize = composer.size();
152
153        final boolean isGesture = composer.isBatchMode();
154        if (composerSize <= 1 || !isGesture) {
155            if (composerSize > MAX_WORD_LENGTH - 1) return null;
156            for (int i = 0; i < composerSize; i++) {
157                mInputCodePoints[i] = composer.getCodeAt(i);
158            }
159        }
160
161        final InputPointers ips = composer.getInputPointers();
162        final int inputSize = isGesture ? ips.getPointerSize() : composerSize;
163        mNativeSuggestOptions.setIsGesture(isGesture);
164        mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions);
165        // proximityInfo and/or prevWordForBigrams may not be null.
166        final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
167                getTraverseSession(sessionId).getSession(), ips.getXCoordinates(),
168                ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints,
169                inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(),
170                prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices,
171                mOutputTypes, mOutputAutoCommitFirstWordConfidence);
172        final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
173        for (int j = 0; j < count; ++j) {
174            final int start = j * MAX_WORD_LENGTH;
175            int len = 0;
176            while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
177                ++len;
178            }
179            if (len > 0) {
180                final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS;
181                if (blockOffensiveWords
182                        && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE)
183                        && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) {
184                    // If we block potentially offensive words, and if the word is possibly
185                    // offensive, then we don't output it unless it's also an exact match.
186                    continue;
187                }
188                final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
189                final int score = SuggestedWordInfo.KIND_WHITELIST == kind
190                        ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
191                // TODO: check that all users of the `kind' parameter are ready to accept
192                // flags too and pass mOutputTypes[j] instead of kind
193                suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
194                        score, kind, this /* sourceDict */,
195                        mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
196                        mOutputAutoCommitFirstWordConfidence[0]));
197            }
198        }
199        return suggestions;
200    }
201
202    public boolean isValidDictionary() {
203        return mNativeDict != 0;
204    }
205
206    public static float calcNormalizedScore(final String before, final String after,
207            final int score) {
208        return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
209                StringUtils.toCodePointArray(after), score);
210    }
211
212    public static int editDistance(final String before, final String after) {
213        if (before == null || after == null) {
214            throw new IllegalArgumentException();
215        }
216        return editDistanceNative(StringUtils.toCodePointArray(before),
217                StringUtils.toCodePointArray(after));
218    }
219
220    @Override
221    public boolean isValidWord(final String word) {
222        return getFrequency(word) >= 0;
223    }
224
225    @Override
226    public int getFrequency(final String word) {
227        if (word == null) return -1;
228        int[] codePoints = StringUtils.toCodePointArray(word);
229        return getProbabilityNative(mNativeDict, codePoints);
230    }
231
232    // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
233    // calls when checking for changes in an entire dictionary.
234    public boolean isValidBigram(final String word0, final String word1) {
235        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return false;
236        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
237        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
238        return isValidBigramNative(mNativeDict, codePoints0, codePoints1);
239    }
240
241    // Add a unigram entry to binary dictionary in native code.
242    public void addUnigramWord(final String word, final int probability) {
243        if (TextUtils.isEmpty(word)) {
244            return;
245        }
246        final int[] codePoints = StringUtils.toCodePointArray(word);
247        addUnigramWordNative(mNativeDict, codePoints, probability);
248    }
249
250    // Add a bigram entry to binary dictionary in native code.
251    public void addBigramWords(final String word0, final String word1, final int probability) {
252        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
253            return;
254        }
255        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
256        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
257        addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability);
258    }
259
260    // Remove a bigram entry form binary dictionary in native code.
261    public void removeBigramWords(final String word0, final String word1) {
262        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
263            return;
264        }
265        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
266        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
267        removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
268    }
269
270    @UsedForTesting
271    public void flush() {
272        if (!isValidDictionary()) return;
273        flushNative(mNativeDict, mDictFilePath);
274    }
275
276    @UsedForTesting
277    public void flushWithGC() {
278        if (!isValidDictionary()) return;
279        flushWithGCNative(mNativeDict, mDictFilePath);
280    }
281
282    @UsedForTesting
283    public boolean needsToRunGC() {
284        if (!isValidDictionary()) return false;
285        return needsToRunGCNative(mNativeDict);
286    }
287
288    @Override
289    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
290        // TODO: actually use the confidence rather than use this completely broken heuristic
291        final String word = candidate.mWord;
292        final int length = word.length();
293        int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT;
294        for (int i = 0; i < length; ++i) {
295            // This is okay because no low-surrogate and no high-surrogate can ever match the
296            // space character, so we don't need to take care of iterating on code points.
297            if (Constants.CODE_SPACE == word.charAt(i)) {
298                if (0 >= --remainingSpaces) return true;
299            }
300        }
301        return false;
302    }
303
304    @Override
305    public void close() {
306        synchronized (mDicTraverseSessions) {
307            final int sessionsSize = mDicTraverseSessions.size();
308            for (int index = 0; index < sessionsSize; ++index) {
309                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
310                if (traverseSession != null) {
311                    traverseSession.close();
312                }
313            }
314        }
315        closeInternal();
316    }
317
318    private synchronized void closeInternal() {
319        if (mNativeDict != 0) {
320            closeNative(mNativeDict);
321            mNativeDict = 0;
322        }
323    }
324
325    @Override
326    protected void finalize() throws Throwable {
327        try {
328            closeInternal();
329        } finally {
330            super.finalize();
331        }
332    }
333}
334