BinaryDictionary.java revision bb57090f1da9d1fc5a0eda9b627d3f8c8b25ab42
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.text.TextUtils;
20import android.util.SparseArray;
21
22import com.android.inputmethod.annotations.UsedForTesting;
23import com.android.inputmethod.keyboard.ProximityInfo;
24import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25import com.android.inputmethod.latin.settings.NativeSuggestOptions;
26import com.android.inputmethod.latin.utils.CollectionUtils;
27import com.android.inputmethod.latin.utils.JniUtils;
28import com.android.inputmethod.latin.utils.StringUtils;
29
30import java.io.File;
31import java.util.ArrayList;
32import java.util.Arrays;
33import java.util.Locale;
34import java.util.Map;
35
36/**
37 * Implements a static, compacted, binary dictionary of standard words.
38 */
39// TODO: All methods which should be locked need to have a suffix "Locked".
40public final class BinaryDictionary extends Dictionary {
41    private static final String TAG = BinaryDictionary.class.getSimpleName();
42
43    // Must be equal to MAX_WORD_LENGTH in native/jni/src/defines.h
44    private static final int MAX_WORD_LENGTH = Constants.DICTIONARY_MAX_WORD_LENGTH;
45    // Must be equal to MAX_RESULTS in native/jni/src/defines.h
46    private static final int MAX_RESULTS = 18;
47    // Required space count for auto commit.
48    // TODO: Remove this heuristic.
49    private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3;
50
51    @UsedForTesting
52    public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
53    @UsedForTesting
54    public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
55
56    private long mNativeDict;
57    private final Locale mLocale;
58    private final long mDictSize;
59    private final String mDictFilePath;
60    private final int[] mInputCodePoints = new int[MAX_WORD_LENGTH];
61    private final int[] mOutputCodePoints = new int[MAX_WORD_LENGTH * MAX_RESULTS];
62    private final int[] mSpaceIndices = new int[MAX_RESULTS];
63    private final int[] mOutputScores = new int[MAX_RESULTS];
64    private final int[] mOutputTypes = new int[MAX_RESULTS];
65    // Only one result is ever used
66    private final int[] mOutputAutoCommitFirstWordConfidence = new int[1];
67
68    private final NativeSuggestOptions mNativeSuggestOptions = new NativeSuggestOptions();
69
70    private final SparseArray<DicTraverseSession> mDicTraverseSessions =
71            CollectionUtils.newSparseArray();
72
73    // TODO: There should be a way to remove used DicTraverseSession objects from
74    // {@code mDicTraverseSessions}.
75    private DicTraverseSession getTraverseSession(final int traverseSessionId) {
76        synchronized(mDicTraverseSessions) {
77            DicTraverseSession traverseSession = mDicTraverseSessions.get(traverseSessionId);
78            if (traverseSession == null) {
79                traverseSession = mDicTraverseSessions.get(traverseSessionId);
80                if (traverseSession == null) {
81                    traverseSession = new DicTraverseSession(mLocale, mNativeDict, mDictSize);
82                    mDicTraverseSessions.put(traverseSessionId, traverseSession);
83                }
84            }
85            return traverseSession;
86        }
87    }
88
89    /**
90     * Constructor for the binary dictionary. This is supposed to be called from the
91     * dictionary factory.
92     * @param filename the name of the file to read through native code.
93     * @param offset the offset of the dictionary data within the file.
94     * @param length the length of the binary data.
95     * @param useFullEditDistance whether to use the full edit distance in suggestions
96     * @param dictType the dictionary type, as a human-readable string
97     * @param isUpdatable whether to open the dictionary file in writable mode.
98     */
99    public BinaryDictionary(final String filename, final long offset, final long length,
100            final boolean useFullEditDistance, final Locale locale, final String dictType,
101            final boolean isUpdatable) {
102        super(dictType);
103        mLocale = locale;
104        mDictSize = length;
105        mDictFilePath = filename;
106        mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
107        loadDictionary(filename, offset, length, isUpdatable);
108    }
109
110    static {
111        JniUtils.loadNativeLibrary();
112    }
113
114    private static native boolean createEmptyDictFileNative(String filePath, long dictVersion,
115            String[] attributeKeyStringArray, String[] attributeValueStringArray);
116    private static native long openNative(String sourceDir, long dictOffset, long dictSize,
117            boolean isUpdatable);
118    private static native void flushNative(long dict, String filePath);
119    private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
120    private static native void flushWithGCNative(long dict, String filePath);
121    private static native void closeNative(long dict);
122    private static native int getProbabilityNative(long dict, int[] word);
123    private static native int getBigramProbabilityNative(long dict, int[] word0, int[] word1);
124    private static native int getSuggestionsNative(long dict, long proximityInfo,
125            long traverseSession, int[] xCoordinates, int[] yCoordinates, int[] times,
126            int[] pointerIds, int[] inputCodePoints, int inputSize, int commitPoint,
127            int[] suggestOptions, int[] prevWordCodePointArray,
128            int[] outputCodePoints, int[] outputScores, int[] outputIndices, int[] outputTypes,
129            int[] outputAutoCommitFirstWordConfidence);
130    private static native float calcNormalizedScoreNative(int[] before, int[] after, int score);
131    private static native int editDistanceNative(int[] before, int[] after);
132    private static native void addUnigramWordNative(long dict, int[] word, int probability);
133    private static native void addBigramWordsNative(long dict, int[] word0, int[] word1,
134            int probability);
135    private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
136    private static native int calculateProbabilityNative(long dict, int unigramProbability,
137            int bigramProbability);
138    private static native String getPropertyNative(long dict, String query);
139
140    @UsedForTesting
141    public static boolean createEmptyDictFile(final String filePath, final long dictVersion,
142            final Map<String, String> attributeMap) {
143        final String[] keyArray = new String[attributeMap.size()];
144        final String[] valueArray = new String[attributeMap.size()];
145        int index = 0;
146        for (final String key : attributeMap.keySet()) {
147            keyArray[index] = key;
148            valueArray[index] = attributeMap.get(key);
149            index++;
150        }
151        return createEmptyDictFileNative(filePath, dictVersion, keyArray, valueArray);
152    }
153
154    // TODO: Move native dict into session
155    private final void loadDictionary(final String path, final long startOffset,
156            final long length, final boolean isUpdatable) {
157        mNativeDict = openNative(path, startOffset, length, isUpdatable);
158    }
159
160    @Override
161    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
162            final String prevWord, final ProximityInfo proximityInfo,
163            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) {
164        return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords,
165                additionalFeaturesOptions, 0 /* sessionId */);
166    }
167
168    @Override
169    public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer,
170            final String prevWord, final ProximityInfo proximityInfo,
171            final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
172            final int sessionId) {
173        if (!isValidDictionary()) return null;
174
175        Arrays.fill(mInputCodePoints, Constants.NOT_A_CODE);
176        // TODO: toLowerCase in the native code
177        final int[] prevWordCodePointArray = (null == prevWord)
178                ? null : StringUtils.toCodePointArray(prevWord);
179        final int composerSize = composer.size();
180
181        final boolean isGesture = composer.isBatchMode();
182        if (composerSize <= 1 || !isGesture) {
183            if (composerSize > MAX_WORD_LENGTH - 1) return null;
184            for (int i = 0; i < composerSize; i++) {
185                mInputCodePoints[i] = composer.getCodeAt(i);
186            }
187        }
188
189        final InputPointers ips = composer.getInputPointers();
190        final int inputSize = isGesture ? ips.getPointerSize() : composerSize;
191        mNativeSuggestOptions.setIsGesture(isGesture);
192        mNativeSuggestOptions.setAdditionalFeaturesOptions(additionalFeaturesOptions);
193        // proximityInfo and/or prevWordForBigrams may not be null.
194        final int count = getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(),
195                getTraverseSession(sessionId).getSession(), ips.getXCoordinates(),
196                ips.getYCoordinates(), ips.getTimes(), ips.getPointerIds(), mInputCodePoints,
197                inputSize, 0 /* commitPoint */, mNativeSuggestOptions.getOptions(),
198                prevWordCodePointArray, mOutputCodePoints, mOutputScores, mSpaceIndices,
199                mOutputTypes, mOutputAutoCommitFirstWordConfidence);
200        final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
201        for (int j = 0; j < count; ++j) {
202            final int start = j * MAX_WORD_LENGTH;
203            int len = 0;
204            while (len < MAX_WORD_LENGTH && mOutputCodePoints[start + len] != 0) {
205                ++len;
206            }
207            if (len > 0) {
208                final int flags = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_FLAGS;
209                if (blockOffensiveWords
210                        && 0 != (flags & SuggestedWordInfo.KIND_FLAG_POSSIBLY_OFFENSIVE)
211                        && 0 == (flags & SuggestedWordInfo.KIND_FLAG_EXACT_MATCH)) {
212                    // If we block potentially offensive words, and if the word is possibly
213                    // offensive, then we don't output it unless it's also an exact match.
214                    continue;
215                }
216                final int kind = mOutputTypes[j] & SuggestedWordInfo.KIND_MASK_KIND;
217                final int score = SuggestedWordInfo.KIND_WHITELIST == kind
218                        ? SuggestedWordInfo.MAX_SCORE : mOutputScores[j];
219                // TODO: check that all users of the `kind' parameter are ready to accept
220                // flags too and pass mOutputTypes[j] instead of kind
221                suggestions.add(new SuggestedWordInfo(new String(mOutputCodePoints, start, len),
222                        score, kind, this /* sourceDict */,
223                        mSpaceIndices[j] /* indexOfTouchPointOfSecondWord */,
224                        mOutputAutoCommitFirstWordConfidence[0]));
225            }
226        }
227        return suggestions;
228    }
229
230    public boolean isValidDictionary() {
231        return mNativeDict != 0;
232    }
233
234    public static float calcNormalizedScore(final String before, final String after,
235            final int score) {
236        return calcNormalizedScoreNative(StringUtils.toCodePointArray(before),
237                StringUtils.toCodePointArray(after), score);
238    }
239
240    public static int editDistance(final String before, final String after) {
241        if (before == null || after == null) {
242            throw new IllegalArgumentException();
243        }
244        return editDistanceNative(StringUtils.toCodePointArray(before),
245                StringUtils.toCodePointArray(after));
246    }
247
248    @Override
249    public boolean isValidWord(final String word) {
250        return getFrequency(word) != NOT_A_PROBABILITY;
251    }
252
253    @Override
254    public int getFrequency(final String word) {
255        if (word == null) return NOT_A_PROBABILITY;
256        int[] codePoints = StringUtils.toCodePointArray(word);
257        return getProbabilityNative(mNativeDict, codePoints);
258    }
259
260    // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni
261    // calls when checking for changes in an entire dictionary.
262    public boolean isValidBigram(final String word0, final String word1) {
263        return getBigramProbability(word0, word1) != NOT_A_PROBABILITY;
264    }
265
266    public int getBigramProbability(final String word0, final String word1) {
267        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) return NOT_A_PROBABILITY;
268        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
269        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
270        return getBigramProbabilityNative(mNativeDict, codePoints0, codePoints1);
271    }
272
273    private void runGCIfRequired() {
274        if (needsToRunGC(true /* mindsBlockByGC */)) {
275            flushWithGC();
276        }
277    }
278
279    // Add a unigram entry to binary dictionary in native code.
280    public void addUnigramWord(final String word, final int probability) {
281        if (TextUtils.isEmpty(word)) {
282            return;
283        }
284        runGCIfRequired();
285        final int[] codePoints = StringUtils.toCodePointArray(word);
286        addUnigramWordNative(mNativeDict, codePoints, probability);
287    }
288
289    // Add a bigram entry to binary dictionary in native code.
290    public void addBigramWords(final String word0, final String word1, final int probability) {
291        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
292            return;
293        }
294        runGCIfRequired();
295        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
296        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
297        addBigramWordsNative(mNativeDict, codePoints0, codePoints1, probability);
298    }
299
300    // Remove a bigram entry form binary dictionary in native code.
301    public void removeBigramWords(final String word0, final String word1) {
302        if (TextUtils.isEmpty(word0) || TextUtils.isEmpty(word1)) {
303            return;
304        }
305        runGCIfRequired();
306        final int[] codePoints0 = StringUtils.toCodePointArray(word0);
307        final int[] codePoints1 = StringUtils.toCodePointArray(word1);
308        removeBigramWordsNative(mNativeDict, codePoints0, codePoints1);
309    }
310
311    private void reopen() {
312        close();
313        final File dictFile = new File(mDictFilePath);
314        mNativeDict = openNative(dictFile.getAbsolutePath(), 0 /* startOffset */,
315                dictFile.length(), true /* isUpdatable */);
316    }
317
318    public void flush() {
319        if (!isValidDictionary()) return;
320        flushNative(mNativeDict, mDictFilePath);
321        reopen();
322    }
323
324    public void flushWithGC() {
325        if (!isValidDictionary()) return;
326        flushWithGCNative(mNativeDict, mDictFilePath);
327        reopen();
328    }
329
330    /**
331     * Checks whether GC is needed to run or not.
332     * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
333     * the blocking in some situations such as in idle time or just before closing.
334     * @return whether GC is needed to run or not.
335     */
336    public boolean needsToRunGC(final boolean mindsBlockByGC) {
337        if (!isValidDictionary()) return false;
338        return needsToRunGCNative(mNativeDict, mindsBlockByGC);
339    }
340
341    @UsedForTesting
342    public int calculateProbability(final int unigramProbability, final int bigramProbability) {
343        if (!isValidDictionary()) return NOT_A_PROBABILITY;
344        return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
345    }
346
347    @UsedForTesting
348    public String getPropertyForTests(String query) {
349        if (!isValidDictionary()) return "";
350        return getPropertyNative(mNativeDict, query);
351    }
352
353    @Override
354    public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
355        // TODO: actually use the confidence rather than use this completely broken heuristic
356        final String word = candidate.mWord;
357        final int length = word.length();
358        int remainingSpaces = SPACE_COUNT_FOR_AUTO_COMMIT;
359        for (int i = 0; i < length; ++i) {
360            // This is okay because no low-surrogate and no high-surrogate can ever match the
361            // space character, so we don't need to take care of iterating on code points.
362            if (Constants.CODE_SPACE == word.charAt(i)) {
363                if (0 >= --remainingSpaces) return true;
364            }
365        }
366        return false;
367    }
368
369    @Override
370    public void close() {
371        synchronized (mDicTraverseSessions) {
372            final int sessionsSize = mDicTraverseSessions.size();
373            for (int index = 0; index < sessionsSize; ++index) {
374                final DicTraverseSession traverseSession = mDicTraverseSessions.valueAt(index);
375                if (traverseSession != null) {
376                    traverseSession.close();
377                }
378            }
379            mDicTraverseSessions.clear();
380        }
381        closeInternalLocked();
382    }
383
384    private synchronized void closeInternalLocked() {
385        if (mNativeDict != 0) {
386            closeNative(mNativeDict);
387            mNativeDict = 0;
388        }
389    }
390
391    // TODO: Manage BinaryDictionary instances without using WeakReference or something.
392    @Override
393    protected void finalize() throws Throwable {
394        try {
395            closeInternalLocked();
396        } finally {
397            super.finalize();
398        }
399    }
400}
401