DecayingExpandableBinaryDictionaryBase.java revision 8bfc8c46ffc755752dbf11a105ef40b8fc5ae390
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.personalization;
18
19import android.content.Context;
20import android.util.Log;
21
22import com.android.inputmethod.annotations.UsedForTesting;
23import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam;
24import com.android.inputmethod.latin.Constants;
25import com.android.inputmethod.latin.Dictionary;
26import com.android.inputmethod.latin.ExpandableBinaryDictionary;
27import com.android.inputmethod.latin.makedict.DictDecoder;
28import com.android.inputmethod.latin.makedict.FormatSpec;
29import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
30import com.android.inputmethod.latin.utils.CollectionUtils;
31import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
32import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener;
33
34import java.io.File;
35import java.io.IOException;
36import java.util.ArrayList;
37import java.util.HashMap;
38import java.util.Locale;
39import java.util.Map;
40import java.util.concurrent.TimeUnit;
41
42/**
43 * This class is a base class of a dictionary that supports decaying for the personalized language
44 * model.
45 */
46public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary {
47    private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName();
48    public static final boolean DBG_SAVE_RESTORE = false;
49    private static final boolean DBG_DUMP_ON_CLOSE = false;
50
51    /** Any pair being typed or picked */
52    public static final int FREQUENCY_FOR_TYPED = 2;
53
54    public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
55    public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;
56
57    public static final int REQUIRED_BINARY_DICTIONARY_VERSION = 4;
58
59    /** Locale for which this user history dictionary is storing words */
60    private final Locale mLocale;
61
62    private final String mFileName;
63
64    private final ArrayList<PersonalizationDictionaryUpdateSession> mSessions =
65            CollectionUtils.newArrayList();
66
67    // Should always be false except when we use this class for test
68    @UsedForTesting boolean mIsTest = false;
69
70    /* package */ DecayingExpandableBinaryDictionaryBase(final Context context,
71            final Locale locale, final String dictionaryType, final String fileName) {
72        super(context, fileName, locale, dictionaryType, true);
73        mLocale = locale;
74        mFileName = fileName;
75        if (mLocale != null && mLocale.toString().length() > 1) {
76            reloadDictionaryIfRequired();
77        }
78    }
79
80    @Override
81    public void close() {
82        if (DBG_DUMP_ON_CLOSE) {
83            dumpAllWordsForDebug();
84        }
85        // Flush pending writes.
86        // TODO: Remove after this class become to use a dynamic binary dictionary.
87        asyncFlashAllBinaryDictionary();
88    }
89
90    @Override
91    protected Map<String, String> getHeaderAttributeMap() {
92        HashMap<String, String> attributeMap = new HashMap<String, String>();
93        attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
94                FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
95        attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
96                FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
97        attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, mFileName);
98        attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, mLocale.toString());
99        attributeMap.put(FormatSpec.FileHeader.DICTIONARY_VERSION_ATTRIBUTE,
100                String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
101        return attributeMap;
102    }
103
104    @Override
105    protected boolean hasContentChanged() {
106        return false;
107    }
108
109    @Override
110    protected boolean needsToReloadBeforeWriting() {
111        return false;
112    }
113
114    @Override
115    protected boolean isValidBinaryDictFormatVersion(final int formatVersion) {
116        return formatVersion >= REQUIRED_BINARY_DICTIONARY_VERSION;
117    }
118
119    @Override
120    protected String getFileNameExtentionToOpenDict() {
121        return "/" + FormatSpec.TRIE_FILE_EXTENSION;
122    }
123
124    public void addMultipleDictionaryEntriesToDictionary(
125            final ArrayList<LanguageModelParam> languageModelParams,
126            final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) {
127        if (languageModelParams == null || languageModelParams.isEmpty()) {
128            if (callback != null) {
129                callback.onFinished();
130            }
131            return;
132        }
133        addMultipleDictionaryEntriesDynamically(languageModelParams, callback);
134    }
135
136    /**
137     * Pair will be added to the decaying dictionary.
138     *
139     * The first word may be null. That means we don't know the context, in other words,
140     * it's only a unigram. The first word may also be an empty string : this means start
141     * context, as in beginning of a sentence for example.
142     * The second word may not be null (a NullPointerException would be thrown).
143     */
144    public void addToDictionary(final String word0, final String word1, final boolean isValid) {
145        if (word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH ||
146                (word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
147            return;
148        }
149        final int frequency = isValid ?
150                FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS;
151        addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */,
152                false /* isNotAWord */);
153        // Do not insert a word as a bigram of itself
154        if (word1.equals(word0)) {
155            return;
156        }
157        if (null != word0) {
158            addBigramDynamically(word0, word1, frequency);
159        }
160    }
161
162    public void cancelAddingUserHistory(final String word0, final String word1) {
163        removeBigramDynamically(word0, word1);
164    }
165
166    @Override
167    protected void loadDictionaryAsync() {
168        // Never loaded to memory in Java side.
169    }
170
171    public void registerUpdateSession(PersonalizationDictionaryUpdateSession session) {
172        session.setPredictionDictionary(this);
173        mSessions.add(session);
174        session.onDictionaryReady();
175    }
176
177    public void unRegisterUpdateSession(PersonalizationDictionaryUpdateSession session) {
178        mSessions.remove(session);
179    }
180
181    @UsedForTesting
182    public void dumpAllWordsForDebug() {
183        runAfterGcForDebug(new Runnable() {
184            @Override
185            public void run() {
186                dumpAllWordsForDebugLocked();
187            }
188        });
189    }
190
191    private void dumpAllWordsForDebugLocked() {
192        Log.d(TAG, "dumpAllWordsForDebug started.");
193        final OnAddWordListener listener = new OnAddWordListener() {
194            @Override
195            public void setUnigram(final String word, final String shortcutTarget,
196                    final int frequency, final int shortcutFreq) {
197                Log.d(TAG, "load unigram: " + word + "," + frequency);
198            }
199
200            @Override
201            public void setBigram(final String word0, final String word1, final int frequency) {
202                if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH
203                        && word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) {
204                    Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency);
205                } else {
206                    Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + ","
207                            + frequency);
208                }
209            }
210        };
211
212        // Load the dictionary from binary file
213        final File dictFile = new File(mContext.getFilesDir(), mFileName);
214        final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile,
215                DictDecoder.USE_BYTEARRAY);
216        if (dictDecoder == null) {
217            // This is an expected condition: we don't have a user history dictionary for this
218            // language yet. It will be created sometime later.
219            return;
220        }
221
222        try {
223            dictDecoder.openDictBuffer();
224            UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
225        } catch (IOException e) {
226            Log.d(TAG, "IOException on opening a bytebuffer", e);
227        } catch (UnsupportedFormatException e) {
228            Log.d(TAG, "Unsupported format, can't read the dictionary", e);
229        }
230    }
231
232    @UsedForTesting
233    public void clearAndFlushDictionary() {
234        // Clear the node structure on memory
235        clear();
236        // Then flush the cleared state of the dictionary on disk.
237        asyncFlashAllBinaryDictionary();
238    }
239
240    /* package */ void decayIfNeeded() {
241        runGCIfRequired(false /* mindsBlockByGC */);
242    }
243}
244