DecayingExpandableBinaryDictionaryBase.java revision 8bfc8c46ffc755752dbf11a105ef40b8fc5ae390
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.personalization; 18 19import android.content.Context; 20import android.util.Log; 21 22import com.android.inputmethod.annotations.UsedForTesting; 23import com.android.inputmethod.latin.BinaryDictionary.LanguageModelParam; 24import com.android.inputmethod.latin.Constants; 25import com.android.inputmethod.latin.Dictionary; 26import com.android.inputmethod.latin.ExpandableBinaryDictionary; 27import com.android.inputmethod.latin.makedict.DictDecoder; 28import com.android.inputmethod.latin.makedict.FormatSpec; 29import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 30import com.android.inputmethod.latin.utils.CollectionUtils; 31import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; 32import com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.OnAddWordListener; 33 34import java.io.File; 35import java.io.IOException; 36import java.util.ArrayList; 37import java.util.HashMap; 38import java.util.Locale; 39import java.util.Map; 40import java.util.concurrent.TimeUnit; 41 42/** 43 * This class is a base class of a dictionary that supports decaying for the personalized language 44 * model. 45 */ 46public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableBinaryDictionary { 47 private static final String TAG = DecayingExpandableBinaryDictionaryBase.class.getSimpleName(); 48 public static final boolean DBG_SAVE_RESTORE = false; 49 private static final boolean DBG_DUMP_ON_CLOSE = false; 50 51 /** Any pair being typed or picked */ 52 public static final int FREQUENCY_FOR_TYPED = 2; 53 54 public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED; 55 public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY; 56 57 public static final int REQUIRED_BINARY_DICTIONARY_VERSION = 4; 58 59 /** Locale for which this user history dictionary is storing words */ 60 private final Locale mLocale; 61 62 private final String mFileName; 63 64 private final ArrayList<PersonalizationDictionaryUpdateSession> mSessions = 65 CollectionUtils.newArrayList(); 66 67 // Should always be false except when we use this class for test 68 @UsedForTesting boolean mIsTest = false; 69 70 /* package */ DecayingExpandableBinaryDictionaryBase(final Context context, 71 final Locale locale, final String dictionaryType, final String fileName) { 72 super(context, fileName, locale, dictionaryType, true); 73 mLocale = locale; 74 mFileName = fileName; 75 if (mLocale != null && mLocale.toString().length() > 1) { 76 reloadDictionaryIfRequired(); 77 } 78 } 79 80 @Override 81 public void close() { 82 if (DBG_DUMP_ON_CLOSE) { 83 dumpAllWordsForDebug(); 84 } 85 // Flush pending writes. 86 // TODO: Remove after this class become to use a dynamic binary dictionary. 87 asyncFlashAllBinaryDictionary(); 88 } 89 90 @Override 91 protected Map<String, String> getHeaderAttributeMap() { 92 HashMap<String, String> attributeMap = new HashMap<String, String>(); 93 attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, 94 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); 95 attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE, 96 FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); 97 attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, mFileName); 98 attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, mLocale.toString()); 99 attributeMap.put(FormatSpec.FileHeader.DICTIONARY_VERSION_ATTRIBUTE, 100 String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()))); 101 return attributeMap; 102 } 103 104 @Override 105 protected boolean hasContentChanged() { 106 return false; 107 } 108 109 @Override 110 protected boolean needsToReloadBeforeWriting() { 111 return false; 112 } 113 114 @Override 115 protected boolean isValidBinaryDictFormatVersion(final int formatVersion) { 116 return formatVersion >= REQUIRED_BINARY_DICTIONARY_VERSION; 117 } 118 119 @Override 120 protected String getFileNameExtentionToOpenDict() { 121 return "/" + FormatSpec.TRIE_FILE_EXTENSION; 122 } 123 124 public void addMultipleDictionaryEntriesToDictionary( 125 final ArrayList<LanguageModelParam> languageModelParams, 126 final ExpandableBinaryDictionary.AddMultipleDictionaryEntriesCallback callback) { 127 if (languageModelParams == null || languageModelParams.isEmpty()) { 128 if (callback != null) { 129 callback.onFinished(); 130 } 131 return; 132 } 133 addMultipleDictionaryEntriesDynamically(languageModelParams, callback); 134 } 135 136 /** 137 * Pair will be added to the decaying dictionary. 138 * 139 * The first word may be null. That means we don't know the context, in other words, 140 * it's only a unigram. The first word may also be an empty string : this means start 141 * context, as in beginning of a sentence for example. 142 * The second word may not be null (a NullPointerException would be thrown). 143 */ 144 public void addToDictionary(final String word0, final String word1, final boolean isValid) { 145 if (word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH || 146 (word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) { 147 return; 148 } 149 final int frequency = isValid ? 150 FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS; 151 addWordDynamically(word1, null /* shortcutTarget */, frequency, 0 /* shortcutFreq */, 152 false /* isNotAWord */); 153 // Do not insert a word as a bigram of itself 154 if (word1.equals(word0)) { 155 return; 156 } 157 if (null != word0) { 158 addBigramDynamically(word0, word1, frequency); 159 } 160 } 161 162 public void cancelAddingUserHistory(final String word0, final String word1) { 163 removeBigramDynamically(word0, word1); 164 } 165 166 @Override 167 protected void loadDictionaryAsync() { 168 // Never loaded to memory in Java side. 169 } 170 171 public void registerUpdateSession(PersonalizationDictionaryUpdateSession session) { 172 session.setPredictionDictionary(this); 173 mSessions.add(session); 174 session.onDictionaryReady(); 175 } 176 177 public void unRegisterUpdateSession(PersonalizationDictionaryUpdateSession session) { 178 mSessions.remove(session); 179 } 180 181 @UsedForTesting 182 public void dumpAllWordsForDebug() { 183 runAfterGcForDebug(new Runnable() { 184 @Override 185 public void run() { 186 dumpAllWordsForDebugLocked(); 187 } 188 }); 189 } 190 191 private void dumpAllWordsForDebugLocked() { 192 Log.d(TAG, "dumpAllWordsForDebug started."); 193 final OnAddWordListener listener = new OnAddWordListener() { 194 @Override 195 public void setUnigram(final String word, final String shortcutTarget, 196 final int frequency, final int shortcutFreq) { 197 Log.d(TAG, "load unigram: " + word + "," + frequency); 198 } 199 200 @Override 201 public void setBigram(final String word0, final String word1, final int frequency) { 202 if (word0.length() < Constants.DICTIONARY_MAX_WORD_LENGTH 203 && word1.length() < Constants.DICTIONARY_MAX_WORD_LENGTH) { 204 Log.d(TAG, "load bigram: " + word0 + "," + word1 + "," + frequency); 205 } else { 206 Log.d(TAG, "Skip inserting a too long bigram: " + word0 + "," + word1 + "," 207 + frequency); 208 } 209 } 210 }; 211 212 // Load the dictionary from binary file 213 final File dictFile = new File(mContext.getFilesDir(), mFileName); 214 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(dictFile, 215 DictDecoder.USE_BYTEARRAY); 216 if (dictDecoder == null) { 217 // This is an expected condition: we don't have a user history dictionary for this 218 // language yet. It will be created sometime later. 219 return; 220 } 221 222 try { 223 dictDecoder.openDictBuffer(); 224 UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener); 225 } catch (IOException e) { 226 Log.d(TAG, "IOException on opening a bytebuffer", e); 227 } catch (UnsupportedFormatException e) { 228 Log.d(TAG, "Unsupported format, can't read the dictionary", e); 229 } 230 } 231 232 @UsedForTesting 233 public void clearAndFlushDictionary() { 234 // Clear the node structure on memory 235 clear(); 236 // Then flush the cleared state of the dictionary on disk. 237 asyncFlashAllBinaryDictionary(); 238 } 239 240 /* package */ void decayIfNeeded() { 241 runGCIfRequired(false /* mindsBlockByGC */); 242 } 243} 244