DynamicPersonalizationDictionaryWriter.java revision 87a72f50c23a4ef357ae623eabc2af16d02466ae
187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi/* 287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Copyright (C) 2013 The Android Open Source Project 387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * 487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License"); 587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * you may not use this file except in compliance with the License. 687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * You may obtain a copy of the License at 787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * 887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * http://www.apache.org/licenses/LICENSE-2.0 987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * 1087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software 1187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS, 1287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * See the License for the specific language governing permissions and 1487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * limitations under the License. 1587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi */ 1687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 1787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagipackage com.android.inputmethod.latin.personalization; 1887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 1987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport android.content.Context; 2087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 2187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.keyboard.ProximityInfo; 2287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.AbstractDictionaryWriter; 2387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.ExpandableDictionary; 2487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.WordComposer; 2587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.ExpandableDictionary.NextWord; 2687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 2787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.DictEncoder; 2887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.FormatSpec; 2987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.UnsupportedFormatException; 3087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryDictIOUtils; 3187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface; 3287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils; 3387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams; 3487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 3587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport java.io.IOException; 3687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport java.util.ArrayList; 3787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 3887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi// Currently this class is used to implement dynamic prodiction dictionary. 3987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi// TODO: Move to native code. 4087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagipublic class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWriter { 4187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private static final String TAG = DynamicPersonalizationDictionaryWriter.class.getSimpleName(); 4287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi /** Maximum number of pairs. Pruning will start when databases goes above this number. */ 4387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public static final int MAX_HISTORY_BIGRAMS = 10000; 4487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 4587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi /** Any pair being typed or picked */ 4687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private static final int FREQUENCY_FOR_TYPED = 2; 4787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 4887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private static final int BINARY_DICT_VERSION = 3; 4987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private static final FormatSpec.FormatOptions FORMAT_OPTIONS = 5087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi new FormatSpec.FormatOptions(BINARY_DICT_VERSION, true /* supportsDynamicUpdate */); 5187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 5287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private final UserHistoryDictionaryBigramList mBigramList = 5387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi new UserHistoryDictionaryBigramList(); 5487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private final ExpandableDictionary mExpandableDictionary; 5587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 5687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public DynamicPersonalizationDictionaryWriter(final Context context, final String dictType) { 5787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi super(context, dictType); 5887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary = new ExpandableDictionary(context, dictType); 5987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 6087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 6187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 6287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public void clear() { 6387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mBigramList.evictAll(); 6487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary.clearDictionary(); 6587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 6687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 6787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi /** 6887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes 6987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * are done to update the binary dictionary. 7087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi */ 7187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 7287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public void addUnigramWord(final String word, final String shortcutTarget, final int frequency, 7387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final boolean isNotAWord) { 7487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary.addWord(word, shortcutTarget, frequency); 7587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mBigramList.addBigram(null, word, (byte)frequency); 7687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 7787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 7887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 7987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public void addBigramWords(final String word0, final String word1, final int frequency, 8087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final boolean isValid, final long lastModifiedTime) { 8187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi if (lastModifiedTime > 0) { 8287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary.setBigramAndGetFrequency(word0, word1, 8387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi new ForgettingCurveParams(frequency, System.currentTimeMillis(), 8487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi lastModifiedTime)); 8587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mBigramList.addBigram(word0, word1, (byte)frequency); 8687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } else { 8787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary.setBigramAndGetFrequency(word0, word1, 8887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi new ForgettingCurveParams(isValid)); 8987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mBigramList.addBigram(word0, word1, (byte)frequency); 9087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 9187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 9287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 9387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 9487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public void removeBigramWords(final String word0, final String word1) { 9587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi if (mBigramList.removeBigram(word0, word1)) { 9687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary.removeBigram(word0, word1); 9787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 9887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 9987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 10087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 10187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi protected void writeDictionary(final DictEncoder dictEncoder) 10287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi throws IOException, UnsupportedFormatException { 10387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi UserHistoryDictIOUtils.writeDictionary(dictEncoder, 10487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi new FrequencyProvider(mBigramList, mExpandableDictionary), mBigramList, 10587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi FORMAT_OPTIONS); 10687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 10787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 10887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi private static class FrequencyProvider implements BigramDictionaryInterface { 10987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final private UserHistoryDictionaryBigramList mBigramList; 11087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final private ExpandableDictionary mExpandableDictionary; 11187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 11287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public FrequencyProvider(final UserHistoryDictionaryBigramList bigramList, 11387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final ExpandableDictionary expandableDictionary) { 11487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mBigramList = bigramList; 11587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi mExpandableDictionary = expandableDictionary; 11687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 11787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 11887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public int getFrequency(final String word0, final String word1) { 11987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final int freq; 12087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi if (word0 == null) { // unigram 12187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi freq = FREQUENCY_FOR_TYPED; 12287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } else { // bigram 12387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final NextWord nw = mExpandableDictionary.getBigramWord(word0, word1); 12487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi if (nw != null) { 12587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final ForgettingCurveParams forgettingCurveParams = nw.getFcParams(); 12687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final byte prevFc = mBigramList.getBigrams(word0).get(word1); 12787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final byte fc = forgettingCurveParams.getFc(); 12887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final boolean isValid = forgettingCurveParams.isValid(); 12987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi if (prevFc > 0 && prevFc == fc) { 13087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi freq = fc & 0xFF; 13187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } else if (UserHistoryForgettingCurveUtils. 13287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi needsToSave(fc, isValid, mBigramList.size() <= MAX_HISTORY_BIGRAMS)) { 13387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi freq = fc & 0xFF; 13487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } else { 13587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi // Delete this entry 13687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi freq = -1; 13787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 13887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } else { 13987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi // Delete this entry 14087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi freq = -1; 14187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 14287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 14387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi return freq; 14487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 14587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 14687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 14787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 14887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer, 14987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi final String prevWord, final ProximityInfo proximityInfo, 15087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi boolean blockOffensiveWords) { 15187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi return mExpandableDictionary.getSuggestions(composer, prevWord, proximityInfo, 15287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi blockOffensiveWords); 15387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 15487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi 15587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi @Override 15687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi public boolean isValidWord(final String word) { 15787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi return mExpandableDictionary.isValidWord(word); 15887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi } 15987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi} 160