DynamicPersonalizationDictionaryWriter.java revision 87a72f50c23a4ef357ae623eabc2af16d02466ae
187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi/*
287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Copyright (C) 2013 The Android Open Source Project
387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi *
487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Licensed under the Apache License, Version 2.0 (the "License");
587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * you may not use this file except in compliance with the License.
687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * You may obtain a copy of the License at
787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi *
887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi *      http://www.apache.org/licenses/LICENSE-2.0
987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi *
1087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * Unless required by applicable law or agreed to in writing, software
1187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * distributed under the License is distributed on an "AS IS" BASIS,
1287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * See the License for the specific language governing permissions and
1487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi * limitations under the License.
1587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi */
1687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
1787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagipackage com.android.inputmethod.latin.personalization;
1887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
1987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport android.content.Context;
2087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
2187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.keyboard.ProximityInfo;
2287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.AbstractDictionaryWriter;
2387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.ExpandableDictionary;
2487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.WordComposer;
2587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.ExpandableDictionary.NextWord;
2687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
2787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.DictEncoder;
2887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.FormatSpec;
2987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.makedict.UnsupportedFormatException;
3087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryDictIOUtils;
3187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryDictIOUtils.BigramDictionaryInterface;
3287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils;
3387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport com.android.inputmethod.latin.utils.UserHistoryForgettingCurveUtils.ForgettingCurveParams;
3487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
3587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport java.io.IOException;
3687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagiimport java.util.ArrayList;
3787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
3887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi// Currently this class is used to implement dynamic prodiction dictionary.
3987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi// TODO: Move to native code.
4087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagipublic class DynamicPersonalizationDictionaryWriter extends AbstractDictionaryWriter {
4187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private static final String TAG = DynamicPersonalizationDictionaryWriter.class.getSimpleName();
4287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    /** Maximum number of pairs. Pruning will start when databases goes above this number. */
4387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public static final int MAX_HISTORY_BIGRAMS = 10000;
4487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
4587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    /** Any pair being typed or picked */
4687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private static final int FREQUENCY_FOR_TYPED = 2;
4787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
4887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private static final int BINARY_DICT_VERSION = 3;
4987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
5087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            new FormatSpec.FormatOptions(BINARY_DICT_VERSION, true /* supportsDynamicUpdate */);
5187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
5287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private final UserHistoryDictionaryBigramList mBigramList =
5387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            new UserHistoryDictionaryBigramList();
5487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private final ExpandableDictionary mExpandableDictionary;
5587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
5687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public DynamicPersonalizationDictionaryWriter(final Context context, final String dictType) {
5787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        super(context, dictType);
5887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        mExpandableDictionary = new ExpandableDictionary(context, dictType);
5987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
6087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
6187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
6287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public void clear() {
6387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        mBigramList.evictAll();
6487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        mExpandableDictionary.clearDictionary();
6587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
6687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
6787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    /**
6887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi     * Adds a word unigram to the fusion dictionary. Call updateBinaryDictionary when all changes
6987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi     * are done to update the binary dictionary.
7087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi     */
7187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
7287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
7387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            final boolean isNotAWord) {
7487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        mExpandableDictionary.addWord(word, shortcutTarget, frequency);
7587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        mBigramList.addBigram(null, word, (byte)frequency);
7687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
7787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
7887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
7987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public void addBigramWords(final String word0, final String word1, final int frequency,
8087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            final boolean isValid, final long lastModifiedTime) {
8187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        if (lastModifiedTime > 0) {
8287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mExpandableDictionary.setBigramAndGetFrequency(word0, word1,
8387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    new ForgettingCurveParams(frequency, System.currentTimeMillis(),
8487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                            lastModifiedTime));
8587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mBigramList.addBigram(word0, word1, (byte)frequency);
8687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        } else {
8787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mExpandableDictionary.setBigramAndGetFrequency(word0, word1,
8887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    new ForgettingCurveParams(isValid));
8987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mBigramList.addBigram(word0, word1, (byte)frequency);
9087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        }
9187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
9287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
9387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
9487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public void removeBigramWords(final String word0, final String word1) {
9587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        if (mBigramList.removeBigram(word0, word1)) {
9687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mExpandableDictionary.removeBigram(word0, word1);
9787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        }
9887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
9987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
10087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
10187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    protected void writeDictionary(final DictEncoder dictEncoder)
10287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            throws IOException, UnsupportedFormatException {
10387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        UserHistoryDictIOUtils.writeDictionary(dictEncoder,
10487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                new FrequencyProvider(mBigramList, mExpandableDictionary), mBigramList,
10587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                        FORMAT_OPTIONS);
10687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
10787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
10887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    private static class FrequencyProvider implements BigramDictionaryInterface {
10987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        final private UserHistoryDictionaryBigramList mBigramList;
11087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        final private ExpandableDictionary mExpandableDictionary;
11187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
11287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        public FrequencyProvider(final UserHistoryDictionaryBigramList bigramList,
11387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                final ExpandableDictionary expandableDictionary) {
11487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mBigramList = bigramList;
11587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            mExpandableDictionary = expandableDictionary;
11687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        }
11787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        @Override
11887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        public int getFrequency(final String word0, final String word1) {
11987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            final int freq;
12087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            if (word0 == null) { // unigram
12187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                freq = FREQUENCY_FOR_TYPED;
12287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            } else { // bigram
12387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                final NextWord nw = mExpandableDictionary.getBigramWord(word0, word1);
12487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                if (nw != null) {
12587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    final ForgettingCurveParams forgettingCurveParams = nw.getFcParams();
12687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    final byte prevFc = mBigramList.getBigrams(word0).get(word1);
12787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    final byte fc = forgettingCurveParams.getFc();
12887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    final boolean isValid = forgettingCurveParams.isValid();
12987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    if (prevFc > 0 && prevFc == fc) {
13087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                        freq = fc & 0xFF;
13187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    } else if (UserHistoryForgettingCurveUtils.
13287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                            needsToSave(fc, isValid, mBigramList.size() <= MAX_HISTORY_BIGRAMS)) {
13387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                        freq = fc & 0xFF;
13487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    } else {
13587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                        // Delete this entry
13687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                        freq = -1;
13787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    }
13887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                } else {
13987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    // Delete this entry
14087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                    freq = -1;
14187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                }
14287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            }
14387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            return freq;
14487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        }
14587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
14687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
14787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
14887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
14987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            final String prevWord, final ProximityInfo proximityInfo,
15087a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi            boolean blockOffensiveWords) {
15187a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        return mExpandableDictionary.getSuggestions(composer, prevWord, proximityInfo,
15287a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi                blockOffensiveWords);
15387a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
15487a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi
15587a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    @Override
15687a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    public boolean isValidWord(final String word) {
15787a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi        return mExpandableDictionary.isValidWord(word);
15887a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi    }
15987a72f50c23a4ef357ae623eabc2af16d02466aeKeisuke Kuroyanagi}
160