1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.utils;
18
19import android.util.Log;
20
21import com.android.inputmethod.annotations.UsedForTesting;
22import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
23import com.android.inputmethod.latin.makedict.DictDecoder;
24import com.android.inputmethod.latin.makedict.DictEncoder;
25import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
26import com.android.inputmethod.latin.makedict.FusionDictionary;
27import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
28import com.android.inputmethod.latin.makedict.PendingAttribute;
29import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
30import com.android.inputmethod.latin.personalization.UserHistoryDictionaryBigramList;
31
32import java.io.IOException;
33import java.util.ArrayList;
34import java.util.HashMap;
35import java.util.Map.Entry;
36import java.util.TreeMap;
37import java.util.concurrent.TimeUnit;
38
39/**
40 * Reads and writes Binary files for a UserHistoryDictionary.
41 *
42 * All the methods in this class are static.
43 */
44public final class UserHistoryDictIOUtils {
45    private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
46    private static final boolean DEBUG = false;
47    private static final String USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
48    private static final String USES_FORGETTING_CURVE_VALUE = "1";
49    private static final String LAST_UPDATED_TIME_KEY = "date";
50
51    public interface OnAddWordListener {
52        /**
53         * Callback to be notified when a word is added to the dictionary.
54         * @param word The added word.
55         * @param shortcutTarget A shortcut target for this word, or null if none.
56         * @param frequency The frequency for this word.
57         * @param shortcutFreq The frequency of the shortcut (0~15, with 15 = whitelist).
58         *   Unspecified if shortcutTarget is null - do not rely on its value.
59         */
60        public void setUnigram(final String word, final String shortcutTarget, final int frequency,
61                final int shortcutFreq);
62        public void setBigram(final String word1, final String word2, final int frequency);
63    }
64
65    @UsedForTesting
66    public interface BigramDictionaryInterface {
67        public int getFrequency(final String word1, final String word2);
68    }
69
70    /**
71     * Writes dictionary to file.
72     */
73    public static void writeDictionary(final DictEncoder dictEncoder,
74            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
75            final FormatOptions formatOptions) {
76        final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
77        fusionDict.addOptionAttribute(USES_FORGETTING_CURVE_KEY, USES_FORGETTING_CURVE_VALUE);
78        fusionDict.addOptionAttribute(LAST_UPDATED_TIME_KEY,
79                String.valueOf(TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis())));
80        try {
81            dictEncoder.writeDictionary(fusionDict, formatOptions);
82            Log.d(TAG, "end writing");
83        } catch (IOException e) {
84            Log.e(TAG, "IO exception while writing file", e);
85        } catch (UnsupportedFormatException e) {
86            Log.e(TAG, "Unsupported format", e);
87        }
88    }
89
90    /**
91     * Constructs a new FusionDictionary from BigramDictionaryInterface.
92     */
93    @UsedForTesting
94    static FusionDictionary constructFusionDictionary(
95            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
96        final FusionDictionary fusionDict = new FusionDictionary(new PtNodeArray(),
97                new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
98                        false));
99        int profTotal = 0;
100        for (final String word1 : bigrams.keySet()) {
101            final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
102            for (final String word2 : word1Bigrams.keySet()) {
103                final int freq = dict.getFrequency(word1, word2);
104                if (freq == -1) {
105                    // don't add this bigram.
106                    continue;
107                }
108                if (DEBUG) {
109                    if (word1 == null) {
110                        Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
111                    } else {
112                        Log.d(TAG, "add bigram: " + word1
113                                + "," + word2 + "," + Integer.toString(freq));
114                    }
115                    profTotal++;
116                }
117                if (word1 == null) { // unigram
118                    fusionDict.add(word2, freq, null, false /* isNotAWord */);
119                } else { // bigram
120                    if (FusionDictionary.findWordInTree(fusionDict.mRootNodeArray, word1) == null) {
121                        fusionDict.add(word1, 2, null, false /* isNotAWord */);
122                    }
123                    fusionDict.setBigram(word1, word2, freq);
124                }
125                bigrams.updateBigram(word1, word2, (byte)freq);
126            }
127        }
128        if (DEBUG) {
129            Log.d(TAG, "add " + profTotal + "words");
130        }
131        return fusionDict;
132    }
133
134    /**
135     * Reads dictionary from file.
136     */
137    public static void readDictionaryBinary(final DictDecoder dictDecoder,
138            final OnAddWordListener dict) {
139        final TreeMap<Integer, String> unigrams = CollectionUtils.newTreeMap();
140        final TreeMap<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
141        final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
142        try {
143            dictDecoder.readUnigramsAndBigramsBinary(unigrams, frequencies, bigrams);
144        } catch (IOException e) {
145            Log.e(TAG, "IO exception while reading file", e);
146        } catch (UnsupportedFormatException e) {
147            Log.e(TAG, "Unsupported format", e);
148        } catch (ArrayIndexOutOfBoundsException e) {
149            Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file", e);
150        }
151        addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
152    }
153
154    /**
155     * Adds all unigrams and bigrams in maps to OnAddWordListener.
156     */
157    @UsedForTesting
158    static void addWordsFromWordMap(final TreeMap<Integer, String> unigrams,
159            final TreeMap<Integer, Integer> frequencies,
160            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams,
161            final OnAddWordListener to) {
162        for (Entry<Integer, String> entry : unigrams.entrySet()) {
163            final String word1 = entry.getValue();
164            final int unigramFrequency = frequencies.get(entry.getKey());
165            to.setUnigram(word1, null /* shortcutTarget */, unigramFrequency, 0 /* shortcutFreq */);
166            final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
167            if (attrList != null) {
168                for (final PendingAttribute attr : attrList) {
169                    final String word2 = unigrams.get(attr.mAddress);
170                    if (word1 == null || word2 == null) {
171                        Log.e(TAG, "Invalid bigram pair detected: " + word1 + ", " + word2);
172                        continue;
173                    }
174                    to.setBigram(word1, word2,
175                            BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
176                                    attr.mFrequency));
177                }
178            }
179        }
180
181    }
182}
183