1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.util.Log;
20
21import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
22import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
23import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
24import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
25import com.android.inputmethod.latin.makedict.FusionDictionary;
26import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
27import com.android.inputmethod.latin.makedict.PendingAttribute;
28import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
29
30import java.io.IOException;
31import java.io.OutputStream;
32import java.util.ArrayList;
33import java.util.HashMap;
34import java.util.Map;
35
36/**
37 * Reads and writes Binary files for a UserHistoryDictionary.
38 *
39 * All the methods in this class are static.
40 */
41public final class UserHistoryDictIOUtils {
42    private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName();
43    private static final boolean DEBUG = false;
44
45    public interface OnAddWordListener {
46        public void setUnigram(final String word, final String shortcutTarget, final int frequency);
47        public void setBigram(final String word1, final String word2, final int frequency);
48    }
49
50    public interface BigramDictionaryInterface {
51        public int getFrequency(final String word1, final String word2);
52    }
53
54    public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
55        private byte[] mBuffer;
56        private int mPosition;
57
58        public ByteArrayWrapper(final byte[] buffer) {
59            mBuffer = buffer;
60            mPosition = 0;
61        }
62
63        @Override
64        public int readUnsignedByte() {
65            return ((int)mBuffer[mPosition++]) & 0xFF;
66        }
67
68        @Override
69        public int readUnsignedShort() {
70            final int retval = readUnsignedByte();
71            return (retval << 8) + readUnsignedByte();
72        }
73
74        @Override
75        public int readUnsignedInt24() {
76            final int retval = readUnsignedShort();
77            return (retval << 8) + readUnsignedByte();
78        }
79
80        @Override
81        public int readInt() {
82            final int retval = readUnsignedShort();
83            return (retval << 16) + readUnsignedShort();
84        }
85
86        @Override
87        public int position() {
88            return mPosition;
89        }
90
91        @Override
92        public void position(int position) {
93            mPosition = position;
94        }
95
96        @Override
97        public void put(final byte b) {
98            mBuffer[mPosition++] = b;
99        }
100
101        @Override
102        public int limit() {
103            return mBuffer.length - 1;
104        }
105
106        @Override
107        public int capacity() {
108            return mBuffer.length;
109        }
110    }
111
112    /**
113     * Writes dictionary to file.
114     */
115    public static void writeDictionaryBinary(final OutputStream destination,
116            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
117            final FormatOptions formatOptions) {
118        final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
119        try {
120            BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
121            Log.d(TAG, "end writing");
122        } catch (IOException e) {
123            Log.e(TAG, "IO exception while writing file: " + e);
124        } catch (UnsupportedFormatException e) {
125            Log.e(TAG, "Unsupported fomat: " + e);
126        }
127    }
128
129    /**
130     * Constructs a new FusionDictionary from BigramDictionaryInterface.
131     */
132    /* packages for test */ static FusionDictionary constructFusionDictionary(
133            final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) {
134        final FusionDictionary fusionDict = new FusionDictionary(new Node(),
135                new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
136                        false));
137        int profTotal = 0;
138        for (final String word1 : bigrams.keySet()) {
139            final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1);
140            for (final String word2 : word1Bigrams.keySet()) {
141                final int freq = dict.getFrequency(word1, word2);
142                if (freq == -1) {
143                    // don't add this bigram.
144                    continue;
145                }
146                if (DEBUG) {
147                    if (word1 == null) {
148                        Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq));
149                    } else {
150                        Log.d(TAG, "add bigram: " + word1
151                                + "," + word2 + "," + Integer.toString(freq));
152                    }
153                    profTotal++;
154                }
155                if (word1 == null) { // unigram
156                    fusionDict.add(word2, freq, null, false /* isNotAWord */);
157                } else { // bigram
158                    if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) {
159                        fusionDict.add(word1, 2, null, false /* isNotAWord */);
160                    }
161                    fusionDict.setBigram(word1, word2, freq);
162                }
163                bigrams.updateBigram(word1, word2, (byte)freq);
164            }
165        }
166        if (DEBUG) {
167            Log.d(TAG, "add " + profTotal + "words");
168        }
169        return fusionDict;
170    }
171
172    /**
173     * Reads dictionary from file.
174     */
175    public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer,
176            final OnAddWordListener dict) {
177        final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
178        final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
179        final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
180        try {
181            BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies,
182                    bigrams);
183        } catch (IOException e) {
184            Log.e(TAG, "IO exception while reading file: " + e);
185        } catch (UnsupportedFormatException e) {
186            Log.e(TAG, "Unsupported format: " + e);
187        } catch (ArrayIndexOutOfBoundsException e) {
188            Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file: " + e);
189        }
190        addWordsFromWordMap(unigrams, frequencies, bigrams, dict);
191    }
192
193    /**
194     * Adds all unigrams and bigrams in maps to OnAddWordListener.
195     */
196    /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams,
197            final Map<Integer, Integer> frequencies,
198            final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) {
199        for (Map.Entry<Integer, String> entry : unigrams.entrySet()) {
200            final String word1 = entry.getValue();
201            final int unigramFrequency = frequencies.get(entry.getKey());
202            to.setUnigram(word1, null, unigramFrequency);
203            final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey());
204            if (attrList != null) {
205                for (final PendingAttribute attr : attrList) {
206                    to.setBigram(word1, unigrams.get(attr.mAddress),
207                            BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency,
208                                    attr.mFrequency));
209                }
210            }
211        }
212
213    }
214}