1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
20import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
21import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
22import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
23import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
24
25import java.io.File;
26import java.io.FileNotFoundException;
27import java.io.FileOutputStream;
28import java.io.IOException;
29import java.io.OutputStream;
30import java.util.ArrayList;
31import java.util.Iterator;
32
33/**
34 * An implementation of DictEncoder for version 3 binary dictionary.
35 */
36public class Ver3DictEncoder implements DictEncoder {
37
38    private final File mDictFile;
39    private OutputStream mOutStream;
40    private byte[] mBuffer;
41    private int mPosition;
42
43    public Ver3DictEncoder(final File dictFile) {
44        mDictFile = dictFile;
45        mOutStream = null;
46        mBuffer = null;
47    }
48
49    // This constructor is used only by BinaryDictOffdeviceUtilsTests.
50    // If you want to use this in the production code, you should consider keeping consistency of
51    // the interface of Ver3DictDecoder by using factory.
52    public Ver3DictEncoder(final OutputStream outStream) {
53        mDictFile = null;
54        mOutStream = outStream;
55    }
56
57    private void openStream() throws FileNotFoundException {
58        mOutStream = new FileOutputStream(mDictFile);
59    }
60
61    private void close() throws IOException {
62        if (mOutStream != null) {
63            mOutStream.close();
64            mOutStream = null;
65        }
66    }
67
68    @Override
69    public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
70            throws IOException, UnsupportedFormatException {
71        if (formatOptions.mVersion > FormatSpec.VERSION3) {
72            throw new UnsupportedFormatException(
73                    "The given format options has wrong version number : "
74                    + formatOptions.mVersion);
75        }
76
77        if (mOutStream == null) {
78            openStream();
79        }
80        BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions);
81
82        // Addresses are limited to 3 bytes, but since addresses can be relative to each node
83        // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding
84        // the order of the PtNode arrays becomes a quite complicated problem, because though the
85        // dictionary itself does not have a size limit, each node array must still be within 16MB
86        // of all its children and parents. As long as this is ensured, the dictionary file may
87        // grow to any size.
88
89        // Leave the choice of the optimal node order to the flattenTree function.
90        MakedictLog.i("Flattening the tree...");
91        ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray);
92
93        MakedictLog.i("Computing addresses...");
94        BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions);
95        MakedictLog.i("Checking PtNode array...");
96        if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
97
98        // Create a buffer that matches the final dictionary size.
99        final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
100        final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
101        mBuffer = new byte[bufferSize];
102
103        MakedictLog.i("Writing file...");
104
105        for (PtNodeArray nodeArray : flatNodes) {
106            BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions);
107        }
108        if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes);
109        mOutStream.write(mBuffer, 0, mPosition);
110
111        MakedictLog.i("Done");
112        close();
113    }
114
115    @Override
116    public void setPosition(final int position) {
117        if (mBuffer == null || position < 0 || position >= mBuffer.length) return;
118        mPosition = position;
119    }
120
121    @Override
122    public int getPosition() {
123        return mPosition;
124    }
125
126    @Override
127    public void writePtNodeCount(final int ptNodeCount) {
128        final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount);
129        if (countSize != 1 && countSize != 2) {
130            throw new RuntimeException("Strange size from getGroupCountSize : " + countSize);
131        }
132        final int encodedPtNodeCount = (countSize == 2) ?
133                (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount;
134        mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount,
135                countSize);
136    }
137
138    private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) {
139        final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
140        mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition,
141                BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions),
142                FormatSpec.PTNODE_FLAGS_SIZE);
143    }
144
145    private void writeParentPosition(final int parentPosition, final PtNode ptNode,
146            final FormatOptions formatOptions) {
147        if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) {
148            mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
149                    parentPosition, formatOptions);
150        } else {
151            mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition,
152                    parentPosition - ptNode.mCachedAddressAfterUpdate, formatOptions);
153        }
154    }
155
156    private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) {
157        mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition);
158        if (hasSeveralChars) {
159            mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR;
160        }
161    }
162
163    private void writeFrequency(final int frequency) {
164        if (frequency >= 0) {
165            mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency,
166                    FormatSpec.PTNODE_FREQUENCY_SIZE);
167        }
168    }
169
170    private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) {
171        final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions);
172        if (formatOptions.mSupportsDynamicUpdate) {
173            mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition,
174                    childrenPos);
175        } else {
176            mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
177                    childrenPos);
178        }
179    }
180
181    /**
182     * Write a shortcut attributes list to mBuffer.
183     *
184     * @param shortcuts the shortcut attributes list.
185     */
186    private void writeShortcuts(final ArrayList<WeightedString> shortcuts) {
187        if (null == shortcuts || shortcuts.isEmpty()) return;
188
189        final int indexOfShortcutByteSize = mPosition;
190        mPosition += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE;
191        final Iterator<WeightedString> shortcutIterator = shortcuts.iterator();
192        while (shortcutIterator.hasNext()) {
193            final WeightedString target = shortcutIterator.next();
194            final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
195                    shortcutIterator.hasNext(),
196                    target.mFrequency);
197            mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
198                    FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
199            final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
200            mPosition += shortcutShift;
201        }
202        final int shortcutByteSize = mPosition - indexOfShortcutByteSize;
203        if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) {
204            throw new RuntimeException("Shortcut list too large");
205        }
206        BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize,
207                FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
208    }
209
210    /**
211     * Write a bigram attributes list to mBuffer.
212     *
213     * @param bigrams the bigram attributes list.
214     * @param dict the dictionary the node array is a part of (for relative offsets).
215     */
216    private void writeBigrams(final ArrayList<WeightedString> bigrams,
217            final FusionDictionary dict) {
218        if (bigrams == null) return;
219
220        final Iterator<WeightedString> bigramIterator = bigrams.iterator();
221        while (bigramIterator.hasNext()) {
222            final WeightedString bigram = bigramIterator.next();
223            final PtNode target =
224                    FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
225            final int addressOfBigram = target.mCachedAddressAfterUpdate;
226            final int unigramFrequencyForThisWord = target.mFrequency;
227            final int offset = addressOfBigram
228                    - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
229            final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
230                    offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
231            mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
232                    FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
233            mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
234                    Math.abs(offset));
235        }
236    }
237
238    @Override
239    public void writeForwardLinkAddress(final int forwardLinkAddress) {
240        mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress,
241                FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
242    }
243
244    @Override
245    public void writePtNode(final PtNode ptNode, final int parentPosition,
246            final FormatOptions formatOptions, final FusionDictionary dict) {
247        writePtNodeFlags(ptNode, formatOptions);
248        writeParentPosition(parentPosition, ptNode, formatOptions);
249        writeCharacters(ptNode.mChars, ptNode.hasSeveralChars());
250        writeFrequency(ptNode.mFrequency);
251        writeChildrenPosition(ptNode, formatOptions);
252        writeShortcuts(ptNode.mShortcutTargets);
253        writeBigrams(ptNode.mBigrams, dict);
254    }
255}
256