1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.annotations.UsedForTesting;
20import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
21import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
22import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
23import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
24import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
25import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
26import com.android.inputmethod.latin.utils.CollectionUtils;
27
28import android.util.Log;
29
30import java.io.File;
31import java.io.FileNotFoundException;
32import java.io.IOException;
33import java.util.ArrayList;
34import java.util.Arrays;
35
36/**
37 * An implementation of binary dictionary decoder for version 4 binary dictionary.
38 */
39@UsedForTesting
40public class Ver4DictDecoder extends AbstractDictDecoder {
41    private static final String TAG = Ver4DictDecoder.class.getSimpleName();
42
43    private static final int FILETYPE_TRIE = 1;
44    private static final int FILETYPE_FREQUENCY = 2;
45    private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
46    private static final int FILETYPE_BIGRAM_FREQ = 4;
47    private static final int FILETYPE_SHORTCUT = 5;
48
49    private final File mDictDirectory;
50    private final DictionaryBufferFactory mBufferFactory;
51    protected DictBuffer mDictBuffer;
52    private DictBuffer mFrequencyBuffer;
53    private DictBuffer mTerminalAddressTableBuffer;
54    private DictBuffer mBigramBuffer;
55    private DictBuffer mShortcutBuffer;
56    private SparseTable mBigramAddressTable;
57    private SparseTable mShortcutAddressTable;
58
59    @UsedForTesting
60    /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
61        mDictDirectory = dictDirectory;
62        mDictBuffer = mFrequencyBuffer = null;
63
64        if ((factoryFlag & MASK_DICTBUFFER) == USE_READONLY_BYTEBUFFER) {
65            mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
66        } else if ((factoryFlag  & MASK_DICTBUFFER) == USE_BYTEARRAY) {
67            mBufferFactory = new DictionaryBufferFromByteArrayFactory();
68        } else if ((factoryFlag & MASK_DICTBUFFER) == USE_WRITABLE_BYTEBUFFER) {
69            mBufferFactory = new DictionaryBufferFromWritableByteBufferFactory();
70        } else {
71            mBufferFactory = new DictionaryBufferFromReadOnlyByteBufferFactory();
72        }
73    }
74
75    @UsedForTesting
76    /* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
77        mDictDirectory = dictDirectory;
78        mBufferFactory = factory;
79        mDictBuffer = mFrequencyBuffer = null;
80    }
81
82    private File getFile(final int fileType) {
83        if (fileType == FILETYPE_TRIE) {
84            return new File(mDictDirectory,
85                    mDictDirectory.getName() + FormatSpec.TRIE_FILE_EXTENSION);
86        } else if (fileType == FILETYPE_FREQUENCY) {
87            return new File(mDictDirectory,
88                    mDictDirectory.getName() + FormatSpec.FREQ_FILE_EXTENSION);
89        } else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) {
90            return new File(mDictDirectory,
91                    mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
92        } else if (fileType == FILETYPE_BIGRAM_FREQ) {
93            return new File(mDictDirectory,
94                    mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION
95                            + FormatSpec.BIGRAM_FREQ_CONTENT_ID);
96        } else if (fileType == FILETYPE_SHORTCUT) {
97            return new File(mDictDirectory,
98                    mDictDirectory.getName() + FormatSpec.SHORTCUT_FILE_EXTENSION
99                            + FormatSpec.SHORTCUT_CONTENT_ID);
100        } else {
101            throw new RuntimeException("Unsupported kind of file : " + fileType);
102        }
103    }
104
105    @Override
106    public void openDictBuffer() throws FileNotFoundException, IOException {
107        mDictBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_TRIE));
108        mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
109        mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
110                getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
111        mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM_FREQ));
112        loadBigramAddressSparseTable();
113        mShortcutBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_SHORTCUT));
114        loadShortcutAddressSparseTable();
115    }
116
117    @Override
118    public boolean isDictBufferOpen() {
119        return mDictBuffer != null;
120    }
121
122    /* package */ DictBuffer getDictBuffer() {
123        return mDictBuffer;
124    }
125
126    @Override
127    public FileHeader readHeader() throws IOException, UnsupportedFormatException {
128        if (mDictBuffer == null) {
129            openDictBuffer();
130        }
131        final FileHeader header = super.readHeader(mDictBuffer);
132        final int version = header.mFormatOptions.mVersion;
133        if (version != 4) {
134            throw new UnsupportedFormatException("File header has a wrong version : " + version);
135        }
136        return header;
137    }
138
139    private void loadBigramAddressSparseTable() throws IOException {
140        final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
141                + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
142        final File freqsFile = new File(mDictDirectory, mDictDirectory.getName()
143                + FormatSpec.BIGRAM_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
144                + FormatSpec.BIGRAM_FREQ_CONTENT_ID);
145        mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, new File[] { freqsFile },
146                FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
147    }
148
149    // TODO: Let's have something like SparseTableContentsReader in this class.
150    private void loadShortcutAddressSparseTable() throws IOException {
151        final File lookupIndexFile = new File(mDictDirectory, mDictDirectory.getName()
152                + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.LOOKUP_TABLE_FILE_SUFFIX);
153        final File contentFile = new File(mDictDirectory, mDictDirectory.getName()
154                + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
155                + FormatSpec.SHORTCUT_CONTENT_ID);
156        final File timestampsFile = new File(mDictDirectory, mDictDirectory.getName()
157                + FormatSpec.SHORTCUT_FILE_EXTENSION + FormatSpec.CONTENT_TABLE_FILE_SUFFIX
158                + FormatSpec.SHORTCUT_CONTENT_ID);
159        mShortcutAddressTable = SparseTable.readFromFiles(lookupIndexFile,
160                new File[] { contentFile, timestampsFile },
161                FormatSpec.SHORTCUT_ADDRESS_TABLE_BLOCK_SIZE);
162    }
163
164    protected static class PtNodeReader extends AbstractDictDecoder.PtNodeReader {
165        protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
166            frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
167            return frequencyBuffer.readUnsignedByte();
168        }
169
170        protected static int readTerminalId(final DictBuffer dictBuffer) {
171            return dictBuffer.readInt();
172        }
173    }
174
175    private ArrayList<WeightedString> readShortcuts(final int terminalId) {
176        if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null;
177
178        final ArrayList<WeightedString> ret = CollectionUtils.newArrayList();
179        final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX,
180                terminalId);
181        mShortcutBuffer.position(posOfShortcuts);
182        while (true) {
183            final int flags = mShortcutBuffer.readUnsignedByte();
184            final String word = CharEncoding.readString(mShortcutBuffer);
185            ret.add(new WeightedString(word,
186                    flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
187            if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
188        }
189        return ret;
190    }
191
192    // TODO: Make this buffer thread safe.
193    // TODO: Support words longer than FormatSpec.MAX_WORD_LENGTH.
194    private final int[] mCharacterBuffer = new int[FormatSpec.MAX_WORD_LENGTH];
195    @Override
196    public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
197        int addressPointer = ptNodePos;
198        final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
199        addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;
200
201        final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options);
202        if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
203            addressPointer += FormatSpec.PARENT_ADDRESS_SIZE;
204        }
205
206        final int characters[];
207        if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
208            int index = 0;
209            int character = CharEncoding.readChar(mDictBuffer);
210            addressPointer += CharEncoding.getCharSize(character);
211            while (FormatSpec.INVALID_CHARACTER != character
212                    && index < FormatSpec.MAX_WORD_LENGTH) {
213                mCharacterBuffer[index++] = character;
214                character = CharEncoding.readChar(mDictBuffer);
215                addressPointer += CharEncoding.getCharSize(character);
216            }
217            characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
218        } else {
219            final int character = CharEncoding.readChar(mDictBuffer);
220            addressPointer += CharEncoding.getCharSize(character);
221            characters = new int[] { character };
222        }
223        final int terminalId;
224        if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
225            terminalId = PtNodeReader.readTerminalId(mDictBuffer);
226            addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
227        } else {
228            terminalId = PtNode.NOT_A_TERMINAL;
229        }
230
231        final int frequency;
232        if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
233            frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId);
234        } else {
235            frequency = PtNode.NOT_A_TERMINAL;
236        }
237        int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
238        if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
239            childrenAddress += addressPointer;
240        }
241        addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
242        final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);
243
244        final ArrayList<PendingAttribute> bigrams;
245        if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
246            bigrams = new ArrayList<PendingAttribute>();
247            final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
248            mBigramBuffer.position(posOfBigrams);
249            while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
250                // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
251                // remaining bigram entries are ignored.
252                final int bigramFlags = mBigramBuffer.readUnsignedByte();
253                final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
254                mTerminalAddressTableBuffer.position(
255                        targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
256                final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
257                bigrams.add(new PendingAttribute(
258                        bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
259                        targetAddress));
260                if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
261            }
262            if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
263                throw new RuntimeException("Too many bigrams in a PtNode (" + bigrams.size()
264                        + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")");
265            }
266        } else {
267            bigrams = null;
268        }
269        return new PtNodeInfo(ptNodePos, addressPointer, flags, characters, frequency,
270                parentAddress, childrenAddress, shortcutTargets, bigrams);
271    }
272
273    private void deleteDictFiles() {
274        final File[] files = mDictDirectory.listFiles();
275        for (int i = 0; i < files.length; ++i) {
276            files[i].delete();
277        }
278    }
279
280    @Override
281    public FusionDictionary readDictionaryBinary(final FusionDictionary dict,
282            final boolean deleteDictIfBroken)
283            throws FileNotFoundException, IOException, UnsupportedFormatException {
284        if (mDictBuffer == null) {
285            openDictBuffer();
286        }
287        try {
288            return BinaryDictDecoderUtils.readDictionaryBinary(this, dict);
289        } catch (IOException e) {
290            Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e);
291            if (deleteDictIfBroken) {
292                deleteDictFiles();
293            }
294            throw e;
295        } catch (UnsupportedFormatException e) {
296            Log.e(TAG, "The dictionary " + mDictDirectory.getName() + " is broken.", e);
297            if (deleteDictIfBroken) {
298                deleteDictFiles();
299            }
300            throw e;
301        }
302    }
303
304    @Override
305    public void setPosition(int newPos) {
306        mDictBuffer.position(newPos);
307    }
308
309    @Override
310    public int getPosition() {
311        return mDictBuffer.position();
312    }
313
314    @Override
315    public int readPtNodeCount() {
316        return BinaryDictDecoderUtils.readPtNodeCount(mDictBuffer);
317    }
318
319    @Override
320    public boolean readAndFollowForwardLink() {
321        final int nextAddress = mDictBuffer.readUnsignedInt24();
322        if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) {
323            mDictBuffer.position(nextAddress);
324            return true;
325        }
326        return false;
327    }
328
329    @Override
330    public boolean hasNextPtNodeArray() {
331        return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS;
332    }
333
334    @Override
335    public void skipPtNode(final FormatOptions formatOptions) {
336        final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
337        PtNodeReader.readParentAddress(mDictBuffer, formatOptions);
338        BinaryDictIOUtils.skipString(mDictBuffer,
339                (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
340        if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) PtNodeReader.readTerminalId(mDictBuffer);
341        PtNodeReader.readChildrenAddress(mDictBuffer, flags, formatOptions);
342    }
343}
344