1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.annotations.UsedForTesting;
20import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
21import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
22import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
23import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
24import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
25
26import java.io.IOException;
27import java.util.ArrayList;
28import java.util.HashMap;
29import java.util.TreeMap;
30
31/**
32 * A base class of the binary dictionary decoder.
33 */
34public abstract class AbstractDictDecoder implements DictDecoder {
35    protected FileHeader readHeader(final DictBuffer dictBuffer)
36            throws IOException, UnsupportedFormatException {
37        if (dictBuffer == null) {
38            openDictBuffer();
39        }
40
41        final int version = HeaderReader.readVersion(dictBuffer);
42        if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
43                || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
44          throw new UnsupportedFormatException("Unsupported version : " + version);
45        }
46        // TODO: Remove this field.
47        final int optionsFlags = HeaderReader.readOptionFlags(dictBuffer);
48
49        final int headerSize = HeaderReader.readHeaderSize(dictBuffer);
50
51        if (headerSize < 0) {
52            throw new UnsupportedFormatException("header size can't be negative.");
53        }
54
55        final HashMap<String, String> attributes = HeaderReader.readAttributes(dictBuffer,
56                headerSize);
57
58        final FileHeader header = new FileHeader(headerSize,
59                new FusionDictionary.DictionaryOptions(attributes,
60                        0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
61                        0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
62                        new FormatOptions(version,
63                                0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE),
64                                0 != (optionsFlags & FormatSpec.CONTAINS_TIMESTAMP_FLAG)));
65        return header;
66    }
67
68    @Override @UsedForTesting
69    public int getTerminalPosition(final String word)
70            throws IOException, UnsupportedFormatException {
71        if (!isDictBufferOpen()) {
72            openDictBuffer();
73        }
74        return BinaryDictIOUtils.getTerminalPosition(this, word);
75    }
76
77    @Override @UsedForTesting
78    public void readUnigramsAndBigramsBinary(final TreeMap<Integer, String> words,
79            final TreeMap<Integer, Integer> frequencies,
80            final TreeMap<Integer, ArrayList<PendingAttribute>> bigrams)
81            throws IOException, UnsupportedFormatException {
82        if (!isDictBufferOpen()) {
83            openDictBuffer();
84        }
85        BinaryDictIOUtils.readUnigramsAndBigramsBinary(this, words, frequencies, bigrams);
86    }
87
88    /**
89     * A utility class for reading a file header.
90     */
91    protected static class HeaderReader {
92        protected static int readVersion(final DictBuffer dictBuffer)
93                throws IOException, UnsupportedFormatException {
94            return BinaryDictDecoderUtils.checkFormatVersion(dictBuffer);
95        }
96
97        protected static int readOptionFlags(final DictBuffer dictBuffer) {
98            return dictBuffer.readUnsignedShort();
99        }
100
101        protected static int readHeaderSize(final DictBuffer dictBuffer) {
102            return dictBuffer.readInt();
103        }
104
105        protected static HashMap<String, String> readAttributes(final DictBuffer dictBuffer,
106                final int headerSize) {
107            final HashMap<String, String> attributes = new HashMap<String, String>();
108            while (dictBuffer.position() < headerSize) {
109                // We can avoid an infinite loop here since dictBuffer.position() is always
110                // increased by calling CharEncoding.readString.
111                final String key = CharEncoding.readString(dictBuffer);
112                final String value = CharEncoding.readString(dictBuffer);
113                attributes.put(key, value);
114            }
115            dictBuffer.position(headerSize);
116            return attributes;
117        }
118    }
119
120    /**
121     * A utility class for reading a PtNode.
122     */
123    protected static class PtNodeReader {
124        protected static int readPtNodeOptionFlags(final DictBuffer dictBuffer) {
125            return dictBuffer.readUnsignedByte();
126        }
127
128        protected static int readParentAddress(final DictBuffer dictBuffer,
129                final FormatOptions formatOptions) {
130            if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
131                return BinaryDictDecoderUtils.readSInt24(dictBuffer);
132            } else {
133                return FormatSpec.NO_PARENT_ADDRESS;
134            }
135        }
136
137        protected static int readChildrenAddress(final DictBuffer dictBuffer, final int optionFlags,
138                final FormatOptions formatOptions) {
139            if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
140                final int address = BinaryDictDecoderUtils.readSInt24(dictBuffer);
141                if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
142                return address;
143            } else {
144                switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
145                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
146                        return dictBuffer.readUnsignedByte();
147                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
148                        return dictBuffer.readUnsignedShort();
149                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
150                        return dictBuffer.readUnsignedInt24();
151                    case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
152                    default:
153                        return FormatSpec.NO_CHILDREN_ADDRESS;
154                }
155            }
156        }
157
158        // Reads shortcuts and returns the read length.
159        protected static int readShortcut(final DictBuffer dictBuffer,
160                final ArrayList<WeightedString> shortcutTargets) {
161            final int pointerBefore = dictBuffer.position();
162            dictBuffer.readUnsignedShort(); // skip the size
163            while (true) {
164                final int targetFlags = dictBuffer.readUnsignedByte();
165                final String word = CharEncoding.readString(dictBuffer);
166                shortcutTargets.add(new WeightedString(word,
167                        targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
168                if (0 == (targetFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
169            }
170            return dictBuffer.position() - pointerBefore;
171        }
172
173        protected static int readBigramAddresses(final DictBuffer dictBuffer,
174                final ArrayList<PendingAttribute> bigrams, final int baseAddress) {
175            int readLength = 0;
176            int bigramCount = 0;
177            while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
178                final int bigramFlags = dictBuffer.readUnsignedByte();
179                ++readLength;
180                final int sign = 0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE)
181                        ? 1 : -1;
182                int bigramAddress = baseAddress + readLength;
183                switch (bigramFlags & FormatSpec.MASK_BIGRAM_ATTR_ADDRESS_TYPE) {
184                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE:
185                        bigramAddress += sign * dictBuffer.readUnsignedByte();
186                        readLength += 1;
187                        break;
188                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES:
189                        bigramAddress += sign * dictBuffer.readUnsignedShort();
190                        readLength += 2;
191                        break;
192                    case FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES:
193                        bigramAddress += sign * dictBuffer.readUnsignedInt24();
194                        readLength += 3;
195                        break;
196                    default:
197                        throw new RuntimeException("Has bigrams with no address");
198                }
199                bigrams.add(new PendingAttribute(
200                        bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
201                        bigramAddress));
202                if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
203            }
204            return readLength;
205        }
206    }
207}
208