1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.annotations.UsedForTesting;
20import com.android.inputmethod.latin.Constants;
21import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
22import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
23import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
24import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
25import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
26import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
27import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
28
29import java.io.File;
30import java.io.FileInputStream;
31import java.io.FileNotFoundException;
32import java.io.IOException;
33import java.io.OutputStream;
34import java.util.ArrayList;
35import java.util.Iterator;
36import java.util.Map;
37import java.util.Stack;
38
39public final class BinaryDictIOUtils {
40    private static final boolean DBG = false;
41
42    private BinaryDictIOUtils() {
43        // This utility class is not publicly instantiable.
44    }
45
46    private static final class Position {
47        public static final int NOT_READ_PTNODE_COUNT = -1;
48
49        public int mAddress;
50        public int mNumOfPtNode;
51        public int mPosition;
52        public int mLength;
53
54        public Position(int address, int length) {
55            mAddress = address;
56            mLength = length;
57            mNumOfPtNode = NOT_READ_PTNODE_COUNT;
58        }
59    }
60
61    /**
62     * Retrieves all node arrays without recursive call.
63     */
64    private static void readUnigramsAndBigramsBinaryInner(final DictDecoder dictDecoder,
65            final int headerSize, final Map<Integer, String> words,
66            final Map<Integer, Integer> frequencies,
67            final Map<Integer, ArrayList<PendingAttribute>> bigrams,
68            final FormatOptions formatOptions) {
69        int[] pushedChars = new int[FormatSpec.MAX_WORD_LENGTH + 1];
70
71        Stack<Position> stack = new Stack<Position>();
72        int index = 0;
73
74        Position initPos = new Position(headerSize, 0);
75        stack.push(initPos);
76
77        while (!stack.empty()) {
78            Position p = stack.peek();
79
80            if (DBG) {
81                MakedictLog.d("read: address=" + p.mAddress + ", numOfPtNode=" +
82                        p.mNumOfPtNode + ", position=" + p.mPosition + ", length=" + p.mLength);
83            }
84
85            if (dictDecoder.getPosition() != p.mAddress) dictDecoder.setPosition(p.mAddress);
86            if (index != p.mLength) index = p.mLength;
87
88            if (p.mNumOfPtNode == Position.NOT_READ_PTNODE_COUNT) {
89                p.mNumOfPtNode = dictDecoder.readPtNodeCount();
90                p.mAddress += getPtNodeCountSize(p.mNumOfPtNode);
91                p.mPosition = 0;
92            }
93            if (p.mNumOfPtNode == 0) {
94                stack.pop();
95                continue;
96            }
97            PtNodeInfo info = dictDecoder.readPtNode(p.mAddress, formatOptions);
98            for (int i = 0; i < info.mCharacters.length; ++i) {
99                pushedChars[index++] = info.mCharacters[i];
100            }
101            p.mPosition++;
102
103            final boolean isMovedPtNode = isMovedPtNode(info.mFlags,
104                    formatOptions);
105            final boolean isDeletedPtNode = isDeletedPtNode(info.mFlags,
106                    formatOptions);
107            if (!isMovedPtNode && !isDeletedPtNode
108                    && info.mFrequency != FusionDictionary.PtNode.NOT_A_TERMINAL) {// found word
109                words.put(info.mOriginalAddress, new String(pushedChars, 0, index));
110                frequencies.put(info.mOriginalAddress, info.mFrequency);
111                if (info.mBigrams != null) bigrams.put(info.mOriginalAddress, info.mBigrams);
112            }
113
114            if (p.mPosition == p.mNumOfPtNode) {
115                if (formatOptions.mSupportsDynamicUpdate) {
116                    final boolean hasValidForwardLinkAddress =
117                            dictDecoder.readAndFollowForwardLink();
118                    if (hasValidForwardLinkAddress && dictDecoder.hasNextPtNodeArray()) {
119                        // The node array has a forward link.
120                        p.mNumOfPtNode = Position.NOT_READ_PTNODE_COUNT;
121                        p.mAddress = dictDecoder.getPosition();
122                    } else {
123                        stack.pop();
124                    }
125                } else {
126                    stack.pop();
127                }
128            } else {
129                // The Ptnode array has more PtNodes.
130                p.mAddress = dictDecoder.getPosition();
131            }
132
133            if (!isMovedPtNode && hasChildrenAddress(info.mChildrenAddress)) {
134                final Position childrenPos = new Position(info.mChildrenAddress, index);
135                stack.push(childrenPos);
136            }
137        }
138    }
139
140    /**
141     * Reads unigrams and bigrams from the binary file.
142     * Doesn't store a full memory representation of the dictionary.
143     *
144     * @param dictDecoder the dict decoder.
145     * @param words the map to store the address as a key and the word as a value.
146     * @param frequencies the map to store the address as a key and the frequency as a value.
147     * @param bigrams the map to store the address as a key and the list of address as a value.
148     * @throws IOException if the file can't be read.
149     * @throws UnsupportedFormatException if the format of the file is not recognized.
150     */
151    /* package */ static void readUnigramsAndBigramsBinary(final DictDecoder dictDecoder,
152            final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
153            final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
154            UnsupportedFormatException {
155        // Read header
156        final FileHeader header = dictDecoder.readHeader();
157        readUnigramsAndBigramsBinaryInner(dictDecoder, header.mHeaderSize, words,
158                frequencies, bigrams, header.mFormatOptions);
159    }
160
161    /**
162     * Gets the address of the last PtNode of the exact matching word in the dictionary.
163     * If no match is found, returns NOT_VALID_WORD.
164     *
165     * @param dictDecoder the dict decoder.
166     * @param word the word we search for.
167     * @return the address of the terminal node.
168     * @throws IOException if the file can't be read.
169     * @throws UnsupportedFormatException if the format of the file is not recognized.
170     */
171    @UsedForTesting
172    /* package */ static int getTerminalPosition(final DictDecoder dictDecoder,
173            final String word) throws IOException, UnsupportedFormatException {
174        if (word == null) return FormatSpec.NOT_VALID_WORD;
175        dictDecoder.setPosition(0);
176
177        final FileHeader header = dictDecoder.readHeader();
178        int wordPos = 0;
179        final int wordLen = word.codePointCount(0, word.length());
180        for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
181            if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
182
183            do {
184                final int ptNodeCount = dictDecoder.readPtNodeCount();
185                boolean foundNextPtNode = false;
186                for (int i = 0; i < ptNodeCount; ++i) {
187                    final int ptNodePos = dictDecoder.getPosition();
188                    final PtNodeInfo currentInfo = dictDecoder.readPtNode(ptNodePos,
189                            header.mFormatOptions);
190                    final boolean isMovedNode = isMovedPtNode(currentInfo.mFlags,
191                            header.mFormatOptions);
192                    final boolean isDeletedNode = isDeletedPtNode(currentInfo.mFlags,
193                            header.mFormatOptions);
194                    if (isMovedNode) continue;
195                    boolean same = true;
196                    for (int p = 0, j = word.offsetByCodePoints(0, wordPos);
197                            p < currentInfo.mCharacters.length;
198                            ++p, j = word.offsetByCodePoints(j, 1)) {
199                        if (wordPos + p >= wordLen
200                                || word.codePointAt(j) != currentInfo.mCharacters[p]) {
201                            same = false;
202                            break;
203                        }
204                    }
205
206                    if (same) {
207                        // found the PtNode matches the word.
208                        if (wordPos + currentInfo.mCharacters.length == wordLen) {
209                            if (currentInfo.mFrequency == PtNode.NOT_A_TERMINAL
210                                    || isDeletedNode) {
211                                return FormatSpec.NOT_VALID_WORD;
212                            } else {
213                                return ptNodePos;
214                            }
215                        }
216                        wordPos += currentInfo.mCharacters.length;
217                        if (currentInfo.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
218                            return FormatSpec.NOT_VALID_WORD;
219                        }
220                        foundNextPtNode = true;
221                        dictDecoder.setPosition(currentInfo.mChildrenAddress);
222                        break;
223                    }
224                }
225
226                // If we found the next PtNode, it is under the file pointer.
227                // But if not, we are at the end of this node array so we expect to have
228                // a forward link address that we need to consult and possibly resume
229                // search on the next node array in the linked list.
230                if (foundNextPtNode) break;
231                if (!header.mFormatOptions.mSupportsDynamicUpdate) {
232                    return FormatSpec.NOT_VALID_WORD;
233                }
234
235                final boolean hasValidForwardLinkAddress =
236                        dictDecoder.readAndFollowForwardLink();
237                if (!hasValidForwardLinkAddress || !dictDecoder.hasNextPtNodeArray()) {
238                    return FormatSpec.NOT_VALID_WORD;
239                }
240            } while(true);
241        }
242        return FormatSpec.NOT_VALID_WORD;
243    }
244
245    /**
246     * @return the size written, in bytes. Always 3 bytes.
247     */
248    static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
249            final int value) {
250        final int absValue = Math.abs(value);
251        dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
252        dictBuffer.put((byte)((absValue >> 8) & 0xFF));
253        dictBuffer.put((byte)(absValue & 0xFF));
254        return 3;
255    }
256
257    /**
258     * @return the size written, in bytes. Always 3 bytes.
259     */
260    static int writeSInt24ToStream(final OutputStream destination, final int value)
261            throws IOException {
262        final int absValue = Math.abs(value);
263        destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
264        destination.write((byte)((absValue >> 8) & 0xFF));
265        destination.write((byte)(absValue & 0xFF));
266        return 3;
267    }
268
269    /**
270     * @return the size written, in bytes. 1, 2, or 3 bytes.
271     */
272    private static int writeVariableAddress(final OutputStream destination, final int value)
273            throws IOException {
274        switch (BinaryDictEncoderUtils.getByteSize(value)) {
275        case 1:
276            destination.write((byte)value);
277            break;
278        case 2:
279            destination.write((byte)(0xFF & (value >> 8)));
280            destination.write((byte)(0xFF & value));
281            break;
282        case 3:
283            destination.write((byte)(0xFF & (value >> 16)));
284            destination.write((byte)(0xFF & (value >> 8)));
285            destination.write((byte)(0xFF & value));
286            break;
287        }
288        return BinaryDictEncoderUtils.getByteSize(value);
289    }
290
291    static void skipString(final DictBuffer dictBuffer,
292            final boolean hasMultipleChars) {
293        if (hasMultipleChars) {
294            int character = CharEncoding.readChar(dictBuffer);
295            while (character != FormatSpec.INVALID_CHARACTER) {
296                character = CharEncoding.readChar(dictBuffer);
297            }
298        } else {
299            CharEncoding.readChar(dictBuffer);
300        }
301    }
302
303    /**
304     * Write a string to a stream.
305     *
306     * @param destination the stream to write.
307     * @param word the string to be written.
308     * @return the size written, in bytes.
309     * @throws IOException
310     */
311    private static int writeString(final OutputStream destination, final String word)
312            throws IOException {
313        int size = 0;
314        final int length = word.length();
315        for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
316            final int codePoint = word.codePointAt(i);
317            if (CharEncoding.getCharSize(codePoint) == 1) {
318                destination.write((byte)codePoint);
319                size++;
320            } else {
321                destination.write((byte)(0xFF & (codePoint >> 16)));
322                destination.write((byte)(0xFF & (codePoint >> 8)));
323                destination.write((byte)(0xFF & codePoint));
324                size += 3;
325            }
326        }
327        destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
328        size += FormatSpec.PTNODE_TERMINATOR_SIZE;
329        return size;
330    }
331
332    /**
333     * Write a PtNode to an output stream from a PtNodeInfo.
334     * A PtNode is an in-memory representation of a node in the patricia trie.
335     * A PtNode info is a container for low-level information about how the
336     * PtNode is stored in the binary format.
337     *
338     * @param destination the stream to write.
339     * @param info the PtNode info to be written.
340     * @return the size written, in bytes.
341     */
342    private static int writePtNode(final OutputStream destination, final PtNodeInfo info)
343            throws IOException {
344        int size = FormatSpec.PTNODE_FLAGS_SIZE;
345        destination.write((byte)info.mFlags);
346        final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ?
347                FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress;
348        size += writeSInt24ToStream(destination, parentOffset);
349
350        for (int i = 0; i < info.mCharacters.length; ++i) {
351            if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) {
352                destination.write((byte)info.mCharacters[i]);
353                size++;
354            } else {
355                size += writeSInt24ToStream(destination, info.mCharacters[i]);
356            }
357        }
358        if (info.mCharacters.length > 1) {
359            destination.write((byte)FormatSpec.PTNODE_CHARACTERS_TERMINATOR);
360            size++;
361        }
362
363        if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
364            destination.write((byte)info.mFrequency);
365            size++;
366        }
367
368        if (DBG) {
369            MakedictLog.d("writePtNode origin=" + info.mOriginalAddress + ", size=" + size
370                    + ", child=" + info.mChildrenAddress + ", characters ="
371                    + new String(info.mCharacters, 0, info.mCharacters.length));
372        }
373        final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ?
374                0 : info.mChildrenAddress - (info.mOriginalAddress + size);
375        writeSInt24ToStream(destination, childrenOffset);
376        size += FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
377
378        if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) {
379            final int shortcutListSize =
380                    BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
381            destination.write((byte)(shortcutListSize >> 8));
382            destination.write((byte)(shortcutListSize & 0xFF));
383            size += 2;
384            final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator();
385            while (shortcutIterator.hasNext()) {
386                final WeightedString target = shortcutIterator.next();
387                destination.write((byte)BinaryDictEncoderUtils.makeShortcutFlags(
388                        shortcutIterator.hasNext(), target.mFrequency));
389                size++;
390                size += writeString(destination, target.mWord);
391            }
392        }
393
394        if (info.mBigrams != null) {
395            // TODO: Consolidate this code with the code that computes the size of the bigram list
396            //        in BinaryDictEncoderUtils#computeActualNodeArraySize
397            for (int i = 0; i < info.mBigrams.size(); ++i) {
398
399                final int bigramFrequency = info.mBigrams.get(i).mFrequency;
400                int bigramFlags = (i < info.mBigrams.size() - 1)
401                        ? FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT : 0;
402                size++;
403                final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress
404                        + size);
405                bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_BIGRAM_ATTR_OFFSET_NEGATIVE : 0;
406                switch (BinaryDictEncoderUtils.getByteSize(bigramOffset)) {
407                case 1:
408                    bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_ONEBYTE;
409                    break;
410                case 2:
411                    bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_TWOBYTES;
412                    break;
413                case 3:
414                    bigramFlags |= FormatSpec.FLAG_BIGRAM_ATTR_ADDRESS_TYPE_THREEBYTES;
415                    break;
416                }
417                bigramFlags |= bigramFrequency & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY;
418                destination.write((byte)bigramFlags);
419                size += writeVariableAddress(destination, Math.abs(bigramOffset));
420            }
421        }
422        return size;
423    }
424
425    /**
426     * Compute the size of the PtNode.
427     */
428    static int computePtNodeSize(final PtNodeInfo info, final FormatOptions formatOptions) {
429        int size = FormatSpec.PTNODE_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE
430                + BinaryDictEncoderUtils.getPtNodeCharactersSize(info.mCharacters)
431                + getChildrenAddressSize(info.mFlags, formatOptions);
432        if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
433            size += FormatSpec.PTNODE_FREQUENCY_SIZE;
434        }
435        if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) {
436            size += BinaryDictEncoderUtils.getShortcutListSize(info.mShortcutTargets);
437        }
438        if (info.mBigrams != null) {
439            for (final PendingAttribute attr : info.mBigrams) {
440                size += FormatSpec.PTNODE_FLAGS_SIZE;
441                size += BinaryDictEncoderUtils.getByteSize(attr.mAddress);
442            }
443        }
444        return size;
445    }
446
447    /**
448     * Write a node array to the stream.
449     *
450     * @param destination the stream to write.
451     * @param infos an array of PtNodeInfo to be written.
452     * @return the size written, in bytes.
453     * @throws IOException
454     */
455    static int writeNodes(final OutputStream destination, final PtNodeInfo[] infos)
456            throws IOException {
457        int size = getPtNodeCountSize(infos.length);
458        switch (getPtNodeCountSize(infos.length)) {
459            case 1:
460                destination.write((byte)infos.length);
461                break;
462            case 2:
463                final int encodedPtNodeCount =
464                        infos.length | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG;
465                destination.write((byte)(encodedPtNodeCount >> 8));
466                destination.write((byte)(encodedPtNodeCount & 0xFF));
467                break;
468            default:
469                throw new RuntimeException("Invalid node count size.");
470        }
471        for (final PtNodeInfo info : infos) size += writePtNode(destination, info);
472        writeSInt24ToStream(destination, FormatSpec.NO_FORWARD_LINK_ADDRESS);
473        return size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
474    }
475
476    private static final int HEADER_READING_BUFFER_SIZE = 16384;
477    /**
478     * Convenience method to read the header of a binary file.
479     *
480     * This is quite resource intensive - don't call when performance is critical.
481     *
482     * @param file The file to read.
483     * @param offset The offset in the file where to start reading the data.
484     * @param length The length of the data file.
485     */
486    private static FileHeader getDictionaryFileHeader(
487            final File file, final long offset, final long length)
488            throws FileNotFoundException, IOException, UnsupportedFormatException {
489        final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
490        final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file,
491                new DictDecoder.DictionaryBufferFactory() {
492                    @Override
493                    public DictBuffer getDictionaryBuffer(File file)
494                            throws FileNotFoundException, IOException {
495                        final FileInputStream inStream = new FileInputStream(file);
496                        try {
497                            inStream.skip(offset);
498                            inStream.read(buffer);
499                            return new ByteArrayDictBuffer(buffer);
500                        } finally {
501                            inStream.close();
502                        }
503                    }
504                }
505        );
506        return dictDecoder.readHeader();
507    }
508
509    public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
510            final long length) {
511        try {
512            final FileHeader header = getDictionaryFileHeader(file, offset, length);
513            return header;
514        } catch (UnsupportedFormatException e) {
515            return null;
516        } catch (IOException e) {
517            return null;
518        }
519    }
520
521    /**
522     * Helper method to hide the actual value of the no children address.
523     */
524    public static boolean hasChildrenAddress(final int address) {
525        return FormatSpec.NO_CHILDREN_ADDRESS != address;
526    }
527
528    /**
529     * Helper method to check whether the node is moved.
530     */
531    public static boolean isMovedPtNode(final int flags, final FormatOptions options) {
532        return options.mSupportsDynamicUpdate
533                && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_MOVED);
534    }
535
536    /**
537     * Helper method to check whether the dictionary can be updated dynamically.
538     */
539    public static boolean supportsDynamicUpdate(final FormatOptions options) {
540        return options.mVersion >= FormatSpec.FIRST_VERSION_WITH_DYNAMIC_UPDATE
541                && options.mSupportsDynamicUpdate;
542    }
543
544    /**
545     * Helper method to check whether the node is deleted.
546     */
547    public static boolean isDeletedPtNode(final int flags, final FormatOptions formatOptions) {
548        return formatOptions.mSupportsDynamicUpdate
549                && ((flags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) == FormatSpec.FLAG_IS_DELETED);
550    }
551
552    /**
553     * Compute the binary size of the node count
554     * @param count the node count
555     * @return the size of the node count, either 1 or 2 bytes.
556     */
557    public static int getPtNodeCountSize(final int count) {
558        if (FormatSpec.MAX_PTNODES_FOR_ONE_BYTE_PTNODE_COUNT >= count) {
559            return 1;
560        } else if (FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY >= count) {
561            return 2;
562        } else {
563            throw new RuntimeException("Can't have more than "
564                    + FormatSpec.MAX_PTNODES_IN_A_PT_NODE_ARRAY + " PtNode in a PtNodeArray (found "
565                    + count + ")");
566        }
567    }
568
569    static int getChildrenAddressSize(final int optionFlags,
570            final FormatOptions formatOptions) {
571        if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE;
572        switch (optionFlags & FormatSpec.MASK_CHILDREN_ADDRESS_TYPE) {
573            case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_ONEBYTE:
574                return 1;
575            case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_TWOBYTES:
576                return 2;
577            case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_THREEBYTES:
578                return 3;
579            case FormatSpec.FLAG_CHILDREN_ADDRESS_TYPE_NOADDRESS:
580            default:
581                return 0;
582        }
583    }
584
585    /**
586     * Calculate bigram frequency from compressed value
587     *
588     * @param unigramFrequency
589     * @param bigramFrequency compressed frequency
590     * @return approximate bigram frequency
591     */
592    public static int reconstructBigramFrequency(final int unigramFrequency,
593            final int bigramFrequency) {
594        final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
595                / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
596        final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
597        return (int)resultFreqFloat;
598    }
599}
600