114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada/*
214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Copyright (C) 2013 The Android Open Source Project
314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada *
414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License");
514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * you may not use this file except in compliance with the License.
614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * You may obtain a copy of the License at
714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada *
814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada *      http://www.apache.org/licenses/LICENSE-2.0
914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada *
1014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software
1114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS,
1214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * See the License for the specific language governing permissions and
1414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * limitations under the License.
1514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada */
1614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
1714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadapackage com.android.inputmethod.latin.makedict;
1814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
1914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport com.android.inputmethod.annotations.UsedForTesting;
20ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagiimport com.android.inputmethod.latin.BinaryDictionary;
21ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.FileUtils;
2214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
2314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.File;
2414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.FileNotFoundException;
2514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.IOException;
2614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.util.ArrayList;
2714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
2814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada/**
2914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * An implementation of binary dictionary decoder for version 4 binary dictionary.
3014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada */
3114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada@UsedForTesting
3214d31d464037c31e7f7d382a8a86f6acf4694b06Yuichiro Hanadapublic class Ver4DictDecoder extends AbstractDictDecoder {
33ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi    final File mDictDirectory;
3414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
3514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    @UsedForTesting
3614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
37ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        this(dictDirectory, null /* factory */);
3814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    }
3914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
4014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    @UsedForTesting
4114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    /* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) {
4214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada        mDictDirectory = dictDirectory;
43d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi
4414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    }
4514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
4614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    @Override
47b986f78ba826fa360304a69565f1880bdd7ce0c5Keisuke Kuroyanagi    public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException {
48d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        // dictType is not being used in dicttool. Passing an empty string.
49d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        final BinaryDictionary binaryDictionary= new BinaryDictionary(
50d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi              mDictDirectory.getAbsolutePath(), 0 /* offset */, 0 /* length */,
51d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi              true /* useFullEditDistance */, null /* locale */,
52d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi              "" /* dictType */, true /* isUpdatable */);
53d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        final DictionaryHeader header = binaryDictionary.getHeader();
54d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        binaryDictionary.close();
55afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi        if (header == null) {
56afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi            throw new IOException("Cannot read the dictionary header.");
57afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi        }
58d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        return header;
5914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    }
6014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada
6114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada    @Override
628e3a1d0f89ac5a0c7d31effb8cbb447f93f70310Keisuke Kuroyanagi    public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken)
6314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada            throws FileNotFoundException, IOException, UnsupportedFormatException {
64d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        // dictType is not being used in dicttool. Passing an empty string.
65d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        final BinaryDictionary binaryDictionary = new BinaryDictionary(
66d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi              mDictDirectory.getAbsolutePath(), 0 /* offset */, 0 /* length */,
67d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi              true /* useFullEditDistance */, null /* locale */,
68d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi              "" /* dictType */, true /* isUpdatable */);
69ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        final DictionaryHeader header = readHeader();
708e3a1d0f89ac5a0c7d31effb8cbb447f93f70310Keisuke Kuroyanagi        final FusionDictionary fusionDict =
71ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions);
72ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        int token = 0;
73a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka        final ArrayList<WordProperty> wordProperties = new ArrayList<>();
74ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        do {
75ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            final BinaryDictionary.GetNextWordPropertyResult result =
76d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi                    binaryDictionary.getNextWordProperty(token);
77ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            final WordProperty wordProperty = result.mWordProperty;
78ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            if (wordProperty == null) {
79d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi                binaryDictionary.close();
80ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                if (deleteDictIfBroken) {
81ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                    FileUtils.deleteRecursively(mDictDirectory);
82ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                }
83ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                return null;
8414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada            }
85ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            wordProperties.add(wordProperty);
86ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            token = result.mNextToken;
87ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        } while (token != 0);
88ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi
890fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        // Insert unigrams into the fusion dictionary.
90ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        for (final WordProperty wordProperty : wordProperties) {
91ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            if (wordProperty.mIsBlacklistEntry) {
92ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
93ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                        wordProperty.mIsNotAWord);
94ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            } else {
958ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi                fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo,
96ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                        wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
9714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada            }
9814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada        }
990fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi        // Insert bigrams into the fusion dictionary.
100ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        for (final WordProperty wordProperty : wordProperties) {
101ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            if (wordProperty.mBigrams == null) {
102ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi                continue;
103ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            }
104ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            final String word0 = wordProperty.mWord;
105ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi            for (final WeightedString bigram : wordProperty.mBigrams) {
1068ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi                fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
1072fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            }
10814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada        }
109d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi        binaryDictionary.close();
110ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi        return fusionDict;
111bc4926235dfff4758ca435362fe7a880d11b4f3bYuichiro Hanada    }
11214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada}
113