1a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada/*
2a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * Copyright (C) 2013 The Android Open Source Project
3a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada *
4a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License");
5a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * you may not use this file except in compliance with the License.
6a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * You may obtain a copy of the License at
7a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada *
8a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada *      http://www.apache.org/licenses/LICENSE-2.0
9a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada *
10a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software
11a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS,
12a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * See the License for the specific language governing permissions and
14a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * limitations under the License.
15a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada */
16a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
17a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadapackage com.android.inputmethod.latin.makedict;
18a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
19a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport com.android.inputmethod.annotations.UsedForTesting;
20a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardimport com.android.inputmethod.latin.BinaryDictionary;
21a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardimport com.android.inputmethod.latin.Dictionary;
22e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagiimport com.android.inputmethod.latin.PrevWordsInfo;
23a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
24a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
25e784148ae6872942434eaa55ca32b4c6442cc8e8Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
26a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardimport com.android.inputmethod.latin.utils.LocaleUtils;
27a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
28a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport java.io.File;
29a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport java.io.IOException;
30a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
31a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada/**
32a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * An implementation of DictEncoder for version 4 binary dictionary.
33a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada */
34a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada@UsedForTesting
35a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadapublic class Ver4DictEncoder implements DictEncoder {
36a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    private final File mDictPlacedDir;
37a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
38a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @UsedForTesting
39a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    public Ver4DictEncoder(final File dictPlacedDir) {
40a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        mDictPlacedDir = dictPlacedDir;
41a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    }
42a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
43a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard    // TODO: This builds a FusionDictionary first and iterates it to add words to the binary
44a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard    // dictionary. However, it is possible to just add words directly to the binary dictionary
45a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard    // instead.
46a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard    // In the long run, when we stop supporting version 2, FusionDictionary will become deprecated
47a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard    // and we can remove it. Then we'll be able to just call BinaryDictionary directly.
48a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @Override
49a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard    public void writeDictionary(FusionDictionary dict, FormatOptions formatOptions)
50a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada            throws IOException, UnsupportedFormatException {
51a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        if (formatOptions.mVersion != FormatSpec.VERSION4) {
52a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada            throw new UnsupportedFormatException("File header has a wrong version number : "
53a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada                    + formatOptions.mVersion);
54a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        }
55a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        if (!mDictPlacedDir.isDirectory()) {
56a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada            throw new UnsupportedFormatException("Given path is not a directory.");
57a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        }
58e784148ae6872942434eaa55ca32b4c6442cc8e8Keisuke Kuroyanagi        if (!BinaryDictionaryUtils.createEmptyDictFile(mDictPlacedDir.getAbsolutePath(),
5943cf9076b2d053c554941e55f6073b8f586c510bJean Chalard                FormatSpec.VERSION4, LocaleUtils.constructLocaleFromString(
6043cf9076b2d053c554941e55f6073b8f586c510bJean Chalard                dict.mOptions.mAttributes.get(DictionaryHeader.DICTIONARY_LOCALE_KEY)),
6143cf9076b2d053c554941e55f6073b8f586c510bJean Chalard                dict.mOptions.mAttributes)) {
62b868375763de60d1a1ff6fa21b121cc1b61df842Jean Chalard            throw new IOException("Cannot create dictionary file : "
63b868375763de60d1a1ff6fa21b121cc1b61df842Jean Chalard                + mDictPlacedDir.getAbsolutePath());
64a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard        }
65a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard        final BinaryDictionary binaryDict = new BinaryDictionary(mDictPlacedDir.getAbsolutePath(),
66a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard                0l, mDictPlacedDir.length(), true /* useFullEditDistance */,
67a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard                LocaleUtils.constructLocaleFromString(dict.mOptions.mAttributes.get(
68b986f78ba826fa360304a69565f1880bdd7ce0c5Keisuke Kuroyanagi                        DictionaryHeader.DICTIONARY_LOCALE_KEY)),
69a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard                Dictionary.TYPE_USER /* Dictionary type. Does not matter for us */,
70a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard                true /* isUpdatable */);
71a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard        if (!binaryDict.isValidDictionary()) {
72a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard            // Somehow createEmptyDictFile returned true, but the file was not created correctly
73a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard            throw new IOException("Cannot create dictionary file");
74a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard        }
755f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi        for (final WordProperty wordProperty : dict) {
76a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard            // TODO: switch to addMultipleDictionaryEntries when they support shortcuts
775f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
78dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(),
79a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard                        null /* shortcutTarget */, 0 /* shortcutProbability */,
801adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                        wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
81dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                        wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) {
82dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord);
83dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                }
84a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard            } else {
855f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi                for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
86dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    if (!binaryDict.addUnigramEntry(wordProperty.mWord,
87dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                            wordProperty.getProbability(),
88df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi                            shortcutTarget.mWord, shortcutTarget.getProbability(),
891adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi                            wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord,
90dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                            wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) {
91dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                        MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord
92dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                                + ", shortcutTarget: " + shortcutTarget.mWord);
93dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                        return;
94dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    }
952fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa                }
962fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa            }
97a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard            if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
98dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                if (!binaryDict.flushWithGC()) {
99dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    MakedictLog.e("Cannot flush dict with GC.");
100dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    return;
101dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                }
102a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada            }
103a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        }
1045f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi        for (final WordProperty word0Property : dict) {
1055f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            if (null == word0Property.mBigrams) continue;
1065f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi            for (final WeightedString word1 : word0Property.mBigrams) {
107e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                final PrevWordsInfo prevWordsInfo =
108e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi                        new PrevWordsInfo(new PrevWordsInfo.WordInfo(word0Property.mWord));
109dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                if (!binaryDict.addNgramEntry(prevWordsInfo, word1.mWord,
110dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                        word1.getProbability(), 0 /* timestamp */)) {
111dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    MakedictLog.e("Cannot add n-gram entry for "
112dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                            + prevWordsInfo + " -> " + word1.mWord);
113dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    return;
114dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                }
115419636091236b7e5f4818c99aef55043b25eef82Jean Chalard                if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
116dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    if (!binaryDict.flushWithGC()) {
117dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                        MakedictLog.e("Cannot flush dict with GC.");
118dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                        return;
119dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi                    }
120419636091236b7e5f4818c99aef55043b25eef82Jean Chalard                }
121a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard            }
122a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada        }
123dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi        if (!binaryDict.flushWithGC()) {
124dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi            MakedictLog.e("Cannot flush dict with GC.");
125dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi            return;
126dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi        }
127a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard        binaryDict.close();
128a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    }
129a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
130a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @Override
131a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    public void setPosition(int position) {
132a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    }
133a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
134a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @Override
135a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    public int getPosition() {
136a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard        return 0;
137a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    }
138a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
139a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @Override
140a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    public void writePtNodeCount(int ptNodeCount) {
14173b9d3b879c109a7b8487b609b0715ffe3090142Yuichiro Hanada    }
14273b9d3b879c109a7b8487b609b0715ffe3090142Yuichiro Hanada
143a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @Override
144a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    public void writeForwardLinkAddress(int forwardLinkAddress) {
145a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    }
146a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada
147a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada    @Override
14895d16561e0e6c38dbd99c893f09c5dbe9d4a465dKeisuke Kuroyanagi    public void writePtNode(PtNode ptNode, FusionDictionary dict) {
149c32962b8f1f9b7255fef84486b53cfc874835bbdYuichiro Hanada    }
150a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada}
151