1a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada/* 2a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * Copyright (C) 2013 The Android Open Source Project 3a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * 4a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License"); 5a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * you may not use this file except in compliance with the License. 6a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * You may obtain a copy of the License at 7a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * 8a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * http://www.apache.org/licenses/LICENSE-2.0 9a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * 10a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software 11a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS, 12a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * See the License for the specific language governing permissions and 14a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * limitations under the License. 15a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada */ 16a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 17a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadapackage com.android.inputmethod.latin.makedict; 18a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 19a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport com.android.inputmethod.annotations.UsedForTesting; 20a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardimport com.android.inputmethod.latin.BinaryDictionary; 21a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardimport com.android.inputmethod.latin.Dictionary; 22e507d92aa3ee4ae43124c5452f20aa8ed0ecef4cKeisuke Kuroyanagiimport com.android.inputmethod.latin.PrevWordsInfo; 23a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 24a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 25e784148ae6872942434eaa55ca32b4c6442cc8e8Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 26a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalardimport com.android.inputmethod.latin.utils.LocaleUtils; 27a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 28a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport java.io.File; 29a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadaimport java.io.IOException; 30a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 31a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada/** 32a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada * An implementation of DictEncoder for version 4 binary dictionary. 33a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada */ 34a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada@UsedForTesting 35a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanadapublic class Ver4DictEncoder implements DictEncoder { 36a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada private final File mDictPlacedDir; 37a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 38a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @UsedForTesting 39a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada public Ver4DictEncoder(final File dictPlacedDir) { 40a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada mDictPlacedDir = dictPlacedDir; 41a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 42a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 43a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // TODO: This builds a FusionDictionary first and iterates it to add words to the binary 44a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // dictionary. However, it is possible to just add words directly to the binary dictionary 45a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // instead. 46a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // In the long run, when we stop supporting version 2, FusionDictionary will become deprecated 47a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // and we can remove it. Then we'll be able to just call BinaryDictionary directly. 48a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @Override 49a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard public void writeDictionary(FusionDictionary dict, FormatOptions formatOptions) 50a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada throws IOException, UnsupportedFormatException { 51a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada if (formatOptions.mVersion != FormatSpec.VERSION4) { 52a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada throw new UnsupportedFormatException("File header has a wrong version number : " 53a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada + formatOptions.mVersion); 54a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 55a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada if (!mDictPlacedDir.isDirectory()) { 56a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada throw new UnsupportedFormatException("Given path is not a directory."); 57a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 58e784148ae6872942434eaa55ca32b4c6442cc8e8Keisuke Kuroyanagi if (!BinaryDictionaryUtils.createEmptyDictFile(mDictPlacedDir.getAbsolutePath(), 5943cf9076b2d053c554941e55f6073b8f586c510bJean Chalard FormatSpec.VERSION4, LocaleUtils.constructLocaleFromString( 6043cf9076b2d053c554941e55f6073b8f586c510bJean Chalard dict.mOptions.mAttributes.get(DictionaryHeader.DICTIONARY_LOCALE_KEY)), 6143cf9076b2d053c554941e55f6073b8f586c510bJean Chalard dict.mOptions.mAttributes)) { 62b868375763de60d1a1ff6fa21b121cc1b61df842Jean Chalard throw new IOException("Cannot create dictionary file : " 63b868375763de60d1a1ff6fa21b121cc1b61df842Jean Chalard + mDictPlacedDir.getAbsolutePath()); 64a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard } 65a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard final BinaryDictionary binaryDict = new BinaryDictionary(mDictPlacedDir.getAbsolutePath(), 66a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard 0l, mDictPlacedDir.length(), true /* useFullEditDistance */, 67a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard LocaleUtils.constructLocaleFromString(dict.mOptions.mAttributes.get( 68b986f78ba826fa360304a69565f1880bdd7ce0c5Keisuke Kuroyanagi DictionaryHeader.DICTIONARY_LOCALE_KEY)), 69a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard Dictionary.TYPE_USER /* Dictionary type. Does not matter for us */, 70a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard true /* isUpdatable */); 71a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard if (!binaryDict.isValidDictionary()) { 72a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // Somehow createEmptyDictFile returned true, but the file was not created correctly 73a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard throw new IOException("Cannot create dictionary file"); 74a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard } 755f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi for (final WordProperty wordProperty : dict) { 76a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard // TODO: switch to addMultipleDictionaryEntries when they support shortcuts 775f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) { 78dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi if (!binaryDict.addUnigramEntry(wordProperty.mWord, wordProperty.getProbability(), 79a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard null /* shortcutTarget */, 0 /* shortcutProbability */, 801adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, 81dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) { 82dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord); 83dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 84a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard } else { 855f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 86dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi if (!binaryDict.addUnigramEntry(wordProperty.mWord, 87dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi wordProperty.getProbability(), 88df1d3e733e2b000c776e74b54d3c62f0d433b013Keisuke Kuroyanagi shortcutTarget.mWord, shortcutTarget.getProbability(), 891adca93381d261a6070be2721dbf8b8abafbfe01Keisuke Kuroyanagi wordProperty.mIsBeginningOfSentence, wordProperty.mIsNotAWord, 90dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi wordProperty.mIsBlacklistEntry, 0 /* timestamp */)) { 91dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi MakedictLog.e("Cannot add unigram entry for " + wordProperty.mWord 92dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi + ", shortcutTarget: " + shortcutTarget.mWord); 93dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi return; 94dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 952fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 962fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 97a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { 98dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi if (!binaryDict.flushWithGC()) { 99dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi MakedictLog.e("Cannot flush dict with GC."); 100dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi return; 101dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 102a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 103a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 1045f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi for (final WordProperty word0Property : dict) { 1055f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi if (null == word0Property.mBigrams) continue; 1065f5feeba13f6f1a907d90365d8037a361d0ff5daKeisuke Kuroyanagi for (final WeightedString word1 : word0Property.mBigrams) { 107e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi final PrevWordsInfo prevWordsInfo = 108e708b1bc2e11285ad404133b8de21719ce08acb5Keisuke Kuroyanagi new PrevWordsInfo(new PrevWordsInfo.WordInfo(word0Property.mWord)); 109dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi if (!binaryDict.addNgramEntry(prevWordsInfo, word1.mWord, 110dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi word1.getProbability(), 0 /* timestamp */)) { 111dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi MakedictLog.e("Cannot add n-gram entry for " 112dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi + prevWordsInfo + " -> " + word1.mWord); 113dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi return; 114dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 115419636091236b7e5f4818c99aef55043b25eef82Jean Chalard if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { 116dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi if (!binaryDict.flushWithGC()) { 117dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi MakedictLog.e("Cannot flush dict with GC."); 118dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi return; 119dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 120419636091236b7e5f4818c99aef55043b25eef82Jean Chalard } 121a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard } 122a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 123dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi if (!binaryDict.flushWithGC()) { 124dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi MakedictLog.e("Cannot flush dict with GC."); 125dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi return; 126dfca51726e9dc9a35f462dee39331823eafa07c9Keisuke Kuroyanagi } 127a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard binaryDict.close(); 128a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 129a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 130a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @Override 131a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada public void setPosition(int position) { 132a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 133a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 134a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @Override 135a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada public int getPosition() { 136a245d15da5d295af21ead9a01583c64796a31ad7Jean Chalard return 0; 137a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 138a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 139a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @Override 140a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada public void writePtNodeCount(int ptNodeCount) { 14173b9d3b879c109a7b8487b609b0715ffe3090142Yuichiro Hanada } 14273b9d3b879c109a7b8487b609b0715ffe3090142Yuichiro Hanada 143a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @Override 144a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada public void writeForwardLinkAddress(int forwardLinkAddress) { 145a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada } 146a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada 147a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada @Override 14895d16561e0e6c38dbd99c893f09c5dbe9d4a465dKeisuke Kuroyanagi public void writePtNode(PtNode ptNode, FusionDictionary dict) { 149c32962b8f1f9b7255fef84486b53cfc874835bbdYuichiro Hanada } 150a141d8ef7dcf8f942eb7bd4ca006f63da1744319Yuichiro Hanada} 151