114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada/* 214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Copyright (C) 2013 The Android Open Source Project 314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * 414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License"); 514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * you may not use this file except in compliance with the License. 614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * You may obtain a copy of the License at 714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * 814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * http://www.apache.org/licenses/LICENSE-2.0 914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * 1014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software 1114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS, 1214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * See the License for the specific language governing permissions and 1414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * limitations under the License. 1514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada */ 1614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 1714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadapackage com.android.inputmethod.latin.makedict; 1814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 1914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport com.android.inputmethod.annotations.UsedForTesting; 20ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagiimport com.android.inputmethod.latin.BinaryDictionary; 21ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagiimport com.android.inputmethod.latin.utils.FileUtils; 2214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 2314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.File; 2414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.FileNotFoundException; 2514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.IOException; 2614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.util.ArrayList; 2714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 2814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada/** 2914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * An implementation of binary dictionary decoder for version 4 binary dictionary. 3014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada */ 3114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada@UsedForTesting 3214d31d464037c31e7f7d382a8a86f6acf4694b06Yuichiro Hanadapublic class Ver4DictDecoder extends AbstractDictDecoder { 33ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final File mDictDirectory; 3414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 3514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @UsedForTesting 3614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada /* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) { 37ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi this(dictDirectory, null /* factory */); 3814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 3914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 4014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @UsedForTesting 4114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada /* package */ Ver4DictDecoder(final File dictDirectory, final DictionaryBufferFactory factory) { 4214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada mDictDirectory = dictDirectory; 43d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi 4414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 4514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 4614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @Override 47b986f78ba826fa360304a69565f1880bdd7ce0c5Keisuke Kuroyanagi public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException { 48d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi // dictType is not being used in dicttool. Passing an empty string. 49d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi final BinaryDictionary binaryDictionary= new BinaryDictionary( 50d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi mDictDirectory.getAbsolutePath(), 0 /* offset */, 0 /* length */, 51d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi true /* useFullEditDistance */, null /* locale */, 52d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi "" /* dictType */, true /* isUpdatable */); 53d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi final DictionaryHeader header = binaryDictionary.getHeader(); 54d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.close(); 55afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi if (header == null) { 56afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi throw new IOException("Cannot read the dictionary header."); 57afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi } 58d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi return header; 5914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 6014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 6114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @Override 628e3a1d0f89ac5a0c7d31effb8cbb447f93f70310Keisuke Kuroyanagi public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken) 6314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada throws FileNotFoundException, IOException, UnsupportedFormatException { 64d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi // dictType is not being used in dicttool. Passing an empty string. 65d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi final BinaryDictionary binaryDictionary = new BinaryDictionary( 66d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi mDictDirectory.getAbsolutePath(), 0 /* offset */, 0 /* length */, 67d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi true /* useFullEditDistance */, null /* locale */, 68d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi "" /* dictType */, true /* isUpdatable */); 69ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final DictionaryHeader header = readHeader(); 708e3a1d0f89ac5a0c7d31effb8cbb447f93f70310Keisuke Kuroyanagi final FusionDictionary fusionDict = 71ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions); 72ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi int token = 0; 73a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<WordProperty> wordProperties = new ArrayList<>(); 74ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi do { 75ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final BinaryDictionary.GetNextWordPropertyResult result = 76d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.getNextWordProperty(token); 77ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final WordProperty wordProperty = result.mWordProperty; 78ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi if (wordProperty == null) { 79d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.close(); 80ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi if (deleteDictIfBroken) { 81ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi FileUtils.deleteRecursively(mDictDirectory); 82ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } 83ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi return null; 8414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 85ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi wordProperties.add(wordProperty); 86ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi token = result.mNextToken; 87ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } while (token != 0); 88ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi 890fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi // Insert unigrams into the fusion dictionary. 90ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi for (final WordProperty wordProperty : wordProperties) { 91ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi if (wordProperty.mIsBlacklistEntry) { 92ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi fusionDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets, 93ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi wordProperty.mIsNotAWord); 94ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } else { 958ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo, 96ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi wordProperty.mShortcutTargets, wordProperty.mIsNotAWord); 9714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 9814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 990fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi // Insert bigrams into the fusion dictionary. 100ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi for (final WordProperty wordProperty : wordProperties) { 101ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi if (wordProperty.mBigrams == null) { 102ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi continue; 103ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } 104ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final String word0 = wordProperty.mWord; 105ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi for (final WeightedString bigram : wordProperty.mBigrams) { 1068ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); 1072fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 10814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 109d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.close(); 110ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi return fusionDict; 111bc4926235dfff4758ca435362fe7a880d11b4f3bYuichiro Hanada } 11214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada} 113