114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada/* 214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Copyright (C) 2013 The Android Open Source Project 314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * 414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Licensed under the Apache License, Version 2.0 (the "License"); 514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * you may not use this file except in compliance with the License. 614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * You may obtain a copy of the License at 714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * 814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * http://www.apache.org/licenses/LICENSE-2.0 914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * 1014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * Unless required by applicable law or agreed to in writing, software 1114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * distributed under the License is distributed on an "AS IS" BASIS, 1214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * See the License for the specific language governing permissions and 1414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * limitations under the License. 1514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada */ 1614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 1714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadapackage com.android.inputmethod.latin.makedict; 1814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 1914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport com.android.inputmethod.annotations.UsedForTesting; 20ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagiimport com.android.inputmethod.latin.BinaryDictionary; 215b91b551e5ffaf2c2e691dfbd434f21c82293986Jean Chalardimport com.android.inputmethod.latin.common.FileUtils; 2214087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 2314087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.File; 2414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.FileNotFoundException; 2514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.io.IOException; 2614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanadaimport java.util.ArrayList; 2714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 2814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada/** 2914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada * An implementation of binary dictionary decoder for version 4 binary dictionary. 3014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada */ 3114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada@UsedForTesting 3214d31d464037c31e7f7d382a8a86f6acf4694b06Yuichiro Hanadapublic class Ver4DictDecoder extends AbstractDictDecoder { 33ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final File mDictDirectory; 3414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 3514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @UsedForTesting 36d3a4c5132422b189c8dbb94dbbe84a9b9761b0a8Tadashi G. Takaoka /* package */ Ver4DictDecoder(final File dictDirectory) { 3714087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada mDictDirectory = dictDirectory; 38d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi 3914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 4014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 4114087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @Override 42b986f78ba826fa360304a69565f1880bdd7ce0c5Keisuke Kuroyanagi public DictionaryHeader readHeader() throws IOException, UnsupportedFormatException { 43d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi // dictType is not being used in dicttool. Passing an empty string. 44d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi final BinaryDictionary binaryDictionary= new BinaryDictionary( 45d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi mDictDirectory.getAbsolutePath(), 0 /* offset */, 0 /* length */, 46d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi true /* useFullEditDistance */, null /* locale */, 47d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi "" /* dictType */, true /* isUpdatable */); 48d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi final DictionaryHeader header = binaryDictionary.getHeader(); 49d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.close(); 50afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi if (header == null) { 51afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi throw new IOException("Cannot read the dictionary header."); 52afd9b62f00cd4557b32dae5bed6ed40320f86857Keisuke Kuroyanagi } 53d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi return header; 5414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 5514087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada 5614087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada @Override 578e3a1d0f89ac5a0c7d31effb8cbb447f93f70310Keisuke Kuroyanagi public FusionDictionary readDictionaryBinary(final boolean deleteDictIfBroken) 5814087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada throws FileNotFoundException, IOException, UnsupportedFormatException { 59d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi // dictType is not being used in dicttool. Passing an empty string. 60d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi final BinaryDictionary binaryDictionary = new BinaryDictionary( 61d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi mDictDirectory.getAbsolutePath(), 0 /* offset */, 0 /* length */, 62d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi true /* useFullEditDistance */, null /* locale */, 63d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi "" /* dictType */, true /* isUpdatable */); 64ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final DictionaryHeader header = readHeader(); 658e3a1d0f89ac5a0c7d31effb8cbb447f93f70310Keisuke Kuroyanagi final FusionDictionary fusionDict = 66ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi new FusionDictionary(new FusionDictionary.PtNodeArray(), header.mDictionaryOptions); 67ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi int token = 0; 68a91561aa58db1c43092c1caecc051a11fa5391c7Tadashi G. Takaoka final ArrayList<WordProperty> wordProperties = new ArrayList<>(); 69ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi do { 70ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final BinaryDictionary.GetNextWordPropertyResult result = 71d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.getNextWordProperty(token); 72ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final WordProperty wordProperty = result.mWordProperty; 73ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi if (wordProperty == null) { 74d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.close(); 75ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi if (deleteDictIfBroken) { 76ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi FileUtils.deleteRecursively(mDictDirectory); 77ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } 78ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi return null; 7914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 80ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi wordProperties.add(wordProperty); 81ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi token = result.mNextToken; 82ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } while (token != 0); 83ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi 840fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi // Insert unigrams into the fusion dictionary. 85ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi for (final WordProperty wordProperty : wordProperties) { 8605172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu fusionDict.add(wordProperty.mWord, wordProperty.mProbabilityInfo, 8712d80ebead6a1d7f704a5a3af3b6fe3313ceab05Dan Zivkovic wordProperty.mIsNotAWord, 8805172bf1a5693c2e108e91436b98ecd35d2dadadAdrian Velicu wordProperty.mIsPossiblyOffensive); 8914087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 900fc93fe4455f24809f6c9baf0d3b936519779cfbKeisuke Kuroyanagi // Insert bigrams into the fusion dictionary. 91c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi // TODO: Support ngrams. 92ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi for (final WordProperty wordProperty : wordProperties) { 93c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi if (!wordProperty.mHasNgrams) { 94ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi continue; 95ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi } 96ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi final String word0 = wordProperty.mWord; 97c6a6f6a9905ab98516d944ac85933d016e4147fbKeisuke Kuroyanagi for (final WeightedString bigram : wordProperty.getBigrams()) { 988ffc631826b108423f98e3ff4d987f067cbc4e0cKeisuke Kuroyanagi fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); 992fa3693c264a4c150ac307d9bb7f6f8f18cc4ffcKen Wakasa } 10014087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada } 101d24a99cff6da3a7121a507e77409261e4f6704dcKeisuke Kuroyanagi binaryDictionary.close(); 102ab6a93773ba3cbe93002bc37b6b61f874fc09144Keisuke Kuroyanagi return fusionDict; 103bc4926235dfff4758ca435362fe7a880d11b4f3bYuichiro Hanada } 10414087ba52c6b5b7acd25ee4a1ef1663ceb72bbf4Yuichiro Hanada} 105