1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.inputmethod.latin.makedict; 18 19import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 20import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 21import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 22import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 23import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; 24 25import java.io.File; 26import java.io.FileNotFoundException; 27import java.io.FileOutputStream; 28import java.io.IOException; 29import java.io.OutputStream; 30import java.util.ArrayList; 31import java.util.Iterator; 32 33/** 34 * An implementation of DictEncoder for version 3 binary dictionary. 35 */ 36public class Ver3DictEncoder implements DictEncoder { 37 38 private final File mDictFile; 39 private OutputStream mOutStream; 40 private byte[] mBuffer; 41 private int mPosition; 42 43 public Ver3DictEncoder(final File dictFile) { 44 mDictFile = dictFile; 45 mOutStream = null; 46 mBuffer = null; 47 } 48 49 // This constructor is used only by BinaryDictOffdeviceUtilsTests. 50 // If you want to use this in the production code, you should consider keeping consistency of 51 // the interface of Ver3DictDecoder by using factory. 52 public Ver3DictEncoder(final OutputStream outStream) { 53 mDictFile = null; 54 mOutStream = outStream; 55 } 56 57 private void openStream() throws FileNotFoundException { 58 mOutStream = new FileOutputStream(mDictFile); 59 } 60 61 private void close() throws IOException { 62 if (mOutStream != null) { 63 mOutStream.close(); 64 mOutStream = null; 65 } 66 } 67 68 @Override 69 public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions) 70 throws IOException, UnsupportedFormatException { 71 if (formatOptions.mVersion > FormatSpec.VERSION3) { 72 throw new UnsupportedFormatException( 73 "The given format options has wrong version number : " 74 + formatOptions.mVersion); 75 } 76 77 if (mOutStream == null) { 78 openStream(); 79 } 80 BinaryDictEncoderUtils.writeDictionaryHeader(mOutStream, dict, formatOptions); 81 82 // Addresses are limited to 3 bytes, but since addresses can be relative to each node 83 // array, the structure itself is not limited to 16MB. However, if it is over 16MB deciding 84 // the order of the PtNode arrays becomes a quite complicated problem, because though the 85 // dictionary itself does not have a size limit, each node array must still be within 16MB 86 // of all its children and parents. As long as this is ensured, the dictionary file may 87 // grow to any size. 88 89 // Leave the choice of the optimal node order to the flattenTree function. 90 MakedictLog.i("Flattening the tree..."); 91 ArrayList<PtNodeArray> flatNodes = BinaryDictEncoderUtils.flattenTree(dict.mRootNodeArray); 92 93 MakedictLog.i("Computing addresses..."); 94 BinaryDictEncoderUtils.computeAddresses(dict, flatNodes, formatOptions); 95 MakedictLog.i("Checking PtNode array..."); 96 if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes); 97 98 // Create a buffer that matches the final dictionary size. 99 final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1); 100 final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize; 101 mBuffer = new byte[bufferSize]; 102 103 MakedictLog.i("Writing file..."); 104 105 for (PtNodeArray nodeArray : flatNodes) { 106 BinaryDictEncoderUtils.writePlacedPtNodeArray(dict, this, nodeArray, formatOptions); 107 } 108 if (MakedictLog.DBG) BinaryDictEncoderUtils.showStatistics(flatNodes); 109 mOutStream.write(mBuffer, 0, mPosition); 110 111 MakedictLog.i("Done"); 112 close(); 113 } 114 115 @Override 116 public void setPosition(final int position) { 117 if (mBuffer == null || position < 0 || position >= mBuffer.length) return; 118 mPosition = position; 119 } 120 121 @Override 122 public int getPosition() { 123 return mPosition; 124 } 125 126 @Override 127 public void writePtNodeCount(final int ptNodeCount) { 128 final int countSize = BinaryDictIOUtils.getPtNodeCountSize(ptNodeCount); 129 if (countSize != 1 && countSize != 2) { 130 throw new RuntimeException("Strange size from getGroupCountSize : " + countSize); 131 } 132 final int encodedPtNodeCount = (countSize == 2) ? 133 (ptNodeCount | FormatSpec.LARGE_PTNODE_ARRAY_SIZE_FIELD_SIZE_FLAG) : ptNodeCount; 134 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, encodedPtNodeCount, 135 countSize); 136 } 137 138 private void writePtNodeFlags(final PtNode ptNode, final FormatOptions formatOptions) { 139 final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); 140 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, 141 BinaryDictEncoderUtils.makePtNodeFlags(ptNode, childrenPos, formatOptions), 142 FormatSpec.PTNODE_FLAGS_SIZE); 143 } 144 145 private void writeParentPosition(final int parentPosition, final PtNode ptNode, 146 final FormatOptions formatOptions) { 147 if (parentPosition == FormatSpec.NO_PARENT_ADDRESS) { 148 mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition, 149 parentPosition, formatOptions); 150 } else { 151 mPosition = BinaryDictEncoderUtils.writeParentAddress(mBuffer, mPosition, 152 parentPosition - ptNode.mCachedAddressAfterUpdate, formatOptions); 153 } 154 } 155 156 private void writeCharacters(final int[] codePoints, final boolean hasSeveralChars) { 157 mPosition = CharEncoding.writeCharArray(codePoints, mBuffer, mPosition); 158 if (hasSeveralChars) { 159 mBuffer[mPosition++] = FormatSpec.PTNODE_CHARACTERS_TERMINATOR; 160 } 161 } 162 163 private void writeFrequency(final int frequency) { 164 if (frequency >= 0) { 165 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, frequency, 166 FormatSpec.PTNODE_FREQUENCY_SIZE); 167 } 168 } 169 170 private void writeChildrenPosition(final PtNode ptNode, final FormatOptions formatOptions) { 171 final int childrenPos = BinaryDictEncoderUtils.getChildrenPosition(ptNode, formatOptions); 172 if (formatOptions.mSupportsDynamicUpdate) { 173 mPosition += BinaryDictEncoderUtils.writeSignedChildrenPosition(mBuffer, mPosition, 174 childrenPos); 175 } else { 176 mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, 177 childrenPos); 178 } 179 } 180 181 /** 182 * Write a shortcut attributes list to mBuffer. 183 * 184 * @param shortcuts the shortcut attributes list. 185 */ 186 private void writeShortcuts(final ArrayList<WeightedString> shortcuts) { 187 if (null == shortcuts || shortcuts.isEmpty()) return; 188 189 final int indexOfShortcutByteSize = mPosition; 190 mPosition += FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE; 191 final Iterator<WeightedString> shortcutIterator = shortcuts.iterator(); 192 while (shortcutIterator.hasNext()) { 193 final WeightedString target = shortcutIterator.next(); 194 final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( 195 shortcutIterator.hasNext(), 196 target.mFrequency); 197 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags, 198 FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 199 final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord); 200 mPosition += shortcutShift; 201 } 202 final int shortcutByteSize = mPosition - indexOfShortcutByteSize; 203 if (shortcutByteSize > FormatSpec.MAX_SHORTCUT_LIST_SIZE_IN_A_PTNODE) { 204 throw new RuntimeException("Shortcut list too large"); 205 } 206 BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, indexOfShortcutByteSize, shortcutByteSize, 207 FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE); 208 } 209 210 /** 211 * Write a bigram attributes list to mBuffer. 212 * 213 * @param bigrams the bigram attributes list. 214 * @param dict the dictionary the node array is a part of (for relative offsets). 215 */ 216 private void writeBigrams(final ArrayList<WeightedString> bigrams, 217 final FusionDictionary dict) { 218 if (bigrams == null) return; 219 220 final Iterator<WeightedString> bigramIterator = bigrams.iterator(); 221 while (bigramIterator.hasNext()) { 222 final WeightedString bigram = bigramIterator.next(); 223 final PtNode target = 224 FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord); 225 final int addressOfBigram = target.mCachedAddressAfterUpdate; 226 final int unigramFrequencyForThisWord = target.mFrequency; 227 final int offset = addressOfBigram 228 - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 229 final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), 230 offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); 231 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags, 232 FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); 233 mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, 234 Math.abs(offset)); 235 } 236 } 237 238 @Override 239 public void writeForwardLinkAddress(final int forwardLinkAddress) { 240 mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, forwardLinkAddress, 241 FormatSpec.FORWARD_LINK_ADDRESS_SIZE); 242 } 243 244 @Override 245 public void writePtNode(final PtNode ptNode, final int parentPosition, 246 final FormatOptions formatOptions, final FusionDictionary dict) { 247 writePtNodeFlags(ptNode, formatOptions); 248 writeParentPosition(parentPosition, ptNode, formatOptions); 249 writeCharacters(ptNode.mChars, ptNode.hasSeveralChars()); 250 writeFrequency(ptNode.mFrequency); 251 writeChildrenPosition(ptNode, formatOptions); 252 writeShortcuts(ptNode.mShortcutTargets); 253 writeBigrams(ptNode.mBigrams, dict); 254 } 255} 256