/* * Copyright (C) 2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.latin; import android.util.Log; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.PendingAttribute; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; /** * Reads and writes Binary files for a UserHistoryDictionary. * * All the methods in this class are static. */ public final class UserHistoryDictIOUtils { private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); private static final boolean DEBUG = false; public interface OnAddWordListener { public void setUnigram(final String word, final String shortcutTarget, final int frequency); public void setBigram(final String word1, final String word2, final int frequency); } public interface BigramDictionaryInterface { public int getFrequency(final String word1, final String word2); } public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface { private byte[] mBuffer; private int mPosition; public ByteArrayWrapper(final byte[] buffer) { mBuffer = buffer; mPosition = 0; } @Override public int readUnsignedByte() { return ((int)mBuffer[mPosition++]) & 0xFF; } @Override public int readUnsignedShort() { final int retval = readUnsignedByte(); return (retval << 8) + readUnsignedByte(); } @Override public int readUnsignedInt24() { final int retval = readUnsignedShort(); return (retval << 8) + readUnsignedByte(); } @Override public int readInt() { final int retval = readUnsignedShort(); return (retval << 16) + readUnsignedShort(); } @Override public int position() { return mPosition; } @Override public void position(int position) { mPosition = position; } @Override public void put(final byte b) { mBuffer[mPosition++] = b; } @Override public int limit() { return mBuffer.length - 1; } @Override public int capacity() { return mBuffer.length; } } /** * Writes dictionary to file. */ public static void writeDictionaryBinary(final OutputStream destination, final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, final FormatOptions formatOptions) { final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); try { BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions); Log.d(TAG, "end writing"); } catch (IOException e) { Log.e(TAG, "IO exception while writing file: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported fomat: " + e); } } /** * Constructs a new FusionDictionary from BigramDictionaryInterface. */ /* packages for test */ static FusionDictionary constructFusionDictionary( final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { final FusionDictionary fusionDict = new FusionDictionary(new Node(), new FusionDictionary.DictionaryOptions(new HashMap(), false, false)); int profTotal = 0; for (final String word1 : bigrams.keySet()) { final HashMap word1Bigrams = bigrams.getBigrams(word1); for (final String word2 : word1Bigrams.keySet()) { final int freq = dict.getFrequency(word1, word2); if (freq == -1) { // don't add this bigram. continue; } if (DEBUG) { if (word1 == null) { Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); } else { Log.d(TAG, "add bigram: " + word1 + "," + word2 + "," + Integer.toString(freq)); } profTotal++; } if (word1 == null) { // unigram fusionDict.add(word2, freq, null, false /* isNotAWord */); } else { // bigram if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) { fusionDict.add(word1, 2, null, false /* isNotAWord */); } fusionDict.setBigram(word1, word2, freq); } bigrams.updateBigram(word1, word2, (byte)freq); } } if (DEBUG) { Log.d(TAG, "add " + profTotal + "words"); } return fusionDict; } /** * Reads dictionary from file. */ public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer, final OnAddWordListener dict) { final Map unigrams = CollectionUtils.newTreeMap(); final Map frequencies = CollectionUtils.newTreeMap(); final Map> bigrams = CollectionUtils.newTreeMap(); try { BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, bigrams); } catch (IOException e) { Log.e(TAG, "IO exception while reading file: " + e); } catch (UnsupportedFormatException e) { Log.e(TAG, "Unsupported format: " + e); } catch (ArrayIndexOutOfBoundsException e) { Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file: " + e); } addWordsFromWordMap(unigrams, frequencies, bigrams, dict); } /** * Adds all unigrams and bigrams in maps to OnAddWordListener. */ /* package for test */ static void addWordsFromWordMap(final Map unigrams, final Map frequencies, final Map> bigrams, final OnAddWordListener to) { for (Map.Entry entry : unigrams.entrySet()) { final String word1 = entry.getValue(); final int unigramFrequency = frequencies.get(entry.getKey()); to.setUnigram(word1, null, unigramFrequency); final ArrayList attrList = bigrams.get(entry.getKey()); if (attrList != null) { for (final PendingAttribute attr : attrList) { to.setBigram(word1, unigrams.get(attr.mAddress), BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, attr.mFrequency)); } } } } }