1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.util.Log; 20 21import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; 22import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; 23import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; 24import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 25import com.android.inputmethod.latin.makedict.FusionDictionary; 26import com.android.inputmethod.latin.makedict.FusionDictionary.Node; 27import com.android.inputmethod.latin.makedict.PendingAttribute; 28import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 29 30import java.io.IOException; 31import java.io.OutputStream; 32import java.util.ArrayList; 33import java.util.HashMap; 34import java.util.Map; 35 36/** 37 * Reads and writes Binary files for a UserHistoryDictionary. 38 * 39 * All the methods in this class are static. 40 */ 41public final class UserHistoryDictIOUtils { 42 private static final String TAG = UserHistoryDictIOUtils.class.getSimpleName(); 43 private static final boolean DEBUG = false; 44 45 public interface OnAddWordListener { 46 public void setUnigram(final String word, final String shortcutTarget, final int frequency); 47 public void setBigram(final String word1, final String word2, final int frequency); 48 } 49 50 public interface BigramDictionaryInterface { 51 public int getFrequency(final String word1, final String word2); 52 } 53 54 public static final class ByteArrayWrapper implements FusionDictionaryBufferInterface { 55 private byte[] mBuffer; 56 private int mPosition; 57 58 public ByteArrayWrapper(final byte[] buffer) { 59 mBuffer = buffer; 60 mPosition = 0; 61 } 62 63 @Override 64 public int readUnsignedByte() { 65 return ((int)mBuffer[mPosition++]) & 0xFF; 66 } 67 68 @Override 69 public int readUnsignedShort() { 70 final int retval = readUnsignedByte(); 71 return (retval << 8) + readUnsignedByte(); 72 } 73 74 @Override 75 public int readUnsignedInt24() { 76 final int retval = readUnsignedShort(); 77 return (retval << 8) + readUnsignedByte(); 78 } 79 80 @Override 81 public int readInt() { 82 final int retval = readUnsignedShort(); 83 return (retval << 16) + readUnsignedShort(); 84 } 85 86 @Override 87 public int position() { 88 return mPosition; 89 } 90 91 @Override 92 public void position(int position) { 93 mPosition = position; 94 } 95 96 @Override 97 public void put(final byte b) { 98 mBuffer[mPosition++] = b; 99 } 100 101 @Override 102 public int limit() { 103 return mBuffer.length - 1; 104 } 105 106 @Override 107 public int capacity() { 108 return mBuffer.length; 109 } 110 } 111 112 /** 113 * Writes dictionary to file. 114 */ 115 public static void writeDictionaryBinary(final OutputStream destination, 116 final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, 117 final FormatOptions formatOptions) { 118 final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); 119 try { 120 BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions); 121 Log.d(TAG, "end writing"); 122 } catch (IOException e) { 123 Log.e(TAG, "IO exception while writing file: " + e); 124 } catch (UnsupportedFormatException e) { 125 Log.e(TAG, "Unsupported fomat: " + e); 126 } 127 } 128 129 /** 130 * Constructs a new FusionDictionary from BigramDictionaryInterface. 131 */ 132 /* packages for test */ static FusionDictionary constructFusionDictionary( 133 final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams) { 134 final FusionDictionary fusionDict = new FusionDictionary(new Node(), 135 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false, 136 false)); 137 int profTotal = 0; 138 for (final String word1 : bigrams.keySet()) { 139 final HashMap<String, Byte> word1Bigrams = bigrams.getBigrams(word1); 140 for (final String word2 : word1Bigrams.keySet()) { 141 final int freq = dict.getFrequency(word1, word2); 142 if (freq == -1) { 143 // don't add this bigram. 144 continue; 145 } 146 if (DEBUG) { 147 if (word1 == null) { 148 Log.d(TAG, "add unigram: " + word2 + "," + Integer.toString(freq)); 149 } else { 150 Log.d(TAG, "add bigram: " + word1 151 + "," + word2 + "," + Integer.toString(freq)); 152 } 153 profTotal++; 154 } 155 if (word1 == null) { // unigram 156 fusionDict.add(word2, freq, null, false /* isNotAWord */); 157 } else { // bigram 158 if (FusionDictionary.findWordInTree(fusionDict.mRoot, word1) == null) { 159 fusionDict.add(word1, 2, null, false /* isNotAWord */); 160 } 161 fusionDict.setBigram(word1, word2, freq); 162 } 163 bigrams.updateBigram(word1, word2, (byte)freq); 164 } 165 } 166 if (DEBUG) { 167 Log.d(TAG, "add " + profTotal + "words"); 168 } 169 return fusionDict; 170 } 171 172 /** 173 * Reads dictionary from file. 174 */ 175 public static void readDictionaryBinary(final FusionDictionaryBufferInterface buffer, 176 final OnAddWordListener dict) { 177 final Map<Integer, String> unigrams = CollectionUtils.newTreeMap(); 178 final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); 179 final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); 180 try { 181 BinaryDictIOUtils.readUnigramsAndBigramsBinary(buffer, unigrams, frequencies, 182 bigrams); 183 } catch (IOException e) { 184 Log.e(TAG, "IO exception while reading file: " + e); 185 } catch (UnsupportedFormatException e) { 186 Log.e(TAG, "Unsupported format: " + e); 187 } catch (ArrayIndexOutOfBoundsException e) { 188 Log.e(TAG, "ArrayIndexOutOfBoundsException while reading file: " + e); 189 } 190 addWordsFromWordMap(unigrams, frequencies, bigrams, dict); 191 } 192 193 /** 194 * Adds all unigrams and bigrams in maps to OnAddWordListener. 195 */ 196 /* package for test */ static void addWordsFromWordMap(final Map<Integer, String> unigrams, 197 final Map<Integer, Integer> frequencies, 198 final Map<Integer, ArrayList<PendingAttribute>> bigrams, final OnAddWordListener to) { 199 for (Map.Entry<Integer, String> entry : unigrams.entrySet()) { 200 final String word1 = entry.getValue(); 201 final int unigramFrequency = frequencies.get(entry.getKey()); 202 to.setUnigram(word1, null, unigramFrequency); 203 final ArrayList<PendingAttribute> attrList = bigrams.get(entry.getKey()); 204 if (attrList != null) { 205 for (final PendingAttribute attr : attrList) { 206 to.setBigram(word1, unigrams.get(attr.mAddress), 207 BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, 208 attr.mFrequency)); 209 } 210 } 211 } 212 213 } 214}