BinaryDictionary.java revision be0cf72253f15bff6abdeaa79f60a56f06ab7b86
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20 21import com.android.inputmethod.keyboard.ProximityInfo; 22 23import java.util.Arrays; 24 25/** 26 * Implements a static, compacted, binary dictionary of standard words. 27 */ 28public class BinaryDictionary extends Dictionary { 29 30 public static final String DICTIONARY_PACK_AUTHORITY = 31 "com.android.inputmethod.latin.dictionarypack"; 32 33 /** 34 * There is a difference between what java and native code can handle. 35 * This value should only be used in BinaryDictionary.java 36 * It is necessary to keep it at this value because some languages e.g. German have 37 * really long words. 38 */ 39 public static final int MAX_WORD_LENGTH = 48; 40 public static final int MAX_WORDS = 18; 41 42 private static final String TAG = "BinaryDictionary"; 43 private static final int MAX_PROXIMITY_CHARS_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE; 44 private static final int MAX_BIGRAMS = 60; 45 46 private static final int TYPED_LETTER_MULTIPLIER = 2; 47 48 private int mDicTypeId; 49 private long mNativeDict; 50 private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_PROXIMITY_CHARS_SIZE]; 51 private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; 52 private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; 53 private final int[] mScores = new int[MAX_WORDS]; 54 private final int[] mBigramScores = new int[MAX_BIGRAMS]; 55 56 public static final Flag FLAG_REQUIRES_GERMAN_UMLAUT_PROCESSING = 57 new Flag(R.bool.config_require_umlaut_processing, 0x1); 58 59 // FULL_EDIT_DISTANCE is a flag that forces the dictionary to use full words 60 // when computing edit distance, instead of the default behavior of stopping 61 // the evaluation at the size the user typed. 62 public static final Flag FLAG_USE_FULL_EDIT_DISTANCE = new Flag(0x2); 63 64 // Can create a new flag from extravalue : 65 // public static final Flag FLAG_MYFLAG = 66 // new Flag("my_flag", 0x02); 67 68 // ALL_CONFIG_FLAGS is a collection of flags that enable reading all flags from configuration. 69 // This is but a mask - it does not mean the flags will be on, only that the configuration 70 // will be read for this particular flag. 71 public static final Flag[] ALL_CONFIG_FLAGS = { 72 // Here should reside all flags that trigger some special processing 73 // These *must* match the definition in UnigramDictionary enum in 74 // unigram_dictionary.h so please update both at the same time. 75 // Please note that flags created with a resource are of type CONFIG while flags 76 // created with a string are of type EXTRAVALUE. These behave like masks, and the 77 // actual value will be read from the configuration/extra value at run time for 78 // the configuration at dictionary creation time. 79 FLAG_REQUIRES_GERMAN_UMLAUT_PROCESSING, 80 }; 81 82 private int mFlags = 0; 83 84 /** 85 * Constructor for the binary dictionary. This is supposed to be called from the 86 * dictionary factory. 87 * All implementations should pass null into flagArray, except for testing purposes. 88 * @param context the context to access the environment from. 89 * @param filename the name of the file to read through native code. 90 * @param offset the offset of the dictionary data within the file. 91 * @param length the length of the binary data. 92 * @param flagArray the flags to limit the dictionary to, or null for default. 93 */ 94 public BinaryDictionary(final Context context, 95 final String filename, final long offset, final long length, Flag[] flagArray) { 96 // Note: at the moment a binary dictionary is always of the "main" type. 97 // Initializing this here will help transitioning out of the scheme where 98 // the Suggest class knows everything about every single dictionary. 99 mDicTypeId = Suggest.DIC_MAIN; 100 // TODO: Stop relying on the state of SubtypeSwitcher, get it as a parameter 101 mFlags = Flag.initFlags(null == flagArray ? ALL_CONFIG_FLAGS : flagArray, context, 102 SubtypeSwitcher.getInstance()); 103 loadDictionary(filename, offset, length); 104 } 105 106 static { 107 Utils.loadNativeLibrary(); 108 } 109 110 private native long openNative(String sourceDir, long dictOffset, long dictSize, 111 int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, 112 int maxWords, int maxAlternatives); 113 private native void closeNative(long dict); 114 private native boolean isValidWordNative(long dict, char[] word, int wordLength); 115 private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, 116 int[] yCoordinates, int[] inputCodes, int codesSize, int flags, char[] outputChars, 117 int[] scores); 118 private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength, 119 int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, 120 int maxWordLength, int maxBigrams, int maxAlternatives); 121 private static native double calcNormalizedScoreNative( 122 char[] before, int beforeLength, char[] after, int afterLength, int score); 123 private static native int editDistanceNative( 124 char[] before, int beforeLength, char[] after, int afterLength); 125 126 private final void loadDictionary(String path, long startOffset, long length) { 127 mNativeDict = openNative(path, startOffset, length, 128 TYPED_LETTER_MULTIPLIER, FULL_WORD_SCORE_MULTIPLIER, 129 MAX_WORD_LENGTH, MAX_WORDS, MAX_PROXIMITY_CHARS_SIZE); 130 } 131 132 @Override 133 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 134 final WordCallback callback) { 135 if (mNativeDict == 0) return; 136 137 char[] chars = previousWord.toString().toCharArray(); 138 Arrays.fill(mOutputChars_bigrams, (char) 0); 139 Arrays.fill(mBigramScores, 0); 140 141 int codesSize = codes.size(); 142 if (codesSize <= 0) { 143 // Do not return bigrams from BinaryDictionary when nothing was typed. 144 // Only use user-history bigrams (or whatever other bigram dictionaries decide). 145 return; 146 } 147 Arrays.fill(mInputCodes, -1); 148 int[] alternatives = codes.getCodesAt(0); 149 System.arraycopy(alternatives, 0, mInputCodes, 0, 150 Math.min(alternatives.length, MAX_PROXIMITY_CHARS_SIZE)); 151 152 int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize, 153 mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS, 154 MAX_PROXIMITY_CHARS_SIZE); 155 156 for (int j = 0; j < count; ++j) { 157 if (mBigramScores[j] < 1) break; 158 final int start = j * MAX_WORD_LENGTH; 159 int len = 0; 160 while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) { 161 ++len; 162 } 163 if (len > 0) { 164 callback.addWord(mOutputChars_bigrams, start, len, mBigramScores[j], 165 mDicTypeId, DataType.BIGRAM); 166 } 167 } 168 } 169 170 // proximityInfo may not be null. 171 @Override 172 public void getWords(final WordComposer codes, final WordCallback callback, 173 final ProximityInfo proximityInfo) { 174 final int count = getSuggestions(codes, proximityInfo, mOutputChars, mScores); 175 176 for (int j = 0; j < count; ++j) { 177 if (mScores[j] < 1) break; 178 final int start = j * MAX_WORD_LENGTH; 179 int len = 0; 180 while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { 181 ++len; 182 } 183 if (len > 0) { 184 callback.addWord(mOutputChars, start, len, mScores[j], mDicTypeId, 185 DataType.UNIGRAM); 186 } 187 } 188 } 189 190 /* package for test */ boolean isValidDictionary() { 191 return mNativeDict != 0; 192 } 193 194 // proximityInfo may not be null. 195 /* package for test */ int getSuggestions(final WordComposer codes, 196 final ProximityInfo proximityInfo, char[] outputChars, int[] scores) { 197 if (!isValidDictionary()) return -1; 198 199 final int codesSize = codes.size(); 200 // Won't deal with really long words. 201 if (codesSize > MAX_WORD_LENGTH - 1) return -1; 202 203 Arrays.fill(mInputCodes, WordComposer.NOT_A_CODE); 204 for (int i = 0; i < codesSize; i++) { 205 int[] alternatives = codes.getCodesAt(i); 206 System.arraycopy(alternatives, 0, mInputCodes, i * MAX_PROXIMITY_CHARS_SIZE, 207 Math.min(alternatives.length, MAX_PROXIMITY_CHARS_SIZE)); 208 } 209 Arrays.fill(outputChars, (char) 0); 210 Arrays.fill(scores, 0); 211 212 return getSuggestionsNative( 213 mNativeDict, proximityInfo.getNativeProximityInfo(), 214 codes.getXCoordinates(), codes.getYCoordinates(), mInputCodes, codesSize, 215 mFlags, outputChars, scores); 216 } 217 218 public static double calcNormalizedScore(String before, String after, int score) { 219 return calcNormalizedScoreNative(before.toCharArray(), before.length(), 220 after.toCharArray(), after.length(), score); 221 } 222 223 public static int editDistance(String before, String after) { 224 return editDistanceNative( 225 before.toCharArray(), before.length(), after.toCharArray(), after.length()); 226 } 227 228 @Override 229 public boolean isValidWord(CharSequence word) { 230 if (word == null) return false; 231 char[] chars = word.toString().toCharArray(); 232 return isValidWordNative(mNativeDict, chars, chars.length); 233 } 234 235 @Override 236 public synchronized void close() { 237 closeInternal(); 238 } 239 240 private void closeInternal() { 241 if (mNativeDict != 0) { 242 closeNative(mNativeDict); 243 mNativeDict = 0; 244 } 245 } 246 247 @Override 248 protected void finalize() throws Throwable { 249 try { 250 closeInternal(); 251 } finally { 252 super.finalize(); 253 } 254 } 255} 256