BinaryDictionary.java revision 24aee9100e92dc4c06cdb54487a4922420fa8660
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.content.res.AssetFileDescriptor; 21import android.content.res.Resources; 22 23import com.android.inputmethod.keyboard.ProximityInfo; 24import com.android.inputmethod.latin.LocaleUtils.RunInLocale; 25 26import java.util.Arrays; 27import java.util.Locale; 28 29/** 30 * Implements a static, compacted, binary dictionary of standard words. 31 */ 32public class BinaryDictionary extends Dictionary { 33 34 public static final String DICTIONARY_PACK_AUTHORITY = 35 "com.android.inputmethod.latin.dictionarypack"; 36 37 /** 38 * There is a difference between what java and native code can handle. 39 * This value should only be used in BinaryDictionary.java 40 * It is necessary to keep it at this value because some languages e.g. German have 41 * really long words. 42 */ 43 public static final int MAX_WORD_LENGTH = 48; 44 public static final int MAX_WORDS = 18; 45 46 private static final String TAG = "BinaryDictionary"; 47 private static final int MAX_BIGRAMS = 60; 48 49 private static final int TYPED_LETTER_MULTIPLIER = 2; 50 51 private int mDicTypeId; 52 private long mNativeDict; 53 private final int[] mInputCodes = new int[MAX_WORD_LENGTH]; 54 private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; 55 private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; 56 private final int[] mScores = new int[MAX_WORDS]; 57 private final int[] mBigramScores = new int[MAX_BIGRAMS]; 58 59 public static final Flag FLAG_REQUIRES_GERMAN_UMLAUT_PROCESSING = 60 new Flag(R.bool.config_require_umlaut_processing, 0x1); 61 public static final Flag FLAG_REQUIRES_FRENCH_LIGATURES_PROCESSING = 62 new Flag(R.bool.config_require_ligatures_processing, 0x4); 63 64 // FULL_EDIT_DISTANCE is a flag that forces the dictionary to use full words 65 // when computing edit distance, instead of the default behavior of stopping 66 // the evaluation at the size the user typed. 67 public static final int FLAG_USE_FULL_EDIT_DISTANCE = 0x2; 68 69 // Can create a new flag from extravalue : 70 // public static final Flag FLAG_MYFLAG = 71 // new Flag("my_flag", 0x02); 72 73 // ALL_CONFIG_FLAGS is a collection of flags that enable reading all flags from configuration. 74 // This is but a mask - it does not mean the flags will be on, only that the configuration 75 // will be read for this particular flag. 76 public static final Flag[] ALL_CONFIG_FLAGS = { 77 // Here should reside all flags that trigger some special processing 78 // These *must* match the definition in UnigramDictionary enum in 79 // unigram_dictionary.h so please update both at the same time. 80 // Please note that flags created with a resource are of type CONFIG while flags 81 // created with a string are of type EXTRAVALUE. These behave like masks, and the 82 // actual value will be read from the configuration/extra value at run time for 83 // the configuration at dictionary creation time. 84 FLAG_REQUIRES_GERMAN_UMLAUT_PROCESSING, 85 FLAG_REQUIRES_FRENCH_LIGATURES_PROCESSING, 86 }; 87 88 private final int mFlags; 89 90 /** 91 * Constructor for the binary dictionary. This is supposed to be called from the 92 * dictionary factory. 93 * All implementations should pass null into flagArray, except for testing purposes. 94 * @param context the context to access the environment from. 95 * @param filename the name of the file to read through native code. 96 * @param offset the offset of the dictionary data within the file. 97 * @param length the length of the binary data. 98 * @param useFullEditDistance whether to use the full edit distance in suggestions 99 */ 100 public BinaryDictionary(final Context context, 101 final String filename, final long offset, final long length, 102 final boolean useFullEditDistance, final Locale locale) { 103 // Note: at the moment a binary dictionary is always of the "main" type. 104 // Initializing this here will help transitioning out of the scheme where 105 // the Suggest class knows everything about every single dictionary. 106 mDicTypeId = Suggest.DIC_MAIN; 107 if (useFullEditDistance) { 108 mFlags = FLAG_USE_FULL_EDIT_DISTANCE; 109 } else { 110 mFlags = 0; 111 } 112 loadDictionary(filename, offset, length); 113 } 114 115 static { 116 JniUtils.loadNativeLibrary(); 117 } 118 119 private native long openNative(String sourceDir, long dictOffset, long dictSize, 120 int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); 121 private native void closeNative(long dict); 122 private native boolean isValidWordNative(long dict, char[] word, int wordLength); 123 private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, 124 int[] yCoordinates, int[] inputCodes, int codesSize, int flags, char[] outputChars, 125 int[] scores); 126 private native int getBigramsNative(long dict, char[] prevWord, int prevWordLength, 127 int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, 128 int maxWordLength, int maxBigrams); 129 private static native double calcNormalizedScoreNative( 130 char[] before, int beforeLength, char[] after, int afterLength, int score); 131 private static native int editDistanceNative( 132 char[] before, int beforeLength, char[] after, int afterLength); 133 134 private final void loadDictionary(String path, long startOffset, long length) { 135 mNativeDict = openNative(path, startOffset, length, 136 TYPED_LETTER_MULTIPLIER, FULL_WORD_SCORE_MULTIPLIER, MAX_WORD_LENGTH, MAX_WORDS); 137 } 138 139 @Override 140 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 141 final WordCallback callback) { 142 if (mNativeDict == 0) return; 143 144 char[] chars = previousWord.toString().toCharArray(); 145 Arrays.fill(mOutputChars_bigrams, (char) 0); 146 Arrays.fill(mBigramScores, 0); 147 148 int codesSize = codes.size(); 149 Arrays.fill(mInputCodes, -1); 150 if (codesSize > 0) { 151 mInputCodes[0] = codes.getCodeAt(0); 152 } 153 154 int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize, 155 mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); 156 if (count > MAX_BIGRAMS) { 157 count = MAX_BIGRAMS; 158 } 159 160 for (int j = 0; j < count; ++j) { 161 if (codesSize > 0 && mBigramScores[j] < 1) break; 162 final int start = j * MAX_WORD_LENGTH; 163 int len = 0; 164 while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) { 165 ++len; 166 } 167 if (len > 0) { 168 callback.addWord(mOutputChars_bigrams, start, len, mBigramScores[j], 169 mDicTypeId, Dictionary.BIGRAM); 170 } 171 } 172 } 173 174 // proximityInfo may not be null. 175 @Override 176 public void getWords(final WordComposer codes, final WordCallback callback, 177 final ProximityInfo proximityInfo) { 178 final int count = getSuggestions(codes, proximityInfo, mOutputChars, mScores); 179 180 for (int j = 0; j < count; ++j) { 181 if (mScores[j] < 1) break; 182 final int start = j * MAX_WORD_LENGTH; 183 int len = 0; 184 while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { 185 ++len; 186 } 187 if (len > 0) { 188 callback.addWord(mOutputChars, start, len, mScores[j], mDicTypeId, 189 Dictionary.UNIGRAM); 190 } 191 } 192 } 193 194 /* package for test */ boolean isValidDictionary() { 195 return mNativeDict != 0; 196 } 197 198 // proximityInfo may not be null. 199 /* package for test */ int getSuggestions(final WordComposer codes, 200 final ProximityInfo proximityInfo, char[] outputChars, int[] scores) { 201 if (!isValidDictionary()) return -1; 202 203 final int codesSize = codes.size(); 204 // Won't deal with really long words. 205 if (codesSize > MAX_WORD_LENGTH - 1) return -1; 206 207 Arrays.fill(mInputCodes, WordComposer.NOT_A_CODE); 208 for (int i = 0; i < codesSize; i++) { 209 mInputCodes[i] = codes.getCodeAt(i); 210 } 211 Arrays.fill(outputChars, (char) 0); 212 Arrays.fill(scores, 0); 213 214 return getSuggestionsNative( 215 mNativeDict, proximityInfo.getNativeProximityInfo(), 216 codes.getXCoordinates(), codes.getYCoordinates(), mInputCodes, codesSize, 217 mFlags, outputChars, scores); 218 } 219 220 public static double calcNormalizedScore(String before, String after, int score) { 221 return calcNormalizedScoreNative(before.toCharArray(), before.length(), 222 after.toCharArray(), after.length(), score); 223 } 224 225 public static int editDistance(String before, String after) { 226 return editDistanceNative( 227 before.toCharArray(), before.length(), after.toCharArray(), after.length()); 228 } 229 230 @Override 231 public boolean isValidWord(CharSequence word) { 232 if (word == null) return false; 233 char[] chars = word.toString().toCharArray(); 234 return isValidWordNative(mNativeDict, chars, chars.length); 235 } 236 237 @Override 238 public synchronized void close() { 239 closeInternal(); 240 } 241 242 private void closeInternal() { 243 if (mNativeDict != 0) { 244 closeNative(mNativeDict); 245 mNativeDict = 0; 246 } 247 } 248 249 @Override 250 protected void finalize() throws Throwable { 251 try { 252 closeInternal(); 253 } finally { 254 super.finalize(); 255 } 256 } 257} 258