BinaryDictionary.java revision bda7eaa63aace64f3d40eae3affaf281591ffa66
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20import android.text.TextUtils; 21 22import com.android.inputmethod.keyboard.ProximityInfo; 23import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 24 25import java.util.ArrayList; 26import java.util.Arrays; 27import java.util.Locale; 28 29/** 30 * Implements a static, compacted, binary dictionary of standard words. 31 */ 32public class BinaryDictionary extends Dictionary { 33 34 public static final String DICTIONARY_PACK_AUTHORITY = 35 "com.android.inputmethod.latin.dictionarypack"; 36 37 /** 38 * There is a difference between what java and native code can handle. 39 * This value should only be used in BinaryDictionary.java 40 * It is necessary to keep it at this value because some languages e.g. German have 41 * really long words. 42 */ 43 public static final int MAX_WORD_LENGTH = 48; 44 public static final int MAX_WORDS = 18; 45 public static final int MAX_SPACES = 16; 46 47 private static final String TAG = "BinaryDictionary"; 48 private static final int MAX_BIGRAMS = 60; 49 50 private static final int TYPED_LETTER_MULTIPLIER = 2; 51 52 private int mDicTypeId; 53 private long mNativeDict; 54 private final int[] mInputCodes = new int[MAX_WORD_LENGTH]; 55 private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; 56 private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; 57 private final int[] mSpaceIndices = new int[MAX_SPACES]; 58 private final int[] mScores = new int[MAX_WORDS]; 59 private final int[] mBigramScores = new int[MAX_BIGRAMS]; 60 61 private final boolean mUseFullEditDistance; 62 63 /** 64 * Constructor for the binary dictionary. This is supposed to be called from the 65 * dictionary factory. 66 * All implementations should pass null into flagArray, except for testing purposes. 67 * @param context the context to access the environment from. 68 * @param filename the name of the file to read through native code. 69 * @param offset the offset of the dictionary data within the file. 70 * @param length the length of the binary data. 71 * @param useFullEditDistance whether to use the full edit distance in suggestions 72 * @param dicTypeId the dictionary type id of the dictionary 73 */ 74 public BinaryDictionary(final Context context, 75 final String filename, final long offset, final long length, 76 final boolean useFullEditDistance, final Locale locale, final int dicTypeId) { 77 mDicTypeId = dicTypeId; 78 mUseFullEditDistance = useFullEditDistance; 79 loadDictionary(filename, offset, length); 80 } 81 82 static { 83 JniUtils.loadNativeLibrary(); 84 } 85 86 private native long openNative(String sourceDir, long dictOffset, long dictSize, 87 int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); 88 private native void closeNative(long dict); 89 private native int getFrequencyNative(long dict, int[] word, int wordLength); 90 private native boolean isValidBigramNative(long dict, int[] word1, int[] word2); 91 private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, 92 int[] yCoordinates, int[] times, int[] pointerIds, int[] inputCodes, int codesSize, 93 int commitPoint, boolean isGesture, int dicTypeId, 94 int[] prevWordCodePointArray, boolean useFullEditDistance, char[] outputChars, 95 int[] scores, int[] outputIndices); 96 private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength, 97 int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, 98 int maxWordLength, int maxBigrams); 99 private static native float calcNormalizedScoreNative( 100 char[] before, int beforeLength, char[] after, int afterLength, int score); 101 private static native int editDistanceNative( 102 char[] before, int beforeLength, char[] after, int afterLength); 103 104 private final void loadDictionary(String path, long startOffset, long length) { 105 mNativeDict = openNative(path, startOffset, length, 106 TYPED_LETTER_MULTIPLIER, FULL_WORD_SCORE_MULTIPLIER, MAX_WORD_LENGTH, MAX_WORDS); 107 } 108 109 @Override 110 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 111 final WordCallback callback) { 112 if (mNativeDict == 0) return; 113 114 int[] codePoints = StringUtils.toCodePointArray(previousWord.toString()); 115 Arrays.fill(mOutputChars_bigrams, (char) 0); 116 Arrays.fill(mBigramScores, 0); 117 118 int codesSize = codes.size(); 119 Arrays.fill(mInputCodes, -1); 120 if (codesSize > 0) { 121 mInputCodes[0] = codes.getCodeAt(0); 122 } 123 124 int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes, 125 codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); 126 if (count > MAX_BIGRAMS) { 127 count = MAX_BIGRAMS; 128 } 129 130 final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<SuggestedWordInfo>(); 131 for (int j = 0; j < count; ++j) { 132 if (codesSize > 0 && mBigramScores[j] < 1) break; 133 final int start = j * MAX_WORD_LENGTH; 134 int len = 0; 135 while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) { 136 ++len; 137 } 138 if (len > 0) { 139 suggestions.add(new SuggestedWordInfo( 140 new String(mOutputChars_bigrams, start, len), 141 mBigramScores[j], SuggestedWordInfo.KIND_CORRECTION)); 142 } 143 } 144 Utils.addAllSuggestions(mDicTypeId, Dictionary.BIGRAM, suggestions, callback); 145 } 146 147 // proximityInfo and/or prevWordForBigrams may not be null. 148 @Override 149 public void getWords(final WordComposer codes, final CharSequence prevWordForBigrams, 150 final WordCallback callback, final ProximityInfo proximityInfo) { 151 final int count = getSuggestions(codes, prevWordForBigrams, proximityInfo, mOutputChars, 152 mScores, mSpaceIndices); 153 154 final ArrayList<SuggestedWordInfo> suggestions = new ArrayList<SuggestedWordInfo>(); 155 for (int j = 0; j < count; ++j) { 156 if (mScores[j] < 1) break; 157 final int start = j * MAX_WORD_LENGTH; 158 int len = 0; 159 while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { 160 ++len; 161 } 162 if (len > 0) { 163 // TODO: actually get the kind from native code 164 suggestions.add(new SuggestedWordInfo( 165 new String(mOutputChars, start, len), 166 mScores[j], SuggestedWordInfo.KIND_CORRECTION)); 167 } 168 } 169 Utils.addAllSuggestions(mDicTypeId, Dictionary.UNIGRAM, suggestions, callback); 170 } 171 172 /* package for test */ boolean isValidDictionary() { 173 return mNativeDict != 0; 174 } 175 176 // proximityInfo may not be null. 177 /* package for test */ int getSuggestions(final WordComposer codes, 178 final CharSequence prevWordForBigrams, final ProximityInfo proximityInfo, 179 char[] outputChars, int[] scores, int[] spaceIndices) { 180 if (!isValidDictionary()) return -1; 181 182 final int codesSize = codes.size(); 183 // Won't deal with really long words. 184 if (codesSize > MAX_WORD_LENGTH - 1) return -1; 185 186 Arrays.fill(mInputCodes, WordComposer.NOT_A_CODE); 187 for (int i = 0; i < codesSize; i++) { 188 mInputCodes[i] = codes.getCodeAt(i); 189 } 190 Arrays.fill(outputChars, (char) 0); 191 Arrays.fill(scores, 0); 192 193 // TODO: toLowerCase in the native code 194 final int[] prevWordCodePointArray = (null == prevWordForBigrams) 195 ? null : StringUtils.toCodePointArray(prevWordForBigrams.toString()); 196 197 int[] emptyArray = new int[codesSize]; 198 Arrays.fill(emptyArray, 0); 199 200 //final int commitPoint = codes.getCommitPoint(); 201 //codes.clearCommitPoint(); 202 203 return getSuggestionsNative(mNativeDict, proximityInfo.getNativeProximityInfo(), 204 codes.getXCoordinates(), codes.getYCoordinates(), emptyArray, emptyArray, mInputCodes, 205 codesSize, 0 /* unused */, false, mDicTypeId, 206 prevWordCodePointArray, mUseFullEditDistance, 207 outputChars, scores, spaceIndices); 208 } 209 210 public static float calcNormalizedScore(String before, String after, int score) { 211 return calcNormalizedScoreNative(before.toCharArray(), before.length(), 212 after.toCharArray(), after.length(), score); 213 } 214 215 public static int editDistance(String before, String after) { 216 return editDistanceNative( 217 before.toCharArray(), before.length(), after.toCharArray(), after.length()); 218 } 219 220 @Override 221 public boolean isValidWord(CharSequence word) { 222 return getFrequency(word) >= 0; 223 } 224 225 @Override 226 public int getFrequency(CharSequence word) { 227 if (word == null) return -1; 228 int[] chars = StringUtils.toCodePointArray(word.toString()); 229 return getFrequencyNative(mNativeDict, chars, chars.length); 230 } 231 232 // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni 233 // calls when checking for changes in an entire dictionary. 234 public boolean isValidBigram(CharSequence word1, CharSequence word2) { 235 if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; 236 int[] chars1 = StringUtils.toCodePointArray(word1.toString()); 237 int[] chars2 = StringUtils.toCodePointArray(word2.toString()); 238 return isValidBigramNative(mNativeDict, chars1, chars2); 239 } 240 241 @Override 242 public synchronized void close() { 243 closeInternal(); 244 } 245 246 private void closeInternal() { 247 if (mNativeDict != 0) { 248 closeNative(mNativeDict); 249 mNativeDict = 0; 250 } 251 } 252 253 @Override 254 protected void finalize() throws Throwable { 255 try { 256 closeInternal(); 257 } finally { 258 super.finalize(); 259 } 260 } 261} 262