BinaryDictionary.java revision ab72a97d7ce44230a0c824797d1675a5ca354a56
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import android.content.Context; 20 21import com.android.inputmethod.keyboard.ProximityInfo; 22 23import java.util.Arrays; 24 25/** 26 * Implements a static, compacted, binary dictionary of standard words. 27 */ 28public class BinaryDictionary extends Dictionary { 29 30 public static final String DICTIONARY_PACK_AUTHORITY = 31 "com.android.inputmethod.latin.dictionarypack"; 32 33 /** 34 * There is a difference between what java and native code can handle. 35 * This value should only be used in BinaryDictionary.java 36 * It is necessary to keep it at this value because some languages e.g. German have 37 * really long words. 38 */ 39 public static final int MAX_WORD_LENGTH = 48; 40 public static final int MAX_WORDS = 18; 41 42 private static final String TAG = "BinaryDictionary"; 43 private static final int MAX_PROXIMITY_CHARS_SIZE = ProximityInfo.MAX_PROXIMITY_CHARS_SIZE; 44 private static final int MAX_BIGRAMS = 60; 45 46 private static final int TYPED_LETTER_MULTIPLIER = 2; 47 48 private int mDicTypeId; 49 private int mNativeDict; 50 private final int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_PROXIMITY_CHARS_SIZE]; 51 private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; 52 private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; 53 private final int[] mScores = new int[MAX_WORDS]; 54 private final int[] mBigramScores = new int[MAX_BIGRAMS]; 55 56 public static final Flag FLAG_REQUIRES_GERMAN_UMLAUT_PROCESSING = 57 new Flag(R.bool.config_require_umlaut_processing, 0x1); 58 59 // FULL_EDIT_DISTANCE is a flag that forces the dictionary to use full words 60 // when computing edit distance, instead of the default behavior of stopping 61 // the evaluation at the size the user typed. 62 public static final Flag FLAG_USE_FULL_EDIT_DISTANCE = new Flag(0x2); 63 64 // Can create a new flag from extravalue : 65 // public static final Flag FLAG_MYFLAG = 66 // new Flag("my_flag", 0x02); 67 68 // ALL_CONFIG_FLAGS is a collection of flags that enable reading all flags from configuration. 69 // This is but a mask - it does not mean the flags will be on, only that the configuration 70 // will be read for this particular flag. 71 public static final Flag[] ALL_CONFIG_FLAGS = { 72 // Here should reside all flags that trigger some special processing 73 // These *must* match the definition in UnigramDictionary enum in 74 // unigram_dictionary.h so please update both at the same time. 75 // Please note that flags created with a resource are of type CONFIG while flags 76 // created with a string are of type EXTRAVALUE. These behave like masks, and the 77 // actual value will be read from the configuration/extra value at run time for 78 // the configuration at dictionary creation time. 79 FLAG_REQUIRES_GERMAN_UMLAUT_PROCESSING, 80 }; 81 82 private int mFlags = 0; 83 84 /** 85 * Constructor for the binary dictionary. This is supposed to be called from the 86 * dictionary factory. 87 * All implementations should pass null into flagArray, except for testing purposes. 88 * @param context the context to access the environment from. 89 * @param filename the name of the file to read through native code. 90 * @param offset the offset of the dictionary data within the file. 91 * @param length the length of the binary data. 92 * @param flagArray the flags to limit the dictionary to, or null for default. 93 */ 94 public BinaryDictionary(final Context context, 95 final String filename, final long offset, final long length, Flag[] flagArray) { 96 // Note: at the moment a binary dictionary is always of the "main" type. 97 // Initializing this here will help transitioning out of the scheme where 98 // the Suggest class knows everything about every single dictionary. 99 mDicTypeId = Suggest.DIC_MAIN; 100 // TODO: Stop relying on the state of SubtypeSwitcher, get it as a parameter 101 mFlags = Flag.initFlags(null == flagArray ? ALL_CONFIG_FLAGS : flagArray, context, 102 SubtypeSwitcher.getInstance()); 103 loadDictionary(filename, offset, length); 104 } 105 106 static { 107 Utils.loadNativeLibrary(); 108 } 109 110 private native int openNative(String sourceDir, long dictOffset, long dictSize, 111 int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, 112 int maxWords, int maxAlternatives); 113 private native void closeNative(int dict); 114 private native boolean isValidWordNative(int nativeData, char[] word, int wordLength); 115 private native int getSuggestionsNative(int dict, int proximityInfo, int[] xCoordinates, 116 int[] yCoordinates, int[] inputCodes, int codesSize, int flags, char[] outputChars, 117 int[] scores); 118 private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength, 119 int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, 120 int maxWordLength, int maxBigrams, int maxAlternatives); 121 122 private final void loadDictionary(String path, long startOffset, long length) { 123 mNativeDict = openNative(path, startOffset, length, 124 TYPED_LETTER_MULTIPLIER, FULL_WORD_SCORE_MULTIPLIER, 125 MAX_WORD_LENGTH, MAX_WORDS, MAX_PROXIMITY_CHARS_SIZE); 126 } 127 128 @Override 129 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 130 final WordCallback callback) { 131 if (mNativeDict == 0) return; 132 133 char[] chars = previousWord.toString().toCharArray(); 134 Arrays.fill(mOutputChars_bigrams, (char) 0); 135 Arrays.fill(mBigramScores, 0); 136 137 int codesSize = codes.size(); 138 if (codesSize <= 0) { 139 // Do not return bigrams from BinaryDictionary when nothing was typed. 140 // Only use user-history bigrams (or whatever other bigram dictionaries decide). 141 return; 142 } 143 Arrays.fill(mInputCodes, -1); 144 int[] alternatives = codes.getCodesAt(0); 145 System.arraycopy(alternatives, 0, mInputCodes, 0, 146 Math.min(alternatives.length, MAX_PROXIMITY_CHARS_SIZE)); 147 148 int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize, 149 mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS, 150 MAX_PROXIMITY_CHARS_SIZE); 151 152 for (int j = 0; j < count; ++j) { 153 if (mBigramScores[j] < 1) break; 154 final int start = j * MAX_WORD_LENGTH; 155 int len = 0; 156 while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) { 157 ++len; 158 } 159 if (len > 0) { 160 callback.addWord(mOutputChars_bigrams, start, len, mBigramScores[j], 161 mDicTypeId, DataType.BIGRAM); 162 } 163 } 164 } 165 166 // proximityInfo may not be null. 167 @Override 168 public void getWords(final WordComposer codes, final WordCallback callback, 169 final ProximityInfo proximityInfo) { 170 final int count = getSuggestions(codes, proximityInfo, mOutputChars, mScores); 171 172 for (int j = 0; j < count; ++j) { 173 if (mScores[j] < 1) break; 174 final int start = j * MAX_WORD_LENGTH; 175 int len = 0; 176 while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { 177 ++len; 178 } 179 if (len > 0) { 180 callback.addWord(mOutputChars, start, len, mScores[j], mDicTypeId, 181 DataType.UNIGRAM); 182 } 183 } 184 } 185 186 /* package for test */ boolean isValidDictionary() { 187 return mNativeDict != 0; 188 } 189 190 // proximityInfo may not be null. 191 /* package for test */ int getSuggestions(final WordComposer codes, 192 final ProximityInfo proximityInfo, char[] outputChars, int[] scores) { 193 if (!isValidDictionary()) return -1; 194 195 final int codesSize = codes.size(); 196 // Won't deal with really long words. 197 if (codesSize > MAX_WORD_LENGTH - 1) return -1; 198 199 Arrays.fill(mInputCodes, WordComposer.NOT_A_CODE); 200 for (int i = 0; i < codesSize; i++) { 201 int[] alternatives = codes.getCodesAt(i); 202 System.arraycopy(alternatives, 0, mInputCodes, i * MAX_PROXIMITY_CHARS_SIZE, 203 Math.min(alternatives.length, MAX_PROXIMITY_CHARS_SIZE)); 204 } 205 Arrays.fill(outputChars, (char) 0); 206 Arrays.fill(scores, 0); 207 208 return getSuggestionsNative( 209 mNativeDict, proximityInfo.getNativeProximityInfo(), 210 codes.getXCoordinates(), codes.getYCoordinates(), mInputCodes, codesSize, 211 mFlags, outputChars, scores); 212 } 213 214 @Override 215 public boolean isValidWord(CharSequence word) { 216 if (word == null) return false; 217 char[] chars = word.toString().toCharArray(); 218 return isValidWordNative(mNativeDict, chars, chars.length); 219 } 220 221 @Override 222 public synchronized void close() { 223 closeInternal(); 224 } 225 226 private void closeInternal() { 227 if (mNativeDict != 0) { 228 closeNative(mNativeDict); 229 mNativeDict = 0; 230 } 231 } 232 233 @Override 234 protected void finalize() throws Throwable { 235 try { 236 closeInternal(); 237 } finally { 238 super.finalize(); 239 } 240 } 241} 242