BinaryDictionaryGetter.java revision a28a05e971cc242b338331a3b78276fa95188d19
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; 20import com.android.inputmethod.latin.makedict.FormatSpec; 21 22import android.content.Context; 23import android.content.SharedPreferences; 24import android.content.pm.PackageManager.NameNotFoundException; 25import android.content.res.AssetFileDescriptor; 26import android.util.Log; 27 28import java.io.File; 29import java.io.FileInputStream; 30import java.io.IOException; 31import java.nio.BufferUnderflowException; 32import java.nio.channels.FileChannel; 33import java.util.ArrayList; 34import java.util.HashMap; 35import java.util.Locale; 36 37/** 38 * Helper class to get the address of a mmap'able dictionary file. 39 */ 40final class BinaryDictionaryGetter { 41 42 /** 43 * Used for Log actions from this class 44 */ 45 private static final String TAG = BinaryDictionaryGetter.class.getSimpleName(); 46 47 /** 48 * Used to return empty lists 49 */ 50 private static final File[] EMPTY_FILE_ARRAY = new File[0]; 51 52 /** 53 * Name of the common preferences name to know which word list are on and which are off. 54 */ 55 private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs"; 56 57 // Name of the category for the main dictionary 58 private static final String MAIN_DICTIONARY_CATEGORY = "main"; 59 public static final String ID_CATEGORY_SEPARATOR = ":"; 60 61 // The key considered to read the version attribute in a dictionary file. 62 private static String VERSION_KEY = "version"; 63 64 // Prevents this from being instantiated 65 private BinaryDictionaryGetter() {} 66 67 /** 68 * Returns whether we may want to use this character as part of a file name. 69 * 70 * This basically only accepts ascii letters and numbers, and rejects everything else. 71 */ 72 private static boolean isFileNameCharacter(int codePoint) { 73 if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit 74 if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase 75 if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase 76 return codePoint == '_'; // Underscore 77 } 78 79 /** 80 * Escapes a string for any characters that may be suspicious for a file or directory name. 81 * 82 * Concretely this does a sort of URL-encoding except it will encode everything that's not 83 * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which 84 * we cannot allow here) 85 */ 86 // TODO: create a unit test for this method 87 private static String replaceFileNameDangerousCharacters(final String name) { 88 // This assumes '%' is fully available as a non-separator, normal 89 // character in a file name. This is probably true for all file systems. 90 final StringBuilder sb = new StringBuilder(); 91 final int nameLength = name.length(); 92 for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { 93 final int codePoint = name.codePointAt(i); 94 if (isFileNameCharacter(codePoint)) { 95 sb.appendCodePoint(codePoint); 96 } else { 97 // 6 digits - unicode is limited to 21 bits 98 sb.append(String.format((Locale)null, "%%%1$06x", codePoint)); 99 } 100 } 101 return sb.toString(); 102 } 103 104 /** 105 * Reverse escaping done by replaceFileNameDangerousCharacters. 106 */ 107 private static String getWordListIdFromFileName(final String fname) { 108 final StringBuilder sb = new StringBuilder(); 109 final int fnameLength = fname.length(); 110 for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { 111 final int codePoint = fname.codePointAt(i); 112 if ('%' != codePoint) { 113 sb.appendCodePoint(codePoint); 114 } else { 115 final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16); 116 i += 6; 117 sb.appendCodePoint(encodedCodePoint); 118 } 119 } 120 return sb.toString(); 121 } 122 123 /** 124 * Helper method to get the top level cache directory. 125 */ 126 private static String getWordListCacheDirectory(final Context context) { 127 return context.getFilesDir() + File.separator + "dicts"; 128 } 129 130 /** 131 * Find out the cache directory associated with a specific locale. 132 */ 133 private static String getCacheDirectoryForLocale(final String locale, final Context context) { 134 final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); 135 final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator 136 + relativeDirectoryName; 137 final File directory = new File(absoluteDirectoryName); 138 if (!directory.exists()) { 139 if (!directory.mkdirs()) { 140 Log.e(TAG, "Could not create the directory for locale" + locale); 141 } 142 } 143 return absoluteDirectoryName; 144 } 145 146 /** 147 * Generates a file name for the id and locale passed as an argument. 148 * 149 * In the current implementation the file name returned will always be unique for 150 * any id/locale pair, but please do not expect that the id can be the same for 151 * different dictionaries with different locales. An id should be unique for any 152 * dictionary. 153 * The file name is pretty much an URL-encoded version of the id inside a directory 154 * named like the locale, except it will also escape characters that look dangerous 155 * to some file systems. 156 * @param id the id of the dictionary for which to get a file name 157 * @param locale the locale for which to get the file name as a string 158 * @param context the context to use for getting the directory 159 * @return the name of the file to be created 160 */ 161 public static String getCacheFileName(String id, String locale, Context context) { 162 final String fileName = replaceFileNameDangerousCharacters(id); 163 return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; 164 } 165 166 /** 167 * Returns a file address from a resource, or null if it cannot be opened. 168 */ 169 private static AssetFileAddress loadFallbackResource(final Context context, 170 final int fallbackResId) { 171 final AssetFileDescriptor afd = context.getResources().openRawResourceFd(fallbackResId); 172 if (afd == null) { 173 Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId=" 174 + fallbackResId); 175 return null; 176 } 177 return AssetFileAddress.makeFromFileNameAndOffset( 178 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength()); 179 } 180 181 private static final class DictPackSettings { 182 final SharedPreferences mDictPreferences; 183 public DictPackSettings(final Context context) { 184 Context dictPackContext = null; 185 try { 186 final String dictPackName = 187 context.getString(R.string.dictionary_pack_package_name); 188 dictPackContext = context.createPackageContext(dictPackName, 0); 189 } catch (NameNotFoundException e) { 190 // The dictionary pack is not installed... 191 // TODO: fallback on the built-in dict, see the TODO above 192 Log.e(TAG, "Could not find a dictionary pack"); 193 } 194 mDictPreferences = null == dictPackContext ? null 195 : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME, 196 Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS); 197 } 198 public boolean isWordListActive(final String dictId) { 199 if (null == mDictPreferences) { 200 // If we don't have preferences it basically means we can't find the dictionary 201 // pack - either it's not installed, or it's disabled, or there is some strange 202 // bug. Either way, a word list with no settings should be on by default: default 203 // dictionaries in LatinIME are on if there is no settings at all, and if for some 204 // reason some dictionaries have been installed BUT the dictionary pack can't be 205 // found anymore it's safer to actually supply installed dictionaries. 206 return true; 207 } else { 208 // The default is true here for the same reasons as above. We got the dictionary 209 // pack but if we don't have any settings for it it means the user has never been 210 // to the settings yet. So by default, the main dictionaries should be on. 211 return mDictPreferences.getBoolean(dictId, true); 212 } 213 } 214 } 215 216 /** 217 * Helper method to the list of cache directories, one for each distinct locale. 218 */ 219 private static File[] getCachedDirectoryList(final Context context) { 220 return new File(getWordListCacheDirectory(context)).listFiles(); 221 } 222 223 /** 224 * Returns the category for a given file name. 225 * 226 * This parses the file name, extracts the category, and returns it. See 227 * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. 228 * @return The category as a string or null if it can't be found in the file name. 229 */ 230 private static String getCategoryFromFileName(final String fileName) { 231 final String id = getWordListIdFromFileName(fileName); 232 final String[] idArray = id.split(ID_CATEGORY_SEPARATOR); 233 if (2 != idArray.length) return null; 234 return idArray[0]; 235 } 236 237 /** 238 * Utility class for the {@link #getCachedWordLists} method 239 */ 240 private static final class FileAndMatchLevel { 241 final File mFile; 242 final int mMatchLevel; 243 public FileAndMatchLevel(final File file, final int matchLevel) { 244 mFile = file; 245 mMatchLevel = matchLevel; 246 } 247 } 248 249 /** 250 * Returns the list of cached files for a specific locale, one for each category. 251 * 252 * This will return exactly one file for each word list category that matches 253 * the passed locale. If several files match the locale for any given category, 254 * this returns the file with the closest match to the locale. For example, if 255 * the passed word list is en_US, and for a category we have an en and an en_US 256 * word list available, we'll return only the en_US one. 257 * Thus, the list will contain as many files as there are categories. 258 * 259 * @param locale the locale to find the dictionary files for, as a string. 260 * @param context the context on which to open the files upon. 261 * @return an array of binary dictionary files, which may be empty but may not be null. 262 */ 263 private static File[] getCachedWordLists(final String locale, 264 final Context context) { 265 final File[] directoryList = getCachedDirectoryList(context); 266 if (null == directoryList) return EMPTY_FILE_ARRAY; 267 final HashMap<String, FileAndMatchLevel> cacheFiles = CollectionUtils.newHashMap(); 268 for (File directory : directoryList) { 269 if (!directory.isDirectory()) continue; 270 final String dirLocale = getWordListIdFromFileName(directory.getName()); 271 final int matchLevel = LocaleUtils.getMatchLevel(dirLocale, locale); 272 if (LocaleUtils.isMatch(matchLevel)) { 273 final File[] wordLists = directory.listFiles(); 274 if (null != wordLists) { 275 for (File wordList : wordLists) { 276 final String category = getCategoryFromFileName(wordList.getName()); 277 final FileAndMatchLevel currentBestMatch = cacheFiles.get(category); 278 if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) { 279 cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel)); 280 } 281 } 282 } 283 } 284 } 285 if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY; 286 final File[] result = new File[cacheFiles.size()]; 287 int index = 0; 288 for (final FileAndMatchLevel entry : cacheFiles.values()) { 289 result[index++] = entry.mFile; 290 } 291 return result; 292 } 293 294 /** 295 * Remove all files with the passed id, except the passed file. 296 * 297 * If a dictionary with a given ID has a metadata change that causes it to change 298 * path, we need to remove the old version. The only way to do this is to check all 299 * installed files for a matching ID in a different directory. 300 */ 301 public static void removeFilesWithIdExcept(final Context context, final String id, 302 final File fileToKeep) { 303 try { 304 final File canonicalFileToKeep = fileToKeep.getCanonicalFile(); 305 final File[] directoryList = getCachedDirectoryList(context); 306 if (null == directoryList) return; 307 for (File directory : directoryList) { 308 // There is one directory per locale. See #getCachedDirectoryList 309 if (!directory.isDirectory()) continue; 310 final File[] wordLists = directory.listFiles(); 311 if (null == wordLists) continue; 312 for (File wordList : wordLists) { 313 final String fileId = getWordListIdFromFileName(wordList.getName()); 314 if (fileId.equals(id)) { 315 if (!canonicalFileToKeep.equals(wordList.getCanonicalFile())) { 316 wordList.delete(); 317 } 318 } 319 } 320 } 321 } catch (java.io.IOException e) { 322 Log.e(TAG, "IOException trying to cleanup files : " + e); 323 } 324 } 325 326 327 /** 328 * Returns the id associated with the main word list for a specified locale. 329 * 330 * Word lists stored in Android Keyboard's resources are referred to as the "main" 331 * word lists. Since they can be updated like any other list, we need to assign a 332 * unique ID to them. This ID is just the name of the language (locale-wise) they 333 * are for, and this method returns this ID. 334 */ 335 private static String getMainDictId(final Locale locale) { 336 // This works because we don't include by default different dictionaries for 337 // different countries. This actually needs to return the id that we would 338 // like to use for word lists included in resources, and the following is okay. 339 return MAIN_DICTIONARY_CATEGORY + ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); 340 } 341 342 private static boolean isMainWordListId(final String id) { 343 final String[] idArray = id.split(ID_CATEGORY_SEPARATOR); 344 if (2 != idArray.length) return false; 345 return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); 346 } 347 348 // ## HACK ## we prevent usage of a dictionary before version 18 for English only. The reason 349 // for this is, since those do not include whitelist entries, the new code with an old version 350 // of the dictionary would lose whitelist functionality. 351 private static boolean hackCanUseDictionaryFile(final Locale locale, final File f) { 352 // Only for English - other languages didn't have a whitelist, hence this 353 // ad-hock ## HACK ## 354 if (!Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) return true; 355 356 FileInputStream inStream = null; 357 try { 358 // Read the version of the file 359 inStream = new FileInputStream(f); 360 final BinaryDictInputOutput.ByteBufferWrapper buffer = 361 new BinaryDictInputOutput.ByteBufferWrapper(inStream.getChannel().map( 362 FileChannel.MapMode.READ_ONLY, 0, f.length())); 363 final int magic = buffer.readInt(); 364 if (magic != FormatSpec.VERSION_2_MAGIC_NUMBER) { 365 return false; 366 } 367 final int formatVersion = buffer.readInt(); 368 final int headerSize = buffer.readInt(); 369 final HashMap<String, String> options = CollectionUtils.newHashMap(); 370 BinaryDictInputOutput.populateOptions(buffer, headerSize, options); 371 372 final String version = options.get(VERSION_KEY); 373 if (null == version) { 374 // No version in the options : the format is unexpected 375 return false; 376 } 377 // Version 18 is the first one to include the whitelist 378 // Obviously this is a big ## HACK ## 379 return Integer.parseInt(version) >= 18; 380 } catch (java.io.FileNotFoundException e) { 381 return false; 382 } catch (java.io.IOException e) { 383 return false; 384 } catch (NumberFormatException e) { 385 return false; 386 } catch (BufferUnderflowException e) { 387 return false; 388 } finally { 389 if (inStream != null) { 390 try { 391 inStream.close(); 392 } catch (IOException e) { 393 // do nothing 394 } 395 } 396 } 397 } 398 399 /** 400 * Returns a list of file addresses for a given locale, trying relevant methods in order. 401 * 402 * Tries to get binary dictionaries from various sources, in order: 403 * - Uses a content provider to get a public dictionary set, as per the protocol described 404 * in BinaryDictionaryFileDumper. 405 * If that fails: 406 * - Gets a file name from the built-in dictionary for this locale, if any. 407 * If that fails: 408 * - Returns null. 409 * @return The list of addresses of valid dictionary files, or null. 410 */ 411 public static ArrayList<AssetFileAddress> getDictionaryFiles(final Locale locale, 412 final Context context) { 413 414 final boolean hasDefaultWordList = DictionaryFactory.isDictionaryAvailable(context, locale); 415 // cacheWordListsFromContentProvider returns the list of files it copied to local 416 // storage, but we don't really care about what was copied NOW: what we want is the 417 // list of everything we ever cached, so we ignore the return value. 418 BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context, 419 hasDefaultWordList); 420 final File[] cachedWordLists = getCachedWordLists(locale.toString(), context); 421 final String mainDictId = getMainDictId(locale); 422 final DictPackSettings dictPackSettings = new DictPackSettings(context); 423 424 boolean foundMainDict = false; 425 final ArrayList<AssetFileAddress> fileList = CollectionUtils.newArrayList(); 426 // cachedWordLists may not be null, see doc for getCachedDictionaryList 427 for (final File f : cachedWordLists) { 428 final String wordListId = getWordListIdFromFileName(f.getName()); 429 final boolean canUse = f.canRead() && hackCanUseDictionaryFile(locale, f); 430 if (canUse && isMainWordListId(wordListId)) { 431 foundMainDict = true; 432 } 433 if (!dictPackSettings.isWordListActive(wordListId)) continue; 434 if (canUse) { 435 fileList.add(AssetFileAddress.makeFromFileName(f.getPath())); 436 } else { 437 Log.e(TAG, "Found a cached dictionary file but cannot read or use it"); 438 } 439 } 440 441 if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) { 442 final int fallbackResId = 443 DictionaryFactory.getMainDictionaryResourceId(context.getResources(), locale); 444 final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId); 445 if (null != fallbackAsset) { 446 fileList.add(fallbackAsset); 447 } 448 } 449 450 return fileList; 451 } 452} 453