BinaryDictionaryGetter.java revision 66c90cd2ae49c49da8aeda5ab1d86bd9b76434c7
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; 20import com.android.inputmethod.latin.makedict.FormatSpec; 21 22import android.content.Context; 23import android.content.SharedPreferences; 24import android.content.pm.PackageManager.NameNotFoundException; 25import android.content.res.AssetFileDescriptor; 26import android.util.Log; 27 28import java.io.File; 29import java.io.FileInputStream; 30import java.io.IOException; 31import java.nio.BufferUnderflowException; 32import java.nio.channels.FileChannel; 33import java.util.ArrayList; 34import java.util.HashMap; 35import java.util.Locale; 36 37/** 38 * Helper class to get the address of a mmap'able dictionary file. 39 */ 40final class BinaryDictionaryGetter { 41 42 /** 43 * Used for Log actions from this class 44 */ 45 private static final String TAG = BinaryDictionaryGetter.class.getSimpleName(); 46 47 /** 48 * Used to return empty lists 49 */ 50 private static final File[] EMPTY_FILE_ARRAY = new File[0]; 51 52 /** 53 * Name of the common preferences name to know which word list are on and which are off. 54 */ 55 private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs"; 56 57 // Name of the category for the main dictionary 58 private static final String MAIN_DICTIONARY_CATEGORY = "main"; 59 public static final String ID_CATEGORY_SEPARATOR = ":"; 60 61 // The key considered to read the version attribute in a dictionary file. 62 private static String VERSION_KEY = "version"; 63 64 // Prevents this from being instantiated 65 private BinaryDictionaryGetter() {} 66 67 /** 68 * Returns whether we may want to use this character as part of a file name. 69 * 70 * This basically only accepts ascii letters and numbers, and rejects everything else. 71 */ 72 private static boolean isFileNameCharacter(int codePoint) { 73 if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit 74 if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase 75 if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase 76 return codePoint == '_'; // Underscore 77 } 78 79 /** 80 * Escapes a string for any characters that may be suspicious for a file or directory name. 81 * 82 * Concretely this does a sort of URL-encoding except it will encode everything that's not 83 * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which 84 * we cannot allow here) 85 */ 86 // TODO: create a unit test for this method 87 private static String replaceFileNameDangerousCharacters(final String name) { 88 // This assumes '%' is fully available as a non-separator, normal 89 // character in a file name. This is probably true for all file systems. 90 final StringBuilder sb = new StringBuilder(); 91 final int nameLength = name.length(); 92 for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { 93 final int codePoint = name.codePointAt(i); 94 if (isFileNameCharacter(codePoint)) { 95 sb.appendCodePoint(codePoint); 96 } else { 97 // 6 digits - unicode is limited to 21 bits 98 sb.append(String.format((Locale)null, "%%%1$06x", codePoint)); 99 } 100 } 101 return sb.toString(); 102 } 103 104 /** 105 * Reverse escaping done by replaceFileNameDangerousCharacters. 106 */ 107 private static String getWordListIdFromFileName(final String fname) { 108 final StringBuilder sb = new StringBuilder(); 109 final int fnameLength = fname.length(); 110 for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { 111 final int codePoint = fname.codePointAt(i); 112 if ('%' != codePoint) { 113 sb.appendCodePoint(codePoint); 114 } else { 115 final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16); 116 i += 6; 117 sb.appendCodePoint(encodedCodePoint); 118 } 119 } 120 return sb.toString(); 121 } 122 123 /** 124 * Helper method to get the top level cache directory. 125 */ 126 private static String getWordListCacheDirectory(final Context context) { 127 return context.getFilesDir() + File.separator + "dicts"; 128 } 129 130 /** 131 * Find out the cache directory associated with a specific locale. 132 */ 133 private static String getCacheDirectoryForLocale(final String locale, final Context context) { 134 final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); 135 final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator 136 + relativeDirectoryName; 137 final File directory = new File(absoluteDirectoryName); 138 if (!directory.exists()) { 139 if (!directory.mkdirs()) { 140 Log.e(TAG, "Could not create the directory for locale" + locale); 141 } 142 } 143 return absoluteDirectoryName; 144 } 145 146 /** 147 * Generates a file name for the id and locale passed as an argument. 148 * 149 * In the current implementation the file name returned will always be unique for 150 * any id/locale pair, but please do not expect that the id can be the same for 151 * different dictionaries with different locales. An id should be unique for any 152 * dictionary. 153 * The file name is pretty much an URL-encoded version of the id inside a directory 154 * named like the locale, except it will also escape characters that look dangerous 155 * to some file systems. 156 * @param id the id of the dictionary for which to get a file name 157 * @param locale the locale for which to get the file name as a string 158 * @param context the context to use for getting the directory 159 * @return the name of the file to be created 160 */ 161 public static String getCacheFileName(String id, String locale, Context context) { 162 final String fileName = replaceFileNameDangerousCharacters(id); 163 return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; 164 } 165 166 /** 167 * Generates a unique temporary file name in the app cache directory. 168 * 169 * This is unique as long as it doesn't get called twice in the same millisecond by the same 170 * thread, which should be more than enough for our purposes. 171 */ 172 public static String getTempFileName(String id, Context context) { 173 final String fileName = replaceFileNameDangerousCharacters(id); 174 return context.getCacheDir() + File.separator + fileName + "." 175 + Thread.currentThread().getId() + "." + System.currentTimeMillis(); 176 } 177 178 /** 179 * Returns a file address from a resource, or null if it cannot be opened. 180 */ 181 private static AssetFileAddress loadFallbackResource(final Context context, 182 final int fallbackResId) { 183 final AssetFileDescriptor afd = context.getResources().openRawResourceFd(fallbackResId); 184 if (afd == null) { 185 Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId=" 186 + fallbackResId); 187 return null; 188 } 189 return AssetFileAddress.makeFromFileNameAndOffset( 190 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength()); 191 } 192 193 private static final class DictPackSettings { 194 final SharedPreferences mDictPreferences; 195 public DictPackSettings(final Context context) { 196 Context dictPackContext = null; 197 try { 198 final String dictPackName = 199 context.getString(R.string.dictionary_pack_package_name); 200 dictPackContext = context.createPackageContext(dictPackName, 0); 201 } catch (NameNotFoundException e) { 202 // The dictionary pack is not installed... 203 // TODO: fallback on the built-in dict, see the TODO above 204 Log.e(TAG, "Could not find a dictionary pack"); 205 } 206 mDictPreferences = null == dictPackContext ? null 207 : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME, 208 Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS); 209 } 210 public boolean isWordListActive(final String dictId) { 211 if (null == mDictPreferences) { 212 // If we don't have preferences it basically means we can't find the dictionary 213 // pack - either it's not installed, or it's disabled, or there is some strange 214 // bug. Either way, a word list with no settings should be on by default: default 215 // dictionaries in LatinIME are on if there is no settings at all, and if for some 216 // reason some dictionaries have been installed BUT the dictionary pack can't be 217 // found anymore it's safer to actually supply installed dictionaries. 218 return true; 219 } else { 220 // The default is true here for the same reasons as above. We got the dictionary 221 // pack but if we don't have any settings for it it means the user has never been 222 // to the settings yet. So by default, the main dictionaries should be on. 223 return mDictPreferences.getBoolean(dictId, true); 224 } 225 } 226 } 227 228 /** 229 * Helper method to the list of cache directories, one for each distinct locale. 230 */ 231 private static File[] getCachedDirectoryList(final Context context) { 232 return new File(getWordListCacheDirectory(context)).listFiles(); 233 } 234 235 /** 236 * Returns the category for a given file name. 237 * 238 * This parses the file name, extracts the category, and returns it. See 239 * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. 240 * @return The category as a string or null if it can't be found in the file name. 241 */ 242 private static String getCategoryFromFileName(final String fileName) { 243 final String id = getWordListIdFromFileName(fileName); 244 final String[] idArray = id.split(ID_CATEGORY_SEPARATOR); 245 if (2 != idArray.length) return null; 246 return idArray[0]; 247 } 248 249 /** 250 * Utility class for the {@link #getCachedWordLists} method 251 */ 252 private static final class FileAndMatchLevel { 253 final File mFile; 254 final int mMatchLevel; 255 public FileAndMatchLevel(final File file, final int matchLevel) { 256 mFile = file; 257 mMatchLevel = matchLevel; 258 } 259 } 260 261 /** 262 * Returns the list of cached files for a specific locale, one for each category. 263 * 264 * This will return exactly one file for each word list category that matches 265 * the passed locale. If several files match the locale for any given category, 266 * this returns the file with the closest match to the locale. For example, if 267 * the passed word list is en_US, and for a category we have an en and an en_US 268 * word list available, we'll return only the en_US one. 269 * Thus, the list will contain as many files as there are categories. 270 * 271 * @param locale the locale to find the dictionary files for, as a string. 272 * @param context the context on which to open the files upon. 273 * @return an array of binary dictionary files, which may be empty but may not be null. 274 */ 275 private static File[] getCachedWordLists(final String locale, 276 final Context context) { 277 final File[] directoryList = getCachedDirectoryList(context); 278 if (null == directoryList) return EMPTY_FILE_ARRAY; 279 final HashMap<String, FileAndMatchLevel> cacheFiles = CollectionUtils.newHashMap(); 280 for (File directory : directoryList) { 281 if (!directory.isDirectory()) continue; 282 final String dirLocale = getWordListIdFromFileName(directory.getName()); 283 final int matchLevel = LocaleUtils.getMatchLevel(dirLocale, locale); 284 if (LocaleUtils.isMatch(matchLevel)) { 285 final File[] wordLists = directory.listFiles(); 286 if (null != wordLists) { 287 for (File wordList : wordLists) { 288 final String category = getCategoryFromFileName(wordList.getName()); 289 final FileAndMatchLevel currentBestMatch = cacheFiles.get(category); 290 if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) { 291 cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel)); 292 } 293 } 294 } 295 } 296 } 297 if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY; 298 final File[] result = new File[cacheFiles.size()]; 299 int index = 0; 300 for (final FileAndMatchLevel entry : cacheFiles.values()) { 301 result[index++] = entry.mFile; 302 } 303 return result; 304 } 305 306 /** 307 * Remove all files with the passed id, except the passed file. 308 * 309 * If a dictionary with a given ID has a metadata change that causes it to change 310 * path, we need to remove the old version. The only way to do this is to check all 311 * installed files for a matching ID in a different directory. 312 */ 313 public static void removeFilesWithIdExcept(final Context context, final String id, 314 final File fileToKeep) { 315 try { 316 final File canonicalFileToKeep = fileToKeep.getCanonicalFile(); 317 final File[] directoryList = getCachedDirectoryList(context); 318 if (null == directoryList) return; 319 for (File directory : directoryList) { 320 // There is one directory per locale. See #getCachedDirectoryList 321 if (!directory.isDirectory()) continue; 322 final File[] wordLists = directory.listFiles(); 323 if (null == wordLists) continue; 324 for (File wordList : wordLists) { 325 final String fileId = getWordListIdFromFileName(wordList.getName()); 326 if (fileId.equals(id)) { 327 if (!canonicalFileToKeep.equals(wordList.getCanonicalFile())) { 328 wordList.delete(); 329 } 330 } 331 } 332 } 333 } catch (java.io.IOException e) { 334 Log.e(TAG, "IOException trying to cleanup files : " + e); 335 } 336 } 337 338 339 /** 340 * Returns the id associated with the main word list for a specified locale. 341 * 342 * Word lists stored in Android Keyboard's resources are referred to as the "main" 343 * word lists. Since they can be updated like any other list, we need to assign a 344 * unique ID to them. This ID is just the name of the language (locale-wise) they 345 * are for, and this method returns this ID. 346 */ 347 private static String getMainDictId(final Locale locale) { 348 // This works because we don't include by default different dictionaries for 349 // different countries. This actually needs to return the id that we would 350 // like to use for word lists included in resources, and the following is okay. 351 return MAIN_DICTIONARY_CATEGORY + ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); 352 } 353 354 private static boolean isMainWordListId(final String id) { 355 final String[] idArray = id.split(ID_CATEGORY_SEPARATOR); 356 if (2 != idArray.length) return false; 357 return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); 358 } 359 360 // ## HACK ## we prevent usage of a dictionary before version 18 for English only. The reason 361 // for this is, since those do not include whitelist entries, the new code with an old version 362 // of the dictionary would lose whitelist functionality. 363 private static boolean hackCanUseDictionaryFile(final Locale locale, final File f) { 364 // Only for English - other languages didn't have a whitelist, hence this 365 // ad-hoc ## HACK ## 366 if (!Locale.ENGLISH.getLanguage().equals(locale.getLanguage())) return true; 367 368 FileInputStream inStream = null; 369 try { 370 // Read the version of the file 371 inStream = new FileInputStream(f); 372 final BinaryDictInputOutput.ByteBufferWrapper buffer = 373 new BinaryDictInputOutput.ByteBufferWrapper(inStream.getChannel().map( 374 FileChannel.MapMode.READ_ONLY, 0, f.length())); 375 final int magic = buffer.readInt(); 376 if (magic != FormatSpec.VERSION_2_MAGIC_NUMBER) { 377 return false; 378 } 379 final int formatVersion = buffer.readInt(); 380 final int headerSize = buffer.readInt(); 381 final HashMap<String, String> options = CollectionUtils.newHashMap(); 382 BinaryDictInputOutput.populateOptions(buffer, headerSize, options); 383 384 final String version = options.get(VERSION_KEY); 385 if (null == version) { 386 // No version in the options : the format is unexpected 387 return false; 388 } 389 // Version 18 is the first one to include the whitelist 390 // Obviously this is a big ## HACK ## 391 return Integer.parseInt(version) >= 18; 392 } catch (java.io.FileNotFoundException e) { 393 return false; 394 } catch (java.io.IOException e) { 395 return false; 396 } catch (NumberFormatException e) { 397 return false; 398 } catch (BufferUnderflowException e) { 399 return false; 400 } finally { 401 if (inStream != null) { 402 try { 403 inStream.close(); 404 } catch (IOException e) { 405 // do nothing 406 } 407 } 408 } 409 } 410 411 /** 412 * Returns a list of file addresses for a given locale, trying relevant methods in order. 413 * 414 * Tries to get binary dictionaries from various sources, in order: 415 * - Uses a content provider to get a public dictionary set, as per the protocol described 416 * in BinaryDictionaryFileDumper. 417 * If that fails: 418 * - Gets a file name from the built-in dictionary for this locale, if any. 419 * If that fails: 420 * - Returns null. 421 * @return The list of addresses of valid dictionary files, or null. 422 */ 423 public static ArrayList<AssetFileAddress> getDictionaryFiles(final Locale locale, 424 final Context context) { 425 426 final boolean hasDefaultWordList = DictionaryFactory.isDictionaryAvailable(context, locale); 427 // cacheWordListsFromContentProvider returns the list of files it copied to local 428 // storage, but we don't really care about what was copied NOW: what we want is the 429 // list of everything we ever cached, so we ignore the return value. 430 BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context, 431 hasDefaultWordList); 432 final File[] cachedWordLists = getCachedWordLists(locale.toString(), context); 433 final String mainDictId = getMainDictId(locale); 434 final DictPackSettings dictPackSettings = new DictPackSettings(context); 435 436 boolean foundMainDict = false; 437 final ArrayList<AssetFileAddress> fileList = CollectionUtils.newArrayList(); 438 // cachedWordLists may not be null, see doc for getCachedDictionaryList 439 for (final File f : cachedWordLists) { 440 final String wordListId = getWordListIdFromFileName(f.getName()); 441 final boolean canUse = f.canRead() && hackCanUseDictionaryFile(locale, f); 442 if (canUse && isMainWordListId(wordListId)) { 443 foundMainDict = true; 444 } 445 if (!dictPackSettings.isWordListActive(wordListId)) continue; 446 if (canUse) { 447 fileList.add(AssetFileAddress.makeFromFileName(f.getPath())); 448 } else { 449 Log.e(TAG, "Found a cached dictionary file but cannot read or use it"); 450 } 451 } 452 453 if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) { 454 final int fallbackResId = 455 DictionaryFactory.getMainDictionaryResourceId(context.getResources(), locale); 456 final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId); 457 if (null != fallbackAsset) { 458 fileList.add(fallbackAsset); 459 } 460 } 461 462 return fileList; 463 } 464} 465