BinaryDictionaryGetter.java revision 28966734619251f78812f6a53f5efacbf5f77c49
1cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard/*
2cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * Copyright (C) 2011 The Android Open Source Project
3cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard *
4cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * use this file except in compliance with the License. You may obtain a copy of
6cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * the License at
7cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard *
8cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * http://www.apache.org/licenses/LICENSE-2.0
9cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard *
10cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * Unless required by applicable law or agreed to in writing, software
11cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * License for the specific language governing permissions and limitations under
14cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * the License.
15cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard */
16cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
17cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardpackage com.android.inputmethod.latin;
18cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
19cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardimport android.content.Context;
20cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardimport android.content.res.AssetFileDescriptor;
21e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalardimport android.content.res.Resources;
22cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardimport android.util.Log;
23cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
2428966734619251f78812f6a53f5efacbf5f77c49Jean Chalardimport java.io.File;
25cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardimport java.io.FileNotFoundException;
26cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardimport java.io.IOException;
27d8f52a4f18d22aa150846b01017410ce70bbad6fJean Chalardimport java.util.Arrays;
28d8f52a4f18d22aa150846b01017410ce70bbad6fJean Chalardimport java.util.List;
29cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardimport java.util.Locale;
30cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
31cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard/**
32cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard * Helper class to get the address of a mmap'able dictionary file.
33cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard */
34cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalardclass BinaryDictionaryGetter {
35cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
36cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    /**
37cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * Used for Log actions from this class
38cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     */
39cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    private static final String TAG = BinaryDictionaryGetter.class.getSimpleName();
40cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
41cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    // Prevents this from being instantiated
42cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    private BinaryDictionaryGetter() {}
43cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
44cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    /**
4528966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * Escapes a string for any characters that may be suspicious for a file or directory name.
4628966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     *
4728966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * Concretely this does a sort of URL-encoding except it will encode everything that's not
4828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
4928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * we cannot allow here)
5028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     */
5128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    // TODO: create a unit test for this method
5228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    private static String replaceFileNameDangerousCharacters(String name) {
5328966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        // This assumes '%' is fully available as a non-separator, normal
5428966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        // character in a file name. This is probably true for all file systems.
5528966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        final StringBuilder sb = new StringBuilder();
5628966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        for (int i = 0; i < name.length(); ++i) {
5728966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            final int codePoint = name.codePointAt(i);
5828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            if (Character.isLetterOrDigit(codePoint) || '_' == codePoint) {
5928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                sb.appendCodePoint(codePoint);
6028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            } else {
6128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                sb.append('%');
6228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                sb.append(Integer.toHexString(codePoint));
6328966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            }
6428966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        }
6528966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        return sb.toString();
6628966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    }
6728966734619251f78812f6a53f5efacbf5f77c49Jean Chalard
6828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    /**
6928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * Find out the cache directory associated with a specific locale.
7028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     */
7128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    private static String getCacheDirectoryForLocale(Locale locale, Context context) {
7228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toString());
7328966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        final String absoluteDirectoryName = context.getFilesDir() + File.separator
7428966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                + relativeDirectoryName;
7528966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        final File directory = new File(absoluteDirectoryName);
7628966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        if (!directory.exists()) {
7728966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            if (!directory.mkdirs()) {
7828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                Log.e(TAG, "Could not create the directory for locale" + locale);
7928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            }
8028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        }
8128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        return absoluteDirectoryName;
8228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    }
8328966734619251f78812f6a53f5efacbf5f77c49Jean Chalard
8428966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    /**
8528966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * Generates a file name for the id and locale passed as an argument.
8628966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     *
8728966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * In the current implementation the file name returned will always be unique for
8828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * any id/locale pair, but please do not expect that the id can be the same for
8928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * different dictionaries with different locales. An id should be unique for any
9028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * dictionary.
9128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * The file name is pretty much an URL-encoded version of the id inside a directory
9228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * named like the locale, except it will also escape characters that look dangerous
9328966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * to some file systems.
9428966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * @param id the id of the dictionary for which to get a file name
9528966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * @param locale the locale for which to get the file name
9628966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * @param context the context to use for getting the directory
9728966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     * @return the name of the file to be created
9828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard     */
9928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    public static String getCacheFileName(String id, Locale locale, Context context) {
10028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        final String fileName = replaceFileNameDangerousCharacters(id);
10128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard        return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
10228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    }
10328966734619251f78812f6a53f5efacbf5f77c49Jean Chalard
10428966734619251f78812f6a53f5efacbf5f77c49Jean Chalard    /**
105cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * Returns a file address from a resource, or null if it cannot be opened.
106cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     */
107e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard    private static AssetFileAddress loadFallbackResource(final Context context,
108e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard            final int fallbackResId, final Locale locale) {
109e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard        final Resources res = context.getResources();
110e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard        final Locale savedLocale = Utils.setSystemLocale(res, locale);
111e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard        final AssetFileDescriptor afd = res.openRawResourceFd(fallbackResId);
112e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard        Utils.setSystemLocale(res, savedLocale);
113e150ef98569d61078e0f8c67ded8364a9c3d4a20Jean Chalard
114cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard        if (afd == null) {
115cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard            Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId="
116cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard                    + fallbackResId);
117cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard            return null;
118cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard        }
119cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard        return AssetFileAddress.makeFromFileNameAndOffset(
120cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard                context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength());
121cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    }
122cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard
123cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    /**
124d8f52a4f18d22aa150846b01017410ce70bbad6fJean Chalard     * Returns a list of file addresses for a given locale, trying relevant methods in order.
125cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     *
126d8f52a4f18d22aa150846b01017410ce70bbad6fJean Chalard     * Tries to get binary dictionaries from various sources, in order:
127d8f52a4f18d22aa150846b01017410ce70bbad6fJean Chalard     * - Uses a content provider to get a public dictionary set, as per the protocol described
128cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     *   in BinaryDictionaryFileDumper.
129cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * If that fails:
130cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * - Gets a file name from the fallback resource passed as an argument.
131cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * If that fails:
132cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * - Returns null.
133cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     * @return The address of a valid file, or null.
134cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard     */
135d8f52a4f18d22aa150846b01017410ce70bbad6fJean Chalard    public static List<AssetFileAddress> getDictionaryFiles(Locale locale, Context context,
136cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard            int fallbackResId) {
137fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard        try {
13828966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            List<AssetFileAddress> cachedDictionaryList =
13928966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                    BinaryDictionaryFileDumper.cacheDictionariesFromContentProvider(locale,
14028966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                            context);
14128966734619251f78812f6a53f5efacbf5f77c49Jean Chalard            if (null != cachedDictionaryList) {
14228966734619251f78812f6a53f5efacbf5f77c49Jean Chalard                return cachedDictionaryList;
143cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard            }
144fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard            // If the list is null, fall through and return the fallback
145fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard        } catch (FileNotFoundException e) {
146fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard            Log.e(TAG, "Unable to create dictionary file from provider for locale "
147fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard                    + locale.toString() + ": falling back to internal dictionary");
148fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard        } catch (IOException e) {
149fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard            Log.e(TAG, "Unable to read source data for locale "
150fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard                    + locale.toString() + ": falling back to internal dictionary");
151cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard        }
152fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard        final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId,
153fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard                locale);
154fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard        if (null == fallbackAsset) return null;
155fae8d60ee926e9f340392789119cf81655ad46e9Jean Chalard        return Arrays.asList(fallbackAsset);
156cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard    }
157cba93f50c3d46ada773ec49435689dc3e2094385Jean Chalard}
158