BinaryDictionaryFileDumper.java revision cb13d11c0af95b0621c012be9c01e04a463e40c5
1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import android.content.ContentProviderClient;
20import android.content.ContentResolver;
21import android.content.ContentValues;
22import android.content.Context;
23import android.content.res.AssetFileDescriptor;
24import android.database.Cursor;
25import android.net.Uri;
26import android.os.RemoteException;
27import android.text.TextUtils;
28import android.util.Log;
29
30import com.android.inputmethod.dictionarypack.DictionaryPackConstants;
31import com.android.inputmethod.latin.utils.CollectionUtils;
32import com.android.inputmethod.latin.utils.DictionaryInfoUtils;
33import com.android.inputmethod.latin.utils.DictionaryInfoUtils.DictionaryInfo;
34import com.android.inputmethod.latin.utils.MetadataFileUriGetter;
35
36import java.io.BufferedInputStream;
37import java.io.BufferedOutputStream;
38import java.io.Closeable;
39import java.io.File;
40import java.io.FileNotFoundException;
41import java.io.FileOutputStream;
42import java.io.IOException;
43import java.io.InputStream;
44import java.util.Arrays;
45import java.util.ArrayList;
46import java.util.Collections;
47import java.util.List;
48import java.util.Locale;
49
50/**
51 * Group class for static methods to help with creation and getting of the binary dictionary
52 * file from the dictionary provider
53 */
54public final class BinaryDictionaryFileDumper {
55    private static final String TAG = BinaryDictionaryFileDumper.class.getSimpleName();
56    private static final boolean DEBUG = false;
57
58    /**
59     * The size of the temporary buffer to copy files.
60     */
61    private static final int FILE_READ_BUFFER_SIZE = 8192;
62    // TODO: make the following data common with the native code
63    private static final byte[] MAGIC_NUMBER_VERSION_1 =
64            new byte[] { (byte)0x78, (byte)0xB1, (byte)0x00, (byte)0x00 };
65    private static final byte[] MAGIC_NUMBER_VERSION_2 =
66            new byte[] { (byte)0x9B, (byte)0xC1, (byte)0x3A, (byte)0xFE };
67
68    private static final String DICTIONARY_PROJECTION[] = { "id" };
69
70    private static final String QUERY_PARAMETER_MAY_PROMPT_USER = "mayPrompt";
71    private static final String QUERY_PARAMETER_TRUE = "true";
72    private static final String QUERY_PARAMETER_DELETE_RESULT = "result";
73    private static final String QUERY_PARAMETER_SUCCESS = "success";
74    private static final String QUERY_PARAMETER_FAILURE = "failure";
75
76    // Using protocol version 2 to communicate with the dictionary pack
77    private static final String QUERY_PARAMETER_PROTOCOL = "protocol";
78    private static final String QUERY_PARAMETER_PROTOCOL_VALUE = "2";
79
80    // The path fragment to append after the client ID for dictionary info requests.
81    private static final String QUERY_PATH_DICT_INFO = "dict";
82    // The path fragment to append after the client ID for dictionary datafile requests.
83    private static final String QUERY_PATH_DATAFILE = "datafile";
84    // The path fragment to append after the client ID for updating the metadata URI.
85    private static final String QUERY_PATH_METADATA = "metadata";
86    private static final String INSERT_METADATA_CLIENT_ID_COLUMN = "clientid";
87    private static final String INSERT_METADATA_METADATA_URI_COLUMN = "uri";
88    private static final String INSERT_METADATA_METADATA_ADDITIONAL_ID_COLUMN = "additionalid";
89
90    // Prevents this class to be accidentally instantiated.
91    private BinaryDictionaryFileDumper() {
92    }
93
94    /**
95     * Returns a URI builder pointing to the dictionary pack.
96     *
97     * This creates a URI builder able to build a URI pointing to the dictionary
98     * pack content provider for a specific dictionary id.
99     */
100    private static Uri.Builder getProviderUriBuilder(final String path) {
101        return new Uri.Builder().scheme(ContentResolver.SCHEME_CONTENT)
102                .authority(DictionaryPackConstants.AUTHORITY).appendPath(path);
103    }
104
105    /**
106     * Gets the content URI builder for a specified type.
107     *
108     * Supported types include QUERY_PATH_DICT_INFO, which takes the locale as
109     * the extraPath argument, and QUERY_PATH_DATAFILE, which needs a wordlist ID
110     * as the extraPath argument.
111     *
112     * @param clientId the clientId to use
113     * @param contentProviderClient the instance of content provider client
114     * @param queryPathType the path element encoding the type
115     * @param extraPath optional extra argument for this type (typically word list id)
116     * @return a builder that can build the URI for the best supported protocol version
117     * @throws RemoteException if the client can't be contacted
118     */
119    private static Uri.Builder getContentUriBuilderForType(final String clientId,
120            final ContentProviderClient contentProviderClient, final String queryPathType,
121            final String extraPath) throws RemoteException {
122        // Check whether protocol v2 is supported by building a v2 URI and calling getType()
123        // on it. If this returns null, v2 is not supported.
124        final Uri.Builder uriV2Builder = getProviderUriBuilder(clientId);
125        uriV2Builder.appendPath(queryPathType);
126        uriV2Builder.appendPath(extraPath);
127        uriV2Builder.appendQueryParameter(QUERY_PARAMETER_PROTOCOL,
128                QUERY_PARAMETER_PROTOCOL_VALUE);
129        if (null != contentProviderClient.getType(uriV2Builder.build())) return uriV2Builder;
130        // Protocol v2 is not supported, so create and return the protocol v1 uri.
131        return getProviderUriBuilder(extraPath);
132    }
133
134    /**
135     * Queries a content provider for the list of word lists for a specific locale
136     * available to copy into Latin IME.
137     */
138    private static List<WordListInfo> getWordListWordListInfos(final Locale locale,
139            final Context context, final boolean hasDefaultWordList) {
140        final String clientId = context.getString(R.string.dictionary_pack_client_id);
141        final ContentProviderClient client = context.getContentResolver().
142                acquireContentProviderClient(getProviderUriBuilder("").build());
143        if (null == client) return Collections.<WordListInfo>emptyList();
144
145        try {
146            final Uri.Builder builder = getContentUriBuilderForType(clientId, client,
147                    QUERY_PATH_DICT_INFO, locale.toString());
148            if (!hasDefaultWordList) {
149                builder.appendQueryParameter(QUERY_PARAMETER_MAY_PROMPT_USER,
150                        QUERY_PARAMETER_TRUE);
151            }
152            final Uri queryUri = builder.build();
153            final boolean isProtocolV2 = (QUERY_PARAMETER_PROTOCOL_VALUE.equals(
154                    queryUri.getQueryParameter(QUERY_PARAMETER_PROTOCOL)));
155
156            Cursor c = client.query(queryUri, DICTIONARY_PROJECTION, null, null, null);
157            if (isProtocolV2 && null == c) {
158                reinitializeClientRecordInDictionaryContentProvider(context, client, clientId);
159                c = client.query(queryUri, DICTIONARY_PROJECTION, null, null, null);
160            }
161            if (null == c) return Collections.<WordListInfo>emptyList();
162            if (c.getCount() <= 0 || !c.moveToFirst()) {
163                c.close();
164                return Collections.<WordListInfo>emptyList();
165            }
166            final ArrayList<WordListInfo> list = CollectionUtils.newArrayList();
167            do {
168                final String wordListId = c.getString(0);
169                final String wordListLocale = c.getString(1);
170                if (TextUtils.isEmpty(wordListId)) continue;
171                list.add(new WordListInfo(wordListId, wordListLocale));
172            } while (c.moveToNext());
173            c.close();
174            return list;
175        } catch (RemoteException e) {
176            // The documentation is unclear as to in which cases this may happen, but it probably
177            // happens when the content provider got suddenly killed because it crashed or because
178            // the user disabled it through Settings.
179            Log.e(TAG, "RemoteException: communication with the dictionary pack cut", e);
180            return Collections.<WordListInfo>emptyList();
181        } catch (Exception e) {
182            // A crash here is dangerous because crashing here would brick any encrypted device -
183            // we need the keyboard to be up and working to enter the password, so we don't want
184            // to die no matter what. So let's be as safe as possible.
185            Log.e(TAG, "Unexpected exception communicating with the dictionary pack", e);
186            return Collections.<WordListInfo>emptyList();
187        } finally {
188            client.release();
189        }
190    }
191
192
193    /**
194     * Helper method to encapsulate exception handling.
195     */
196    private static AssetFileDescriptor openAssetFileDescriptor(
197            final ContentProviderClient providerClient, final Uri uri) {
198        try {
199            return providerClient.openAssetFile(uri, "r");
200        } catch (FileNotFoundException e) {
201            // I don't want to log the word list URI here for security concerns. The exception
202            // contains the name of the file, so let's not pass it to Log.e here.
203            Log.e(TAG, "Could not find a word list from the dictionary provider."
204                    /* intentionally don't pass the exception (see comment above) */);
205            return null;
206        } catch (RemoteException e) {
207            Log.e(TAG, "Can't communicate with the dictionary pack", e);
208            return null;
209        }
210    }
211
212    /**
213     * Caches a word list the id of which is passed as an argument. This will write the file
214     * to the cache file name designated by its id and locale, overwriting it if already present
215     * and creating it (and its containing directory) if necessary.
216     */
217    private static void cacheWordList(final String wordlistId, final String locale,
218            final ContentProviderClient providerClient, final Context context) {
219        final int COMPRESSED_CRYPTED_COMPRESSED = 0;
220        final int CRYPTED_COMPRESSED = 1;
221        final int COMPRESSED_CRYPTED = 2;
222        final int COMPRESSED_ONLY = 3;
223        final int CRYPTED_ONLY = 4;
224        final int NONE = 5;
225        final int MODE_MIN = COMPRESSED_CRYPTED_COMPRESSED;
226        final int MODE_MAX = NONE;
227
228        final String clientId = context.getString(R.string.dictionary_pack_client_id);
229        final Uri.Builder wordListUriBuilder;
230        try {
231            wordListUriBuilder = getContentUriBuilderForType(clientId,
232                    providerClient, QUERY_PATH_DATAFILE, wordlistId /* extraPath */);
233        } catch (RemoteException e) {
234            Log.e(TAG, "Can't communicate with the dictionary pack", e);
235            return;
236        }
237        final String finalFileName =
238                DictionaryInfoUtils.getCacheFileName(wordlistId, locale, context);
239        String tempFileName;
240        try {
241            tempFileName = BinaryDictionaryGetter.getTempFileName(wordlistId, context);
242        } catch (IOException e) {
243            Log.e(TAG, "Can't open the temporary file", e);
244            return;
245        }
246
247        for (int mode = MODE_MIN; mode <= MODE_MAX; ++mode) {
248            final InputStream originalSourceStream;
249            InputStream inputStream = null;
250            InputStream uncompressedStream = null;
251            InputStream decryptedStream = null;
252            BufferedInputStream bufferedInputStream = null;
253            File outputFile = null;
254            BufferedOutputStream bufferedOutputStream = null;
255            AssetFileDescriptor afd = null;
256            final Uri wordListUri = wordListUriBuilder.build();
257            try {
258                // Open input.
259                afd = openAssetFileDescriptor(providerClient, wordListUri);
260                // If we can't open it at all, don't even try a number of times.
261                if (null == afd) return;
262                originalSourceStream = afd.createInputStream();
263                // Open output.
264                outputFile = new File(tempFileName);
265                // Just to be sure, delete the file. This may fail silently, and return false: this
266                // is the right thing to do, as we just want to continue anyway.
267                outputFile.delete();
268                // Get the appropriate decryption method for this try
269                switch (mode) {
270                    case COMPRESSED_CRYPTED_COMPRESSED:
271                        uncompressedStream =
272                                FileTransforms.getUncompressedStream(originalSourceStream);
273                        decryptedStream = FileTransforms.getDecryptedStream(uncompressedStream);
274                        inputStream = FileTransforms.getUncompressedStream(decryptedStream);
275                        break;
276                    case CRYPTED_COMPRESSED:
277                        decryptedStream = FileTransforms.getDecryptedStream(originalSourceStream);
278                        inputStream = FileTransforms.getUncompressedStream(decryptedStream);
279                        break;
280                    case COMPRESSED_CRYPTED:
281                        uncompressedStream =
282                                FileTransforms.getUncompressedStream(originalSourceStream);
283                        inputStream = FileTransforms.getDecryptedStream(uncompressedStream);
284                        break;
285                    case COMPRESSED_ONLY:
286                        inputStream = FileTransforms.getUncompressedStream(originalSourceStream);
287                        break;
288                    case CRYPTED_ONLY:
289                        inputStream = FileTransforms.getDecryptedStream(originalSourceStream);
290                        break;
291                    case NONE:
292                        inputStream = originalSourceStream;
293                        break;
294                }
295                bufferedInputStream = new BufferedInputStream(inputStream);
296                bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(outputFile));
297                checkMagicAndCopyFileTo(bufferedInputStream, bufferedOutputStream);
298                bufferedOutputStream.flush();
299                bufferedOutputStream.close();
300                final File finalFile = new File(finalFileName);
301                finalFile.delete();
302                if (!outputFile.renameTo(finalFile)) {
303                    throw new IOException("Can't move the file to its final name");
304                }
305                wordListUriBuilder.appendQueryParameter(QUERY_PARAMETER_DELETE_RESULT,
306                        QUERY_PARAMETER_SUCCESS);
307                if (0 >= providerClient.delete(wordListUriBuilder.build(), null, null)) {
308                    Log.e(TAG, "Could not have the dictionary pack delete a word list");
309                }
310                BinaryDictionaryGetter.removeFilesWithIdExcept(context, wordlistId, finalFile);
311                Log.e(TAG, "Successfully copied file for wordlist ID " + wordlistId);
312                // Success! Close files (through the finally{} clause) and return.
313                return;
314            } catch (Exception e) {
315                if (DEBUG) {
316                    Log.i(TAG, "Can't open word list in mode " + mode, e);
317                }
318                if (null != outputFile) {
319                    // This may or may not fail. The file may not have been created if the
320                    // exception was thrown before it could be. Hence, both failure and
321                    // success are expected outcomes, so we don't check the return value.
322                    outputFile.delete();
323                }
324                // Try the next method.
325            } finally {
326                // Ignore exceptions while closing files.
327                closeAssetFileDescriptorAndReportAnyException(afd);
328                closeCloseableAndReportAnyException(inputStream);
329                closeCloseableAndReportAnyException(uncompressedStream);
330                closeCloseableAndReportAnyException(decryptedStream);
331                closeCloseableAndReportAnyException(bufferedInputStream);
332                closeCloseableAndReportAnyException(bufferedOutputStream);
333            }
334        }
335
336        // We could not copy the file at all. This is very unexpected.
337        // I'd rather not print the word list ID to the log out of security concerns
338        Log.e(TAG, "Could not copy a word list. Will not be able to use it.");
339        // If we can't copy it we should warn the dictionary provider so that it can mark it
340        // as invalid.
341        wordListUriBuilder.appendQueryParameter(QUERY_PARAMETER_DELETE_RESULT,
342                QUERY_PARAMETER_FAILURE);
343        try {
344            if (0 >= providerClient.delete(wordListUriBuilder.build(), null, null)) {
345                Log.e(TAG, "In addition, we were unable to delete it.");
346            }
347        } catch (RemoteException e) {
348            Log.e(TAG, "In addition, communication with the dictionary provider was cut", e);
349        }
350    }
351
352    // Ideally the two following methods should be merged, but AssetFileDescriptor does not
353    // implement Closeable although it does implement #close(), and Java does not have
354    // structural typing.
355    private static void closeAssetFileDescriptorAndReportAnyException(
356            final AssetFileDescriptor file) {
357        try {
358            if (null != file) file.close();
359        } catch (Exception e) {
360            Log.e(TAG, "Exception while closing a file", e);
361        }
362    }
363
364    private static void closeCloseableAndReportAnyException(final Closeable file) {
365        try {
366            if (null != file) file.close();
367        } catch (Exception e) {
368            Log.e(TAG, "Exception while closing a file", e);
369        }
370    }
371
372    /**
373     * Queries a content provider for word list data for some locale and cache the returned files
374     *
375     * This will query a content provider for word list data for a given locale, and copy the
376     * files locally so that they can be mmap'ed. This may overwrite previously cached word lists
377     * with newer versions if a newer version is made available by the content provider.
378     * @throw FileNotFoundException if the provider returns non-existent data.
379     * @throw IOException if the provider-returned data could not be read.
380     */
381    public static void cacheWordListsFromContentProvider(final Locale locale,
382            final Context context, final boolean hasDefaultWordList) {
383        final ContentProviderClient providerClient;
384        try {
385            providerClient = context.getContentResolver().
386                acquireContentProviderClient(getProviderUriBuilder("").build());
387        } catch (final SecurityException e) {
388            Log.e(TAG, "No permission to communicate with the dictionary provider", e);
389            return;
390        }
391        if (null == providerClient) {
392            Log.e(TAG, "Can't establish communication with the dictionary provider");
393            return;
394        }
395        try {
396            final List<WordListInfo> idList = getWordListWordListInfos(locale, context,
397                    hasDefaultWordList);
398            for (WordListInfo id : idList) {
399                cacheWordList(id.mId, id.mLocale, providerClient, context);
400            }
401        } finally {
402            providerClient.release();
403        }
404    }
405
406    /**
407     * Copies the data in an input stream to a target file if the magic number matches.
408     *
409     * If the magic number does not match the expected value, this method throws an
410     * IOException. Other usual conditions for IOException or FileNotFoundException
411     * also apply.
412     *
413     * @param input the stream to be copied.
414     * @param output an output stream to copy the data to.
415     */
416    public static void checkMagicAndCopyFileTo(final BufferedInputStream input,
417            final BufferedOutputStream output) throws FileNotFoundException, IOException {
418        // Check the magic number
419        final int length = MAGIC_NUMBER_VERSION_2.length;
420        final byte[] magicNumberBuffer = new byte[length];
421        final int readMagicNumberSize = input.read(magicNumberBuffer, 0, length);
422        if (readMagicNumberSize < length) {
423            throw new IOException("Less bytes to read than the magic number length");
424        }
425        if (!Arrays.equals(MAGIC_NUMBER_VERSION_2, magicNumberBuffer)) {
426            if (!Arrays.equals(MAGIC_NUMBER_VERSION_1, magicNumberBuffer)) {
427                throw new IOException("Wrong magic number for downloaded file");
428            }
429        }
430        output.write(magicNumberBuffer);
431
432        // Actually copy the file
433        final byte[] buffer = new byte[FILE_READ_BUFFER_SIZE];
434        for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer))
435            output.write(buffer, 0, readBytes);
436        input.close();
437    }
438
439    private static void reinitializeClientRecordInDictionaryContentProvider(final Context context,
440            final ContentProviderClient client, final String clientId) throws RemoteException {
441        final String metadataFileUri = MetadataFileUriGetter.getMetadataUri(context);
442        final String metadataAdditionalId = MetadataFileUriGetter.getMetadataAdditionalId(context);
443        // Tell the content provider to reset all information about this client id
444        final Uri metadataContentUri = getProviderUriBuilder(clientId)
445                .appendPath(QUERY_PATH_METADATA)
446                .appendQueryParameter(QUERY_PARAMETER_PROTOCOL, QUERY_PARAMETER_PROTOCOL_VALUE)
447                .build();
448        client.delete(metadataContentUri, null, null);
449        // Update the metadata URI
450        final ContentValues metadataValues = new ContentValues();
451        metadataValues.put(INSERT_METADATA_CLIENT_ID_COLUMN, clientId);
452        metadataValues.put(INSERT_METADATA_METADATA_URI_COLUMN, metadataFileUri);
453        metadataValues.put(INSERT_METADATA_METADATA_ADDITIONAL_ID_COLUMN, metadataAdditionalId);
454        client.insert(metadataContentUri, metadataValues);
455
456        // Update the dictionary list.
457        final Uri dictionaryContentUriBase = getProviderUriBuilder(clientId)
458                .appendPath(QUERY_PATH_DICT_INFO)
459                .appendQueryParameter(QUERY_PARAMETER_PROTOCOL, QUERY_PARAMETER_PROTOCOL_VALUE)
460                .build();
461        final ArrayList<DictionaryInfo> dictionaryList =
462                DictionaryInfoUtils.getCurrentDictionaryFileNameAndVersionInfo(context);
463        final int length = dictionaryList.size();
464        for (int i = 0; i < length; ++i) {
465            final DictionaryInfo info = dictionaryList.get(i);
466            client.insert(Uri.withAppendedPath(dictionaryContentUriBase, info.mId),
467                    info.toContentValues());
468        }
469    }
470
471    /**
472     * Initialize a client record with the dictionary content provider.
473     *
474     * This merely acquires the content provider and calls
475     * #reinitializeClientRecordInDictionaryContentProvider.
476     *
477     * @param context the context for resources and providers.
478     * @param clientId the client ID to use.
479     */
480    public static void initializeClientRecordHelper(final Context context,
481            final String clientId) {
482        try {
483            final ContentProviderClient client = context.getContentResolver().
484                    acquireContentProviderClient(getProviderUriBuilder("").build());
485            if (null == client) return;
486            reinitializeClientRecordInDictionaryContentProvider(context, client, clientId);
487        } catch (RemoteException e) {
488            Log.e(TAG, "Cannot contact the dictionary content provider", e);
489        }
490    }
491}
492