BinaryDictOffdeviceUtils.java revision 0044df6cf2f4ef00d78e530220565b8272187446
177fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard/*
277fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * Copyright (C) 2012 The Android Open Source Project
377fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard *
477fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * Licensed under the Apache License, Version 2.0 (the "License"); you may not
577fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * use this file except in compliance with the License. You may obtain a copy of
677fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * the License at
777fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard *
877fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * http://www.apache.org/licenses/LICENSE-2.0
977fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard *
1077fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * Unless required by applicable law or agreed to in writing, software
1177fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
1277fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1377fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * License for the specific language governing permissions and limitations under
1477fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard * the License.
1577fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard */
1677fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard
1777fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalardpackage com.android.inputmethod.latin.dicttool;
1877fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard
19b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
20b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
21b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport java.io.File;
22b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport java.io.BufferedInputStream;
23b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport java.io.BufferedOutputStream;
24b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport java.io.FileInputStream;
25b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport java.io.FileOutputStream;
2677fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalardimport java.io.IOException;
2777fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalardimport java.io.InputStream;
2877fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalardimport java.io.OutputStream;
29b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardimport java.util.ArrayList;
3077fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard
3177fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard/**
32b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard * Class grouping utilities for offline dictionary making.
33b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard *
34b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard * Those should not be used on-device, essentially because they are quite
35b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard * liberal about I/O and performance.
36b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard */
37b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalardpublic final class BinaryDictOffdeviceUtils {
38b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    // Prefix and suffix are arbitrary, the values do not really matter
39b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    private final static String PREFIX = "dicttool";
40b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    private final static String SUFFIX = ".tmp";
41b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
42b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    public final static String COMPRESSION = "compression";
430044df6cf2f4ef00d78e530220565b8272187446Jean Chalard    public final static String ENCRYPTION = "encryption";
44b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
45b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    public static class DecoderChainSpec {
46b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        ArrayList<String> mDecoderSpec = new ArrayList<String>();
47b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        File mFile;
48b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        public DecoderChainSpec addStep(final String stepDescription) {
49b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            mDecoderSpec.add(stepDescription);
50b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            return this;
51b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        }
52b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    }
53b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
5477fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard    public static void copy(final InputStream input, final OutputStream output) throws IOException {
5577fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard        final byte[] buffer = new byte[1000];
5677fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard        final BufferedInputStream in = new BufferedInputStream(input);
5777fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard        final BufferedOutputStream out = new BufferedOutputStream(output);
5877fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard        for (int readBytes = in.read(buffer); readBytes >= 0; readBytes = in.read(buffer))
5977fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard            output.write(buffer, 0, readBytes);
6077fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard        in.close();
6177fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard        out.close();
6277fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard    }
63b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
64b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    /**
65b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     * Returns a decrypted/uncompressed binary dictionary.
66b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     *
67b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     * This will decrypt/uncompress any number of times as necessary until it finds the binary
68b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     * dictionary signature, and copy the decoded file to a temporary place.
69b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     * If this is not a binary dictionary, the method returns null.
70b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     */
71b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
72b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src);
73b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    }
74b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
75b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
76b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            final DecoderChainSpec spec, final File src) {
77b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        // TODO: arrange for the intermediary files to be deleted
78b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        if (BinaryDictInputOutput.isBinaryDictionary(src)) {
79b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            spec.mFile = src;
80b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            return spec;
81b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        }
82b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        // It's not a raw dictionary - try to see if it's compressed.
83b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        final File uncompressedFile = tryGetUncompressedFile(src);
84b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        if (null != uncompressedFile) {
85b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            final DecoderChainSpec newSpec =
86b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard                    getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile);
87b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            if (null == newSpec) return null;
88b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            return newSpec.addStep(COMPRESSION);
89b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        }
900044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        // It's not a compressed either - try to see if it's crypted.
910044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        final File decryptedFile = tryGetDecryptedFile(src);
920044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        if (null != decryptedFile) {
930044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            final DecoderChainSpec newSpec =
940044df6cf2f4ef00d78e530220565b8272187446Jean Chalard                    getRawBinaryDictionaryOrNullInternal(spec, decryptedFile);
950044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            if (null == newSpec) return null;
960044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            return newSpec.addStep(ENCRYPTION);
970044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        }
98b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        return null;
99b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    }
100b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard
101b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    /* Try to uncompress the file passed as an argument.
102b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     *
103b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
104b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     * is returned.
105b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard     */
106b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    private static File tryGetUncompressedFile(final File src) {
107b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        try {
108b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            final File dst = File.createTempFile(PREFIX, SUFFIX);
109b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            final FileOutputStream dstStream = new FileOutputStream(dst);
110b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
111b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard                    new BufferedOutputStream(dstStream)); // #copy() closes the streams
112b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            return dst;
113b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        } catch (IOException e) {
114b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            // Could not uncompress the file: presumably the file is simply not a compressed file
115b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard            return null;
116b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard        }
117b3c98901c5fc1460b54cdf27d74405f27c88e74bJean Chalard    }
1180044df6cf2f4ef00d78e530220565b8272187446Jean Chalard
1190044df6cf2f4ef00d78e530220565b8272187446Jean Chalard    /* Try to decrypt the file passed as an argument.
1200044df6cf2f4ef00d78e530220565b8272187446Jean Chalard     *
1210044df6cf2f4ef00d78e530220565b8272187446Jean Chalard     * If the file can be decrypted, the decrypted version is returned. Otherwise, null
1220044df6cf2f4ef00d78e530220565b8272187446Jean Chalard     * is returned.
1230044df6cf2f4ef00d78e530220565b8272187446Jean Chalard     */
1240044df6cf2f4ef00d78e530220565b8272187446Jean Chalard    private static File tryGetDecryptedFile(final File src) {
1250044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        try {
1260044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            final File dst = File.createTempFile(PREFIX, SUFFIX);
1270044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            final FileOutputStream dstStream = new FileOutputStream(dst);
1280044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            copy(Crypt.getDecryptedStream(new BufferedInputStream(new FileInputStream(src))),
1290044df6cf2f4ef00d78e530220565b8272187446Jean Chalard                    dstStream); // #copy() closes the streams
1300044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            return dst;
1310044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        } catch (IOException e) {
1320044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            // Could not uncompress the file: presumably the file is simply not a compressed file
1330044df6cf2f4ef00d78e530220565b8272187446Jean Chalard            return null;
1340044df6cf2f4ef00d78e530220565b8272187446Jean Chalard        }
1350044df6cf2f4ef00d78e530220565b8272187446Jean Chalard    }
13677fe603a3d82f5fc28816520bac479ff48bf15e5Jean Chalard}
137