1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.dicttool;
18
19import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
20import com.android.inputmethod.latin.makedict.DictDecoder;
21import com.android.inputmethod.latin.makedict.FormatSpec;
22import com.android.inputmethod.latin.makedict.FusionDictionary;
23import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
24
25import org.xml.sax.SAXException;
26
27import java.io.File;
28import java.io.BufferedInputStream;
29import java.io.BufferedOutputStream;
30import java.io.FileInputStream;
31import java.io.FileOutputStream;
32import java.io.IOException;
33import java.io.InputStream;
34import java.io.OutputStream;
35import java.util.ArrayList;
36
37import javax.xml.parsers.ParserConfigurationException;
38
39/**
40 * Class grouping utilities for offline dictionary making.
41 *
42 * Those should not be used on-device, essentially because they are quite
43 * liberal about I/O and performance.
44 */
45public final class BinaryDictOffdeviceUtils {
46    // Prefix and suffix are arbitrary, the values do not really matter
47    private final static String PREFIX = "dicttool";
48    private final static String SUFFIX = ".tmp";
49
50    public final static String COMPRESSION = "compressed";
51    public final static String ENCRYPTION = "encrypted";
52
53    private final static int MAX_DECODE_DEPTH = 8;
54
55    public static class DecoderChainSpec {
56        ArrayList<String> mDecoderSpec = new ArrayList<String>();
57        File mFile;
58        public DecoderChainSpec addStep(final String stepDescription) {
59            mDecoderSpec.add(stepDescription);
60            return this;
61        }
62        public String describeChain() {
63            final StringBuilder s = new StringBuilder("raw");
64            for (final String step : mDecoderSpec) {
65                s.append(" > ");
66                s.append(step);
67            }
68            return s.toString();
69        }
70    }
71
72    public static void copy(final InputStream input, final OutputStream output) throws IOException {
73        final byte[] buffer = new byte[1000];
74        final BufferedInputStream in = new BufferedInputStream(input);
75        final BufferedOutputStream out = new BufferedOutputStream(output);
76        for (int readBytes = in.read(buffer); readBytes >= 0; readBytes = in.read(buffer))
77            output.write(buffer, 0, readBytes);
78        in.close();
79        out.close();
80    }
81
82    /**
83     * Returns a decrypted/uncompressed dictionary.
84     *
85     * This will decrypt/uncompress any number of times as necessary until it finds the
86     * dictionary signature, and copy the decoded file to a temporary place.
87     * If this is not a dictionary, the method returns null.
88     */
89    public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
90        return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
91    }
92
93    private static DecoderChainSpec getRawDictionaryOrNullInternal(
94            final DecoderChainSpec spec, final File src, final int depth) {
95        // Unfortunately the decoding scheme we use can consider any data to be encrypted
96        // and will product some output, meaning it's not possible to reliably detect encrypted
97        // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
98        // over and over, ending in a stack overflow. Hence we limit the depth at which we try
99        // decoding the file.
100        if (depth > MAX_DECODE_DEPTH) return null;
101        if (BinaryDictDecoderUtils.isBinaryDictionary(src)
102                || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
103            spec.mFile = src;
104            return spec;
105        }
106        // It's not a raw dictionary - try to see if it's compressed.
107        final File uncompressedFile = tryGetUncompressedFile(src);
108        if (null != uncompressedFile) {
109            final DecoderChainSpec newSpec =
110                    getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
111            if (null == newSpec) return null;
112            return newSpec.addStep(COMPRESSION);
113        }
114        // It's not a compressed either - try to see if it's crypted.
115        final File decryptedFile = tryGetDecryptedFile(src);
116        if (null != decryptedFile) {
117            final DecoderChainSpec newSpec =
118                    getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
119            if (null == newSpec) return null;
120            return newSpec.addStep(ENCRYPTION);
121        }
122        return null;
123    }
124
125    /* Try to uncompress the file passed as an argument.
126     *
127     * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
128     * is returned.
129     */
130    private static File tryGetUncompressedFile(final File src) {
131        try {
132            final File dst = File.createTempFile(PREFIX, SUFFIX);
133            dst.deleteOnExit();
134            final FileOutputStream dstStream = new FileOutputStream(dst);
135            copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
136                    new BufferedOutputStream(dstStream)); // #copy() closes the streams
137            return dst;
138        } catch (IOException e) {
139            // Could not uncompress the file: presumably the file is simply not a compressed file
140            return null;
141        }
142    }
143
144    /* Try to decrypt the file passed as an argument.
145     *
146     * If the file can be decrypted, the decrypted version is returned. Otherwise, null
147     * is returned.
148     */
149    private static File tryGetDecryptedFile(final File src) {
150        try {
151            final File dst = File.createTempFile(PREFIX, SUFFIX);
152            dst.deleteOnExit();
153            final FileOutputStream dstStream = new FileOutputStream(dst);
154            copy(Crypt.getDecryptedStream(new BufferedInputStream(new FileInputStream(src))),
155                    dstStream); // #copy() closes the streams
156            return dst;
157        } catch (IOException e) {
158            // Could not decrypt the file: presumably the file is simply not a crypted file
159            return null;
160        }
161    }
162
163    static void crash(final String filename, final Exception e) {
164        throw new RuntimeException("Can't read file " + filename, e);
165    }
166
167    static FusionDictionary getDictionary(final String filename, final boolean report) {
168        final File file = new File(filename);
169        if (report) {
170            System.out.println("Dictionary : " + file.getAbsolutePath());
171            System.out.println("Size : " + file.length() + " bytes");
172        }
173        try {
174            if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
175                if (report) System.out.println("Format : XML unigram list");
176                return XmlDictInputOutput.readDictionaryXml(
177                        new BufferedInputStream(new FileInputStream(file)),
178                        null /* shortcuts */, null /* bigrams */);
179            } else {
180                final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
181                if (null == decodedSpec) {
182                    crash(filename, new RuntimeException(
183                            filename + " does not seem to be a dictionary file"));
184                } else if (CombinedInputOutput.isCombinedDictionary(
185                        decodedSpec.mFile.getAbsolutePath())){
186                    if (report) {
187                        System.out.println("Format : Combined format");
188                        System.out.println("Packaging : " + decodedSpec.describeChain());
189                        System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
190                    }
191                    return CombinedInputOutput.readDictionaryCombined(
192                            new BufferedInputStream(new FileInputStream(decodedSpec.mFile)));
193                } else {
194                    final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile,
195                            DictDecoder.USE_BYTEARRAY);
196                    if (report) {
197                        System.out.println("Format : Binary dictionary format");
198                        System.out.println("Packaging : " + decodedSpec.describeChain());
199                        System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
200                    }
201                    return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */);
202                }
203            }
204        } catch (IOException e) {
205            crash(filename, e);
206        } catch (SAXException e) {
207            crash(filename, e);
208        } catch (ParserConfigurationException e) {
209            crash(filename, e);
210        } catch (UnsupportedFormatException e) {
211            crash(filename, e);
212        }
213        return null;
214    }
215}
216