1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin.dicttool;
18
19import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
20import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
21import com.android.inputmethod.latin.makedict.DictDecoder;
22import com.android.inputmethod.latin.makedict.DictionaryHeader;
23import com.android.inputmethod.latin.makedict.FormatSpec;
24import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
25import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
26import com.android.inputmethod.latin.makedict.FusionDictionary;
27import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
28
29import java.io.BufferedInputStream;
30import java.io.BufferedOutputStream;
31import java.io.BufferedReader;
32import java.io.File;
33import java.io.FileInputStream;
34import java.io.FileNotFoundException;
35import java.io.FileOutputStream;
36import java.io.IOException;
37import java.io.InputStream;
38import java.io.InputStreamReader;
39import java.io.OutputStream;
40import java.util.HashMap;
41
42import javax.annotation.Nonnull;
43import javax.annotation.Nullable;
44
45/**
46 * Class grouping utilities for offline dictionary making.
47 *
48 * Those should not be used on-device, essentially because they are quite
49 * liberal about I/O and performance.
50 */
51public final class BinaryDictOffdeviceUtils {
52    // Prefix and suffix are arbitrary, the values do not really matter
53    private final static String PREFIX = "dicttool";
54    private final static String SUFFIX = ".tmp";
55    private final static int COPY_BUFFER_SIZE = 8192;
56
57    public static class DecoderChainSpec<T> {
58        public final static int COMPRESSION = 1;
59        public final static int ENCRYPTION = 2;
60
61        private final static int[][] VALID_DECODER_CHAINS = {
62            { }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
63        };
64
65        private final int mDecoderSpecIndex;
66        public T mResult;
67
68        public DecoderChainSpec() {
69            mDecoderSpecIndex = 0;
70            mResult = null;
71        }
72
73        private DecoderChainSpec(final DecoderChainSpec<T> src) {
74            mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
75            mResult = src.mResult;
76        }
77
78        private String getStepDescription(final int step) {
79            switch (step) {
80            case COMPRESSION:
81                return "compression";
82            case ENCRYPTION:
83                return "encryption";
84            default:
85                return "unknown";
86            }
87        }
88
89        public String describeChain() {
90            final StringBuilder s = new StringBuilder("raw");
91            for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
92                s.append(" > ");
93                s.append(getStepDescription(step));
94            }
95            return s.toString();
96        }
97
98        /**
99         * Returns the next sequential spec. If exhausted, return null.
100         */
101        public DecoderChainSpec next() {
102            if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
103                return null;
104            }
105            return new DecoderChainSpec(this);
106        }
107
108        public InputStream getStream(final File src) throws FileNotFoundException, IOException {
109            InputStream input = new BufferedInputStream(new FileInputStream(src));
110            for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
111                switch (step) {
112                case COMPRESSION:
113                    input = Compress.getUncompressedStream(input);
114                    break;
115                case ENCRYPTION:
116                    input = Crypt.getDecryptedStream(input);
117                    break;
118                }
119            }
120            return input;
121        }
122    }
123
124    public interface InputProcessor<T> {
125        @Nonnull
126        public T process(@Nonnull final InputStream input)
127                throws IOException, UnsupportedFormatException;
128    }
129
130    public static class CopyProcessor implements InputProcessor<File> {
131        @Override @Nonnull
132        public File process(@Nonnull final InputStream input) throws IOException,
133                UnsupportedFormatException {
134            final File dst = File.createTempFile(PREFIX, SUFFIX);
135            dst.deleteOnExit();
136            try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
137                copy(input, output);
138                output.flush();
139                output.close();
140                if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
141                        || CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
142                    return dst;
143                }
144            }
145            throw new UnsupportedFormatException("Input stream not at the expected format");
146        }
147    }
148
149    public static class HeaderReaderProcessor implements InputProcessor<DictionaryHeader> {
150        // Arbitrarily limit the header length to 32k. Sounds like it would never be larger
151        // than this. Revisit this if needed later.
152        private final int MAX_HEADER_LENGTH = 32 * 1024;
153        @Override @Nonnull
154        public DictionaryHeader process(final InputStream input) throws IOException,
155                UnsupportedFormatException {
156            // Do everything as curtly and ad-hoc as possible for performance.
157            final byte[] tmpBuffer = new byte[12];
158            if (tmpBuffer.length != input.read(tmpBuffer)) {
159                throw new UnsupportedFormatException("File too short, not a dictionary");
160            }
161            // Ad-hoc check for the magic number. See FormatSpec.java as well as
162            // byte_array_utils.h and BinaryDictEncoderUtils#writeDictionaryHeader().
163            final int MAGIC_NUMBER_START_OFFSET = 0;
164            final int VERSION_START_OFFSET = 4;
165            final int HEADER_SIZE_OFFSET = 8;
166            final int magicNumber = ((tmpBuffer[MAGIC_NUMBER_START_OFFSET] & 0xFF) << 24)
167                    + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 1] & 0xFF) << 16)
168                    + ((tmpBuffer[MAGIC_NUMBER_START_OFFSET + 2] & 0xFF) << 8)
169                    + (tmpBuffer[MAGIC_NUMBER_START_OFFSET + 3] & 0xFF);
170            if (magicNumber != FormatSpec.MAGIC_NUMBER) {
171                throw new UnsupportedFormatException("Wrong magic number");
172            }
173            final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
174                    + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
175            if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
176                    && version != FormatSpec.VERSION202) {
177                throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
178            }
179            final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
180                    + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
181                    + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
182                    + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
183            if (totalHeaderSize > MAX_HEADER_LENGTH) {
184                throw new UnsupportedFormatException("Header too large");
185            }
186            final byte[] headerBuffer = new byte[totalHeaderSize - tmpBuffer.length];
187            readStreamExhaustively(input, headerBuffer);
188            final HashMap<String, String> attributes =
189                    BinaryDictDecoderUtils.decodeHeaderAttributes(headerBuffer);
190            return new DictionaryHeader(totalHeaderSize, new DictionaryOptions(attributes),
191                    new FormatOptions(version, false /* hasTimestamp */));
192        }
193    }
194
195    private static void readStreamExhaustively(final InputStream inputStream,
196            final byte[] outBuffer) throws IOException, UnsupportedFormatException {
197        int readBytes = 0;
198        int readBytesLastCycle = -1;
199        while (readBytes != outBuffer.length) {
200            readBytesLastCycle = inputStream.read(outBuffer, readBytes,
201                    outBuffer.length - readBytes);
202            if (readBytesLastCycle == -1)
203                throw new UnsupportedFormatException("File shorter than specified in the header"
204                        + " (expected " + outBuffer.length + ", read " + readBytes + ")");
205            readBytes += readBytesLastCycle;
206        }
207    }
208
209    public static void copy(final InputStream input, final OutputStream output) throws IOException {
210        final byte[] buffer = new byte[COPY_BUFFER_SIZE];
211        for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
212            output.write(buffer, 0, readBytes);
213        }
214    }
215
216    /**
217     * Process a dictionary, decrypting/uncompressing it on the fly as necessary.
218     *
219     * This will execute the given processor repeatedly with the possible alternatives
220     * for dictionary format until the processor does not throw an exception.
221     * If the processor succeeds for none of the possible formats, the method returns null.
222     */
223    @Nullable
224    public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
225            @Nonnull final InputProcessor<T> processor) {
226        @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
227        while (null != spec) {
228            try {
229                final InputStream input = spec.getStream(src);
230                spec.mResult = processor.process(input);
231                try {
232                    input.close();
233                } catch (IOException e) {
234                    // CipherInputStream doesn't like being closed without having read the
235                    // entire stream, for some reason. But we don't want to because it's a waste
236                    // of resources. We really, really don't care about this.
237                    // However on close() CipherInputStream does throw this exception, wrapped
238                    // in an IOException so we need to catch it.
239                    if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
240                        throw e;
241                    }
242                }
243                return spec;
244            } catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
245                // If the format is not the right one for this file, the processor will throw one
246                // of these exceptions. In our case, that means we should try the next spec,
247                // since it may still be at another format we haven't tried yet.
248                // TODO: stop using exceptions for this non-exceptional case.
249            }
250            spec = spec.next();
251        }
252        return null;
253    }
254
255    /**
256     * Get a decoder chain spec with a raw dictionary file. This makes a new file on the
257     * disk ready for any treatment the client wants.
258     */
259    @Nullable
260    public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
261        return decodeDictionaryForProcess(src, new CopyProcessor());
262    }
263
264    static FusionDictionary getDictionary(final String filename, final boolean report) {
265        final File file = new File(filename);
266        if (report) {
267            System.out.println("Dictionary : " + file.getAbsolutePath());
268            System.out.println("Size : " + file.length() + " bytes");
269        }
270        try {
271            final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
272            if (null == decodedSpec) {
273                throw new RuntimeException("Does not seem to be a dictionary file " + filename);
274            }
275            if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
276                if (report) {
277                    System.out.println("Format : Combined format");
278                    System.out.println("Packaging : " + decodedSpec.describeChain());
279                    System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
280                }
281                try (final BufferedReader reader = new BufferedReader(
282                        new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
283                    return CombinedInputOutput.readDictionaryCombined(reader);
284                }
285            }
286            final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
287                    decodedSpec.mResult, 0, decodedSpec.mResult.length(),
288                    DictDecoder.USE_BYTEARRAY);
289            if (report) {
290                System.out.println("Format : Binary dictionary format");
291                System.out.println("Packaging : " + decodedSpec.describeChain());
292                System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
293            }
294            return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
295        } catch (final IOException | UnsupportedFormatException e) {
296            throw new RuntimeException("Can't read file " + filename, e);
297        }
298    }
299}
300