1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin.dicttool; 18 19import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; 20import com.android.inputmethod.latin.makedict.DictDecoder; 21import com.android.inputmethod.latin.makedict.FormatSpec; 22import com.android.inputmethod.latin.makedict.FusionDictionary; 23import com.android.inputmethod.latin.makedict.UnsupportedFormatException; 24 25import org.xml.sax.SAXException; 26 27import java.io.File; 28import java.io.BufferedInputStream; 29import java.io.BufferedOutputStream; 30import java.io.FileInputStream; 31import java.io.FileOutputStream; 32import java.io.IOException; 33import java.io.InputStream; 34import java.io.OutputStream; 35import java.util.ArrayList; 36 37import javax.xml.parsers.ParserConfigurationException; 38 39/** 40 * Class grouping utilities for offline dictionary making. 41 * 42 * Those should not be used on-device, essentially because they are quite 43 * liberal about I/O and performance. 44 */ 45public final class BinaryDictOffdeviceUtils { 46 // Prefix and suffix are arbitrary, the values do not really matter 47 private final static String PREFIX = "dicttool"; 48 private final static String SUFFIX = ".tmp"; 49 50 public final static String COMPRESSION = "compressed"; 51 public final static String ENCRYPTION = "encrypted"; 52 53 private final static int MAX_DECODE_DEPTH = 8; 54 55 public static class DecoderChainSpec { 56 ArrayList<String> mDecoderSpec = new ArrayList<String>(); 57 File mFile; 58 public DecoderChainSpec addStep(final String stepDescription) { 59 mDecoderSpec.add(stepDescription); 60 return this; 61 } 62 public String describeChain() { 63 final StringBuilder s = new StringBuilder("raw"); 64 for (final String step : mDecoderSpec) { 65 s.append(" > "); 66 s.append(step); 67 } 68 return s.toString(); 69 } 70 } 71 72 public static void copy(final InputStream input, final OutputStream output) throws IOException { 73 final byte[] buffer = new byte[1000]; 74 final BufferedInputStream in = new BufferedInputStream(input); 75 final BufferedOutputStream out = new BufferedOutputStream(output); 76 for (int readBytes = in.read(buffer); readBytes >= 0; readBytes = in.read(buffer)) 77 output.write(buffer, 0, readBytes); 78 in.close(); 79 out.close(); 80 } 81 82 /** 83 * Returns a decrypted/uncompressed dictionary. 84 * 85 * This will decrypt/uncompress any number of times as necessary until it finds the 86 * dictionary signature, and copy the decoded file to a temporary place. 87 * If this is not a dictionary, the method returns null. 88 */ 89 public static DecoderChainSpec getRawDictionaryOrNull(final File src) { 90 return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0); 91 } 92 93 private static DecoderChainSpec getRawDictionaryOrNullInternal( 94 final DecoderChainSpec spec, final File src, final int depth) { 95 // Unfortunately the decoding scheme we use can consider any data to be encrypted 96 // and will product some output, meaning it's not possible to reliably detect encrypted 97 // data. Thus, some non-dictionary files (especially small) ones may successfully decrypt 98 // over and over, ending in a stack overflow. Hence we limit the depth at which we try 99 // decoding the file. 100 if (depth > MAX_DECODE_DEPTH) return null; 101 if (BinaryDictDecoderUtils.isBinaryDictionary(src) 102 || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) { 103 spec.mFile = src; 104 return spec; 105 } 106 // It's not a raw dictionary - try to see if it's compressed. 107 final File uncompressedFile = tryGetUncompressedFile(src); 108 if (null != uncompressedFile) { 109 final DecoderChainSpec newSpec = 110 getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1); 111 if (null == newSpec) return null; 112 return newSpec.addStep(COMPRESSION); 113 } 114 // It's not a compressed either - try to see if it's crypted. 115 final File decryptedFile = tryGetDecryptedFile(src); 116 if (null != decryptedFile) { 117 final DecoderChainSpec newSpec = 118 getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1); 119 if (null == newSpec) return null; 120 return newSpec.addStep(ENCRYPTION); 121 } 122 return null; 123 } 124 125 /* Try to uncompress the file passed as an argument. 126 * 127 * If the file can be uncompressed, the uncompressed version is returned. Otherwise, null 128 * is returned. 129 */ 130 private static File tryGetUncompressedFile(final File src) { 131 try { 132 final File dst = File.createTempFile(PREFIX, SUFFIX); 133 dst.deleteOnExit(); 134 final FileOutputStream dstStream = new FileOutputStream(dst); 135 copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))), 136 new BufferedOutputStream(dstStream)); // #copy() closes the streams 137 return dst; 138 } catch (IOException e) { 139 // Could not uncompress the file: presumably the file is simply not a compressed file 140 return null; 141 } 142 } 143 144 /* Try to decrypt the file passed as an argument. 145 * 146 * If the file can be decrypted, the decrypted version is returned. Otherwise, null 147 * is returned. 148 */ 149 private static File tryGetDecryptedFile(final File src) { 150 try { 151 final File dst = File.createTempFile(PREFIX, SUFFIX); 152 dst.deleteOnExit(); 153 final FileOutputStream dstStream = new FileOutputStream(dst); 154 copy(Crypt.getDecryptedStream(new BufferedInputStream(new FileInputStream(src))), 155 dstStream); // #copy() closes the streams 156 return dst; 157 } catch (IOException e) { 158 // Could not decrypt the file: presumably the file is simply not a crypted file 159 return null; 160 } 161 } 162 163 static void crash(final String filename, final Exception e) { 164 throw new RuntimeException("Can't read file " + filename, e); 165 } 166 167 static FusionDictionary getDictionary(final String filename, final boolean report) { 168 final File file = new File(filename); 169 if (report) { 170 System.out.println("Dictionary : " + file.getAbsolutePath()); 171 System.out.println("Size : " + file.length() + " bytes"); 172 } 173 try { 174 if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) { 175 if (report) System.out.println("Format : XML unigram list"); 176 return XmlDictInputOutput.readDictionaryXml( 177 new BufferedInputStream(new FileInputStream(file)), 178 null /* shortcuts */, null /* bigrams */); 179 } else { 180 final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); 181 if (null == decodedSpec) { 182 crash(filename, new RuntimeException( 183 filename + " does not seem to be a dictionary file")); 184 } else if (CombinedInputOutput.isCombinedDictionary( 185 decodedSpec.mFile.getAbsolutePath())){ 186 if (report) { 187 System.out.println("Format : Combined format"); 188 System.out.println("Packaging : " + decodedSpec.describeChain()); 189 System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); 190 } 191 return CombinedInputOutput.readDictionaryCombined( 192 new BufferedInputStream(new FileInputStream(decodedSpec.mFile))); 193 } else { 194 final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile, 195 DictDecoder.USE_BYTEARRAY); 196 if (report) { 197 System.out.println("Format : Binary dictionary format"); 198 System.out.println("Packaging : " + decodedSpec.describeChain()); 199 System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); 200 } 201 return dictDecoder.readDictionaryBinary(null, false /* deleteDictIfBroken */); 202 } 203 } 204 } catch (IOException e) { 205 crash(filename, e); 206 } catch (SAXException e) { 207 crash(filename, e); 208 } catch (ParserConfigurationException e) { 209 crash(filename, e); 210 } catch (UnsupportedFormatException e) { 211 crash(filename, e); 212 } 213 return null; 214 } 215} 216