1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17package com.android.inputmethod.latin; 18 19import java.io.File; 20import java.io.FileInputStream; 21import java.io.FileNotFoundException; 22import java.io.FileOutputStream; 23import java.io.FileWriter; 24import java.io.IOException; 25import java.io.RandomAccessFile; 26import java.util.Arrays; 27import java.util.LinkedList; 28 29import javax.xml.parsers.ParserConfigurationException; 30 31import org.xml.sax.SAXException; 32 33/** 34 * Main class/method for DictionaryMaker. 35 */ 36public class DictionaryMaker { 37 38 static class Arguments { 39 private final static String OPTION_VERSION_2 = "-2"; 40 private final static String OPTION_INPUT_SOURCE = "-s"; 41 private final static String OPTION_INPUT_BIGRAM_XML = "-b"; 42 private final static String OPTION_OUTPUT_BINARY = "-d"; 43 private final static String OPTION_OUTPUT_XML = "-x"; 44 private final static String OPTION_HELP = "-h"; 45 public final String mInputBinary; 46 public final String mInputUnigramXml; 47 public final String mInputBigramXml; 48 public final String mOutputBinary; 49 public final String mOutputXml; 50 51 private void checkIntegrity() { 52 checkHasExactlyOneInput(); 53 checkHasAtLeastOneOutput(); 54 } 55 56 private void checkHasExactlyOneInput() { 57 if (null == mInputUnigramXml && null == mInputBinary) { 58 throw new RuntimeException("No input file specified"); 59 } else if (null != mInputUnigramXml && null != mInputBinary) { 60 throw new RuntimeException("Both input XML and binary specified"); 61 } else if (null != mInputBinary && null != mInputBigramXml) { 62 throw new RuntimeException("Cannot specify a binary input and a separate bigram " 63 + "file"); 64 } 65 } 66 67 private void checkHasAtLeastOneOutput() { 68 if (null == mOutputBinary && null == mOutputXml) { 69 throw new RuntimeException("No output specified"); 70 } 71 } 72 73 private void displayHelp() { 74 MakedictLog.i("Usage: makedict " 75 + "[-s <unigrams.xml> [-b <bigrams.xml>] | -s <binary input>] " 76 + " [-d <binary output>] [-x <xml output>] [-2]\n" 77 + "\n" 78 + " Converts a source dictionary file to one or several outputs.\n" 79 + " Source can be an XML file, with an optional XML bigrams file, or a\n" 80 + " binary dictionary file.\n" 81 + " Both binary and XML outputs are supported. Both can be output at\n" 82 + " the same time but outputting several files of the same type is not\n" 83 + " supported."); 84 } 85 86 public Arguments(String[] argsArray) { 87 final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray)); 88 if (args.isEmpty()) { 89 displayHelp(); 90 } 91 String inputBinary = null; 92 String inputUnigramXml = null; 93 String inputBigramXml = null; 94 String outputBinary = null; 95 String outputXml = null; 96 97 while (!args.isEmpty()) { 98 final String arg = args.get(0); 99 args.remove(0); 100 if (arg.charAt(0) == '-') { 101 if (OPTION_VERSION_2.equals(arg)) { 102 // Do nothing, this is the default 103 } else if (OPTION_HELP.equals(arg)) { 104 displayHelp(); 105 } else { 106 // All these options need an argument 107 if (args.isEmpty()) { 108 throw new RuntimeException("Option " + arg + " requires an argument"); 109 } 110 String filename = args.get(0); 111 args.remove(0); 112 if (OPTION_INPUT_SOURCE.equals(arg)) { 113 if (BinaryDictInputOutput.isBinaryDictionary(filename)) { 114 inputBinary = filename; 115 } else { 116 inputUnigramXml = filename; 117 } 118 } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) { 119 inputBigramXml = filename; 120 } else if (OPTION_OUTPUT_BINARY.equals(arg)) { 121 outputBinary = filename; 122 } else if (OPTION_OUTPUT_XML.equals(arg)) { 123 outputXml = filename; 124 } 125 } 126 } else { 127 if (null == inputBinary && null == inputUnigramXml) { 128 if (BinaryDictInputOutput.isBinaryDictionary(arg)) { 129 inputBinary = arg; 130 } else { 131 inputUnigramXml = arg; 132 } 133 } else if (null == outputBinary) { 134 outputBinary = arg; 135 } else { 136 throw new RuntimeException("Several output binary files specified"); 137 } 138 } 139 } 140 141 mInputBinary = inputBinary; 142 mInputUnigramXml = inputUnigramXml; 143 mInputBigramXml = inputBigramXml; 144 mOutputBinary = outputBinary; 145 mOutputXml = outputXml; 146 checkIntegrity(); 147 } 148 } 149 150 public static void main(String[] args) 151 throws FileNotFoundException, ParserConfigurationException, SAXException, IOException, 152 UnsupportedFormatException { 153 final Arguments parsedArgs = new Arguments(args); 154 FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs); 155 writeOutputToParsedArgs(parsedArgs, dictionary); 156 } 157 158 /** 159 * Invoke the right input method according to args. 160 * 161 * @param args the parsed command line arguments. 162 * @return the read dictionary. 163 */ 164 private static FusionDictionary readInputFromParsedArgs(final Arguments args) 165 throws IOException, UnsupportedFormatException, ParserConfigurationException, 166 SAXException, FileNotFoundException { 167 if (null != args.mInputBinary) { 168 return readBinaryFile(args.mInputBinary); 169 } else if (null != args.mInputUnigramXml) { 170 return readXmlFile(args.mInputUnigramXml, args.mInputBigramXml); 171 } else { 172 throw new RuntimeException("No input file specified"); 173 } 174 } 175 176 /** 177 * Read a dictionary from the name of a binary file. 178 * 179 * @param binaryFilename the name of the file in the binary dictionary format. 180 * @return the read dictionary. 181 * @throws FileNotFoundException if the file can't be found 182 * @throws IOException if the input file can't be read 183 * @throws UnsupportedFormatException if the binary file is not in the expected format 184 */ 185 private static FusionDictionary readBinaryFile(final String binaryFilename) 186 throws FileNotFoundException, IOException, UnsupportedFormatException { 187 final RandomAccessFile inputFile = new RandomAccessFile(binaryFilename, "r"); 188 return BinaryDictInputOutput.readDictionaryBinary(inputFile, null); 189 } 190 191 /** 192 * Read a dictionary from a unigram XML file, and optionally a bigram XML file. 193 * 194 * @param unigramXmlFilename the name of the unigram XML file. May not be null. 195 * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams. 196 * @return the read dictionary. 197 * @throws FileNotFoundException if one of the files can't be found 198 * @throws SAXException if one or more of the XML files is not well-formed 199 * @throws IOException if one the input files can't be read 200 * @throws ParserConfigurationException if the system can't create a SAX parser 201 */ 202 private static FusionDictionary readXmlFile(final String unigramXmlFilename, 203 final String bigramXmlFilename) throws FileNotFoundException, SAXException, 204 IOException, ParserConfigurationException { 205 final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename)); 206 final FileInputStream bigrams = null == bigramXmlFilename ? null : 207 new FileInputStream(new File(bigramXmlFilename)); 208 return XmlDictInputOutput.readDictionaryXml(unigrams, bigrams); 209 } 210 211 /** 212 * Invoke the right output method according to args. 213 * 214 * This will write the passed dictionary to the file(s) passed in the command line arguments. 215 * @param args the parsed arguments. 216 * @param dict the file to output. 217 * @throws FileNotFoundException if one of the output files can't be created. 218 * @throws IOException if one of the output files can't be written to. 219 */ 220 private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict) 221 throws FileNotFoundException, IOException { 222 if (null != args.mOutputBinary) { 223 writeBinaryDictionary(args.mOutputBinary, dict); 224 } 225 if (null != args.mOutputXml) { 226 writeXmlDictionary(args.mOutputXml, dict); 227 } 228 } 229 230 /** 231 * Write the dictionary in binary format to the specified filename. 232 * 233 * @param outputFilename the name of the file to write to. 234 * @param dict the dictionary to write. 235 * @throws FileNotFoundException if the output file can't be created. 236 * @throws IOException if the output file can't be written to. 237 */ 238 private static void writeBinaryDictionary(final String outputFilename, 239 final FusionDictionary dict) throws FileNotFoundException, IOException { 240 final File outputFile = new File(outputFilename); 241 BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict); 242 } 243 244 /** 245 * Write the dictionary in XML format to the specified filename. 246 * 247 * @param outputFilename the name of the file to write to. 248 * @param dict the dictionary to write. 249 * @throws FileNotFoundException if the output file can't be created. 250 * @throws IOException if the output file can't be written to. 251 */ 252 private static void writeXmlDictionary(final String outputFilename, 253 final FusionDictionary dict) throws FileNotFoundException, IOException { 254 XmlDictInputOutput.writeDictionaryXml(new FileWriter(outputFilename), dict); 255 } 256} 257