1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 * use this file except in compliance with the License. You may obtain a copy of
6 * the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 * License for the specific language governing permissions and limitations under
14 * the License.
15 */
16
17package com.android.inputmethod.latin;
18
19import java.io.File;
20import java.io.FileInputStream;
21import java.io.FileNotFoundException;
22import java.io.FileOutputStream;
23import java.io.FileWriter;
24import java.io.IOException;
25import java.io.RandomAccessFile;
26import java.util.Arrays;
27import java.util.LinkedList;
28
29import javax.xml.parsers.ParserConfigurationException;
30
31import org.xml.sax.SAXException;
32
33/**
34 * Main class/method for DictionaryMaker.
35 */
36public class DictionaryMaker {
37
38    static class Arguments {
39        private final static String OPTION_VERSION_2 = "-2";
40        private final static String OPTION_INPUT_SOURCE = "-s";
41        private final static String OPTION_INPUT_BIGRAM_XML = "-b";
42        private final static String OPTION_OUTPUT_BINARY = "-d";
43        private final static String OPTION_OUTPUT_XML = "-x";
44        private final static String OPTION_HELP = "-h";
45        public final String mInputBinary;
46        public final String mInputUnigramXml;
47        public final String mInputBigramXml;
48        public final String mOutputBinary;
49        public final String mOutputXml;
50
51        private void checkIntegrity() {
52            checkHasExactlyOneInput();
53            checkHasAtLeastOneOutput();
54        }
55
56        private void checkHasExactlyOneInput() {
57            if (null == mInputUnigramXml && null == mInputBinary) {
58                throw new RuntimeException("No input file specified");
59            } else if (null != mInputUnigramXml && null != mInputBinary) {
60                throw new RuntimeException("Both input XML and binary specified");
61            } else if (null != mInputBinary && null != mInputBigramXml) {
62                throw new RuntimeException("Cannot specify a binary input and a separate bigram "
63                        + "file");
64            }
65        }
66
67        private void checkHasAtLeastOneOutput() {
68            if (null == mOutputBinary && null == mOutputXml) {
69                throw new RuntimeException("No output specified");
70            }
71        }
72
73        private void displayHelp() {
74            MakedictLog.i("Usage: makedict "
75                    + "[-s <unigrams.xml> [-b <bigrams.xml>] | -s <binary input>] "
76                    + " [-d <binary output>] [-x <xml output>] [-2]\n"
77                    + "\n"
78                    + "  Converts a source dictionary file to one or several outputs.\n"
79                    + "  Source can be an XML file, with an optional XML bigrams file, or a\n"
80                    + "  binary dictionary file.\n"
81                    + "  Both binary and XML outputs are supported. Both can be output at\n"
82                    + "  the same time but outputting several files of the same type is not\n"
83                    + "  supported.");
84        }
85
86        public Arguments(String[] argsArray) {
87            final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
88            if (args.isEmpty()) {
89                displayHelp();
90            }
91            String inputBinary = null;
92            String inputUnigramXml = null;
93            String inputBigramXml = null;
94            String outputBinary = null;
95            String outputXml = null;
96
97            while (!args.isEmpty()) {
98                final String arg = args.get(0);
99                args.remove(0);
100                if (arg.charAt(0) == '-') {
101                    if (OPTION_VERSION_2.equals(arg)) {
102                        // Do nothing, this is the default
103                    } else if (OPTION_HELP.equals(arg)) {
104                        displayHelp();
105                    } else {
106                        // All these options need an argument
107                        if (args.isEmpty()) {
108                            throw new RuntimeException("Option " + arg + " requires an argument");
109                        }
110                        String filename = args.get(0);
111                        args.remove(0);
112                        if (OPTION_INPUT_SOURCE.equals(arg)) {
113                            if (BinaryDictInputOutput.isBinaryDictionary(filename)) {
114                                inputBinary = filename;
115                            } else {
116                                inputUnigramXml = filename;
117                            }
118                        } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
119                            inputBigramXml = filename;
120                        } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
121                            outputBinary = filename;
122                        } else if (OPTION_OUTPUT_XML.equals(arg)) {
123                            outputXml = filename;
124                        }
125                    }
126                } else {
127                    if (null == inputBinary && null == inputUnigramXml) {
128                        if (BinaryDictInputOutput.isBinaryDictionary(arg)) {
129                            inputBinary = arg;
130                        } else {
131                            inputUnigramXml = arg;
132                        }
133                    } else if (null == outputBinary) {
134                        outputBinary = arg;
135                    } else {
136                        throw new RuntimeException("Several output binary files specified");
137                    }
138                }
139            }
140
141            mInputBinary = inputBinary;
142            mInputUnigramXml = inputUnigramXml;
143            mInputBigramXml = inputBigramXml;
144            mOutputBinary = outputBinary;
145            mOutputXml = outputXml;
146            checkIntegrity();
147        }
148    }
149
150    public static void main(String[] args)
151            throws FileNotFoundException, ParserConfigurationException, SAXException, IOException,
152            UnsupportedFormatException {
153        final Arguments parsedArgs = new Arguments(args);
154        FusionDictionary dictionary = readInputFromParsedArgs(parsedArgs);
155        writeOutputToParsedArgs(parsedArgs, dictionary);
156    }
157
158    /**
159     * Invoke the right input method according to args.
160     *
161     * @param args the parsed command line arguments.
162     * @return the read dictionary.
163     */
164    private static FusionDictionary readInputFromParsedArgs(final Arguments args)
165            throws IOException, UnsupportedFormatException, ParserConfigurationException,
166            SAXException, FileNotFoundException {
167        if (null != args.mInputBinary) {
168            return readBinaryFile(args.mInputBinary);
169        } else if (null != args.mInputUnigramXml) {
170            return readXmlFile(args.mInputUnigramXml, args.mInputBigramXml);
171        } else {
172            throw new RuntimeException("No input file specified");
173        }
174    }
175
176    /**
177     * Read a dictionary from the name of a binary file.
178     *
179     * @param binaryFilename the name of the file in the binary dictionary format.
180     * @return the read dictionary.
181     * @throws FileNotFoundException if the file can't be found
182     * @throws IOException if the input file can't be read
183     * @throws UnsupportedFormatException if the binary file is not in the expected format
184     */
185    private static FusionDictionary readBinaryFile(final String binaryFilename)
186            throws FileNotFoundException, IOException, UnsupportedFormatException {
187        final RandomAccessFile inputFile = new RandomAccessFile(binaryFilename, "r");
188        return BinaryDictInputOutput.readDictionaryBinary(inputFile, null);
189    }
190
191    /**
192     * Read a dictionary from a unigram XML file, and optionally a bigram XML file.
193     *
194     * @param unigramXmlFilename the name of the unigram XML file. May not be null.
195     * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams.
196     * @return the read dictionary.
197     * @throws FileNotFoundException if one of the files can't be found
198     * @throws SAXException if one or more of the XML files is not well-formed
199     * @throws IOException if one the input files can't be read
200     * @throws ParserConfigurationException if the system can't create a SAX parser
201     */
202    private static FusionDictionary readXmlFile(final String unigramXmlFilename,
203            final String bigramXmlFilename) throws FileNotFoundException, SAXException,
204            IOException, ParserConfigurationException {
205        final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
206        final FileInputStream bigrams = null == bigramXmlFilename ? null :
207                new FileInputStream(new File(bigramXmlFilename));
208        return XmlDictInputOutput.readDictionaryXml(unigrams, bigrams);
209    }
210
211    /**
212     * Invoke the right output method according to args.
213     *
214     * This will write the passed dictionary to the file(s) passed in the command line arguments.
215     * @param args the parsed arguments.
216     * @param dict the file to output.
217     * @throws FileNotFoundException if one of the output files can't be created.
218     * @throws IOException if one of the output files can't be written to.
219     */
220    private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
221            throws FileNotFoundException, IOException {
222        if (null != args.mOutputBinary) {
223            writeBinaryDictionary(args.mOutputBinary, dict);
224        }
225        if (null != args.mOutputXml) {
226            writeXmlDictionary(args.mOutputXml, dict);
227        }
228    }
229
230    /**
231     * Write the dictionary in binary format to the specified filename.
232     *
233     * @param outputFilename the name of the file to write to.
234     * @param dict the dictionary to write.
235     * @throws FileNotFoundException if the output file can't be created.
236     * @throws IOException if the output file can't be written to.
237     */
238    private static void writeBinaryDictionary(final String outputFilename,
239            final FusionDictionary dict) throws FileNotFoundException, IOException {
240        final File outputFile = new File(outputFilename);
241        BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict);
242    }
243
244    /**
245     * Write the dictionary in XML format to the specified filename.
246     *
247     * @param outputFilename the name of the file to write to.
248     * @param dict the dictionary to write.
249     * @throws FileNotFoundException if the output file can't be created.
250     * @throws IOException if the output file can't be written to.
251     */
252    private static void writeXmlDictionary(final String outputFilename,
253            final FusionDictionary dict) throws FileNotFoundException, IOException {
254        XmlDictInputOutput.writeDictionaryXml(new FileWriter(outputFilename), dict);
255    }
256}
257