1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstcompile.cc 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Creates binary FSTs from simple text format used by AT&T 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (see http://www.research.att.com/projects/mohri/fsm/doc4/fsm.5.html). 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/compile.h> 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(acceptor, false, "Input in acceptor format"); 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(arc_type, "standard", "Output arc type"); 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(fst_type, "vector", "Output FST type"); 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(isymbols, "", "Input label symbol table"); 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(osymbols, "", "Output label symbol table"); 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(ssymbols, "", "State label symbol table"); 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_isymbols, false, "Store input label symbol table with FST"); 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_osymbols, false, "Store output label symbol table with FST"); 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_state_numbering, false, "Do not renumber input states"); 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false, 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "Allow negative labels (not recommended; may cause conflicts)"); 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) { 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson namespace s = fst::script; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::istream; 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::ifstream; 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::SymbolTable; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string usage = "Creates binary FSTs from simple text format.\n\n Usage: "; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += argv[0]; 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " [text.fst [binary.fst]]\n"; 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson std::set_new_handler(FailedNewHandler); 48dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin SET_FLAGS(usage.c_str(), &argc, &argv, true); 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (argc > 3) { 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShowUsage(); 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const char *source = "standard input"; 55dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin istream *istrm = &cin; 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (argc > 1 && strcmp(argv[1], "-") != 0) { 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson source = argv[1]; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson istrm = new fst::ifstream(argv[1]); 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!*istrm) { 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << argv[0] << ": Open failed, file = " << argv[1]; 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 66dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin fst::SymbolTableTextOptions opts; 67dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin opts.allow_negative = FLAGS_allow_negative_labels; 68dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_isymbols.empty()) { 70dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin isyms = SymbolTable::ReadText(FLAGS_isymbols, opts); 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!isyms) exit(1); 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_osymbols.empty()) { 75dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin osyms = SymbolTable::ReadText(FLAGS_osymbols, opts); 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!osyms) exit(1); 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_ssymbols.empty()) { 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssyms = SymbolTable::ReadText(FLAGS_ssymbols); 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!ssyms) exit(1); 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string dest = argc > 2 ? argv[2] : ""; 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s::CompileFst(*istrm, source, dest, FLAGS_fst_type, FLAGS_arc_type, 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isyms, osyms, ssyms, 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_acceptor, FLAGS_keep_isymbols, FLAGS_keep_osymbols, 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_keep_state_numbering, FLAGS_allow_negative_labels); 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (istrm != &cin) 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete istrm; 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 96