1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// farcompilestrings.cc 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use new arc-type dispatching 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compiles a set of stings as FSTs and stores them in a finite-state 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// archive. 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/farscript.h> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/main.h> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream> 28dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <sstream> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(key_prefix, "", "Prefix to append to keys"); 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(key_suffix, "", "Suffix to append to keys"); 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_int32(generate_keys, 0, 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "Generate N digit numeric keys (def: use file basenames)"); 34dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinDEFINE_string(far_type, "default", 355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin "FAR file format type: one of: \"default\", \"fst\", " 36dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin "\"stlist\", \"sttable\""); 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false, 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "Allow negative labels (not recommended; may cause conflicts)"); 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(arc_type, "standard", "Output arc type"); 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(entry_type, "line", "Entry type: one of : " 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "\"file\" (one FST per file), \"line\" (one FST per line)"); 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(fst_type, "vector", "Output FST type"); 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(token_type, "symbol", "Token type: one of : " 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "\"symbol\", \"byte\", \"utf8\""); 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(symbols, "", "Label symbol table"); 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(unknown_symbol, "", ""); 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(file_list_input, false, 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "Each input files contains a list of files to be processed"); 49dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinDEFINE_bool(keep_symbols, false, 50dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin "Store symbol table in Far file"); 51dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinDEFINE_bool(initial_symbols, true, 52dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin "When keep_symbols==true, stores symbol table only for the first" 53dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin " Fst in archive."); 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) { 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson namespace s = fst::script; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string usage = "Compiles a set of strings as FSTs and stores them in"; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " a finite-state archive.\n\n Usage:"; 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += argv[0]; 61dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin usage += " [in1.txt [[in2.txt ...] out.far]]\n"; 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson std::set_new_handler(FailedNewHandler); 64dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin SET_FLAGS(usage.c_str(), &argc, &argv, true); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 66dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin vector<string> in_fnames; 67dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin for (unsigned i = 1; i < argc - 1; ++i) 68dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin in_fnames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : ""); 69dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (in_fnames.empty()) 70dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin in_fnames.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : ""); 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 72dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin string out_fname = 73dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : ""; 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type); 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type); 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type); 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type, 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson far_type, FLAGS_generate_keys, fet, ftt, 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_symbols, FLAGS_unknown_symbol, 82dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin FLAGS_keep_symbols, FLAGS_initial_symbols, 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_allow_negative_labels, 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_file_list_input, FLAGS_key_prefix, 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_key_suffix); 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 89