1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstsymbols.cc 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Performs operations (set, clear, relabel) on the symbols table 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// attached to the input Fst. 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/fst-class.h> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/script-impl.h> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/verify.h> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(isymbols, "", "Input label symbol table"); 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(osymbols, "", "Output label symbol table"); 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(clear_isymbols, false, "Clear input symbol table"); 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(clear_osymbols, false, "Clear output symbol table"); 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_ipairs, "", "Input relabel pairs (numeric)"); 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_opairs, "", "Output relabel pairs (numeric)"); 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(save_isymbols, "", "Save fst file's input symbol table to file"); 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(save_osymbols, "", "Save fst file's output symbol table to file"); 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false, 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "Allow negative labels (not recommended; may cause conflicts)"); 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(verify, false, "Verify fst properities before saving"); 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) { 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson namespace s = fst::script; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::SymbolTable; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string usage = "Performs operations (set, clear, relabel) on the symbol" 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson " tables attached to an FST.\n\n Usage: "; 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += argv[0]; 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " [in.fst [out.fst]]\n"; 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson std::set_new_handler(FailedNewHandler); 51dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin SET_FLAGS(usage.c_str(), &argc, &argv, true); 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (argc > 3) { 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShowUsage(); 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string in_fname = argc > 1 && strcmp(argv[1], "-") != 0 ? argv[1] : ""; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string out_fname = argc > 2 ? argv[2] : ""; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s::MutableFstClass *fst = s::MutableFstClass::Read(in_fname, true); 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst) return 1; 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_save_isymbols.empty()) { 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *isyms = fst->InputSymbols(); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (isyms) { 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isyms->WriteText(FLAGS_save_isymbols); 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "save isymbols requested but there are no input symbols."; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_save_osymbols.empty()) { 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *osyms = fst->OutputSymbols(); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (osyms) { 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson osyms->WriteText(FLAGS_save_osymbols); 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "save osymbols requested but there are no output symbols."; 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 81dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin fst::SymbolTableTextOptions opts; 82dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin opts.allow_negative = FLAGS_allow_negative_labels; 83dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (FLAGS_clear_isymbols) 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetInputSymbols(0); 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else if (!FLAGS_isymbols.empty()) 87dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin fst->SetInputSymbols(SymbolTable::ReadText(FLAGS_isymbols, opts)); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (FLAGS_clear_osymbols) 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetOutputSymbols(0); 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else if (!FLAGS_osymbols.empty()) 92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin fst->SetOutputSymbols(SymbolTable::ReadText(FLAGS_osymbols, opts)); 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_relabel_ipairs.empty()) { 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef int64 Label; 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<pair<Label, Label> > ipairs; 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst::ReadLabelPairs(FLAGS_relabel_ipairs, &ipairs, 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_allow_negative_labels); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable *isyms = RelabelSymbolTable(fst->InputSymbols(), ipairs); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetInputSymbols(isyms); 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete isyms; 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_relabel_opairs.empty()) { 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef int64 Label; 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<pair<Label, Label> > opairs; 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst::ReadLabelPairs(FLAGS_relabel_opairs, &opairs, 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_allow_negative_labels); 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable *osyms = RelabelSymbolTable(fst->OutputSymbols(), opairs); 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetOutputSymbols(osyms); 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete osyms; 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (FLAGS_verify && !s::Verify(*fst)) 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->Write(out_fname); 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 119