1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstrelabel.cc 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: johans@google.com (Johan Schalkwyk) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Relabel input or output space of Fst 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair; 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/relabel.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/weight-class.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(isymbols, "", "Input label symbol table"); 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(osymbols, "", "Output label symbol table"); 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_isymbols, "", "Input symbol set to relabel to"); 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_osymbols, "", "Ouput symbol set to relabel to"); 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_ipairs, "", "Input relabel pairs (numeric)"); 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_opairs, "", "Output relabel pairs (numeric)"); 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false, 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "Allow negative labels (not recommended; may cause conflicts)"); 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) { 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson namespace s = fst::script; 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::SymbolTable; 46dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin using fst::SymbolTableTextOptions; 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::script::FstClass; 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using fst::script::MutableFstClass; 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string usage = "Relabels the input and/or the output labels of the FST.\n\n" 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson " Usage: "; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += argv[0]; 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " [in.fst [out.fst]]\n"; 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " Using SymbolTables flags:\n"; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " -relabel_isymbols isyms.txt\n"; 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " -relabel_osymbols osyms.txt\n"; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " Using numeric labels flags:\n"; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " -relabel_ipairs ipairs.txt\n"; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson usage += " -relabel_opairs opairs.txts\n"; 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson std::set_new_handler(FailedNewHandler); 62dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin SET_FLAGS(usage.c_str(), &argc, &argv, true); 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (argc > 3) { 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShowUsage(); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : ""; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string out_name = argc > 2 ? argv[2] : ""; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFstClass *fst = MutableFstClass::Read(in_name, true); 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst) return 1; 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Relabel with symbol tables 75dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin SymbolTableTextOptions opts; 76dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin opts.allow_negative = FLAGS_allow_negative_labels; 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_relabel_isymbols.empty() || !FLAGS_relabel_osymbols.empty()) { 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool attach_new_isymbols = (fst->InputSymbols() != 0); 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* old_isymbols = FLAGS_isymbols.empty() 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? fst->InputSymbols() 81dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin : SymbolTable::ReadText(FLAGS_isymbols, opts); 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* relabel_isymbols = FLAGS_relabel_isymbols.empty() 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? NULL 84dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin : SymbolTable::ReadText(FLAGS_relabel_isymbols, opts); 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool attach_new_osymbols = (fst->OutputSymbols() != 0); 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* old_osymbols = FLAGS_osymbols.empty() 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? fst->OutputSymbols() 89dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin : SymbolTable::ReadText(FLAGS_osymbols, opts); 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* relabel_osymbols = FLAGS_relabel_osymbols.empty() 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? NULL 92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin : SymbolTable::ReadText(FLAGS_relabel_osymbols, opts); 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s::Relabel(fst, 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson old_isymbols, relabel_isymbols, attach_new_isymbols, 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson old_osymbols, relabel_osymbols, attach_new_osymbols); 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // read in relabel pairs and parse 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef int64 Label; 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<pair<Label, Label> > ipairs; 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<pair<Label, Label> > opairs; 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_relabel_ipairs.empty()) { 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(!fst::ReadLabelPairs(FLAGS_relabel_ipairs, &ipairs, 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_allow_negative_labels)) 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!FLAGS_relabel_opairs.empty()) { 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst::ReadLabelPairs(FLAGS_relabel_opairs, &opairs, 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FLAGS_allow_negative_labels)) 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 1; 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s::Relabel(fst, ipairs, opairs); 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->Write(out_name); 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 119