1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstsymbols.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Performs operations (set, clear, relabel) on the symbols table
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// attached to the input Fst.
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/fst-class.h>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/script-impl.h>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/verify.h>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(isymbols, "", "Input label symbol table");
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(osymbols, "", "Output label symbol table");
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(clear_isymbols, false, "Clear input symbol table");
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(clear_osymbols, false, "Clear output symbol table");
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_ipairs, "", "Input relabel pairs (numeric)");
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_opairs, "", "Output relabel pairs (numeric)");
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(save_isymbols, "", "Save fst file's input symbol table to file");
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(save_osymbols, "", "Save fst file's output symbol table to file");
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false,
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Allow negative labels (not recommended; may cause conflicts)");
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(verify, false, "Verify fst properities before saving");
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) {
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  namespace s = fst::script;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::SymbolTable;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string usage = "Performs operations (set, clear, relabel) on the symbol"
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      " tables attached to an FST.\n\n  Usage: ";
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += argv[0];
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " [in.fst [out.fst]]\n";
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  std::set_new_handler(FailedNewHandler);
51dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SET_FLAGS(usage.c_str(), &argc, &argv, true);
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (argc > 3) {
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ShowUsage();
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string in_fname = argc > 1 && strcmp(argv[1], "-") != 0 ? argv[1] : "";
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string out_fname = argc > 2 ? argv[2] : "";
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::MutableFstClass *fst = s::MutableFstClass::Read(in_fname, true);
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!fst) return 1;
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_save_isymbols.empty()) {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const SymbolTable *isyms = fst->InputSymbols();
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (isyms) {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      isyms->WriteText(FLAGS_save_isymbols);
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "save isymbols requested but there are no input symbols.";
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_save_osymbols.empty()) {
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const SymbolTable *osyms = fst->OutputSymbols();
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (osyms) {
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      osyms->WriteText(FLAGS_save_osymbols);
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "save osymbols requested but there are no output symbols.";
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
81dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  fst::SymbolTableTextOptions opts;
82dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  opts.allow_negative = FLAGS_allow_negative_labels;
83dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_clear_isymbols)
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetInputSymbols(0);
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  else if (!FLAGS_isymbols.empty())
87dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    fst->SetInputSymbols(SymbolTable::ReadText(FLAGS_isymbols, opts));
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_clear_osymbols)
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetOutputSymbols(0);
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  else if (!FLAGS_osymbols.empty())
92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    fst->SetOutputSymbols(SymbolTable::ReadText(FLAGS_osymbols, opts));
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_relabel_ipairs.empty()) {
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typedef int64 Label;
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<pair<Label, Label> > ipairs;
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst::ReadLabelPairs(FLAGS_relabel_ipairs, &ipairs,
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                            FLAGS_allow_negative_labels);
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SymbolTable *isyms = RelabelSymbolTable(fst->InputSymbols(), ipairs);
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetInputSymbols(isyms);
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete isyms;
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_relabel_opairs.empty()) {
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typedef int64 Label;
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<pair<Label, Label> > opairs;
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst::ReadLabelPairs(FLAGS_relabel_opairs, &opairs,
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                            FLAGS_allow_negative_labels);
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SymbolTable *osyms = RelabelSymbolTable(fst->OutputSymbols(), opairs);
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetOutputSymbols(osyms);
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete osyms;
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_verify && !s::Verify(*fst))
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst->Write(out_fname);
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
119