1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstrelabel.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: johans@google.com (Johan Schalkwyk)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Relabel input or output space of Fst
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair;
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/relabel.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/weight-class.h>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(isymbols, "", "Input label symbol table");
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(osymbols, "", "Output label symbol table");
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_isymbols, "", "Input symbol set to relabel to");
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_osymbols, "", "Ouput symbol set to relabel to");
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_ipairs, "", "Input relabel pairs (numeric)");
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(relabel_opairs, "", "Output relabel pairs (numeric)");
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false,
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Allow negative labels (not recommended; may cause conflicts)");
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) {
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  namespace s = fst::script;
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::SymbolTable;
46dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using fst::SymbolTableTextOptions;
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::script::FstClass;
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::script::MutableFstClass;
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string usage = "Relabels the input and/or the output labels of the FST.\n\n"
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      "  Usage: ";
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += argv[0];
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " [in.fst [out.fst]]\n";
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " Using SymbolTables flags:\n";
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += "  -relabel_isymbols isyms.txt\n";
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += "  -relabel_osymbols osyms.txt\n";
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " Using numeric labels flags:\n";
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += "  -relabel_ipairs   ipairs.txt\n";
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += "  -relabel_opairs   opairs.txts\n";
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  std::set_new_handler(FailedNewHandler);
62dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SET_FLAGS(usage.c_str(), &argc, &argv, true);
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (argc > 3) {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ShowUsage();
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string out_name = argc > 2 ? argv[2] : "";
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  MutableFstClass *fst = MutableFstClass::Read(in_name, true);
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!fst) return 1;
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Relabel with symbol tables
75dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SymbolTableTextOptions opts;
76dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  opts.allow_negative = FLAGS_allow_negative_labels;
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_relabel_isymbols.empty() || !FLAGS_relabel_osymbols.empty()) {
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool attach_new_isymbols = (fst->InputSymbols() != 0);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const SymbolTable* old_isymbols = FLAGS_isymbols.empty()
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ? fst->InputSymbols()
81dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        : SymbolTable::ReadText(FLAGS_isymbols, opts);
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const SymbolTable* relabel_isymbols = FLAGS_relabel_isymbols.empty()
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ? NULL
84dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        : SymbolTable::ReadText(FLAGS_relabel_isymbols, opts);
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool attach_new_osymbols = (fst->OutputSymbols() != 0);
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const SymbolTable* old_osymbols = FLAGS_osymbols.empty()
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ? fst->OutputSymbols()
89dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        : SymbolTable::ReadText(FLAGS_osymbols, opts);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const SymbolTable* relabel_osymbols = FLAGS_relabel_osymbols.empty()
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ? NULL
92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        : SymbolTable::ReadText(FLAGS_relabel_osymbols, opts);
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    s::Relabel(fst,
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               old_isymbols, relabel_isymbols, attach_new_isymbols,
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               old_osymbols, relabel_osymbols, attach_new_osymbols);
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // read in relabel pairs and parse
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typedef int64 Label;
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<pair<Label, Label> > ipairs;
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<pair<Label, Label> > opairs;
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!FLAGS_relabel_ipairs.empty()) {
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if(!fst::ReadLabelPairs(FLAGS_relabel_ipairs, &ipairs,
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                  FLAGS_allow_negative_labels))
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 1;
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!FLAGS_relabel_opairs.empty()) {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!fst::ReadLabelPairs(FLAGS_relabel_opairs, &opairs,
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                   FLAGS_allow_negative_labels))
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 1;
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    s::Relabel(fst, ipairs, opairs);
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst->Write(out_name);
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
119