1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstcompile.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Creates binary FSTs from simple text format used by AT&T
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (see http://www.research.att.com/projects/mohri/fsm/doc4/fsm.5.html).
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/compile.h>
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(acceptor, false, "Input in acceptor format");
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(arc_type, "standard", "Output arc type");
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(fst_type, "vector", "Output FST type");
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(isymbols, "", "Input label symbol table");
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(osymbols, "", "Output label symbol table");
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(ssymbols, "", "State label symbol table");
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_isymbols, false, "Store input label symbol table with FST");
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_osymbols, false, "Store output label symbol table with FST");
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_state_numbering, false, "Do not renumber input states");
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false,
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Allow negative labels (not recommended; may cause conflicts)");
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) {
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  namespace s = fst::script;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::istream;
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::ifstream;
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::SymbolTable;
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string usage = "Creates binary FSTs from simple text format.\n\n  Usage: ";
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += argv[0];
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " [text.fst [binary.fst]]\n";
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  std::set_new_handler(FailedNewHandler);
48dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SET_FLAGS(usage.c_str(), &argc, &argv, true);
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (argc > 3) {
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ShowUsage();
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const char *source = "standard input";
55dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  istream *istrm = &cin;
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (argc > 1 && strcmp(argv[1], "-") != 0) {
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    source = argv[1];
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    istrm = new fst::ifstream(argv[1]);
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!*istrm) {
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << argv[0] << ": Open failed, file = " << argv[1];
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 1;
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *isyms = 0, *osyms = 0, *ssyms = 0;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  fst::SymbolTableTextOptions opts;
67dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  opts.allow_negative = FLAGS_allow_negative_labels;
68dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_isymbols.empty()) {
70dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    isyms = SymbolTable::ReadText(FLAGS_isymbols, opts);
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!isyms) exit(1);
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_osymbols.empty()) {
75dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    osyms = SymbolTable::ReadText(FLAGS_osymbols, opts);
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!osyms) exit(1);
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!FLAGS_ssymbols.empty()) {
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ssyms = SymbolTable::ReadText(FLAGS_ssymbols);
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!ssyms) exit(1);
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string dest = argc > 2 ? argv[2] : "";
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::CompileFst(*istrm, source, dest, FLAGS_fst_type, FLAGS_arc_type,
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                isyms, osyms, ssyms,
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                FLAGS_acceptor, FLAGS_keep_isymbols, FLAGS_keep_osymbols,
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                FLAGS_keep_state_numbering, FLAGS_allow_negative_labels);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (istrm != &cin)
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete istrm;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
96