1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// farcompilestrings.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use new arc-type dispatching
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compiles a set of stings as FSTs and stores them in a finite-state
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// archive.
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/farscript.h>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/main.h>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream>
28dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <sstream>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(key_prefix, "", "Prefix to append to keys");
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(key_suffix, "", "Suffix to append to keys");
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_int32(generate_keys, 0,
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             "Generate N digit numeric keys (def: use file basenames)");
34dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinDEFINE_string(far_type, "default",
355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin              "FAR file format type: one of: \"default\", \"fst\", "
36dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin              "\"stlist\", \"sttable\"");
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(allow_negative_labels, false,
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Allow negative labels (not recommended; may cause conflicts)");
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(arc_type, "standard", "Output arc type");
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(entry_type, "line", "Entry type: one of : "
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              "\"file\" (one FST per file), \"line\" (one FST per line)");
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(fst_type, "vector", "Output FST type");
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(token_type, "symbol", "Token type: one of : "
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              "\"symbol\", \"byte\", \"utf8\"");
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(symbols, "", "Label symbol table");
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(unknown_symbol, "", "");
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(file_list_input, false,
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Each input files contains a list of files to be processed");
49dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinDEFINE_bool(keep_symbols, false,
50dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            "Store symbol table in Far file");
51dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinDEFINE_bool(initial_symbols, true,
52dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            "When keep_symbols==true, stores symbol table only for the first"
53dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            " Fst in archive.");
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint  main(int argc, char **argv) {
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  namespace s = fst::script;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string usage = "Compiles a set of strings as FSTs and stores them in";
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " a finite-state archive.\n\n Usage:";
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += argv[0];
61dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  usage += " [in1.txt [[in2.txt ...] out.far]]\n";
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  std::set_new_handler(FailedNewHandler);
64dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SET_FLAGS(usage.c_str(), &argc, &argv, true);
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  vector<string> in_fnames;
67dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  for (unsigned i = 1; i < argc - 1; ++i)
68dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    in_fnames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
69dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (in_fnames.empty())
70dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    in_fnames.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : "");
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
72dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  string out_fname =
73dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : "";
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type);
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type);
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type);
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type,
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       far_type, FLAGS_generate_keys, fet, ftt,
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       FLAGS_symbols, FLAGS_unknown_symbol,
82dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                       FLAGS_keep_symbols, FLAGS_initial_symbols,
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       FLAGS_allow_negative_labels,
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       FLAGS_file_list_input, FLAGS_key_prefix,
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       FLAGS_key_suffix);
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
89