1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fstrandgen.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Generates random paths through an FST.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/script/randgen.h>
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_int32(max_length, INT_MAX, "Maximum path length");
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_int64(npath, 1, "Number of paths to generate");
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_int32(seed, time(0), "Random seed");
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(select, "uniform", "Selection type: one of: "
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              " \"uniform\", \"log_prob\" (when appropriate),"
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson	      " \"fast_log_prob\" (when appropriate)");
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(weighted, false,
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Output tree weighted by path count vs. unweighted paths");
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(remove_total_weight, false,
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "Remove total weight when output weighted");
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) {
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  namespace s = fst::script;
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::script::FstClass;
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using fst::script::VectorFstClass;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string usage = "Generates random paths through an FST.\n\n  Usage: ";
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += argv[0];
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " [in.fst [out.fst]]\n";
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  std::set_new_handler(FailedNewHandler);
45dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SET_FLAGS(usage.c_str(), &argc, &argv, true);
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (argc > 3) {
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ShowUsage();
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VLOG(1) << argv[0] << ": Seed = " << FLAGS_seed;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string in_name = (argc > 1 && strcmp(argv[1], "-") != 0) ? argv[1] : "";
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string out_name = argc > 2 ? argv[2] : "";
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FstClass *ifst = FstClass::Read(in_name);
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!ifst) return 1;
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VectorFstClass ofst(ifst->ArcType());
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::RandArcSelection ras;
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_select == "uniform") {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ras = s::UNIFORM_ARC_SELECTOR;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (FLAGS_select == "log_prob") {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ras = s::LOG_PROB_ARC_SELECTOR;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (FLAGS_select == "fast_log_prob") {
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ras = s::FAST_LOG_PROB_ARC_SELECTOR;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << argv[0] << ": Unknown selection type \""
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << FLAGS_select << "\"\n";
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::RandGen(*ifst, &ofst, FLAGS_seed,
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             fst::RandGenOptions<s::RandArcSelection>(
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 ras, FLAGS_max_length, FLAGS_npath,
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 FLAGS_weighted, FLAGS_remove_total_weight));
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst.Write(out_name);
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
83