1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pdtshortestpath.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use FstClass
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Return the shortest path in a PDT.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/pdtscript.h>
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(keep_parentheses, false, "Keep PDT parentheses in result.");
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(queue_type, "fifo", "Queue type: one of: "
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              "\"fifo\", \"lifo\", \"state\"");
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(path_gc, true, "Garbage collect shortest path data");
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs.");
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint main(int argc, char **argv) {
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  namespace s = fst::script;
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string usage = "Shortest path in a PDT.\n\n  Usage: ";
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += argv[0];
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  usage += " in.pdt [out.fst]\n";
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  std::set_new_handler(FailedNewHandler);
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SetFlags(usage.c_str(), &argc, &argv, true);
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (argc > 3) {
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ShowUsage();
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string in_name = (argc > 1 && (strcmp(argv[1], "-") != 0)) ? argv[1] : "";
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string out_name = argc > 2 ? argv[2] : "";
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::FstClass *ifst = s::FstClass::Read(in_name);
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!ifst) return 1;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_pdt_parentheses.empty()) {
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << argv[0] << ": No PDT parenthesis label pairs provided";
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<pair<int64, int64> > parens, rparens;
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst::ReadLabelPairs(FLAGS_pdt_parentheses, &parens, false);
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::VectorFstClass ofst(ifst->ArcType());
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst::QueueType qt;
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_queue_type == "fifo") {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    qt = fst::FIFO_QUEUE;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (FLAGS_queue_type == "lifo") {
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    qt = fst::LIFO_QUEUE;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (FLAGS_queue_type == "state") {
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    qt = fst::STATE_ORDER_QUEUE;
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Unknown or unsupported queue type: " << FLAGS_queue_type;
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 1;
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::PdtShortestPathOptions opts(qt, FLAGS_keep_parentheses, FLAGS_path_gc);
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  s::PdtShortestPath(*ifst, parens, &ofst, opts);
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst.Write(out_name);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
82