1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// main.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to not use new arc-dispatch
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Definitions and functions for invoking and using Far main
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// functions that support multiple and extensible arc types.
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/main.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Return the 'FarType' value corresponding to a far type name.
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonFarType FarTypeFromString(const string &str) {
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FarType type = FAR_DEFAULT;
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (str == "stlist")
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type = FAR_STLIST;
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  else if (str == "sttable")
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type = FAR_STTABLE;
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  else if (str == "default")
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type = FAR_DEFAULT;
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return type;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Return the textual name  corresponding to a 'FarType;.
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstring FarTypeToString(FarType type) {
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  switch (type) {
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    case FAR_STLIST:
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return "stlist";
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    case FAR_STTABLE:
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return "sttable";
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    case FAR_DEFAULT:
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return "default";
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    default:
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return "<unknown>";
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonFarEntryType StringToFarEntryType(const string &s) {
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (s == "line") {
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FET_LINE;
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (s == "file") {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FET_FILE;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "Unknown FAR entry type: " << s;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FET_LINE;  // compiler requires return
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonFarTokenType StringToFarTokenType(const string &s) {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (s == "symbol") {
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FTT_SYMBOL;
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (s == "byte") {
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FTT_BYTE;
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (s == "utf8") {
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FTT_UTF8;
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "Unknown FAR entry type: " << s;
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FTT_SYMBOL;  // compiler requires return
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstring LoadArcTypeFromFar(const string &far_fname) {
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FarHeader hdr;
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (far_fname.empty()) {
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Reading FAR from standard in not supported";
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return "";
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!hdr.Read(far_fname)) {
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Error reading FAR: " << far_fname;
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return "";
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string atype = hdr.ArcType();
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (atype == "unknown") {
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Empty FST archive: " << far_fname;
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return "";
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return atype;
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstring LoadArcTypeFromFst(const string &fst_fname) {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FstHeader hdr;
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ifstream in(fst_fname.c_str(), ifstream::in | ifstream::binary);
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!hdr.Read(in, fst_fname)) {
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Error reading FST: " << fst_fname;
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return "";
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return hdr.ArcType();
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
119