1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// expanded-fst.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Generic FST augmented with state count - interface class definition.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_EXPANDED_FST_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_EXPANDED_FST_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <sys/types.h>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic FST plus state count.
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ExpandedFst : public Fst<A> {
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual StateId NumStates() const = 0;  // State count
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this ExpandedFst. See Fst<>::Copy() for further doc.
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ExpandedFst<A> *Copy(bool safe = false) const = 0;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read an ExpandedFst from an input stream; return NULL on error.
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static ExpandedFst<A> *Read(istream &strm, const FstReadOptions &opts) {
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstReadOptions ropts(opts);
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstHeader hdr;
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ropts.header)
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr = *opts.header;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else {
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!hdr.Read(strm, opts.source))
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ropts.header = &hdr;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!(hdr.Properties() & kExpanded)) {
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "ExpandedFst::Read: Not an ExpandedFst: " << ropts.source;
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstRegister<A> *registr = FstRegister<A>::GetRegister();
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const typename FstRegister<A>::Reader reader =
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      registr->GetReader(hdr.FstType());
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!reader) {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "ExpandedFst::Read: Unknown FST type \"" << hdr.FstType()
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << "\" (arc type = \"" << A::Type()
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << "\"): " << ropts.source;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Fst<A> *fst = reader(strm, ropts);
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!fst) return 0;
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return static_cast<ExpandedFst<A> *>(fst);
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read an ExpandedFst from a file; return NULL on error.
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Empty filename reads from standard input.
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static ExpandedFst<A> *Read(const string &filename) {
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!filename.empty()) {
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!strm) {
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename;
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return Read(strm, FstReadOptions(filename));
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
85dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return Read(cin, FstReadOptions("standard input"));
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace internal {
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  ExpandedFst<A> case - abstract methods.
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::Weight Final(const ExpandedFst<A> &fst, typename A::StateId s) {
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.Final(s);
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumArcs(const ExpandedFst<A> &fst, typename A::StateId s) {
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.NumArcs(s);
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumInputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) {
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.NumInputEpsilons(s);
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumOutputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) {
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.NumOutputEpsilons(s);
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace internal
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A useful alias when using StdArc.
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ExpandedFst<StdArc> StdExpandedFst;
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is a helper class template useful for attaching an ExpandedFst
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// interface to its implementation, handling reference counting. It
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// delegates to ImplToFst the handling of the Fst interface methods.
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate < class I, class F = ExpandedFst<typename I::Arc> >
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ImplToExpandedFst : public ImplToFst<I, F> {
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename I::Arc Arc;
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst<I, F>::GetImpl;
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual StateId NumStates() const { return GetImpl()->NumStates(); }
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToExpandedFst() : ImplToFst<I, F>() {}
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToExpandedFst(I *impl) : ImplToFst<I, F>(impl) {}
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst)
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<I, F>(fst) {}
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst, bool safe)
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<I, F>(fst, safe) {}
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read FST implementation from a file; return NULL on error.
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Empty filename reads from standard input.
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static I *Read(const string &filename) {
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!filename.empty()) {
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!strm) {
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename;
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return I::Read(strm, FstReadOptions(filename));
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
157dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return I::Read(cin, FstReadOptions("standard input"));
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Disallow
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToExpandedFst<I, F> &operator=(const ImplToExpandedFst<I, F> &fst);
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToExpandedFst<I, F> &operator=(const Fst<Arc> &fst) {
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "ImplToExpandedFst: Assignment operator disallowed";
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->SetProperties(kError, kError);
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return *this;
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Function to return the number of states in an FST, counting them
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// if necessary.
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc>
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename Arc::StateId CountStates(const Fst<Arc> &fst) {
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (fst.Properties(kExpanded, false)) {
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const ExpandedFst<Arc> *efst = static_cast<const ExpandedFst<Arc> *>(&fst);
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return efst->NumStates();
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename Arc::StateId nstates = 0;
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (StateIterator< Fst<Arc> > siter(fst); !siter.Done(); siter.Next())
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++nstates;
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return nstates;
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_EXPANDED_FST_H__
190