1// far.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Finite-State Transducer (FST) archive classes.
20//
21
22#ifndef FST_EXTENSIONS_FAR_FAR_H__
23#define FST_EXTENSIONS_FAR_FAR_H__
24
25#include <fst/extensions/far/stlist.h>
26#include <fst/extensions/far/sttable.h>
27#include <fst/fst.h>
28#include <fst/vector-fst.h>
29
30namespace fst {
31
32enum FarEntryType { FET_LINE, FET_FILE };
33enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
34
35// FST archive header class
36class FarHeader {
37 public:
38  const string &FarType() const { return fartype_; }
39  const string &ArcType() const { return arctype_; }
40
41  bool Read(const string &filename) {
42    FstHeader fsthdr;
43    if (filename.empty()) {  // Header reading unsupported on stdin.
44      return false;
45    } else if (IsSTTable(filename)) {  // Check if STTable
46      ReadSTTableHeader(filename, &fsthdr);
47      fartype_ = "sttable";
48      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
49      return true;
50    } else if (IsSTList(filename)) {  // Check if STList
51      ReadSTListHeader(filename, &fsthdr);
52      fartype_ = "sttable";
53      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
54      return true;
55    }
56    return false;
57  }
58
59 private:
60  string fartype_;
61  string arctype_;
62};
63
64enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
65               FAR_SSTABLE = 3 };
66
67// This class creates an archive of FSTs.
68template <class A>
69class FarWriter {
70 public:
71  typedef A Arc;
72
73  // Creates a new (empty) FST archive; returns NULL on error.
74  static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
75
76  // Adds an FST to the end of an archive. Keys must be non-empty and
77  // in lexicographic order. FSTs must have a suitable write method.
78  virtual void Add(const string &key, const Fst<A> &fst) = 0;
79
80  virtual FarType Type() const = 0;
81
82  virtual bool Error() const = 0;
83
84  virtual ~FarWriter() {}
85
86 protected:
87  FarWriter() {}
88
89 private:
90  DISALLOW_COPY_AND_ASSIGN(FarWriter);
91};
92
93
94// This class iterates through an existing archive of FSTs.
95template <class A>
96class FarReader {
97 public:
98 typedef A Arc;
99
100  // Opens an existing FST archive in a single file; returns NULL on error.
101  // Sets current position to the beginning of the achive.
102  static FarReader *Open(const string &filename);
103
104  // Opens an existing FST archive in multiple files; returns NULL on error.
105  // Sets current position to the beginning of the achive.
106  static FarReader *Open(const vector<string> &filenames);
107
108  // Resets current posision to beginning of archive.
109  virtual void Reset() = 0;
110
111  // Sets current position to first entry >= key.  Returns true if a match.
112  virtual bool Find(const string &key) = 0;
113
114  // Current position at end of archive?
115  virtual bool Done() const = 0;
116
117  // Move current position to next FST.
118  virtual void Next() = 0;
119
120  // Returns key at the current position. This reference is invalidated if
121  // the current position in the archive is changed.
122  virtual const string &GetKey() const = 0;
123
124  // Returns FST at the current position. This reference is invalidated if
125  // the current position in the archive is changed.
126  virtual const Fst<A> &GetFst() const = 0;
127
128  virtual FarType Type() const = 0;
129
130  virtual bool Error() const = 0;
131
132  virtual ~FarReader() {}
133
134 protected:
135  FarReader() {}
136
137 private:
138  DISALLOW_COPY_AND_ASSIGN(FarReader);
139};
140
141
142template <class A>
143class FstWriter {
144 public:
145  void operator()(ostream &strm, const Fst<A> &fst) const {
146    fst.Write(strm, FstWriteOptions());
147  }
148};
149
150
151template <class A>
152class STTableFarWriter : public FarWriter<A> {
153 public:
154  typedef A Arc;
155
156  static STTableFarWriter *Create(const string filename) {
157    STTableWriter<Fst<A>, FstWriter<A> > *writer =
158        STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
159    return new STTableFarWriter(writer);
160  }
161
162  void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
163
164  FarType Type() const { return FAR_STTABLE; }
165
166  bool Error() const { return writer_->Error(); }
167
168  ~STTableFarWriter() { delete writer_; }
169
170 private:
171  explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
172      : writer_(writer) {}
173
174 private:
175  STTableWriter<Fst<A>, FstWriter<A> > *writer_;
176
177  DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
178};
179
180
181template <class A>
182class STListFarWriter : public FarWriter<A> {
183 public:
184  typedef A Arc;
185
186  static STListFarWriter *Create(const string filename) {
187    STListWriter<Fst<A>, FstWriter<A> > *writer =
188        STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
189    return new STListFarWriter(writer);
190  }
191
192  void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
193
194  FarType Type() const { return FAR_STLIST; }
195
196  bool Error() const { return writer_->Error(); }
197
198  ~STListFarWriter() { delete writer_; }
199
200 private:
201  explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
202      : writer_(writer) {}
203
204 private:
205  STListWriter<Fst<A>, FstWriter<A> > *writer_;
206
207  DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
208};
209
210
211template <class A>
212FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
213  switch(type) {
214    case FAR_DEFAULT:
215      if (filename.empty())
216        return STListFarWriter<A>::Create(filename);
217    case FAR_STTABLE:
218      return STTableFarWriter<A>::Create(filename);
219      break;
220    case FAR_STLIST:
221      return STListFarWriter<A>::Create(filename);
222      break;
223    default:
224      LOG(ERROR) << "FarWriter::Create: unknown far type";
225      return 0;
226  }
227}
228
229
230template <class A>
231class FstReader {
232 public:
233  Fst<A> *operator()(istream &strm) const {
234    return Fst<A>::Read(strm, FstReadOptions());
235  }
236};
237
238
239template <class A>
240class STTableFarReader : public FarReader<A> {
241 public:
242  typedef A Arc;
243
244  static STTableFarReader *Open(const string &filename) {
245    STTableReader<Fst<A>, FstReader<A> > *reader =
246        STTableReader<Fst<A>, FstReader<A> >::Open(filename);
247    // TODO: error check
248    return new STTableFarReader(reader);
249  }
250
251  static STTableFarReader *Open(const vector<string> &filenames) {
252    STTableReader<Fst<A>, FstReader<A> > *reader =
253        STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
254    // TODO: error check
255    return new STTableFarReader(reader);
256  }
257
258  void Reset() { reader_->Reset(); }
259
260  bool Find(const string &key) { return reader_->Find(key); }
261
262  bool Done() const { return reader_->Done(); }
263
264  void Next() { return reader_->Next(); }
265
266  const string &GetKey() const { return reader_->GetKey(); }
267
268  const Fst<A> &GetFst() const { return reader_->GetEntry(); }
269
270  FarType Type() const { return FAR_STTABLE; }
271
272  bool Error() const { return reader_->Error(); }
273
274  ~STTableFarReader() { delete reader_; }
275
276 private:
277  explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
278      : reader_(reader) {}
279
280 private:
281  STTableReader<Fst<A>, FstReader<A> > *reader_;
282
283  DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
284};
285
286
287template <class A>
288class STListFarReader : public FarReader<A> {
289 public:
290  typedef A Arc;
291
292  static STListFarReader *Open(const string &filename) {
293    STListReader<Fst<A>, FstReader<A> > *reader =
294        STListReader<Fst<A>, FstReader<A> >::Open(filename);
295    // TODO: error check
296    return new STListFarReader(reader);
297  }
298
299  static STListFarReader *Open(const vector<string> &filenames) {
300    STListReader<Fst<A>, FstReader<A> > *reader =
301        STListReader<Fst<A>, FstReader<A> >::Open(filenames);
302    // TODO: error check
303    return new STListFarReader(reader);
304  }
305
306  void Reset() { reader_->Reset(); }
307
308  bool Find(const string &key) { return reader_->Find(key); }
309
310  bool Done() const { return reader_->Done(); }
311
312  void Next() { return reader_->Next(); }
313
314  const string &GetKey() const { return reader_->GetKey(); }
315
316  const Fst<A> &GetFst() const { return reader_->GetEntry(); }
317
318  FarType Type() const { return FAR_STLIST; }
319
320  bool Error() const { return reader_->Error(); }
321
322  ~STListFarReader() { delete reader_; }
323
324 private:
325  explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
326      : reader_(reader) {}
327
328 private:
329  STListReader<Fst<A>, FstReader<A> > *reader_;
330
331  DISALLOW_COPY_AND_ASSIGN(STListFarReader);
332};
333
334
335template <class A>
336FarReader<A> *FarReader<A>::Open(const string &filename) {
337  if (filename.empty())
338    return STListFarReader<A>::Open(filename);
339  else if (IsSTTable(filename))
340    return STTableFarReader<A>::Open(filename);
341  else if (IsSTList(filename))
342    return STListFarReader<A>::Open(filename);
343  return 0;
344}
345
346
347template <class A>
348FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
349  if (!filenames.empty() && filenames[0].empty())
350    return STListFarReader<A>::Open(filenames);
351  else if (!filenames.empty() && IsSTTable(filenames[0]))
352    return STTableFarReader<A>::Open(filenames);
353  else if (!filenames.empty() && IsSTList(filenames[0]))
354    return STListFarReader<A>::Open(filenames);
355  return 0;
356}
357
358}  // namespace fst
359
360#endif  // FST_EXTENSIONS_FAR_FAR_H__
361