1// far.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Finite-State Transducer (FST) archive classes.
20//
21
22#ifndef FST_EXTENSIONS_FAR_FAR_H__
23#define FST_EXTENSIONS_FAR_FAR_H__
24
25#include <fst/extensions/far/stlist.h>
26#include <fst/extensions/far/sttable.h>
27#include <fst/fst.h>
28#include <fst/vector-fst.h>
29
30namespace fst {
31
32enum FarEntryType { FET_LINE, FET_FILE };
33enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
34
35inline bool IsFst(const string &filename) {
36  ifstream strm(filename.c_str());
37  if (!strm)
38    return false;
39  return IsFstHeader(strm, filename);
40}
41
42// FST archive header class
43class FarHeader {
44 public:
45  const string &FarType() const { return fartype_; }
46  const string &ArcType() const { return arctype_; }
47
48  bool Read(const string &filename) {
49    FstHeader fsthdr;
50    if (filename.empty()) {
51      // Header reading unsupported on stdin. Assumes STList and StdArc.
52      fartype_ = "stlist";
53      arctype_ = "standard";
54      return true;
55    } else if (IsSTTable(filename)) {  // Check if STTable
56      ReadSTTableHeader(filename, &fsthdr);
57      fartype_ = "sttable";
58      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
59      return true;
60    } else if (IsSTList(filename)) {  // Check if STList
61      ReadSTListHeader(filename, &fsthdr);
62      fartype_ = "sttable";
63      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
64      return true;
65    } else if (IsFst(filename)) {  // Check if Fst
66      ifstream istrm(filename.c_str());
67      fsthdr.Read(istrm, filename);
68      fartype_ = "fst";
69      arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
70      return true;
71    }
72    return false;
73  }
74
75 private:
76  string fartype_;
77  string arctype_;
78};
79
80enum FarType {
81  FAR_DEFAULT = 0,
82  FAR_STTABLE = 1,
83  FAR_STLIST = 2,
84  FAR_FST = 3,
85};
86
87// This class creates an archive of FSTs.
88template <class A>
89class FarWriter {
90 public:
91  typedef A Arc;
92
93  // Creates a new (empty) FST archive; returns NULL on error.
94  static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
95
96  // Adds an FST to the end of an archive. Keys must be non-empty and
97  // in lexicographic order. FSTs must have a suitable write method.
98  virtual void Add(const string &key, const Fst<A> &fst) = 0;
99
100  virtual FarType Type() const = 0;
101
102  virtual bool Error() const = 0;
103
104  virtual ~FarWriter() {}
105
106 protected:
107  FarWriter() {}
108
109 private:
110  DISALLOW_COPY_AND_ASSIGN(FarWriter);
111};
112
113
114// This class iterates through an existing archive of FSTs.
115template <class A>
116class FarReader {
117 public:
118 typedef A Arc;
119
120  // Opens an existing FST archive in a single file; returns NULL on error.
121  // Sets current position to the beginning of the achive.
122  static FarReader *Open(const string &filename);
123
124  // Opens an existing FST archive in multiple files; returns NULL on error.
125  // Sets current position to the beginning of the achive.
126  static FarReader *Open(const vector<string> &filenames);
127
128  // Resets current posision to beginning of archive.
129  virtual void Reset() = 0;
130
131  // Sets current position to first entry >= key.  Returns true if a match.
132  virtual bool Find(const string &key) = 0;
133
134  // Current position at end of archive?
135  virtual bool Done() const = 0;
136
137  // Move current position to next FST.
138  virtual void Next() = 0;
139
140  // Returns key at the current position. This reference is invalidated if
141  // the current position in the archive is changed.
142  virtual const string &GetKey() const = 0;
143
144  // Returns FST at the current position. This reference is invalidated if
145  // the current position in the archive is changed.
146  virtual const Fst<A> &GetFst() const = 0;
147
148  virtual FarType Type() const = 0;
149
150  virtual bool Error() const = 0;
151
152  virtual ~FarReader() {}
153
154 protected:
155  FarReader() {}
156
157 private:
158  DISALLOW_COPY_AND_ASSIGN(FarReader);
159};
160
161
162template <class A>
163class FstWriter {
164 public:
165  void operator()(ostream &strm, const Fst<A> &fst) const {
166    fst.Write(strm, FstWriteOptions());
167  }
168};
169
170
171template <class A>
172class STTableFarWriter : public FarWriter<A> {
173 public:
174  typedef A Arc;
175
176  static STTableFarWriter *Create(const string &filename) {
177    STTableWriter<Fst<A>, FstWriter<A> > *writer =
178        STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
179    return new STTableFarWriter(writer);
180  }
181
182  void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
183
184  FarType Type() const { return FAR_STTABLE; }
185
186  bool Error() const { return writer_->Error(); }
187
188  ~STTableFarWriter() { delete writer_; }
189
190 private:
191  explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
192      : writer_(writer) {}
193
194 private:
195  STTableWriter<Fst<A>, FstWriter<A> > *writer_;
196
197  DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
198};
199
200
201template <class A>
202class STListFarWriter : public FarWriter<A> {
203 public:
204  typedef A Arc;
205
206  static STListFarWriter *Create(const string &filename) {
207    STListWriter<Fst<A>, FstWriter<A> > *writer =
208        STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
209    return new STListFarWriter(writer);
210  }
211
212  void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
213
214  FarType Type() const { return FAR_STLIST; }
215
216  bool Error() const { return writer_->Error(); }
217
218  ~STListFarWriter() { delete writer_; }
219
220 private:
221  explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
222      : writer_(writer) {}
223
224 private:
225  STListWriter<Fst<A>, FstWriter<A> > *writer_;
226
227  DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
228};
229
230
231template <class A>
232class FstFarWriter : public FarWriter<A> {
233 public:
234  typedef A Arc;
235
236  explicit FstFarWriter(const string &filename)
237      : filename_(filename), error_(false), written_(false) {}
238
239  static FstFarWriter *Create(const string &filename) {
240    return new FstFarWriter(filename);
241  }
242
243  void Add(const string &key, const Fst<A> &fst) {
244    if (written_) {
245      LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
246                 << " subsequent entries discarded.";
247    } else {
248      error_ = !fst.Write(filename_);
249      written_ = true;
250    }
251  }
252
253  FarType Type() const { return FAR_FST; }
254
255  bool Error() const { return error_; }
256
257  ~FstFarWriter() {}
258
259 private:
260  string filename_;
261  bool error_;
262  bool written_;
263
264  DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
265};
266
267
268template <class A>
269FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
270  switch(type) {
271    case FAR_DEFAULT:
272      if (filename.empty())
273        return STListFarWriter<A>::Create(filename);
274    case FAR_STTABLE:
275      return STTableFarWriter<A>::Create(filename);
276    case FAR_STLIST:
277      return STListFarWriter<A>::Create(filename);
278    case FAR_FST:
279      return FstFarWriter<A>::Create(filename);
280    default:
281      LOG(ERROR) << "FarWriter::Create: unknown far type";
282      return 0;
283  }
284}
285
286
287template <class A>
288class FstReader {
289 public:
290  Fst<A> *operator()(istream &strm) const {
291    return Fst<A>::Read(strm, FstReadOptions());
292  }
293};
294
295
296template <class A>
297class STTableFarReader : public FarReader<A> {
298 public:
299  typedef A Arc;
300
301  static STTableFarReader *Open(const string &filename) {
302    STTableReader<Fst<A>, FstReader<A> > *reader =
303        STTableReader<Fst<A>, FstReader<A> >::Open(filename);
304    // TODO: error check
305    return new STTableFarReader(reader);
306  }
307
308  static STTableFarReader *Open(const vector<string> &filenames) {
309    STTableReader<Fst<A>, FstReader<A> > *reader =
310        STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
311    // TODO: error check
312    return new STTableFarReader(reader);
313  }
314
315  void Reset() { reader_->Reset(); }
316
317  bool Find(const string &key) { return reader_->Find(key); }
318
319  bool Done() const { return reader_->Done(); }
320
321  void Next() { return reader_->Next(); }
322
323  const string &GetKey() const { return reader_->GetKey(); }
324
325  const Fst<A> &GetFst() const { return reader_->GetEntry(); }
326
327  FarType Type() const { return FAR_STTABLE; }
328
329  bool Error() const { return reader_->Error(); }
330
331  ~STTableFarReader() { delete reader_; }
332
333 private:
334  explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
335      : reader_(reader) {}
336
337 private:
338  STTableReader<Fst<A>, FstReader<A> > *reader_;
339
340  DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
341};
342
343
344template <class A>
345class STListFarReader : public FarReader<A> {
346 public:
347  typedef A Arc;
348
349  static STListFarReader *Open(const string &filename) {
350    STListReader<Fst<A>, FstReader<A> > *reader =
351        STListReader<Fst<A>, FstReader<A> >::Open(filename);
352    // TODO: error check
353    return new STListFarReader(reader);
354  }
355
356  static STListFarReader *Open(const vector<string> &filenames) {
357    STListReader<Fst<A>, FstReader<A> > *reader =
358        STListReader<Fst<A>, FstReader<A> >::Open(filenames);
359    // TODO: error check
360    return new STListFarReader(reader);
361  }
362
363  void Reset() { reader_->Reset(); }
364
365  bool Find(const string &key) { return reader_->Find(key); }
366
367  bool Done() const { return reader_->Done(); }
368
369  void Next() { return reader_->Next(); }
370
371  const string &GetKey() const { return reader_->GetKey(); }
372
373  const Fst<A> &GetFst() const { return reader_->GetEntry(); }
374
375  FarType Type() const { return FAR_STLIST; }
376
377  bool Error() const { return reader_->Error(); }
378
379  ~STListFarReader() { delete reader_; }
380
381 private:
382  explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
383      : reader_(reader) {}
384
385 private:
386  STListReader<Fst<A>, FstReader<A> > *reader_;
387
388  DISALLOW_COPY_AND_ASSIGN(STListFarReader);
389};
390
391template <class A>
392class FstFarReader : public FarReader<A> {
393 public:
394  typedef A Arc;
395
396  static FstFarReader *Open(const string &filename) {
397    vector<string> filenames;
398    filenames.push_back(filename);
399    return new FstFarReader<A>(filenames);
400  }
401
402  static FstFarReader *Open(const vector<string> &filenames) {
403    return new FstFarReader<A>(filenames);
404  }
405
406  FstFarReader(const vector<string> &filenames)
407      : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
408    sort(keys_.begin(), keys_.end());
409    streams_.resize(keys_.size(), 0);
410    for (size_t i = 0; i < keys_.size(); ++i) {
411      if (keys_[i].empty()) {
412        if (!has_stdin_) {
413          streams_[i] = &cin;
414          //sources_[i] = "stdin";
415          has_stdin_ = true;
416        } else {
417          FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
418                     << "appear once in the input file list.";
419          error_ = true;
420          return;
421        }
422      } else {
423        streams_[i] = new ifstream(
424            keys_[i].c_str(), ifstream::in | ifstream::binary);
425      }
426    }
427    if (pos_ >= keys_.size()) return;
428    ReadFst();
429  }
430
431  void Reset() {
432    if (has_stdin_) {
433      FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
434      error_ = true;
435      return;
436    }
437    pos_ = 0;
438    ReadFst();
439  }
440
441  bool Find(const string &key) {
442    if (has_stdin_) {
443      FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
444      error_ = true;
445      return false;
446    }
447    pos_ = 0;//TODO
448    ReadFst();
449    return true;
450  }
451
452  bool Done() const { return error_ || pos_ >= keys_.size(); }
453
454  void Next() {
455    ++pos_;
456    ReadFst();
457  }
458
459  const string &GetKey() const {
460    return keys_[pos_];
461  }
462
463  const Fst<A> &GetFst() const {
464    return *fst_;
465  }
466
467  FarType Type() const { return FAR_FST; }
468
469  bool Error() const { return error_; }
470
471  ~FstFarReader() {
472    if (fst_) delete fst_;
473    for (size_t i = 0; i < keys_.size(); ++i)
474      delete streams_[i];
475  }
476
477 private:
478  void ReadFst() {
479    if (fst_) delete fst_;
480    if (pos_ >= keys_.size()) return;
481    streams_[pos_]->seekg(0);
482    fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
483    if (!fst_) {
484      FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
485      error_ = true;
486    }
487  }
488
489 private:
490  vector<string> keys_;
491  vector<istream*> streams_;
492  bool has_stdin_;
493  size_t pos_;
494  mutable Fst<A> *fst_;
495  mutable bool error_;
496
497  DISALLOW_COPY_AND_ASSIGN(FstFarReader);
498};
499
500template <class A>
501FarReader<A> *FarReader<A>::Open(const string &filename) {
502  if (filename.empty())
503    return STListFarReader<A>::Open(filename);
504  else if (IsSTTable(filename))
505    return STTableFarReader<A>::Open(filename);
506  else if (IsSTList(filename))
507    return STListFarReader<A>::Open(filename);
508  else if (IsFst(filename))
509    return FstFarReader<A>::Open(filename);
510  return 0;
511}
512
513
514template <class A>
515FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
516  if (!filenames.empty() && filenames[0].empty())
517    return STListFarReader<A>::Open(filenames);
518  else if (!filenames.empty() && IsSTTable(filenames[0]))
519    return STTableFarReader<A>::Open(filenames);
520  else if (!filenames.empty() && IsSTList(filenames[0]))
521    return STListFarReader<A>::Open(filenames);
522  else if (!filenames.empty() && IsFst(filenames[0]))
523    return FstFarReader<A>::Open(filenames);
524  return 0;
525}
526
527}  // namespace fst
528
529#endif  // FST_EXTENSIONS_FAR_FAR_H__
530