fst.h revision 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea
1d19ac0c75a019273e03922e2252ed262578a43d1Bill Wendling// fst.h
2602890dd8ef53c6e8d60a2752b97940f7a58de1aBill Wendling
3602890dd8ef53c6e8d60a2752b97940f7a58de1aBill Wendling// Licensed under the Apache License, Version 2.0 (the "License");
4602890dd8ef53c6e8d60a2752b97940f7a58de1aBill Wendling// you may not use this file except in compliance with the License.
546c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach// You may obtain a copy of the License at
6602890dd8ef53c6e8d60a2752b97940f7a58de1aBill Wendling//
7602890dd8ef53c6e8d60a2752b97940f7a58de1aBill Wendling//     http://www.apache.org/licenses/LICENSE-2.0
846c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach//
9af2b573614c7d853879ff24eb9a86d1c36acc198Bill Wendling// Unless required by applicable law or agreed to in writing, software
10af2b573614c7d853879ff24eb9a86d1c36acc198Bill Wendling// distributed under the License is distributed on an "AS IS" BASIS,
1146c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12849f2e381e4e83dc4f60e4a1fe6e6bb47bde8248Bill Wendling// See the License for the specific language governing permissions and
13849f2e381e4e83dc4f60e4a1fe6e6bb47bde8248Bill Wendling// limitations under the License.
1446c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach//
1546c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach// Copyright 2005-2010 Google, Inc.
1618901d63bf0deb117bd7a1ad69b25faa422ce378Owen Anderson// Author: riley@google.com (Michael Riley)
17d19ac0c75a019273e03922e2252ed262578a43d1Bill Wendling//
18d19ac0c75a019273e03922e2252ed262578a43d1Bill Wendling// \file
19d19ac0c75a019273e03922e2252ed262578a43d1Bill Wendling// Finite-State Transducer (FST) - abstract base class definition,
20d19ac0c75a019273e03922e2252ed262578a43d1Bill Wendling// state and arc iterator interface, and suggested base implementation.
2146c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach//
2246c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach
2346c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#ifndef FST_LIB_FST_H__
245cbbf68e35a053c904548564da13d4a8596f988bBill Wendling#define FST_LIB_FST_H__
255cbbf68e35a053c904548564da13d4a8596f988bBill Wendling
265cbbf68e35a053c904548564da13d4a8596f988bBill Wendling#include <stddef.h>
2746c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <sys/types.h>
2846c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <cmath>
292f17bf2a4406d89b5e127306cbd0fc862e0a6bd5Bill Wendling#include <string>
302f17bf2a4406d89b5e127306cbd0fc862e0a6bd5Bill Wendling
3146c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <fst/compat.h>
322f17bf2a4406d89b5e127306cbd0fc862e0a6bd5Bill Wendling#include <fst/types.h>
332f17bf2a4406d89b5e127306cbd0fc862e0a6bd5Bill Wendling
342f17bf2a4406d89b5e127306cbd0fc862e0a6bd5Bill Wendling#include <fst/arc.h>
3546c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <fst/properties.h>
3646c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <fst/register.h>
37ef4a68badbde372faac9ca47efb9001def57a43dBill Wendling#include <iostream>
38ef4a68badbde372faac9ca47efb9001def57a43dBill Wendling#include <fstream>
3946c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <sstream>
40fdcee77887372dbf6589d47cc33094965b679f24Bruno Cardoso Lopes#include <fst/symbol-table.h>
4146c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach#include <fst/util.h>
4246c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbach
43fa5bd27fbe5188ca708ac0dda4f32d90505da9f5Bruno Cardoso Lopes
441b10d5be40313b4e246e85cf375dfa3452ab306bBruno Cardoso LopesDECLARE_bool(fst_align);
454ee72398a15cd7b8e217bb3d34a4e9e0e72caca1Amaury de la Vieuville
461b10d5be40313b4e246e85cf375dfa3452ab306bBruno Cardoso Lopesnamespace fst {
47a2b6e4151b75248f9dbf8067186cba673520f8f4Bruno Cardoso Lopes
4846c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbachbool IsFstHeader(istream &, const string &);
49be64b394317feb8d7bcb732bdfb35e0b286efd4cBruno Cardoso Lopes
50be64b394317feb8d7bcb732bdfb35e0b286efd4cBruno Cardoso Lopesclass FstHeader;
5146c38aff89c36a95bc9e61c6133056a5de9e5e59Jim Grosbachtemplate <class A> class StateIteratorData;
52template <class A> class ArcIteratorData;
53template <class A> class MatcherBase;
54
55struct FstReadOptions {
56  // FileReadMode(s) are advisory, there are many conditions than prevent a
57  // file from being mapped, READ mode will be selected in these cases with
58  // a warning indicating why it was chosen.
59  enum FileReadMode { READ, MAP };
60
61  string source;                // Where you're reading from
62  const FstHeader *header;      // Pointer to Fst header. If non-zero, use
63                                // this info (don't read a stream header)
64  const SymbolTable* isymbols;  // Pointer to input symbols. If non-zero, use
65                                // this info (read and skip stream isymbols)
66  const SymbolTable* osymbols;  // Pointer to output symbols. If non-zero, use
67                                // this info (read and skip stream osymbols)
68  FileReadMode mode;            // Read or map files (advisory, if possible)
69
70  explicit FstReadOptions(const string& src = "<unspecified>",
71                          const FstHeader *hdr = 0,
72                          const SymbolTable* isym = 0,
73                          const SymbolTable* osym = 0);
74
75  explicit FstReadOptions(const string& src,
76                          const SymbolTable* isym,
77                          const SymbolTable* osym = 0);
78
79  // Helper function to convert strings FileReadModes into their enum value.
80  static FileReadMode ReadMode(const string &mode);
81};
82
83struct FstWriteOptions {
84  string source;                 // Where you're writing to
85  bool write_header;             // Write the header?
86  bool write_isymbols;           // Write input symbols?
87  bool write_osymbols;           // Write output symbols?
88  bool align;                    // Write data aligned where appropriate;
89                                 // this may fail on pipes
90
91  explicit FstWriteOptions(const string& src = "<unspecifed>",
92                           bool hdr = true, bool isym = true,
93                           bool osym = true, bool alig = FLAGS_fst_align)
94      : source(src), write_header(hdr),
95        write_isymbols(isym), write_osymbols(osym), align(alig) {}
96};
97
98//
99// Fst HEADER CLASS
100//
101// This is the recommended Fst file header representation.
102//
103class FstHeader {
104 public:
105  enum {
106    HAS_ISYMBOLS = 0x1,          // Has input symbol table
107    HAS_OSYMBOLS = 0x2,          // Has output symbol table
108    IS_ALIGNED   = 0x4,          // Memory-aligned (where appropriate)
109  } Flags;
110
111  FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
112                numstates_(0), numarcs_(0) {}
113  const string &FstType() const { return fsttype_; }
114  const string &ArcType() const { return arctype_; }
115  int32 Version() const { return version_; }
116  int32 GetFlags() const { return flags_; }
117  uint64 Properties() const { return properties_; }
118  int64 Start() const { return start_; }
119  int64 NumStates() const { return numstates_; }
120  int64 NumArcs() const { return numarcs_; }
121
122  void SetFstType(const string& type) { fsttype_ = type; }
123  void SetArcType(const string& type) { arctype_ = type; }
124  void SetVersion(int32 version) { version_ = version; }
125  void SetFlags(int32 flags) { flags_ = flags; }
126  void SetProperties(uint64 properties) { properties_ = properties; }
127  void SetStart(int64 start) { start_ = start; }
128  void SetNumStates(int64 numstates) { numstates_ = numstates; }
129  void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
130
131  bool Read(istream &strm, const string &source, bool rewind = false);
132  bool Write(ostream &strm, const string &source) const;
133
134 private:
135
136  string fsttype_;                   // E.g. "vector"
137  string arctype_;                   // E.g. "standard"
138  int32 version_;                    // Type version #
139  int32 flags_;                      // File format bits
140  uint64 properties_;                // FST property bits
141  int64 start_;                      // Start state
142  int64 numstates_;                  // # of states
143  int64 numarcs_;                    // # of arcs
144};
145
146
147// Specifies matcher action.
148enum MatchType { MATCH_INPUT,      // Match input label.
149                 MATCH_OUTPUT,     // Match output label.
150                 MATCH_BOTH,       // Match input or output label.
151                 MATCH_NONE,       // Match nothing.
152                 MATCH_UNKNOWN };  // Match type unknown.
153
154//
155// Fst INTERFACE CLASS DEFINITION
156//
157
158// A generic FST, templated on the arc definition, with
159// common-demoninator methods (use StateIterator and ArcIterator to
160// iterate over its states and arcs).
161template <class A>
162class Fst {
163 public:
164  typedef A Arc;
165  typedef typename A::Weight Weight;
166  typedef typename A::StateId StateId;
167
168  virtual ~Fst() {}
169
170  virtual StateId Start() const = 0;          // Initial state
171
172  virtual Weight Final(StateId) const = 0;    // State's final weight
173
174  virtual size_t NumArcs(StateId) const = 0;  // State's arc count
175
176  virtual size_t NumInputEpsilons(StateId)
177      const = 0;                              // State's input epsilon count
178
179  virtual size_t NumOutputEpsilons(StateId)
180      const = 0;                              // State's output epsilon count
181
182  // If test=false, return stored properties bits for mask (some poss. unknown)
183  // If test=true, return property bits for mask (computing o.w. unknown)
184  virtual uint64 Properties(uint64 mask, bool test)
185      const = 0;  // Property bits
186
187  virtual const string& Type() const = 0;    // Fst type name
188
189  // Get a copy of this Fst. The copying behaves as follows:
190  //
191  // (1) The copying is constant time if safe = false or if safe = true
192  // and is on an otherwise unaccessed Fst.
193  //
194  // (2) If safe = true, the copy is thread-safe in that the original
195  // and copy can be safely accessed (but not necessarily mutated) by
196  // separate threads. For some Fst types, 'Copy(true)' should only be
197  // called on an Fst that has not otherwise been accessed. Its behavior
198  // is undefined otherwise.
199  //
200  // (3) If a MutableFst is copied and then mutated, then the original is
201  // unmodified and vice versa (often by a copy-on-write on the initial
202  // mutation, which may not be constant time).
203  virtual Fst<A> *Copy(bool safe = false) const = 0;
204
205  // Read an Fst from an input stream; returns NULL on error
206  static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
207    FstReadOptions ropts(opts);
208    FstHeader hdr;
209    if (ropts.header)
210      hdr = *opts.header;
211    else {
212      if (!hdr.Read(strm, opts.source))
213        return 0;
214      ropts.header = &hdr;
215    }
216    FstRegister<A> *registr = FstRegister<A>::GetRegister();
217    const typename FstRegister<A>::Reader reader =
218      registr->GetReader(hdr.FstType());
219    if (!reader) {
220      LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
221                 << "\" (arc type = \"" << A::Type()
222                 << "\"): " << ropts.source;
223      return 0;
224    }
225    return reader(strm, ropts);
226  };
227
228  // Read an Fst from a file; return NULL on error
229  // Empty filename reads from standard input
230  static Fst<A> *Read(const string &filename) {
231    if (!filename.empty()) {
232      ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
233      if (!strm) {
234        LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
235        return 0;
236      }
237      return Read(strm, FstReadOptions(filename));
238    } else {
239      return Read(cin, FstReadOptions("standard input"));
240    }
241  }
242
243  // Write an Fst to an output stream; return false on error
244  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
245    LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
246               << " Fst type";
247    return false;
248  }
249
250  // Write an Fst to a file; return false on error
251  // Empty filename writes to standard output
252  virtual bool Write(const string &filename) const {
253    LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
254               << " Fst type";
255    return false;
256  }
257
258  // Return input label symbol table; return NULL if not specified
259  virtual const SymbolTable* InputSymbols() const = 0;
260
261  // Return output label symbol table; return NULL if not specified
262  virtual const SymbolTable* OutputSymbols() const = 0;
263
264  // For generic state iterator construction; not normally called
265  // directly by users.
266  virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
267
268  // For generic arc iterator construction; not normally called
269  // directly by users.
270  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
271
272  // For generic matcher construction; not normally called
273  // directly by users.
274  virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
275
276 protected:
277  bool WriteFile(const string &filename) const {
278    if (!filename.empty()) {
279      ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
280      if (!strm) {
281        LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
282        return false;
283      }
284      return Write(strm, FstWriteOptions(filename));
285    } else {
286      return Write(cout, FstWriteOptions("standard output"));
287    }
288  }
289};
290
291
292//
293// STATE and ARC ITERATOR DEFINITIONS
294//
295
296// State iterator interface templated on the Arc definition; used
297// for StateIterator specializations returned by the InitStateIterator
298// Fst method.
299template <class A>
300class StateIteratorBase {
301 public:
302  typedef A Arc;
303  typedef typename A::StateId StateId;
304
305  virtual ~StateIteratorBase() {}
306
307  bool Done() const { return Done_(); }       // End of iterator?
308  StateId Value() const { return Value_(); }  // Current state (when !Done)
309  void Next() { Next_(); }      // Advance to next state (when !Done)
310  void Reset() { Reset_(); }    // Return to initial condition
311
312 private:
313  // This allows base class virtual access to non-virtual derived-
314  // class members of the same name. It makes the derived class more
315  // efficient to use but unsafe to further derive.
316  virtual bool Done_() const = 0;
317  virtual StateId Value_() const = 0;
318  virtual void Next_() = 0;
319  virtual void Reset_() = 0;
320};
321
322
323// StateIterator initialization data
324
325template <class A> struct StateIteratorData {
326  StateIteratorBase<A> *base;   // Specialized iterator if non-zero
327  typename A::StateId nstates;  // O.w. total # of states
328};
329
330
331// Generic state iterator, templated on the FST definition
332// - a wrapper around pointer to specific one.
333// Here is a typical use: \code
334//   for (StateIterator<StdFst> siter(fst);
335//        !siter.Done();
336//        siter.Next()) {
337//     StateId s = siter.Value();
338//     ...
339//   } \endcode
340template <class F>
341class StateIterator {
342 public:
343  typedef F FST;
344  typedef typename F::Arc Arc;
345  typedef typename Arc::StateId StateId;
346
347  explicit StateIterator(const F &fst) : s_(0) {
348    fst.InitStateIterator(&data_);
349  }
350
351  ~StateIterator() { if (data_.base) delete data_.base; }
352
353  bool Done() const {
354    return data_.base ? data_.base->Done() : s_ >= data_.nstates;
355  }
356
357  StateId Value() const { return data_.base ? data_.base->Value() : s_; }
358
359  void Next() {
360    if (data_.base)
361      data_.base->Next();
362    else
363      ++s_;
364  }
365
366  void Reset() {
367    if (data_.base)
368      data_.base->Reset();
369    else
370      s_ = 0;
371  }
372
373 private:
374  StateIteratorData<Arc> data_;
375  StateId s_;
376
377  DISALLOW_COPY_AND_ASSIGN(StateIterator);
378};
379
380
381// Flags to control the behavior on an arc iterator:
382static const uint32 kArcILabelValue    = 0x0001;  // Value() gives valid ilabel
383static const uint32 kArcOLabelValue    = 0x0002;  //  "       "     "    olabel
384static const uint32 kArcWeightValue    = 0x0004;  //  "       "     "    weight
385static const uint32 kArcNextStateValue = 0x0008;  //  "       "     " nextstate
386static const uint32 kArcNoCache   = 0x0010;       // No need to cache arcs
387
388static const uint32 kArcValueFlags =
389                  kArcILabelValue | kArcOLabelValue |
390                  kArcWeightValue | kArcNextStateValue;
391
392static const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
393
394
395// Arc iterator interface, templated on the Arc definition; used
396// for Arc iterator specializations that are returned by the InitArcIterator
397// Fst method.
398template <class A>
399class ArcIteratorBase {
400 public:
401  typedef A Arc;
402  typedef typename A::StateId StateId;
403
404  virtual ~ArcIteratorBase() {}
405
406  bool Done() const { return Done_(); }            // End of iterator?
407  const A& Value() const { return Value_(); }      // Current arc (when !Done)
408  void Next() { Next_(); }           // Advance to next arc (when !Done)
409  size_t Position() const { return Position_(); }  // Return current position
410  void Reset() { Reset_(); }         // Return to initial condition
411  void Seek(size_t a) { Seek_(a); }  // Random arc access by position
412  uint32 Flags() const { return Flags_(); }  // Return current behavorial flags
413  void SetFlags(uint32 flags, uint32 mask) {  // Set behavorial flags
414    SetFlags_(flags, mask);
415  }
416
417 private:
418  // This allows base class virtual access to non-virtual derived-
419  // class members of the same name. It makes the derived class more
420  // efficient to use but unsafe to further derive.
421  virtual bool Done_() const = 0;
422  virtual const A& Value_() const = 0;
423  virtual void Next_() = 0;
424  virtual size_t Position_() const = 0;
425  virtual void Reset_() = 0;
426  virtual void Seek_(size_t a) = 0;
427  virtual uint32 Flags_() const = 0;
428  virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
429};
430
431
432// ArcIterator initialization data
433template <class A> struct ArcIteratorData {
434  ArcIteratorBase<A> *base;  // Specialized iterator if non-zero
435  const A *arcs;             // O.w. arcs pointer
436  size_t narcs;              // ... and arc count
437  int *ref_count;            // ... and reference count if non-zero
438};
439
440
441// Generic arc iterator, templated on the FST definition
442// - a wrapper around pointer to specific one.
443// Here is a typical use: \code
444//   for (ArcIterator<StdFst> aiter(fst, s));
445//        !aiter.Done();
446//         aiter.Next()) {
447//     StdArc &arc = aiter.Value();
448//     ...
449//   } \endcode
450template <class F>
451class ArcIterator {
452   public:
453  typedef F FST;
454  typedef typename F::Arc Arc;
455  typedef typename Arc::StateId StateId;
456
457  ArcIterator(const F &fst, StateId s) : i_(0) {
458    fst.InitArcIterator(s, &data_);
459  }
460
461  explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
462    if (data_.ref_count)
463      ++(*data_.ref_count);
464  }
465
466  ~ArcIterator() {
467    if (data_.base)
468      delete data_.base;
469    else if (data_.ref_count)
470      --(*data_.ref_count);
471  }
472
473  bool Done() const {
474    return data_.base ?  data_.base->Done() : i_ >= data_.narcs;
475  }
476
477  const Arc& Value() const {
478    return data_.base ? data_.base->Value() : data_.arcs[i_];
479  }
480
481  void Next() {
482    if (data_.base)
483      data_.base->Next();
484    else
485      ++i_;
486  }
487
488  void Reset() {
489    if (data_.base)
490      data_.base->Reset();
491    else
492      i_ = 0;
493  }
494
495  void Seek(size_t a) {
496    if (data_.base)
497      data_.base->Seek(a);
498    else
499      i_ = a;
500  }
501
502  size_t Position() const {
503    return data_.base ? data_.base->Position() : i_;
504  }
505
506  uint32 Flags() const {
507    if (data_.base)
508      return data_.base->Flags();
509    else
510      return kArcValueFlags;
511  }
512
513  void SetFlags(uint32 flags, uint32 mask) {
514    if (data_.base)
515      data_.base->SetFlags(flags, mask);
516  }
517
518 private:
519  ArcIteratorData<Arc> data_;
520  size_t i_;
521  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
522};
523
524//
525// MATCHER DEFINITIONS
526//
527
528template <class A>
529MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
530  return 0;  // Use the default matcher
531}
532
533
534//
535// FST ACCESSORS - Useful functions in high-performance cases.
536//
537
538namespace internal {
539
540// General case - requires non-abstract, 'final' methods. Use for inlining.
541template <class F> inline
542typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
543  return fst.F::Final(s);
544}
545
546template <class F> inline
547ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
548  return fst.F::NumArcs(s);
549}
550
551template <class F> inline
552ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
553  return fst.F::NumInputEpsilons(s);
554}
555
556template <class F> inline
557ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
558  return fst.F::NumOutputEpsilons(s);
559}
560
561
562//  Fst<A> case - abstract methods.
563template <class A> inline
564typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
565  return fst.Final(s);
566}
567
568template <class A> inline
569ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
570  return fst.NumArcs(s);
571}
572
573template <class A> inline
574ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
575  return fst.NumInputEpsilons(s);
576}
577
578template <class A> inline
579ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
580  return fst.NumOutputEpsilons(s);
581}
582
583}  // namespace internal
584
585// A useful alias when using StdArc.
586typedef Fst<StdArc> StdFst;
587
588
589//
590//  CONSTANT DEFINITIONS
591//
592
593const int kNoStateId   =  -1;  // Not a valid state ID
594const int kNoLabel     =  -1;  // Not a valid label
595
596//
597// Fst IMPLEMENTATION BASE
598//
599// This is the recommended Fst implementation base class. It will
600// handle reference counts, property bits, type information and symbols.
601//
602
603template <class A> class FstImpl {
604 public:
605  typedef typename A::Weight Weight;
606  typedef typename A::StateId StateId;
607
608  FstImpl()
609      : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
610
611  FstImpl(const FstImpl<A> &impl)
612      : properties_(impl.properties_), type_(impl.type_),
613        isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
614        osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
615
616  virtual ~FstImpl() {
617    delete isymbols_;
618    delete osymbols_;
619  }
620
621  const string& Type() const { return type_; }
622
623  void SetType(const string &type) { type_ = type; }
624
625  virtual uint64 Properties() const { return properties_; }
626
627  virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
628
629  void SetProperties(uint64 props) {
630    properties_ &= kError;          // kError can't be cleared
631    properties_ |= props;
632  }
633
634  void SetProperties(uint64 props, uint64 mask) {
635    properties_ &= ~mask | kError;  // kError can't be cleared
636    properties_ |= props & mask;
637  }
638
639  // Allows (only) setting error bit on const FST impls
640  void SetProperties(uint64 props, uint64 mask) const {
641    if (mask != kError)
642      FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
643    properties_ |= kError;
644  }
645
646  const SymbolTable* InputSymbols() const { return isymbols_; }
647
648  const SymbolTable* OutputSymbols() const { return osymbols_; }
649
650  SymbolTable* InputSymbols() { return isymbols_; }
651
652  SymbolTable* OutputSymbols() { return osymbols_; }
653
654  void SetInputSymbols(const SymbolTable* isyms) {
655    if (isymbols_) delete isymbols_;
656    isymbols_ = isyms ? isyms->Copy() : 0;
657  }
658
659  void SetOutputSymbols(const SymbolTable* osyms) {
660    if (osymbols_) delete osymbols_;
661    osymbols_ = osyms ? osyms->Copy() : 0;
662  }
663
664  int RefCount() const {
665    return ref_count_.count();
666  }
667
668  int IncrRefCount() {
669    return ref_count_.Incr();
670  }
671
672  int DecrRefCount() {
673    return ref_count_.Decr();
674  }
675
676  // Read-in header and symbols from input stream, initialize Fst, and
677  // return the header.  If opts.header is non-null, skip read-in and
678  // use the option value.  If opts.[io]symbols is non-null, read-in
679  // (if present), but use the option value.
680  bool ReadHeader(istream &strm, const FstReadOptions& opts,
681                  int min_version, FstHeader *hdr);
682
683  // Write-out header and symbols from output stream.
684  // If a opts.header is false, skip writing header.
685  // If opts.[io]symbols is false, skip writing those symbols.
686  // This method is needed for Impl's that implement Write methods.
687  void WriteHeader(ostream &strm, const FstWriteOptions& opts,
688                   int version, FstHeader *hdr) const {
689    if (opts.write_header) {
690      hdr->SetFstType(type_);
691      hdr->SetArcType(A::Type());
692      hdr->SetVersion(version);
693      hdr->SetProperties(properties_);
694      int32 file_flags = 0;
695      if (isymbols_ && opts.write_isymbols)
696        file_flags |= FstHeader::HAS_ISYMBOLS;
697      if (osymbols_ && opts.write_osymbols)
698        file_flags |= FstHeader::HAS_OSYMBOLS;
699      if (opts.align)
700        file_flags |= FstHeader::IS_ALIGNED;
701      hdr->SetFlags(file_flags);
702      hdr->Write(strm, opts.source);
703    }
704    if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
705    if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
706  }
707
708  // Write-out header and symbols to output stream.
709  // If a opts.header is false, skip writing header.
710  // If opts.[io]symbols is false, skip writing those symbols.
711  // type is the Fst type being written.
712  // This method is used in the cross-type serialization methods Fst::WriteFst.
713  static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
714                             const FstWriteOptions& opts, int version,
715                             const string &type, uint64 properties,
716                             FstHeader *hdr) {
717    if (opts.write_header) {
718      hdr->SetFstType(type);
719      hdr->SetArcType(A::Type());
720      hdr->SetVersion(version);
721      hdr->SetProperties(properties);
722      int32 file_flags = 0;
723      if (fst.InputSymbols() && opts.write_isymbols)
724        file_flags |= FstHeader::HAS_ISYMBOLS;
725      if (fst.OutputSymbols() && opts.write_osymbols)
726        file_flags |= FstHeader::HAS_OSYMBOLS;
727      if (opts.align)
728        file_flags |= FstHeader::IS_ALIGNED;
729      hdr->SetFlags(file_flags);
730      hdr->Write(strm, opts.source);
731    }
732    if (fst.InputSymbols() && opts.write_isymbols) {
733      fst.InputSymbols()->Write(strm);
734    }
735    if (fst.OutputSymbols() && opts.write_osymbols) {
736      fst.OutputSymbols()->Write(strm);
737    }
738  }
739
740  // In serialization routines where the header cannot be written until after
741  // the machine has been serialized, this routine can be called to seek to
742  // the beginning of the file an rewrite the header with updated fields.
743  // It repositions the file pointer back at the end of the file.
744  // returns true on success, false on failure.
745  static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
746                              const FstWriteOptions& opts, int version,
747                              const string &type, uint64 properties,
748                              FstHeader *hdr, size_t header_offset) {
749    strm.seekp(header_offset);
750    if (!strm) {
751      LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
752      return false;
753    }
754    WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
755    if (!strm) {
756      LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
757      return false;
758    }
759    strm.seekp(0, ios_base::end);
760    if (!strm) {
761      LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
762      return false;
763    }
764    return true;
765  }
766
767 protected:
768  mutable uint64 properties_;           // Property bits
769
770 private:
771  string type_;                 // Unique name of Fst class
772  SymbolTable *isymbols_;       // Ilabel symbol table
773  SymbolTable *osymbols_;       // Olabel symbol table
774  RefCounter ref_count_;        // Reference count
775
776  void operator=(const FstImpl<A> &impl);  // disallow
777};
778
779template <class A> inline
780bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
781                            int min_version, FstHeader *hdr) {
782  if (opts.header)
783    *hdr = *opts.header;
784  else if (!hdr->Read(strm, opts.source))
785    return false;
786
787  if (FLAGS_v >= 2) {
788    LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
789              << ", fst_type: " << hdr->FstType()
790              << ", arc_type: " << A::Type()
791              << ", version: " << hdr->Version()
792              << ", flags: " << hdr->GetFlags();
793  }
794
795  if (hdr->FstType() != type_) {
796    LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
797               << "\": " << opts.source;
798    return false;
799  }
800  if (hdr->ArcType() != A::Type()) {
801    LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
802               << "\": " << opts.source;
803    return false;
804  }
805  if (hdr->Version() < min_version) {
806    LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
807               << " Fst version: " << opts.source;
808    return false;
809  }
810  properties_ = hdr->Properties();
811  if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
812    isymbols_ = SymbolTable::Read(strm, opts.source);
813  if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
814    osymbols_ =SymbolTable::Read(strm, opts.source);
815
816  if (opts.isymbols) {
817    delete isymbols_;
818    isymbols_ = opts.isymbols->Copy();
819  }
820  if (opts.osymbols) {
821    delete osymbols_;
822    osymbols_ = opts.osymbols->Copy();
823  }
824  return true;
825}
826
827
828template<class Arc>
829uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
830
831
832// This is a helper class template useful for attaching an Fst interface to
833// its implementation, handling reference counting.
834template < class I, class F = Fst<typename I::Arc> >
835class ImplToFst : public F {
836 public:
837  typedef typename I::Arc Arc;
838  typedef typename Arc::Weight Weight;
839  typedef typename Arc::StateId StateId;
840
841  virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_;  }
842
843  virtual StateId Start() const { return impl_->Start(); }
844
845  virtual Weight Final(StateId s) const { return impl_->Final(s); }
846
847  virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
848
849  virtual size_t NumInputEpsilons(StateId s) const {
850    return impl_->NumInputEpsilons(s);
851  }
852
853  virtual size_t NumOutputEpsilons(StateId s) const {
854    return impl_->NumOutputEpsilons(s);
855  }
856
857  virtual uint64 Properties(uint64 mask, bool test) const {
858    if (test) {
859      uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
860      impl_->SetProperties(testprops, knownprops);
861      return testprops & mask;
862    } else {
863      return impl_->Properties(mask);
864    }
865  }
866
867  virtual const string& Type() const { return impl_->Type(); }
868
869  virtual const SymbolTable* InputSymbols() const {
870    return impl_->InputSymbols();
871  }
872
873  virtual const SymbolTable* OutputSymbols() const {
874    return impl_->OutputSymbols();
875  }
876
877 protected:
878  ImplToFst() : impl_(0) {}
879
880  ImplToFst(I *impl) : impl_(impl) {}
881
882  ImplToFst(const ImplToFst<I, F> &fst) {
883    impl_ = fst.impl_;
884    impl_->IncrRefCount();
885  }
886
887  // This constructor presumes there is a copy constructor for the
888  // implementation.
889  ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
890    if (safe) {
891      impl_ = new I(*(fst.impl_));
892    } else {
893      impl_ = fst.impl_;
894      impl_->IncrRefCount();
895    }
896  }
897
898  I *GetImpl() const { return impl_; }
899
900  // Change Fst implementation pointer. If 'own_impl' is true,
901  // ownership of the input implementation is given to this
902  // object; otherwise, the input implementation's reference count
903  // should be incremented.
904  void SetImpl(I *impl, bool own_impl = true) {
905    if (!own_impl)
906      impl->IncrRefCount();
907    if (impl_ && !impl_->DecrRefCount()) delete impl_;
908    impl_ = impl;
909  }
910
911 private:
912  // Disallow
913  ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
914
915  ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
916    FSTERROR() << "ImplToFst: Assignment operator disallowed";
917    GetImpl()->SetProperties(kError, kError);
918    return *this;
919  }
920
921  I *impl_;
922};
923
924
925// Converts FSTs by casting their implementations, where this makes
926// sense (which excludes implementations with weight-dependent virtual
927// methods). Must be a friend of the Fst classes involved (currently
928// the concrete Fsts: VectorFst, ConstFst, CompactFst).
929template<class F, class G> void Cast(const F &ifst, G *ofst) {
930  ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
931}
932
933// Fst Serialization
934template <class A>
935void FstToString(const Fst<A> &fst, string *result) {
936  ostringstream ostrm;
937  fst.Write(ostrm, FstWriteOptions("FstToString"));
938  *result = ostrm.str();
939}
940
941template <class A>
942Fst<A> *StringToFst(const string &s) {
943  istringstream istrm(s);
944  return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
945}
946
947}  // namespace fst
948
949#endif  // FST_LIB_FST_H__
950