1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fst.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Finite-State Transducer (FST) - abstract base class definition,
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state and arc iterator interface, and suggested base implementation.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_FST_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_FST_H__
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <stddef.h>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <sys/types.h>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <cmath>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compat.h>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/types.h>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/arc.h>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/properties.h>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/register.h>
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream>
39dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <sstream>
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/symbol-table.h>
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDECLARE_bool(fst_align);
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IsFstHeader(istream &, const string &);
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass FstHeader;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class StateIteratorData;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class ArcIteratorData;
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class MatcherBase;
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct FstReadOptions {
565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // FileReadMode(s) are advisory, there are many conditions than prevent a
575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // file from being mapped, READ mode will be selected in these cases with
585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // a warning indicating why it was chosen.
595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  enum FileReadMode { READ, MAP };
605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string source;                // Where you're reading from
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const FstHeader *header;      // Pointer to Fst header. If non-zero, use
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                // this info (don't read a stream header)
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable* isymbols;  // Pointer to input symbols. If non-zero, use
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                // this info (read and skip stream isymbols)
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable* osymbols;  // Pointer to output symbols. If non-zero, use
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                // this info (read and skip stream osymbols)
685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  FileReadMode mode;            // Read or map files (advisory, if possible)
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  explicit FstReadOptions(const string& src = "<unspecified>",
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                          const FstHeader *hdr = 0,
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                          const SymbolTable* isym = 0,
735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                          const SymbolTable* osym = 0);
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit FstReadOptions(const string& src,
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                          const SymbolTable* isym,
775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                          const SymbolTable* osym = 0);
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Helper function to convert strings FileReadModes into their enum value.
805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static FileReadMode ReadMode(const string &mode);
815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin};
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct FstWriteOptions {
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string source;                 // Where you're writing to
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool write_header;             // Write the header?
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool write_isymbols;           // Write input symbols?
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool write_osymbols;           // Write output symbols?
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool align;                    // Write data aligned where appropriate;
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                 // this may fail on pipes
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit FstWriteOptions(const string& src = "<unspecifed>",
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                           bool hdr = true, bool isym = true,
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                           bool osym = true, bool alig = FLAGS_fst_align)
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : source(src), write_header(hdr),
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        write_isymbols(isym), write_osymbols(osym), align(alig) {}
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst HEADER CLASS
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is the recommended Fst file header representation.
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass FstHeader {
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  enum {
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    HAS_ISYMBOLS = 0x1,          // Has input symbol table
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    HAS_OSYMBOLS = 0x2,          // Has output symbol table
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    IS_ALIGNED   = 0x4,          // Memory-aligned (where appropriate)
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } Flags;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FstHeader() : version_(0), flags_(0), properties_(0), start_(-1),
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                numstates_(0), numarcs_(0) {}
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const string &FstType() const { return fsttype_; }
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const string &ArcType() const { return arctype_; }
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int32 Version() const { return version_; }
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int32 GetFlags() const { return flags_; }
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties() const { return properties_; }
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 Start() const { return start_; }
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumStates() const { return numstates_; }
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumArcs() const { return numarcs_; }
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetFstType(const string& type) { fsttype_ = type; }
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetArcType(const string& type) { arctype_ = type; }
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetVersion(int32 version) { version_ = version; }
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetFlags(int32 flags) { flags_ = flags; }
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetProperties(uint64 properties) { properties_ = properties; }
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetStart(int64 start) { start_ = start; }
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetNumStates(int64 numstates) { numstates_ = numstates; }
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; }
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Read(istream &strm, const string &source, bool rewind = false);
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Write(ostream &strm, const string &source) const;
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string fsttype_;                   // E.g. "vector"
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string arctype_;                   // E.g. "standard"
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int32 version_;                    // Type version #
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int32 flags_;                      // File format bits
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 properties_;                // FST property bits
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 start_;                      // Start state
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 numstates_;                  // # of states
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 numarcs_;                    // # of arcs
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specifies matcher action.
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum MatchType { MATCH_INPUT,      // Match input label.
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 MATCH_OUTPUT,     // Match output label.
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 MATCH_BOTH,       // Match input or output label.
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 MATCH_NONE,       // Match nothing.
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 MATCH_UNKNOWN };  // Match type unknown.
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst INTERFACE CLASS DEFINITION
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic FST, templated on the arc definition, with
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// common-demoninator methods (use StateIterator and ArcIterator to
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iterate over its states and arcs).
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass Fst {
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~Fst() {}
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual StateId Start() const = 0;          // Initial state
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual Weight Final(StateId) const = 0;    // State's final weight
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t NumArcs(StateId) const = 0;  // State's arc count
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t NumInputEpsilons(StateId)
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const = 0;                              // State's input epsilon count
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t NumOutputEpsilons(StateId)
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const = 0;                              // State's output epsilon count
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If test=false, return stored properties bits for mask (some poss. unknown)
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If test=true, return property bits for mask (computing o.w. unknown)
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual uint64 Properties(uint64 mask, bool test)
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const = 0;  // Property bits
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const string& Type() const = 0;    // Fst type name
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this Fst. The copying behaves as follows:
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // (1) The copying is constant time if safe = false or if safe = true
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // and is on an otherwise unaccessed Fst.
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // (2) If safe = true, the copy is thread-safe in that the original
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // and copy can be safely accessed (but not necessarily mutated) by
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // separate threads. For some Fst types, 'Copy(true)' should only be
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // called on an Fst that has not otherwise been accessed. Its behavior
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // is undefined otherwise.
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // (3) If a MutableFst is copied and then mutated, then the original is
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // unmodified and vice versa (often by a copy-on-write on the initial
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // mutation, which may not be constant time).
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual Fst<A> *Copy(bool safe = false) const = 0;
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read an Fst from an input stream; returns NULL on error
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static Fst<A> *Read(istream &strm, const FstReadOptions &opts) {
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstReadOptions ropts(opts);
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstHeader hdr;
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ropts.header)
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr = *opts.header;
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else {
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!hdr.Read(strm, opts.source))
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ropts.header = &hdr;
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstRegister<A> *registr = FstRegister<A>::GetRegister();
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const typename FstRegister<A>::Reader reader =
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      registr->GetReader(hdr.FstType());
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!reader) {
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType()
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << "\" (arc type = \"" << A::Type()
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << "\"): " << ropts.source;
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return reader(strm, ropts);
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read an Fst from a file; return NULL on error
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Empty filename reads from standard input
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static Fst<A> *Read(const string &filename) {
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!filename.empty()) {
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!strm) {
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Fst::Read: Can't open file: " << filename;
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return Read(strm, FstReadOptions(filename));
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
239dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return Read(cin, FstReadOptions("standard input"));
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Write an Fst to an output stream; return false on error
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Fst::Write: No write stream method for " << Type()
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << " Fst type";
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Write an Fst to a file; return false on error
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Empty filename writes to standard output
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Write(const string &filename) const {
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Fst::Write: No write filename method for " << Type()
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << " Fst type";
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Return input label symbol table; return NULL if not specified
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const SymbolTable* InputSymbols() const = 0;
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Return output label symbol table; return NULL if not specified
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const SymbolTable* OutputSymbols() const = 0;
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // For generic state iterator construction; not normally called
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // directly by users.
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitStateIterator(StateIteratorData<A> *) const = 0;
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // For generic arc iterator construction; not normally called
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // directly by users.
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0;
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // For generic matcher construction; not normally called
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // directly by users.
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual MatcherBase<A> *InitMatcher(MatchType match_type) const;
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool WriteFile(const string &filename) const {
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!filename.empty()) {
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofstream strm(filename.c_str(), ofstream::out | ofstream::binary);
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!strm) {
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Fst::Write: Can't open file: " << filename;
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return Write(strm, FstWriteOptions(filename));
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
286dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return Write(cout, FstWriteOptions("standard output"));
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// STATE and ARC ITERATOR DEFINITIONS
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// State iterator interface templated on the Arc definition; used
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for StateIterator specializations returned by the InitStateIterator
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst method.
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIteratorBase {
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~StateIteratorBase() {}
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const { return Done_(); }       // End of iterator?
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId Value() const { return Value_(); }  // Current state (when !Done)
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() { Next_(); }      // Advance to next state (when !Done)
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() { Reset_(); }    // Return to initial condition
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // This allows base class virtual access to non-virtual derived-
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // class members of the same name. It makes the derived class more
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // efficient to use but unsafe to further derive.
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Done_() const = 0;
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual StateId Value_() const = 0;
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Next_() = 0;
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Reset_() = 0;
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// StateIterator initialization data
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> struct StateIteratorData {
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateIteratorBase<A> *base;   // Specialized iterator if non-zero
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename A::StateId nstates;  // O.w. total # of states
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Generic state iterator, templated on the FST definition
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - a wrapper around pointer to specific one.
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Here is a typical use: \code
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   for (StateIterator<StdFst> siter(fst);
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//        !siter.Done();
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//        siter.Next()) {
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     StateId s = siter.Value();
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     ...
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   } \endcode
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F>
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator {
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef F FST;
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename F::Arc Arc;
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const F &fst) : s_(0) {
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst.InitStateIterator(&data_);
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~StateIterator() { if (data_.base) delete data_.base; }
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const {
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data_.base ? data_.base->Done() : s_ >= data_.nstates;
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId Value() const { return data_.base ? data_.base->Value() : s_; }
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() {
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_.base->Next();
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++s_;
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() {
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_.base->Reset();
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      s_ = 0;
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateIteratorData<Arc> data_;
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId s_;
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(StateIterator);
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Flags to control the behavior on an arc iterator:
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcILabelValue    = 0x0001;  // Value() gives valid ilabel
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcOLabelValue    = 0x0002;  //  "       "     "    olabel
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcWeightValue    = 0x0004;  //  "       "     "    weight
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcNextStateValue = 0x0008;  //  "       "     " nextstate
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcNoCache   = 0x0010;       // No need to cache arcs
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcValueFlags =
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  kArcILabelValue | kArcOLabelValue |
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  kArcWeightValue | kArcNextStateValue;
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcFlags = kArcValueFlags | kArcNoCache;
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Arc iterator interface, templated on the Arc definition; used
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for Arc iterator specializations that are returned by the InitArcIterator
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst method.
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIteratorBase {
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~ArcIteratorBase() {}
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const { return Done_(); }            // End of iterator?
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const A& Value() const { return Value_(); }      // Current arc (when !Done)
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() { Next_(); }           // Advance to next arc (when !Done)
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t Position() const { return Position_(); }  // Return current position
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() { Reset_(); }         // Return to initial condition
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Seek(size_t a) { Seek_(a); }  // Random arc access by position
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint32 Flags() const { return Flags_(); }  // Return current behavorial flags
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetFlags(uint32 flags, uint32 mask) {  // Set behavorial flags
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetFlags_(flags, mask);
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // This allows base class virtual access to non-virtual derived-
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // class members of the same name. It makes the derived class more
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // efficient to use but unsafe to further derive.
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Done_() const = 0;
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const A& Value_() const = 0;
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Next_() = 0;
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t Position_() const = 0;
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Reset_() = 0;
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Seek_(size_t a) = 0;
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual uint32 Flags_() const = 0;
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void SetFlags_(uint32 flags, uint32 mask) = 0;
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ArcIterator initialization data
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> struct ArcIteratorData {
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIteratorBase<A> *base;  // Specialized iterator if non-zero
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const A *arcs;             // O.w. arcs pointer
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t narcs;              // ... and arc count
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int *ref_count;            // ... and reference count if non-zero
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Generic arc iterator, templated on the FST definition
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - a wrapper around pointer to specific one.
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Here is a typical use: \code
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   for (ArcIterator<StdFst> aiter(fst, s));
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//        !aiter.Done();
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//         aiter.Next()) {
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     StdArc &arc = aiter.Value();
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     ...
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   } \endcode
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F>
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator {
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef F FST;
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename F::Arc Arc;
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIterator(const F &fst, StateId s) : i_(0) {
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst.InitArcIterator(s, &data_);
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) {
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.ref_count)
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++(*data_.ref_count);
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~ArcIterator() {
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete data_.base;
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else if (data_.ref_count)
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      --(*data_.ref_count);
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const {
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data_.base ?  data_.base->Done() : i_ >= data_.narcs;
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Arc& Value() const {
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data_.base ? data_.base->Value() : data_.arcs[i_];
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() {
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_.base->Next();
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++i_;
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() {
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_.base->Reset();
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      i_ = 0;
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Seek(size_t a) {
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_.base->Seek(a);
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      i_ = a;
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t Position() const {
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data_.base ? data_.base->Position() : i_;
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint32 Flags() const {
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return data_.base->Flags();
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return kArcValueFlags;
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetFlags(uint32 flags, uint32 mask) {
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_.base)
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_.base->SetFlags(flags, mask);
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIteratorData<Arc> data_;
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t i_;
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// MATCHER DEFINITIONS
526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonMatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const {
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return 0;  // Use the default matcher
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST ACCESSORS - Useful functions in high-performance cases.
536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace internal {
539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// General case - requires non-abstract, 'final' methods. Use for inlining.
541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline
542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) {
543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.F::Final(s);
544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline
547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumArcs(const F &fst, typename F::Arc::StateId s) {
548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.F::NumArcs(s);
549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline
552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) {
553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.F::NumInputEpsilons(s);
554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline
557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) {
558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.F::NumOutputEpsilons(s);
559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  Fst<A> case - abstract methods.
563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::Weight Final(const Fst<A> &fst, typename A::StateId s) {
565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.Final(s);
566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) {
570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.NumArcs(s);
571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) {
575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.NumInputEpsilons(s);
576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) {
580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return fst.NumOutputEpsilons(s);
581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace internal
584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A useful alias when using StdArc.
586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef Fst<StdArc> StdFst;
587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  CONSTANT DEFINITIONS
591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int kNoStateId   =  -1;  // Not a valid state ID
594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int kNoLabel     =  -1;  // Not a valid label
595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst IMPLEMENTATION BASE
598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is the recommended Fst implementation base class. It will
600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// handle reference counts, property bits, type information and symbols.
601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class FstImpl {
604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FstImpl()
609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {}
610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FstImpl(const FstImpl<A> &impl)
612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : properties_(impl.properties_), type_(impl.type_),
613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0),
614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {}
615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~FstImpl() {
617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete isymbols_;
618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete osymbols_;
619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const string& Type() const { return type_; }
622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetType(const string &type) { type_ = type; }
624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual uint64 Properties() const { return properties_; }
626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual uint64 Properties(uint64 mask) const { return properties_ & mask; }
628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetProperties(uint64 props) {
630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    properties_ &= kError;          // kError can't be cleared
631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    properties_ |= props;
632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetProperties(uint64 props, uint64 mask) {
635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    properties_ &= ~mask | kError;  // kError can't be cleared
636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    properties_ |= props & mask;
637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Allows (only) setting error bit on const FST impls
640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetProperties(uint64 props, uint64 mask) const {
641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (mask != kError)
642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "FstImpl::SetProperties() const: can only set kError";
643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    properties_ |= kError;
644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable* InputSymbols() const { return isymbols_; }
647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable* OutputSymbols() const { return osymbols_; }
649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SymbolTable* InputSymbols() { return isymbols_; }
651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SymbolTable* OutputSymbols() { return osymbols_; }
653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetInputSymbols(const SymbolTable* isyms) {
655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (isymbols_) delete isymbols_;
656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    isymbols_ = isyms ? isyms->Copy() : 0;
657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetOutputSymbols(const SymbolTable* osyms) {
660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (osymbols_) delete osymbols_;
661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    osymbols_ = osyms ? osyms->Copy() : 0;
662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int RefCount() const {
665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ref_count_.count();
666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int IncrRefCount() {
669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ref_count_.Incr();
670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int DecrRefCount() {
673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ref_count_.Decr();
674f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
675f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
676f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read-in header and symbols from input stream, initialize Fst, and
677f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // return the header.  If opts.header is non-null, skip read-in and
678f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // use the option value.  If opts.[io]symbols is non-null, read-in
679f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // (if present), but use the option value.
680f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ReadHeader(istream &strm, const FstReadOptions& opts,
681f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  int min_version, FstHeader *hdr);
682f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
683f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Write-out header and symbols from output stream.
684f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If a opts.header is false, skip writing header.
685f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If opts.[io]symbols is false, skip writing those symbols.
686f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // This method is needed for Impl's that implement Write methods.
687f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void WriteHeader(ostream &strm, const FstWriteOptions& opts,
688f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   int version, FstHeader *hdr) const {
689f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (opts.write_header) {
690f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetFstType(type_);
691f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetArcType(A::Type());
692f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetVersion(version);
693f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetProperties(properties_);
694f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      int32 file_flags = 0;
695f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (isymbols_ && opts.write_isymbols)
696f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        file_flags |= FstHeader::HAS_ISYMBOLS;
697f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (osymbols_ && opts.write_osymbols)
698f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        file_flags |= FstHeader::HAS_OSYMBOLS;
699f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (opts.align)
700f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        file_flags |= FstHeader::IS_ALIGNED;
701f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetFlags(file_flags);
702f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->Write(strm, opts.source);
703f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
704f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm);
705f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm);
706f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
707f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
708f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Write-out header and symbols to output stream.
709f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If a opts.header is false, skip writing header.
710f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If opts.[io]symbols is false, skip writing those symbols.
711f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // type is the Fst type being written.
712f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // This method is used in the cross-type serialization methods Fst::WriteFst.
713f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static void WriteFstHeader(const Fst<A> &fst, ostream &strm,
714f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                             const FstWriteOptions& opts, int version,
715dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                             const string &type, uint64 properties,
716dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                             FstHeader *hdr) {
717f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (opts.write_header) {
718f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetFstType(type);
719f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetArcType(A::Type());
720f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetVersion(version);
721dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      hdr->SetProperties(properties);
722f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      int32 file_flags = 0;
723f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst.InputSymbols() && opts.write_isymbols)
724f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        file_flags |= FstHeader::HAS_ISYMBOLS;
725f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst.OutputSymbols() && opts.write_osymbols)
726f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        file_flags |= FstHeader::HAS_OSYMBOLS;
727f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (opts.align)
728f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        file_flags |= FstHeader::IS_ALIGNED;
729f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->SetFlags(file_flags);
730f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr->Write(strm, opts.source);
731f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
732f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst.InputSymbols() && opts.write_isymbols) {
733f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst.InputSymbols()->Write(strm);
734f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
735f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst.OutputSymbols() && opts.write_osymbols) {
736f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst.OutputSymbols()->Write(strm);
737f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
738f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
739f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
740f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // In serialization routines where the header cannot be written until after
741f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // the machine has been serialized, this routine can be called to seek to
742f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // the beginning of the file an rewrite the header with updated fields.
743f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // It repositions the file pointer back at the end of the file.
744f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // returns true on success, false on failure.
745f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm,
746f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                              const FstWriteOptions& opts, int version,
747dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                              const string &type, uint64 properties,
748dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                              FstHeader *hdr, size_t header_offset) {
749f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    strm.seekp(header_offset);
750f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!strm) {
751f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
752f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
753f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
754dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    WriteFstHeader(fst, strm, opts, version, type, properties, hdr);
755f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!strm) {
756f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
757f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
758f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
759f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    strm.seekp(0, ios_base::end);
760f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!strm) {
761f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source;
762f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
763f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
764f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
765f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
766f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
767f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
768f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable uint64 properties_;           // Property bits
769f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
770f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
771f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string type_;                 // Unique name of Fst class
772f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SymbolTable *isymbols_;       // Ilabel symbol table
773f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SymbolTable *osymbols_;       // Olabel symbol table
774f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  RefCounter ref_count_;        // Reference count
775f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
776f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const FstImpl<A> &impl);  // disallow
777f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
778f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
779f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
780f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts,
781f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                            int min_version, FstHeader *hdr) {
782f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.header)
783f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *hdr = *opts.header;
784f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  else if (!hdr->Read(strm, opts.source))
785f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
786f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
787f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (FLAGS_v >= 2) {
788f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source
789f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              << ", fst_type: " << hdr->FstType()
790f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              << ", arc_type: " << A::Type()
791f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              << ", version: " << hdr->Version()
792f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              << ", flags: " << hdr->GetFlags();
793f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
794f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
795f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (hdr->FstType() != type_) {
796f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_
797f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "\": " << opts.source;
798f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
799f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
800f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (hdr->ArcType() != A::Type()) {
801f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type()
802f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "\": " << opts.source;
803f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
804f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
805f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (hdr->Version() < min_version) {
806f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_
807f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << " Fst version: " << opts.source;
808f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
809f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
810f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  properties_ = hdr->Properties();
811f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS)
812f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    isymbols_ = SymbolTable::Read(strm, opts.source);
813f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS)
814f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    osymbols_ =SymbolTable::Read(strm, opts.source);
815f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
816f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.isymbols) {
817f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete isymbols_;
818f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    isymbols_ = opts.isymbols->Copy();
819f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
820f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.osymbols) {
821f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete osymbols_;
822f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    osymbols_ = opts.osymbols->Copy();
823f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
824f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
825f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
826f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
827f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
828f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
829f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonuint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known);
830f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
831f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
832f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is a helper class template useful for attaching an Fst interface to
833f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// its implementation, handling reference counting.
834f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate < class I, class F = Fst<typename I::Arc> >
835f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ImplToFst : public F {
836f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
837f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename I::Arc Arc;
838f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
839f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
840f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
841f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_;  }
842f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
843f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual StateId Start() const { return impl_->Start(); }
844f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
845f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual Weight Final(StateId s) const { return impl_->Final(s); }
846f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
847f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
848f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
849f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t NumInputEpsilons(StateId s) const {
850f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl_->NumInputEpsilons(s);
851f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
852f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
853f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual size_t NumOutputEpsilons(StateId s) const {
854f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl_->NumOutputEpsilons(s);
855f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
856f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
857f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual uint64 Properties(uint64 mask, bool test) const {
858f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (test) {
859f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops);
860f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      impl_->SetProperties(testprops, knownprops);
861f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return testprops & mask;
862f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
863f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return impl_->Properties(mask);
864f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
865f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
866f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
867f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const string& Type() const { return impl_->Type(); }
868f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
869f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const SymbolTable* InputSymbols() const {
870f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl_->InputSymbols();
871f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
872f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
873f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const SymbolTable* OutputSymbols() const {
874f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl_->OutputSymbols();
875f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
876f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
877f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
878f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToFst() : impl_(0) {}
879f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
880f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToFst(I *impl) : impl_(impl) {}
881f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
882f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToFst(const ImplToFst<I, F> &fst) {
883f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl_ = fst.impl_;
884f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl_->IncrRefCount();
885f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
886f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
887f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // This constructor presumes there is a copy constructor for the
888f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // implementation.
889f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToFst(const ImplToFst<I, F> &fst, bool safe) {
890f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (safe) {
891f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      impl_ = new I(*(fst.impl_));
892f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
893f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      impl_ = fst.impl_;
894f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      impl_->IncrRefCount();
895f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
896f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
897f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
898f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I *GetImpl() const { return impl_; }
899f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
900f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Change Fst implementation pointer. If 'own_impl' is true,
901f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // ownership of the input implementation is given to this
902f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // object; otherwise, the input implementation's reference count
903f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // should be incremented.
904f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetImpl(I *impl, bool own_impl = true) {
905f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!own_impl)
906f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      impl->IncrRefCount();
907f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (impl_ && !impl_->DecrRefCount()) delete impl_;
908f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl_ = impl;
909f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
910f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
911f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
912f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Disallow
913f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst);
914f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
915f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ImplToFst<I, F> &operator=(const Fst<Arc> &fst) {
916f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "ImplToFst: Assignment operator disallowed";
917f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->SetProperties(kError, kError);
918f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return *this;
919f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
920f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
921f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I *impl_;
922f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
923f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
924f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
925f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Converts FSTs by casting their implementations, where this makes
926f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// sense (which excludes implementations with weight-dependent virtual
927f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// methods). Must be a friend of the Fst classes involved (currently
928f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the concrete Fsts: VectorFst, ConstFst, CompactFst).
929f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class F, class G> void Cast(const F &ifst, G *ofst) {
930f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false);
931f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
932f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
933f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst Serialization
934f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
935f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid FstToString(const Fst<A> &fst, string *result) {
936f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ostringstream ostrm;
937f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst.Write(ostrm, FstWriteOptions("FstToString"));
938f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *result = ostrm.str();
939f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
940f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
941f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
942f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonFst<A> *StringToFst(const string &s) {
943f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  istringstream istrm(s);
944f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return Fst<A>::Read(istrm, FstReadOptions("StringToFst"));
945f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
946f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
947f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
948f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
949f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_FST_H__
950