1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fst.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Finite-State Transducer (FST) - abstract base class definition, 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state and arc iterator interface, and suggested base implementation. 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_FST_H__ 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_FST_H__ 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <stddef.h> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <sys/types.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <cmath> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compat.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/types.h> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/arc.h> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/properties.h> 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/register.h> 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream> 39dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <sstream> 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/symbol-table.h> 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDECLARE_bool(fst_align); 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IsFstHeader(istream &, const string &); 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass FstHeader; 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class StateIteratorData; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class ArcIteratorData; 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class MatcherBase; 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct FstReadOptions { 565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // FileReadMode(s) are advisory, there are many conditions than prevent a 575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // file from being mapped, READ mode will be selected in these cases with 585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // a warning indicating why it was chosen. 595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin enum FileReadMode { READ, MAP }; 605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string source; // Where you're reading from 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FstHeader *header; // Pointer to Fst header. If non-zero, use 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // this info (don't read a stream header) 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // this info (read and skip stream isymbols) 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // this info (read and skip stream osymbols) 685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin FileReadMode mode; // Read or map files (advisory, if possible) 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin explicit FstReadOptions(const string& src = "<unspecified>", 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FstHeader *hdr = 0, 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* isym = 0, 735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const SymbolTable* osym = 0); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit FstReadOptions(const string& src, 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* isym, 775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const SymbolTable* osym = 0); 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Helper function to convert strings FileReadModes into their enum value. 805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin static FileReadMode ReadMode(const string &mode); 815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}; 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct FstWriteOptions { 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string source; // Where you're writing to 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool write_header; // Write the header? 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool write_isymbols; // Write input symbols? 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool write_osymbols; // Write output symbols? 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool align; // Write data aligned where appropriate; 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // this may fail on pipes 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit FstWriteOptions(const string& src = "<unspecifed>", 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool hdr = true, bool isym = true, 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool osym = true, bool alig = FLAGS_fst_align) 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : source(src), write_header(hdr), 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson write_isymbols(isym), write_osymbols(osym), align(alig) {} 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst HEADER CLASS 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is the recommended Fst file header representation. 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass FstHeader { 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enum { 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HAS_ISYMBOLS = 0x1, // Has input symbol table 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HAS_OSYMBOLS = 0x2, // Has output symbol table 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IS_ALIGNED = 0x4, // Memory-aligned (where appropriate) 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } Flags; 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstHeader() : version_(0), flags_(0), properties_(0), start_(-1), 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson numstates_(0), numarcs_(0) {} 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string &FstType() const { return fsttype_; } 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string &ArcType() const { return arctype_; } 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 Version() const { return version_; } 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 GetFlags() const { return flags_; } 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties() const { return properties_; } 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 Start() const { return start_; } 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumStates() const { return numstates_; } 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumArcs() const { return numarcs_; } 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFstType(const string& type) { fsttype_ = type; } 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetArcType(const string& type) { arctype_ = type; } 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetVersion(int32 version) { version_ = version; } 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFlags(int32 flags) { flags_ = flags; } 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetProperties(uint64 properties) { properties_ = properties; } 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetStart(int64 start) { start_ = start; } 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetNumStates(int64 numstates) { numstates_ = numstates; } 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; } 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Read(istream &strm, const string &source, bool rewind = false); 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Write(ostream &strm, const string &source) const; 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string fsttype_; // E.g. "vector" 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string arctype_; // E.g. "standard" 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 version_; // Type version # 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 flags_; // File format bits 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 properties_; // FST property bits 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 start_; // Start state 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 numstates_; // # of states 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 numarcs_; // # of arcs 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specifies matcher action. 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum MatchType { MATCH_INPUT, // Match input label. 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MATCH_OUTPUT, // Match output label. 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MATCH_BOTH, // Match input or output label. 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MATCH_NONE, // Match nothing. 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MATCH_UNKNOWN }; // Match type unknown. 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst INTERFACE CLASS DEFINITION 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic FST, templated on the arc definition, with 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// common-demoninator methods (use StateIterator and ArcIterator to 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iterate over its states and arcs). 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass Fst { 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~Fst() {} 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual StateId Start() const = 0; // Initial state 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual Weight Final(StateId) const = 0; // State's final weight 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t NumArcs(StateId) const = 0; // State's arc count 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t NumInputEpsilons(StateId) 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const = 0; // State's input epsilon count 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t NumOutputEpsilons(StateId) 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const = 0; // State's output epsilon count 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If test=false, return stored properties bits for mask (some poss. unknown) 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If test=true, return property bits for mask (computing o.w. unknown) 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual uint64 Properties(uint64 mask, bool test) 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const = 0; // Property bits 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const string& Type() const = 0; // Fst type name 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this Fst. The copying behaves as follows: 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // (1) The copying is constant time if safe = false or if safe = true 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // and is on an otherwise unaccessed Fst. 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // (2) If safe = true, the copy is thread-safe in that the original 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // and copy can be safely accessed (but not necessarily mutated) by 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // separate threads. For some Fst types, 'Copy(true)' should only be 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // called on an Fst that has not otherwise been accessed. Its behavior 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // is undefined otherwise. 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // (3) If a MutableFst is copied and then mutated, then the original is 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // unmodified and vice versa (often by a copy-on-write on the initial 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // mutation, which may not be constant time). 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual Fst<A> *Copy(bool safe = false) const = 0; 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Read an Fst from an input stream; returns NULL on error 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static Fst<A> *Read(istream &strm, const FstReadOptions &opts) { 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstReadOptions ropts(opts); 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstHeader hdr; 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ropts.header) 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr = *opts.header; 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else { 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!hdr.Read(strm, opts.source)) 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ropts.header = &hdr; 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstRegister<A> *registr = FstRegister<A>::GetRegister(); 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const typename FstRegister<A>::Reader reader = 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson registr->GetReader(hdr.FstType()); 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!reader) { 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType() 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "\" (arc type = \"" << A::Type() 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "\"): " << ropts.source; 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return reader(strm, ropts); 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Read an Fst from a file; return NULL on error 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Empty filename reads from standard input 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static Fst<A> *Read(const string &filename) { 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!filename.empty()) { 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!strm) { 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::Read: Can't open file: " << filename; 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return Read(strm, FstReadOptions(filename)); 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 239dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return Read(cin, FstReadOptions("standard input")); 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Write an Fst to an output stream; return false on error 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::Write: No write stream method for " << Type() 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " Fst type"; 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Write an Fst to a file; return false on error 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Empty filename writes to standard output 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual bool Write(const string &filename) const { 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::Write: No write filename method for " << Type() 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " Fst type"; 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Return input label symbol table; return NULL if not specified 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const SymbolTable* InputSymbols() const = 0; 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Return output label symbol table; return NULL if not specified 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const SymbolTable* OutputSymbols() const = 0; 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // For generic state iterator construction; not normally called 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // directly by users. 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void InitStateIterator(StateIteratorData<A> *) const = 0; 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // For generic arc iterator construction; not normally called 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // directly by users. 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0; 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // For generic matcher construction; not normally called 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // directly by users. 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual MatcherBase<A> *InitMatcher(MatchType match_type) const; 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected: 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool WriteFile(const string &filename) const { 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!filename.empty()) { 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!strm) { 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::Write: Can't open file: " << filename; 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return Write(strm, FstWriteOptions(filename)); 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 286dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return Write(cout, FstWriteOptions("standard output")); 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// STATE and ARC ITERATOR DEFINITIONS 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// State iterator interface templated on the Arc definition; used 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for StateIterator specializations returned by the InitStateIterator 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst method. 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIteratorBase { 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~StateIteratorBase() {} 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { return Done_(); } // End of iterator? 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId Value() const { return Value_(); } // Current state (when !Done) 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { Next_(); } // Advance to next state (when !Done) 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Reset() { Reset_(); } // Return to initial condition 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This allows base class virtual access to non-virtual derived- 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // class members of the same name. It makes the derived class more 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // efficient to use but unsafe to further derive. 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual bool Done_() const = 0; 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual StateId Value_() const = 0; 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Next_() = 0; 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Reset_() = 0; 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// StateIterator initialization data 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> struct StateIteratorData { 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateIteratorBase<A> *base; // Specialized iterator if non-zero 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename A::StateId nstates; // O.w. total # of states 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Generic state iterator, templated on the FST definition 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - a wrapper around pointer to specific one. 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Here is a typical use: \code 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for (StateIterator<StdFst> siter(fst); 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// !siter.Done(); 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// siter.Next()) { 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// StateId s = siter.Value(); 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ... 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// } \endcode 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator { 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef F FST; 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename F::Arc Arc; 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const F &fst) : s_(0) { 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.InitStateIterator(&data_); 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~StateIterator() { if (data_.base) delete data_.base; } 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return data_.base ? data_.base->Done() : s_ >= data_.nstates; 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId Value() const { return data_.base ? data_.base->Value() : s_; } 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_.base->Next(); 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++s_; 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Reset() { 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_.base->Reset(); 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_ = 0; 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateIteratorData<Arc> data_; 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s_; 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(StateIterator); 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Flags to control the behavior on an arc iterator: 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcOLabelValue = 0x0002; // " " " olabel 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcWeightValue = 0x0004; // " " " weight 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcNextStateValue = 0x0008; // " " " nextstate 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcNoCache = 0x0010; // No need to cache arcs 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcValueFlags = 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kArcILabelValue | kArcOLabelValue | 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kArcWeightValue | kArcNextStateValue; 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const uint32 kArcFlags = kArcValueFlags | kArcNoCache; 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Arc iterator interface, templated on the Arc definition; used 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for Arc iterator specializations that are returned by the InitArcIterator 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst method. 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIteratorBase { 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~ArcIteratorBase() {} 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { return Done_(); } // End of iterator? 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A& Value() const { return Value_(); } // Current arc (when !Done) 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { Next_(); } // Advance to next arc (when !Done) 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t Position() const { return Position_(); } // Return current position 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Reset() { Reset_(); } // Return to initial condition 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Seek(size_t a) { Seek_(a); } // Random arc access by position 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 Flags() const { return Flags_(); } // Return current behavorial flags 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFlags_(flags, mask); 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This allows base class virtual access to non-virtual derived- 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // class members of the same name. It makes the derived class more 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // efficient to use but unsafe to further derive. 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual bool Done_() const = 0; 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const A& Value_() const = 0; 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Next_() = 0; 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t Position_() const = 0; 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Reset_() = 0; 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Seek_(size_t a) = 0; 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual uint32 Flags_() const = 0; 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void SetFlags_(uint32 flags, uint32 mask) = 0; 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ArcIterator initialization data 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> struct ArcIteratorData { 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIteratorBase<A> *base; // Specialized iterator if non-zero 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A *arcs; // O.w. arcs pointer 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t narcs; // ... and arc count 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int *ref_count; // ... and reference count if non-zero 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Generic arc iterator, templated on the FST definition 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - a wrapper around pointer to specific one. 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Here is a typical use: \code 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for (ArcIterator<StdFst> aiter(fst, s)); 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// !aiter.Done(); 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.Next()) { 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// StdArc &arc = aiter.Value(); 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ... 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// } \endcode 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator { 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef F FST; 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename F::Arc Arc; 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const F &fst, StateId s) : i_(0) { 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.InitArcIterator(s, &data_); 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) { 462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.ref_count) 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++(*data_.ref_count); 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~ArcIterator() { 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete data_.base; 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else if (data_.ref_count) 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson --(*data_.ref_count); 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return data_.base ? data_.base->Done() : i_ >= data_.narcs; 475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Arc& Value() const { 478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return data_.base ? data_.base->Value() : data_.arcs[i_]; 479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_.base->Next(); 484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++i_; 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Reset() { 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_.base->Reset(); 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson i_ = 0; 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Seek(size_t a) { 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_.base->Seek(a); 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson i_ = a; 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t Position() const { 503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return data_.base ? data_.base->Position() : i_; 504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 Flags() const { 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return data_.base->Flags(); 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kArcValueFlags; 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFlags(uint32 flags, uint32 mask) { 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data_.base) 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_.base->SetFlags(flags, mask); 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIteratorData<Arc> data_; 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t i_; 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(ArcIterator); 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// MATCHER DEFINITIONS 526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonMatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const { 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; // Use the default matcher 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST ACCESSORS - Useful functions in high-performance cases. 536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace internal { 539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// General case - requires non-abstract, 'final' methods. Use for inlining. 541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline 542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) { 543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.F::Final(s); 544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline 547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumArcs(const F &fst, typename F::Arc::StateId s) { 548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.F::NumArcs(s); 549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline 552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) { 553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.F::NumInputEpsilons(s); 554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F> inline 557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) { 558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.F::NumOutputEpsilons(s); 559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst<A> case - abstract methods. 563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::Weight Final(const Fst<A> &fst, typename A::StateId s) { 565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.Final(s); 566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) { 570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.NumArcs(s); 571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) { 575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.NumInputEpsilons(s); 576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) { 580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst.NumOutputEpsilons(s); 581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace internal 584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A useful alias when using StdArc. 586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef Fst<StdArc> StdFst; 587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// CONSTANT DEFINITIONS 591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int kNoStateId = -1; // Not a valid state ID 594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int kNoLabel = -1; // Not a valid label 595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst IMPLEMENTATION BASE 598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is the recommended Fst implementation base class. It will 600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// handle reference counts, property bits, type information and symbols. 601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class FstImpl { 604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstImpl() 609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {} 610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstImpl(const FstImpl<A> &impl) 612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : properties_(impl.properties_), type_(impl.type_), 613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0), 614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {} 615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~FstImpl() { 617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete isymbols_; 618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete osymbols_; 619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string& Type() const { return type_; } 622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetType(const string &type) { type_ = type; } 624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual uint64 Properties() const { return properties_; } 626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual uint64 Properties(uint64 mask) const { return properties_ & mask; } 628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetProperties(uint64 props) { 630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson properties_ &= kError; // kError can't be cleared 631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson properties_ |= props; 632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetProperties(uint64 props, uint64 mask) { 635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson properties_ &= ~mask | kError; // kError can't be cleared 636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson properties_ |= props & mask; 637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Allows (only) setting error bit on const FST impls 640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetProperties(uint64 props, uint64 mask) const { 641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (mask != kError) 642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "FstImpl::SetProperties() const: can only set kError"; 643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson properties_ |= kError; 644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* InputSymbols() const { return isymbols_; } 647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable* OutputSymbols() const { return osymbols_; } 649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable* InputSymbols() { return isymbols_; } 651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable* OutputSymbols() { return osymbols_; } 653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetInputSymbols(const SymbolTable* isyms) { 655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (isymbols_) delete isymbols_; 656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isymbols_ = isyms ? isyms->Copy() : 0; 657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetOutputSymbols(const SymbolTable* osyms) { 660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (osymbols_) delete osymbols_; 661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson osymbols_ = osyms ? osyms->Copy() : 0; 662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int RefCount() const { 665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return ref_count_.count(); 666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int IncrRefCount() { 669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return ref_count_.Incr(); 670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int DecrRefCount() { 673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return ref_count_.Decr(); 674f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 675f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 676f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Read-in header and symbols from input stream, initialize Fst, and 677f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // return the header. If opts.header is non-null, skip read-in and 678f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // use the option value. If opts.[io]symbols is non-null, read-in 679f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // (if present), but use the option value. 680f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ReadHeader(istream &strm, const FstReadOptions& opts, 681f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int min_version, FstHeader *hdr); 682f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 683f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Write-out header and symbols from output stream. 684f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If a opts.header is false, skip writing header. 685f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If opts.[io]symbols is false, skip writing those symbols. 686f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This method is needed for Impl's that implement Write methods. 687f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void WriteHeader(ostream &strm, const FstWriteOptions& opts, 688f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int version, FstHeader *hdr) const { 689f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.write_header) { 690f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetFstType(type_); 691f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetArcType(A::Type()); 692f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetVersion(version); 693f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetProperties(properties_); 694f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 file_flags = 0; 695f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (isymbols_ && opts.write_isymbols) 696f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson file_flags |= FstHeader::HAS_ISYMBOLS; 697f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (osymbols_ && opts.write_osymbols) 698f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson file_flags |= FstHeader::HAS_OSYMBOLS; 699f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.align) 700f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson file_flags |= FstHeader::IS_ALIGNED; 701f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetFlags(file_flags); 702f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->Write(strm, opts.source); 703f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 704f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm); 705f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm); 706f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 707f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 708f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Write-out header and symbols to output stream. 709f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If a opts.header is false, skip writing header. 710f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If opts.[io]symbols is false, skip writing those symbols. 711f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // type is the Fst type being written. 712f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This method is used in the cross-type serialization methods Fst::WriteFst. 713f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static void WriteFstHeader(const Fst<A> &fst, ostream &strm, 714f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FstWriteOptions& opts, int version, 715dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin const string &type, uint64 properties, 716dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin FstHeader *hdr) { 717f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.write_header) { 718f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetFstType(type); 719f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetArcType(A::Type()); 720f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetVersion(version); 721dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin hdr->SetProperties(properties); 722f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 file_flags = 0; 723f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst.InputSymbols() && opts.write_isymbols) 724f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson file_flags |= FstHeader::HAS_ISYMBOLS; 725f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst.OutputSymbols() && opts.write_osymbols) 726f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson file_flags |= FstHeader::HAS_OSYMBOLS; 727f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.align) 728f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson file_flags |= FstHeader::IS_ALIGNED; 729f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->SetFlags(file_flags); 730f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hdr->Write(strm, opts.source); 731f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 732f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst.InputSymbols() && opts.write_isymbols) { 733f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.InputSymbols()->Write(strm); 734f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 735f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst.OutputSymbols() && opts.write_osymbols) { 736f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.OutputSymbols()->Write(strm); 737f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 738f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 739f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 740f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // In serialization routines where the header cannot be written until after 741f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the machine has been serialized, this routine can be called to seek to 742f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the beginning of the file an rewrite the header with updated fields. 743f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // It repositions the file pointer back at the end of the file. 744f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // returns true on success, false on failure. 745f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm, 746f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FstWriteOptions& opts, int version, 747dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin const string &type, uint64 properties, 748dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin FstHeader *hdr, size_t header_offset) { 749f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm.seekp(header_offset); 750f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!strm) { 751f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; 752f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 753f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 754dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin WriteFstHeader(fst, strm, opts, version, type, properties, hdr); 755f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!strm) { 756f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; 757f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 758f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 759f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm.seekp(0, ios_base::end); 760f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!strm) { 761f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; 762f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 763f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 764f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 765f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 766f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 767f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected: 768f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable uint64 properties_; // Property bits 769f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 770f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 771f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string type_; // Unique name of Fst class 772f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable *isymbols_; // Ilabel symbol table 773f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable *osymbols_; // Olabel symbol table 774f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson RefCounter ref_count_; // Reference count 775f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 776f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const FstImpl<A> &impl); // disallow 777f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 778f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 779f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 780f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts, 781f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int min_version, FstHeader *hdr) { 782f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.header) 783f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *hdr = *opts.header; 784f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else if (!hdr->Read(strm, opts.source)) 785f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 786f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 787f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (FLAGS_v >= 2) { 788f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source 789f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", fst_type: " << hdr->FstType() 790f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", arc_type: " << A::Type() 791f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", version: " << hdr->Version() 792f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", flags: " << hdr->GetFlags(); 793f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 794f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 795f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (hdr->FstType() != type_) { 796f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_ 797f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "\": " << opts.source; 798f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 799f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 800f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (hdr->ArcType() != A::Type()) { 801f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type() 802f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "\": " << opts.source; 803f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 804f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 805f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (hdr->Version() < min_version) { 806f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_ 807f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " Fst version: " << opts.source; 808f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 809f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 810f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson properties_ = hdr->Properties(); 811f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS) 812f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isymbols_ = SymbolTable::Read(strm, opts.source); 813f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS) 814f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson osymbols_ =SymbolTable::Read(strm, opts.source); 815f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 816f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.isymbols) { 817f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete isymbols_; 818f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isymbols_ = opts.isymbols->Copy(); 819f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 820f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.osymbols) { 821f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete osymbols_; 822f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson osymbols_ = opts.osymbols->Copy(); 823f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 824f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 825f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 826f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 827f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 828f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 829f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonuint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known); 830f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 831f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 832f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is a helper class template useful for attaching an Fst interface to 833f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// its implementation, handling reference counting. 834f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate < class I, class F = Fst<typename I::Arc> > 835f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ImplToFst : public F { 836f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 837f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename I::Arc Arc; 838f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 839f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 840f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 841f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; } 842f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 843f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual StateId Start() const { return impl_->Start(); } 844f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 845f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual Weight Final(StateId s) const { return impl_->Final(s); } 846f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 847f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); } 848f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 849f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t NumInputEpsilons(StateId s) const { 850f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return impl_->NumInputEpsilons(s); 851f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 852f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 853f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual size_t NumOutputEpsilons(StateId s) const { 854f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return impl_->NumOutputEpsilons(s); 855f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 856f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 857f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual uint64 Properties(uint64 mask, bool test) const { 858f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (test) { 859f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops); 860f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_->SetProperties(testprops, knownprops); 861f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return testprops & mask; 862f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 863f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return impl_->Properties(mask); 864f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 865f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 866f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 867f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const string& Type() const { return impl_->Type(); } 868f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 869f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const SymbolTable* InputSymbols() const { 870f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return impl_->InputSymbols(); 871f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 872f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 873f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const SymbolTable* OutputSymbols() const { 874f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return impl_->OutputSymbols(); 875f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 876f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 877f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected: 878f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ImplToFst() : impl_(0) {} 879f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 880f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ImplToFst(I *impl) : impl_(impl) {} 881f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 882f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ImplToFst(const ImplToFst<I, F> &fst) { 883f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_ = fst.impl_; 884f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_->IncrRefCount(); 885f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 886f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 887f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This constructor presumes there is a copy constructor for the 888f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // implementation. 889f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ImplToFst(const ImplToFst<I, F> &fst, bool safe) { 890f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (safe) { 891f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_ = new I(*(fst.impl_)); 892f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 893f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_ = fst.impl_; 894f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_->IncrRefCount(); 895f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 896f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 897f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 898f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I *GetImpl() const { return impl_; } 899f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 900f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Change Fst implementation pointer. If 'own_impl' is true, 901f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // ownership of the input implementation is given to this 902f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // object; otherwise, the input implementation's reference count 903f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // should be incremented. 904f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetImpl(I *impl, bool own_impl = true) { 905f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!own_impl) 906f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl->IncrRefCount(); 907f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (impl_ && !impl_->DecrRefCount()) delete impl_; 908f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_ = impl; 909f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 910f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 911f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 912f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Disallow 913f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst); 914f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 915f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ImplToFst<I, F> &operator=(const Fst<Arc> &fst) { 916f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ImplToFst: Assignment operator disallowed"; 917f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->SetProperties(kError, kError); 918f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *this; 919f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 920f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 921f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I *impl_; 922f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 923f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 924f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 925f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Converts FSTs by casting their implementations, where this makes 926f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// sense (which excludes implementations with weight-dependent virtual 927f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// methods). Must be a friend of the Fst classes involved (currently 928f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the concrete Fsts: VectorFst, ConstFst, CompactFst). 929f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class F, class G> void Cast(const F &ifst, G *ofst) { 930f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false); 931f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 932f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 933f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst Serialization 934f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 935f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid FstToString(const Fst<A> &fst, string *result) { 936f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostringstream ostrm; 937f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.Write(ostrm, FstWriteOptions("FstToString")); 938f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *result = ostrm.str(); 939f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 940f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 941f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 942f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonFst<A> *StringToFst(const string &s) { 943f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson istringstream istrm(s); 944f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return Fst<A>::Read(istrm, FstReadOptions("StringToFst")); 945f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 946f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 947f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 948f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 949f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_FST_H__ 950