1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// replace.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: johans@google.com (Johan Schalkwyk) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functions and classes for the recursive replacement of Fsts. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_REPLACE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_REPLACE_H__ 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 253da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak#include <tr1/unordered_map> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap; 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <set> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair; 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/expanded-fst.h> 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/matcher.h> 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/replace-util.h> 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-table.h> 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h> 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// REPLACE STATE TUPLES AND TABLES 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The replace state table has the form 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// template <class A, class P> 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// class ReplaceStateTable { 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// public: 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// typedef A Arc; 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// typedef P PrefixId; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// typedef typename A::StateId StateId; 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// typedef ReplaceStateTuple<StateId, PrefixId> StateTuple; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// typedef typename A::Label Label; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Required constuctor 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ReplaceStateTable(const vector<pair<Label, const Fst<A>*> > &fst_tuples, 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Label root); 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Required copy constructor that does not copy state 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ReplaceStateTable(const ReplaceStateTable<A,P> &table); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Lookup state ID by tuple. If it doesn't exist, then add it. 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// StateId FindState(const StateTuple &tuple); 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Lookup state tuple by ID. 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// const StateTuple &Tuple(StateId id) const; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// }; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \struct ReplaceStateTuple 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Tuple of information that uniquely defines a state in replace 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P> 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ReplaceStateTuple { 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef S StateId; 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef P PrefixId; 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceStateTuple() 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : prefix_id(-1), fst_id(kNoStateId), fst_state(kNoStateId) {} 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceStateTuple(PrefixId p, StateId f, StateId s) 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : prefix_id(p), fst_id(f), fst_state(s) {} 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId prefix_id; // index in prefix table 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId fst_id; // current fst being walked 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId fst_state; // current state in fst being walked, not to be 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // confused with the state_id of the combined fst 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Equality of replace state tuples. 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P> 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsoninline bool operator==(const ReplaceStateTuple<S, P>& x, 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ReplaceStateTuple<S, P>& y) { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return x.prefix_id == y.prefix_id && 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson x.fst_id == y.fst_id && 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson x.fst_state == y.fst_state; 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceRootSelector 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functor returning true for tuples corresponding to states in the root FST 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P> 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceRootSelector { 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const ReplaceStateTuple<S, P> &tuple) const { 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return tuple.prefix_id == 0; 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFingerprint 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fingerprint for general replace state tuples. 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P> 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFingerprint { 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFingerprint(const vector<uint64> *size_array) 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : cumulative_size_array_(size_array) {} 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 operator()(const ReplaceStateTuple<S, P> &tuple) const { 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return tuple.prefix_id * (cumulative_size_array_->back()) + 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cumulative_size_array_->at(tuple.fst_id - 1) + 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson tuple.fst_state; 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<uint64> *cumulative_size_array_; 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFstStateFingerprint 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful when the fst_state uniquely define the tuple. 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P> 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFstStateFingerprint { 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 operator()(const ReplaceStateTuple<S, P>& tuple) const { 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return tuple.fst_state; 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceHash 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic hash function for replace state tuples. 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename S, typename P> 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceHash { 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t operator()(const ReplaceStateTuple<S, P>& t) const { 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return t.prefix_id + t.fst_id * kPrime0 + t.fst_state * kPrime1; 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const size_t kPrime0; 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const size_t kPrime1; 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename S, typename P> 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ReplaceHash<S, P>::kPrime0 = 7853; 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename S, typename P> 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ReplaceHash<S, P>::kPrime1 = 7867; 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> class ReplaceFstMatcher; 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class VectorHashReplaceStateTable 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A two-level state table for replace. 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Warning: calls CountStates to compute the number of states of each 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// component Fst. 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class P = ssize_t> 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass VectorHashReplaceStateTable { 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef P PrefixId; 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ReplaceStateTuple<StateId, P> StateTuple; 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef VectorHashStateTable<ReplaceStateTuple<StateId, P>, 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceRootSelector<StateId, P>, 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstStateFingerprint<StateId, P>, 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFingerprint<StateId, P> > StateTable; 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorHashReplaceStateTable( 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<Label, const Fst<A>*> > &fst_tuples, 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label root) : root_size_(0) { 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cumulative_size_array_.push_back(0); 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < fst_tuples.size(); ++i) { 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_tuples[i].first == root) { 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_size_ = CountStates(*(fst_tuples[i].second)); 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cumulative_size_array_.push_back(cumulative_size_array_.back()); 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cumulative_size_array_.push_back(cumulative_size_array_.back() + 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CountStates(*(fst_tuples[i].second))); 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_ = new StateTable( 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ReplaceRootSelector<StateId, P>, 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ReplaceFstStateFingerprint<StateId, P>, 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ReplaceFingerprint<StateId, P>(&cumulative_size_array_), 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_size_, 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_size_ + cumulative_size_array_.back()); 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorHashReplaceStateTable(const VectorHashReplaceStateTable<A, P> &table) 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : root_size_(table.root_size_), 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cumulative_size_array_(table.cumulative_size_array_) { 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_ = new StateTable( 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ReplaceRootSelector<StateId, P>, 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ReplaceFstStateFingerprint<StateId, P>, 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ReplaceFingerprint<StateId, P>(&cumulative_size_array_), 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_size_, 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_size_ + cumulative_size_array_.back()); 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~VectorHashReplaceStateTable() { 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete state_table_; 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId FindState(const StateTuple &tuple) { 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return state_table_->FindState(tuple); 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StateTuple &Tuple(StateId id) const { 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return state_table_->Tuple(id); 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId root_size_; 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<uint64> cumulative_size_array_; 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTable *state_table_; 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class DefaultReplaceStateTable 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Default replace state table 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class P = ssize_t> 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass DefaultReplaceStateTable : public CompactHashStateTable< 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceStateTuple<typename A::StateId, P>, 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceHash<typename A::StateId, P> > { 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef P PrefixId; 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ReplaceStateTuple<StateId, P> StateTuple; 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CompactHashStateTable<StateTuple, 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceHash<StateId, PrefixId> > StateTable; 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using StateTable::FindState; 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using StateTable::Tuple; 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DefaultReplaceStateTable( 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<Label, const Fst<A>*> > &fst_tuples, 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label root) {} 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DefaultReplaceStateTable(const DefaultReplaceStateTable<A, P> &table) 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : StateTable() {} 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// REPLACE FST CLASS 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// By default ReplaceFst will copy the input label of the 'replace arc'. 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For acceptors we do not want this behaviour. Instead we need to 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// create an epsilon arc when recursing into the appropriate Fst. 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The 'epsilon_on_replace' option can be used to toggle this behaviour. 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T = DefaultReplaceStateTable<A> > 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ReplaceFstOptions : CacheOptions { 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 root; // root rule for expansion 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool epsilon_on_replace; 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool take_ownership; // take ownership of input Fst(s) 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T* state_table; 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstOptions(const CacheOptions &opts, int64 r) 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheOptions(opts), 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root(r), 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson epsilon_on_replace(false), 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson take_ownership(false), 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table(0) {} 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit ReplaceFstOptions(int64 r) 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : root(r), 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson epsilon_on_replace(false), 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson take_ownership(false), 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table(0) {} 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstOptions(int64 r, bool epsilon_replace_arc) 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : root(r), 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson epsilon_on_replace(epsilon_replace_arc), 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson take_ownership(false), 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table(0) {} 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstOptions() 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : root(kNoLabel), 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson epsilon_on_replace(false), 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson take_ownership(false), 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table(0) {} 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFstImpl 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Implementation class for replace class Fst 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The replace implementation class supports a dynamic 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// expansion of a recursive transition network represented as Fst 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// with dynamic replacable arcs. 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFstImpl : public CacheImpl<A> { 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class ReplaceFstMatcher<A, T>; 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetType; 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetProperties; 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::WriteHeader; 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetInputSymbols; 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetOutputSymbols; 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::InputSymbols; 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::OutputSymbols; 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::PushArc; 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::HasArcs; 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::HasFinal; 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::HasStart; 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::SetArcs; 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::SetFinal; 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheImpl<A>::SetStart; 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CacheState<A> State; 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef unordered_map<Label, Label> NonTerminalHash; 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef T StateTable; 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename T::PrefixId PrefixId; 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ReplaceStateTuple<StateId, PrefixId> StateTuple; 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // constructor for replace class implementation. 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \param fst_tuples array of label/fst tuples, one for each non-terminal 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstImpl(const vector< pair<Label, const Fst<A>* > >& fst_tuples, 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ReplaceFstOptions<A, T> &opts) 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(opts), 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson epsilon_on_replace_(opts.epsilon_on_replace), 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_(opts.state_table ? opts.state_table : 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new StateTable(fst_tuples, opts.root)) { 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("replace"); 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_tuples.size() > 0) { 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(fst_tuples[0].second->InputSymbols()); 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(fst_tuples[0].second->OutputSymbols()); 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool all_negative = true; // all nonterminals are negative? 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool dense_range = true; // all nonterminals are positive 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // and form a dense range containing 1? 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < fst_tuples.size(); ++i) { 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label nonterminal = fst_tuples[i].first; 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (nonterminal >= 0) 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson all_negative = false; 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (nonterminal > fst_tuples.size() || nonterminal <= 0) 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dense_range = false; 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<uint64> inprops; 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool all_ilabel_sorted = true; 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool all_olabel_sorted = true; 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool all_non_empty = true; 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_array_.push_back(0); 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < fst_tuples.size(); ++i) { 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label label = fst_tuples[i].first; 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A> *fst = fst_tuples[i].second; 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nonterminal_hash_[label] = fst_array_.size(); 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nonterminal_set_.insert(label); 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_array_.push_back(opts.take_ownership ? fst : fst->Copy()); 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst->Start() == kNoStateId) 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson all_non_empty = false; 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(!fst->Properties(kILabelSorted, false)) 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson all_ilabel_sorted = false; 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(!fst->Properties(kOLabelSorted, false)) 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson all_olabel_sorted = false; 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson inprops.push_back(fst->Properties(kCopyProperties, false)); 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (i) { 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!CompatSymbols(InputSymbols(), fst->InputSymbols())) { 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ReplaceFstImpl: input symbols of Fst " << i 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " does not match input symbols of base Fst (0'th fst)"; 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!CompatSymbols(OutputSymbols(), fst->OutputSymbols())) { 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ReplaceFstImpl: output symbols of Fst " << i 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " does not match output symbols of base Fst " 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "(0'th fst)"; 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label nonterminal = nonterminal_hash_[opts.root]; 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((nonterminal == 0) && (fst_array_.size() > 1)) { 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ReplaceFstImpl: no Fst corresponding to root label '" 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << opts.root << "' in the input tuple vector"; 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_ = (nonterminal > 0) ? nonterminal : 1; 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(ReplaceProperties(inprops, root_ - 1, epsilon_on_replace_, 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson all_non_empty)); 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // We assume that all terminals are positive. The resulting 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // ReplaceFst is known to be kILabelSorted when all sub-FSTs are 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // kILabelSorted and one of the 3 following conditions is satisfied: 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 1. 'epsilon_on_replace' is false, or 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 2. all non-terminals are negative, or 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 3. all non-terninals are positive and form a dense range containing 1. 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (all_ilabel_sorted && 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (!epsilon_on_replace_ || all_negative || dense_range)) 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kILabelSorted, kILabelSorted); 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Similarly, the resulting ReplaceFst is known to be 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // kOLabelSorted when all sub-FSTs are kOLabelSorted and one of 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the 2 following conditions is satisfied: 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 1. all non-terminals are negative, or 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 2. all non-terninals are positive and form a dense range containing 1. 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (all_olabel_sorted && (all_negative || dense_range)) 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kOLabelSorted, kOLabelSorted); 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Enable optional caching as long as sorted and all non empty. 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (Properties(kILabelSorted | kOLabelSorted) && all_non_empty) 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson always_cache_ = false; 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson always_cache_ = true; 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "ReplaceFstImpl::ReplaceFstImpl: always_cache = " 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << (always_cache_ ? "true" : "false"); 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstImpl(const ReplaceFstImpl& impl) 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(impl), 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson epsilon_on_replace_(impl.epsilon_on_replace_), 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson always_cache_(impl.always_cache_), 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_(new StateTable(*(impl.state_table_))), 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nonterminal_set_(impl.nonterminal_set_), 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nonterminal_hash_(impl.nonterminal_hash_), 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson root_(impl.root_) { 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("replace"); 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(impl.Properties(), kCopyProperties); 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(impl.InputSymbols()); 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(impl.OutputSymbols()); 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_array_.reserve(impl.fst_array_.size()); 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_array_.push_back(0); 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 1; i < impl.fst_array_.size(); ++i) { 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_array_.push_back(impl.fst_array_[i]->Copy(true)); 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~ReplaceFstImpl() { 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "~ReplaceFstImpl: gc = " 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << (CacheImpl<A>::GetCacheGc() ? "true" : "false") 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", gc_size = " << CacheImpl<A>::GetCacheSize() 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", gc_limit = " << CacheImpl<A>::GetCacheLimit(); 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete state_table_; 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 1; i < fst_array_.size(); ++i) { 461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete fst_array_[i]; 462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Computes the dependency graph of the replace class and returns 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // true if the dependencies are cyclic. Cyclic dependencies will result 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // in an un-expandable replace fst. 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool CyclicDependencies() const { 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceUtil<A> replace_util(fst_array_, nonterminal_hash_, root_); 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return replace_util.CyclicDependencies(); 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Return or compute start state of replace fst 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId Start() { 475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasStart()) { 476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_array_.size() == 1) { // no fsts defined for replace 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetStart(kNoStateId); 478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kNoStateId; 479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_array_[root_]; 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId fst_start = fst->Start(); 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_start == kNoStateId) // root Fst is empty 483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kNoStateId; 484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId prefix = GetPrefixId(StackPrefix()); 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start = state_table_->FindState( 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple(prefix, root_, fst_start)); 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetStart(start); 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return start; 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Start(); 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // return final weight of state (kInfWeight means state is not final) 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight Final(StateId s) { 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasFinal(s)) { 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StateTuple& tuple = state_table_->Tuple(s); 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StackPrefix& stack = stackprefix_array_[tuple.prefix_id]; 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_array_[tuple.fst_id]; 502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId fst_state = tuple.fst_state; 503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst->Final(fst_state) != Weight::Zero() && stack.Depth() == 0) 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, fst->Final(fst_state)); 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, Weight::Zero()); 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Final(s); 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumArcs(StateId s) { 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (HasArcs(s)) { // If state cached, use the cached value. 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumArcs(s); 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (always_cache_) { // If always caching, expand and cache state. 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumArcs(s); 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { // Otherwise compute the number of arcs without expanding. 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple = state_table_->Tuple(s); 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple.fst_state == kNoStateId) 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_array_[tuple.fst_id]; 524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t num_arcs = fst->NumArcs(tuple.fst_state); 525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeFinalArc(tuple, 0)) 526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson num_arcs++; 527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return num_arcs; 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns whether a given label is a non terminal 533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool IsNonTerminal(Label l) const { 534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): be smarter and take advantage of 535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // all_dense or all_negative. 536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Use also in ComputeArc, this would require changes to replace 537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // so that recursing into an empty fst lead to a non co-accessible 538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // state instead of deleting the arc as done currently. 539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Current use correct, since i/olabel sorted iff all_non_empty. 540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename NonTerminalHash::const_iterator it = 541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nonterminal_hash_.find(l); 542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return it != nonterminal_hash_.end(); 543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumInputEpsilons(StateId s) { 546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (HasArcs(s)) { 547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If state cached, use the cached value. 548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumInputEpsilons(s); 549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (always_cache_ || !Properties(kILabelSorted)) { 550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If always caching or if the number of input epsilons is too expensive 551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to compute without caching (i.e. not ilabel sorted), 552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // then expand and cache state. 553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumInputEpsilons(s); 555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Otherwise, compute the number of input epsilons without caching. 557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple = state_table_->Tuple(s); 558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple.fst_state == kNoStateId) 559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_array_[tuple.fst_id]; 561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t num = 0; 562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!epsilon_on_replace_) { 563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If epsilon_on_replace is false, all input epsilon arcs 564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // are also input epsilons arcs in the underlying machine. 565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->NumInputEpsilons(tuple.fst_state); 566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Otherwise, one need to consider that all non-terminal arcs 568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // in the underlying machine also become input epsilon arc. 569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state); 570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; !aiter.Done() && 571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ((aiter.Value().ilabel == 0) || 572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IsNonTerminal(aiter.Value().olabel)); 573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) 574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++num; 575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeFinalArc(tuple, 0)) 577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson num++; 578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return num; 579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumOutputEpsilons(StateId s) { 583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (HasArcs(s)) { 584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If state cached, use the cached value. 585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumOutputEpsilons(s); 586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if(always_cache_ || !Properties(kOLabelSorted)) { 587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If always caching or if the number of output epsilons is too expensive 588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to compute without caching (i.e. not olabel sorted), 589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // then expand and cache state. 590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumOutputEpsilons(s); 592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Otherwise, compute the number of output epsilons without caching. 594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple = state_table_->Tuple(s); 595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple.fst_state == kNoStateId) 596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_array_[tuple.fst_id]; 598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t num = 0; 599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state); 600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; !aiter.Done() && 601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ((aiter.Value().olabel == 0) || 602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IsNonTerminal(aiter.Value().olabel)); 603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) 604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++num; 605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeFinalArc(tuple, 0)) 606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson num++; 607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return num; 608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties() const { return Properties(kFstProperties); } 612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set error if found; return FST impl properties. 614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties(uint64 mask) const { 615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (mask & kError) { 616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 1; i < fst_array_.size(); ++i) { 617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_array_[i]->Properties(kError, false)) 618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return FstImpl<Arc>::Properties(mask); 622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // return the base arc iterator, if arcs have not been computed yet, 625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // extend/recurse for new arcs. 626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheImpl<A>::InitArcIterator(s, data); 630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): Set behaviour of generic iterator 631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Warning: ArcIterator<ReplaceFst<A> >::InitCache() 632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // relies on current behaviour. 633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Extend current state (walk arcs one level deep) 637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Expand(StateId s) { 638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple = state_table_->Tuple(s); 639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If local fst is empty 641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple.fst_state == kNoStateId) { 642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator< Fst<A> > aiter( 647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *(fst_array_[tuple.fst_id]), tuple.fst_state); 648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc; 649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Create a final arc when needed 651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeFinalArc(tuple, &arc)) 652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PushArc(s, arc); 653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Expand all arcs leaving the state 655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (;!aiter.Done(); aiter.Next()) { 656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeArc(tuple, aiter.Value(), &arc)) 657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PushArc(s, arc); 658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Expand(StateId s, const StateTuple &tuple, 664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ArcIteratorData<A> &data) { 665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If local fst is empty 666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple.fst_state == kNoStateId) { 667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator< Fst<A> > aiter(data); 672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc; 673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 674f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Create a final arc when needed 675f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeFinalArc(tuple, &arc)) 676f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddArc(s, arc); 677f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 678f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Expand all arcs leaving the state 679f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; !aiter.Done(); aiter.Next()) { 680f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ComputeArc(tuple, aiter.Value(), &arc)) 681f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddArc(s, arc); 682f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 683f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 684f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 685f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 686f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 687f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If arcp == 0, only returns if a final arc is required, does not 688f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // actually compute it. 689f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ComputeFinalArc(const StateTuple &tuple, A* arcp, 690f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 flags = kArcValueFlags) { 691f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_array_[tuple.fst_id]; 692f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId fst_state = tuple.fst_state; 693f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_state == kNoStateId) 694f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 695f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 696f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // if state is final, pop up stack 697f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StackPrefix& stack = stackprefix_array_[tuple.prefix_id]; 698f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst->Final(fst_state) != Weight::Zero() && stack.Depth()) { 699f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arcp) { 700f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcp->ilabel = 0; 701f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcp->olabel = 0; 702f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (flags & kArcNextStateValue) { 703f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId prefix_id = PopPrefix(stack); 704f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PrefixTuple& top = stack.Top(); 705f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcp->nextstate = state_table_->FindState( 706f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple(prefix_id, top.fst_id, top.nextstate)); 707f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 708f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (flags & kArcWeightValue) 709f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcp->weight = fst->Final(fst_state); 710f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 711f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 712f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 713f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 714f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 715f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 716f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 717f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compute the arc in the replace fst corresponding to a given 718f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // in the underlying machine. Returns false if the underlying arc 719f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // corresponds to no arc in the replace. 720f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ComputeArc(const StateTuple &tuple, const A &arc, A* arcp, 721f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 flags = kArcValueFlags) { 722f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!epsilon_on_replace_ && 723f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (flags == (flags & (kArcILabelValue | kArcWeightValue)))) { 724f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *arcp = arc; 725f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 726f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 727f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 728f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arc.olabel == 0) { // expand local fst 729f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nextstate = flags & kArcNextStateValue 730f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? state_table_->FindState( 731f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate)) 732f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : kNoStateId; 733f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate); 734f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 735f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // check for non terminal 736f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename NonTerminalHash::const_iterator it = 737f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nonterminal_hash_.find(arc.olabel); 738f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it != nonterminal_hash_.end()) { // recurse into non terminal 739f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label nonterminal = it->second; 740f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* nt_fst = fst_array_[nonterminal]; 741f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId nt_prefix = PushPrefix(stackprefix_array_[tuple.prefix_id], 742f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson tuple.fst_id, arc.nextstate); 743f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 744f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // if start state is valid replace, else arc is implicitly 745f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // deleted 746f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nt_start = nt_fst->Start(); 747f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (nt_start != kNoStateId) { 748f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nt_nextstate = flags & kArcNextStateValue 749f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? state_table_->FindState( 750f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple(nt_prefix, nonterminal, nt_start)) 751f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : kNoStateId; 752f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label ilabel = (epsilon_on_replace_) ? 0 : arc.ilabel; 753f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *arcp = A(ilabel, 0, arc.weight, nt_nextstate); 754f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 755f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 756f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 757f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 758f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nextstate = flags & kArcNextStateValue 759f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ? state_table_->FindState( 760f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate)) 761f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : kNoStateId; 762f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate); 763f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 764f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 765f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 766f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 767f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 768f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns the arc iterator flags supported by this Fst. 769f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 ArcIteratorFlags() const { 770f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 flags = kArcValueFlags; 771f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!always_cache_) 772f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags |= kArcNoCache; 773f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return flags; 774f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 775f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 776f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T* GetStateTable() const { 777f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return state_table_; 778f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 779f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 780f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* GetFst(Label fst_id) const { 781f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst_array_[fst_id]; 782f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 783f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 784f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool EpsilonOnReplace() const { return epsilon_on_replace_; } 785f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 786f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // private helper classes 787f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 788f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const size_t kPrime0; 789f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 790f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \class PrefixTuple 791f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \brief Tuple of fst_id and destination state (entry in stack prefix) 792f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct PrefixTuple { 793f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixTuple(Label f, StateId s) : fst_id(f), nextstate(s) {} 794f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 795f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label fst_id; 796f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nextstate; 797f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 798f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 799f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \class StackPrefix 800f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \brief Container for stack prefix. 801f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class StackPrefix { 802f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 803f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackPrefix() {} 804f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 805f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // copy constructor 806f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackPrefix(const StackPrefix& x) : 807f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson prefix_(x.prefix_) { 808f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 809f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 810f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Push(StateId fst_id, StateId nextstate) { 811f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson prefix_.push_back(PrefixTuple(fst_id, nextstate)); 812f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 813f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 814f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Pop() { 815f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson prefix_.pop_back(); 816f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 817f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 818f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PrefixTuple& Top() const { 819f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return prefix_[prefix_.size()-1]; 820f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 821f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 822f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t Depth() const { 823f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return prefix_.size(); 824f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 825f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 826f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 827f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<PrefixTuple> prefix_; 828f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 829f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 830f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 831f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \class StackPrefixEqual 832f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \brief Compare two stack prefix classes for equality 833f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class StackPrefixEqual { 834f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 835f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const StackPrefix& x, const StackPrefix& y) const { 836f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (x.prefix_.size() != y.prefix_.size()) return false; 837f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < x.prefix_.size(); ++i) { 838f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (x.prefix_[i].fst_id != y.prefix_[i].fst_id || 839f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson x.prefix_[i].nextstate != y.prefix_[i].nextstate) return false; 840f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 841f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 842f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 843f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 844f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 845f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 846f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \class StackPrefixKey 847f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // \brief Hash function for stack prefix to prefix id 848f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class StackPrefixKey { 849f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 850f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t operator()(const StackPrefix& x) const { 851f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t sum = 0; 852f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < x.prefix_.size(); ++i) { 853f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sum += x.prefix_[i].fst_id + x.prefix_[i].nextstate*kPrime0; 854f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 855f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return sum; 856f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 857f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 858f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 859f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef unordered_map<StackPrefix, PrefixId, StackPrefixKey, StackPrefixEqual> 860f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackPrefixHash; 861f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 862f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // private methods 863f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 864f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // hash stack prefix (return unique index into stackprefix array) 865f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId GetPrefixId(const StackPrefix& prefix) { 866f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename StackPrefixHash::iterator it = prefix_hash_.find(prefix); 867f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it == prefix_hash_.end()) { 868f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId prefix_id = stackprefix_array_.size(); 869f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stackprefix_array_.push_back(prefix); 870f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson prefix_hash_[prefix] = prefix_id; 871f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return prefix_id; 872f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 873f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return it->second; 874f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 875f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 876f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 877f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // prefix id after a stack pop 878f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId PopPrefix(StackPrefix prefix) { 879f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson prefix.Pop(); 880f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return GetPrefixId(prefix); 881f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 882f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 883f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // prefix id after a stack push 884f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrefixId PushPrefix(StackPrefix prefix, Label fst_id, StateId nextstate) { 885f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson prefix.Push(fst_id, nextstate); 886f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return GetPrefixId(prefix); 887f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 888f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 889f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 890f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // private data 891f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 892f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // runtime options 893f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool epsilon_on_replace_; 894f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool always_cache_; // Optionally caching arc iterator disabled when true 895f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 896f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // state table 897f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTable *state_table_; 898f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 899f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // cross index of unique stack prefix 900f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // could potentially have one copy of prefix array 901f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackPrefixHash prefix_hash_; 902f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StackPrefix> stackprefix_array_; 903f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 904f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson set<Label> nonterminal_set_; 905f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NonTerminalHash nonterminal_hash_; 906f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<const Fst<A>*> fst_array_; 907f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label root_; 908f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 909f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const ReplaceFstImpl<A, T> &); // disallow 910f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 911f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 912f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 913f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> 914f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ReplaceFstImpl<A, T>::kPrime0 = 7853; 915f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 916f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 917f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFst 918f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Recursivively replaces arcs in the root Fst with other Fsts. 919f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version is a delayed Fst. 920f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 921f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ReplaceFst supports dynamic replacement of arcs in one Fst with 922f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// another Fst. This replacement is recursive. ReplaceFst can be used 923f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to support a variety of delayed constructions such as recursive 924f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transition networks, union, or closure. It is constructed with an 925f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// array of Fst(s). One Fst represents the root (or topology) 926f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// machine. The root Fst refers to other Fsts by recursively replacing 927f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arcs labeled as non-terminals with the matching non-terminal 928f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst. Currently the ReplaceFst uses the output symbols of the arcs 929f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to determine whether the arc is a non-terminal arc or not. A 930f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-terminal can be any label that is not a non-zero terminal label 931f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// in the output alphabet. 932f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 933f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Note that the constructor uses a vector of pair<>. These correspond 934f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to the tuple of non-terminal Label and corresponding Fst. For example 935f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to implement the closure operation we need 2 Fsts. The first root 936f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst is a single Arc on the start State that self loops, it references 937f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the particular machine for which we are performing the closure operation. 938f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 939f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The ReplaceFst class supports an optionally caching arc iterator: 940f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ArcIterator< ReplaceFst<A> > 941f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The ReplaceFst need to be built such that it is known to be ilabel 942f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// or olabel sorted (see usage below). 943f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 944f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Observe that Matcher<Fst<A> > will use the optionally caching arc 945f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iterator when available (Fst is ilabel sorted and matching on the 946f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// input, or Fst is olabel sorted and matching on the output). 947f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In order to obtain the most efficient behaviour, it is recommended 948f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to set 'epsilon_on_replace' to false (this means constructing acceptors 949f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// as transducers with epsilons on the input side of nonterminal arcs) 950f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and matching on the input side. 951f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 952f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles 953f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst. 954f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T = DefaultReplaceStateTable<A> > 955f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFst : public ImplToFst< ReplaceFstImpl<A, T> > { 956f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 957f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class ArcIterator< ReplaceFst<A, T> >; 958f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class StateIterator< ReplaceFst<A, T> >; 959f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class ReplaceFstMatcher<A, T>; 960f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 961f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 962f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 963f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 964f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 965f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CacheState<A> State; 966f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ReplaceFstImpl<A, T> Impl; 967f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 968f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ImplToFst<Impl>::Properties; 969f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 970f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array, 971f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label root) 972f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(new Impl(fst_array, ReplaceFstOptions<A, T>(root))) {} 973f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 974f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array, 975f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ReplaceFstOptions<A, T> &opts) 976f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(new Impl(fst_array, opts)) {} 977f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 978f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // See Fst<>::Copy() for doc. 979f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFst(const ReplaceFst<A, T>& fst, bool safe = false) 980f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(fst, safe) {} 981f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 982f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this ReplaceFst. See Fst<>::Copy() for further doc. 983f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ReplaceFst<A, T> *Copy(bool safe = false) const { 984f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new ReplaceFst<A, T>(*this, safe); 985f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 986f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 987f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual inline void InitStateIterator(StateIteratorData<A> *data) const; 988f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 989f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 990f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->InitArcIterator(s, data); 991f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 992f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 993f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { 994f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((GetImpl()->ArcIteratorFlags() & kArcNoCache) && 995f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ((match_type == MATCH_INPUT && Properties(kILabelSorted, false)) || 996f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (match_type == MATCH_OUTPUT && Properties(kOLabelSorted, false)))) { 997f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new ReplaceFstMatcher<A, T>(*this, match_type); 998f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 999f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else { 1000f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Not using replace matcher"; 1001f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 1002f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1003f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1004f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1005f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool CyclicDependencies() const { 1006f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return GetImpl()->CyclicDependencies(); 1007f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1008f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1009f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 1010f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Makes visible to friends. 1011f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 1012f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1013f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const ReplaceFst<A> &fst); // disallow 1014f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 1015f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1016f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1017f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ReplaceFst. 1018f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A, class T> 1019f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< ReplaceFst<A, T> > 1020f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheStateIterator< ReplaceFst<A, T> > { 1021f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 1022f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const ReplaceFst<A, T> &fst) 1023f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheStateIterator< ReplaceFst<A, T> >(fst, fst.GetImpl()) {} 1024f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1025f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 1026f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(StateIterator); 1027f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 1028f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1029f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1030f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ReplaceFst. 1031f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implements optional caching. It can be used as follows: 1032f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 1033f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ReplaceFst<A> replace; 1034f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ArcIterator< ReplaceFst<A> > aiter(replace, s); 1035f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Note: ArcIterator< Fst<A> > is always a caching arc iterator. 1036f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.SetFlags(kArcNoCache, kArcNoCache); 1037f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Use the arc iterator, no arc will be cached, no state will be expanded. 1038f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // The varied 'kArcValueFlags' can be used to decide which part 1039f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // of arc values needs to be computed. 1040f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.SetFlags(kArcILabelValue, kArcValueFlags); 1041f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Only want the ilabel for this arc 1042f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.Value(); // Does not compute the destination state. 1043f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.Next(); 1044f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.SetFlags(kArcNextStateValue, kArcNextStateValue); 1045f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Want both ilabel and nextstate for that arc 1046f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// aiter.Value(); // Does compute the destination state and inserts it 1047f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // in the replace state table. 1048f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // No Arc has been cached at that point. 1049f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 1050f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> 1051f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< ReplaceFst<A, T> > { 1052f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 1053f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 1054f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 1055f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1056f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const ReplaceFst<A, T> &fst, StateId s) 1057f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_(fst), state_(s), pos_(0), offset_(0), flags_(0), arcs_(0), 1058f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_(0), final_flags_(0) { 1059f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cache_data_.ref_count = 0; 1060f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson local_data_.ref_count = 0; 1061f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1062f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If FST does not support optional caching, force caching. 1063f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(!(fst_.GetImpl()->ArcIteratorFlags() & kArcNoCache) && 1064f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !(fst_.GetImpl()->HasArcs(state_))) 1065f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_.GetImpl()->Expand(state_); 1066f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1067f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If state is already cached, use cached arcs array. 1068f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_.GetImpl()->HasArcs(state_)) { 1069f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (fst_.GetImpl())->template CacheImpl<A>::InitArcIterator(state_, 1070f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson &cache_data_); 1071f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson num_arcs_ = cache_data_.narcs; 1072f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs_ = cache_data_.arcs; // 'arcs_' is a ptr to the cached arcs. 1073f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_ = kArcValueFlags; // All the arc member values are valid. 1074f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { // Otherwise delay decision until Value() is called. 1075f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson tuple_ = fst_.GetImpl()->GetStateTable()->Tuple(state_); 1076f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple_.fst_state == kNoStateId) { 1077f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson num_arcs_ = 0; 1078f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 1079f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // The decision to cache or not to cache has been defered 1080f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // until Value() or SetFlags() is called. However, the arc 1081f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // iterator is set up now to be ready for non-caching in order 1082f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to keep the Value() method simple and efficient. 1083f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A>* fst = fst_.GetImpl()->GetFst(tuple_.fst_id); 1084f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->InitArcIterator(tuple_.fst_state, &local_data_); 1085f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'arcs_' is a pointer to the arcs in the underlying machine. 1086f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs_ = local_data_.arcs; 1087f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compute the final arc (but not its destination state) 1088f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // if a final arc is required. 1089f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool has_final_arc = fst_.GetImpl()->ComputeFinalArc( 1090f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson tuple_, 1091f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson &final_arc_, 1092f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kArcValueFlags & ~kArcNextStateValue); 1093f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set the arc value flags that hold for 'final_arc_'. 1094f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_flags_ = kArcValueFlags & ~kArcNextStateValue; 1095f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compute the number of arcs. 1096f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson num_arcs_ = local_data_.narcs; 1097f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (has_final_arc) 1098f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++num_arcs_; 1099f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set the offset between the underlying arc positions and 1100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the positions in the arc iterator. 1101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson offset_ = num_arcs_ - local_data_.narcs; 1102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Defers the decision to cache or not until Value() or 1103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // SetFlags() is called. 1104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_ = 0; 1105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~ArcIterator() { 1110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (cache_data_.ref_count) 1111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson --(*cache_data_.ref_count); 1112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (local_data_.ref_count) 1113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson --(*local_data_.ref_count); 1114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void ExpandAndCache() const { 1117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): revisit this 1118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // fst_.GetImpl()->Expand(state_, tuple_, local_data_); 1119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // (fst_.GetImpl())->CacheImpl<A>*>::InitArcIterator(state_, 1120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // &cache_data_); 1121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 1122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_.InitArcIterator(state_, &cache_data_); // Expand and cache state. 1123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs_ = cache_data_.arcs; // 'arcs_' is a pointer to the cached arcs. 1124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_ = kArcValueFlags; // All the arc member values are valid. 1125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson offset_ = 0; // No offset 1126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Init() { 1130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (flags_ & kArcNoCache) { // If caching is disabled 1131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'arcs_' is a pointer to the arcs in the underlying machine. 1132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs_ = local_data_.arcs; 1133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set the arcs value flags that hold for 'arcs_'. 1134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_ = kArcWeightValue; 1135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst_.GetImpl()->EpsilonOnReplace()) 1136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_ |= kArcILabelValue; 1137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set the offset between the underlying arc positions and 1138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the positions in the arc iterator. 1139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson offset_ = num_arcs_ - local_data_.narcs; 1140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { // Otherwise, expand and cache 1141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandAndCache(); 1142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { return pos_ >= num_arcs_; } 1146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A& Value() const { 1148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If 'data_flags_' was set to 0, non-caching was not requested 1149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!data_flags_) { 1150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): revisit this. 1151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (flags_ & kArcNoCache) { 1152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Should never happen. 1153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ReplaceFst: inconsistent arc iterator flags"; 1154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandAndCache(); // Expand and cache. 1156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (pos_ - offset_ >= 0) { // The requested arc is not the 'final' arc. 1159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A& arc = arcs_[pos_ - offset_]; 1160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((data_flags_ & flags_) == (flags_ & kArcValueFlags)) { 1161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If the value flags for 'arc' match the recquired value flags 1162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // then return 'arc'. 1163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return arc; 1164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 1165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Otherwise, compute the corresponding arc on-the-fly. 1166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_.GetImpl()->ComputeArc(tuple_, arc, &arc_, flags_ & kArcValueFlags); 1167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return arc_; 1168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { // The requested arc is the 'final' arc. 1170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((final_flags_ & flags_) != (flags_ & kArcValueFlags)) { 1171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If the arc value flags that hold for the final arc 1172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // do not match the requested value flags, then 1173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'final_arc_' needs to be updated. 1174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_.GetImpl()->ComputeFinalArc(tuple_, &final_arc_, 1175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags_ & kArcValueFlags); 1176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_flags_ = flags_ & kArcValueFlags; 1177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return final_arc_; 1179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { ++pos_; } 1183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t Position() const { return pos_; } 1185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Reset() { pos_ = 0; } 1187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Seek(size_t pos) { pos_ = pos; } 1189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 Flags() const { return flags_; } 1191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFlags(uint32 f, uint32 mask) { 1193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Update the flags taking into account what flags are supported 1194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // by the Fst. 1195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags_ &= ~mask; 1196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags_ |= (f & fst_.GetImpl()->ArcIteratorFlags()); 1197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If non-caching is not requested (and caching has not already 1198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // been performed), then flush 'data_flags_' to request caching 1199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // during the next call to Value(). 1200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!(flags_ & kArcNoCache) && data_flags_ != kArcValueFlags) { 1201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst_.GetImpl()->HasArcs(state_)) 1202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_flags_ = 0; 1203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If 'data_flags_' has been flushed but non-caching is requested 1205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // before calling Value(), then set up the iterator for non-caching. 1206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((f & kArcNoCache) && (!data_flags_)) 1207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Init(); 1208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 1211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ReplaceFst<A, T> &fst_; // Reference to the FST 1212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId state_; // State in the FST 1213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable typename T::StateTuple tuple_; // Tuple corresponding to state_ 1214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t pos_; // Current position 1216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable ssize_t offset_; // Offset between position in iterator and in arcs_ 1217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t num_arcs_; // Number of arcs at state_ 1218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 flags_; // Behavorial flags for the arc iterator 1219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable Arc arc_; // Memory to temporarily store computed arcs 1220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable ArcIteratorData<Arc> cache_data_; // Arc iterator data in cache 1222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable ArcIteratorData<Arc> local_data_; // Arc iterator data in local fst 1223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable const A* arcs_; // Array of arcs 1225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable uint32 data_flags_; // Arc value flags valid for data in arcs_ 1226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable Arc final_arc_; // Final arc (when required) 1227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable uint32 final_flags_; // Arc value flags valid for final_arc_ 1228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(ArcIterator); 1230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 1231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> 1234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFstMatcher : public MatcherBase<A> { 1235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 1236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 1237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 1238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 1239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef MultiEpsMatcher<Matcher<Fst<A> > > LocalMatcher; 1240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstMatcher(const ReplaceFst<A, T> &fst, fst::MatchType match_type) 1242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_(fst), 1243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_(fst_.GetImpl()), 1244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_(fst::kNoStateId), 1245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_(match_type), 1246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_loop_(false), 1247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_arc_(false), 1248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) { 1249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (match_type_ == fst::MATCH_OUTPUT) 1250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson swap(loop_.ilabel, loop_.olabel); 1251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson InitMatchers(); 1252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstMatcher(const ReplaceFstMatcher<A, T> &matcher, bool safe = false) 1255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_(matcher.fst_), 1256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_(fst_.GetImpl()), 1257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_(fst::kNoStateId), 1258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_(matcher.match_type_), 1259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_loop_(false), 1260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) { 1261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (match_type_ == fst::MATCH_OUTPUT) 1262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson swap(loop_.ilabel, loop_.olabel); 1263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson InitMatchers(); 1264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Create a local matcher for each component Fst of replace. 1267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // LocalMatcher is a multi epsilon wrapper matcher. MultiEpsilonMatcher 1268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // is used to match each non-terminal arc, since these non-terminal 1269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // turn into epsilons on recursion. 1270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitMatchers() { 1271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<const Fst<A>*>& fst_array = impl_->fst_array_; 1272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher_.resize(fst_array.size(), 0); 1273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < fst_array.size(); ++i) { 1274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_array[i]) { 1275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher_[i] = 1276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new LocalMatcher(*fst_array[i], match_type_, kMultiEpsList); 1277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename set<Label>::iterator it = impl_->nonterminal_set_.begin(); 1279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; it != impl_->nonterminal_set_.end(); ++it) { 1280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher_[i]->AddMultiEpsLabel(*it); 1281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ReplaceFstMatcher<A, T> *Copy(bool safe = false) const { 1287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new ReplaceFstMatcher<A, T>(*this, safe); 1288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~ReplaceFstMatcher() { 1291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < matcher_.size(); ++i) 1292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete matcher_[i]; 1293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual MatchType Type(bool test) const { 1296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (match_type_ == MATCH_NONE) 1297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return match_type_; 1298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 true_prop = match_type_ == MATCH_INPUT ? 1300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kILabelSorted : kOLabelSorted; 1301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 false_prop = match_type_ == MATCH_INPUT ? 1302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kNotILabelSorted : kNotOLabelSorted; 1303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 props = fst_.Properties(true_prop | false_prop, test); 1304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (props & true_prop) 1306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return match_type_; 1307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else if (props & false_prop) 1308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return MATCH_NONE; 1309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 1310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return MATCH_UNKNOWN; 1311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const Fst<A> &GetFst() const { 1314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fst_; 1315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual uint64 Properties(uint64 props) const { 1318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return props; 1319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 1322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set the sate from which our matching happens. 1323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void SetState_(StateId s) { 1324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s_ == s) return; 1325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_ = s; 1327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson tuple_ = impl_->GetStateTable()->Tuple(s_); 1328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (tuple_.fst_state == kNoStateId) { 1329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson done_ = true; 1330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 1331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get current matcher. Used for non epsilon matching 1333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_matcher_ = matcher_[tuple_.fst_id]; 1334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_matcher_->SetState(tuple_.fst_state); 1335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson loop_.nextstate = s_; 1336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_arc_ = false; 1338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Search for label, from previous set state. If label == 0, first 1341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // hallucinate and epsilon loop, else use the underlying matcher to 1342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // search for the label or epsilons. 1343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // - Note since the ReplaceFST recursion on non-terminal arcs causes 1344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // epsilon transitions to be created we use the MultiEpsilonMatcher 1345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to search for possible matches of non terminals. 1346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // - If the component Fst reaches a final state we also need to add 1347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the exiting final arc. 1348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual bool Find_(Label label) { 1349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool found = false; 1350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson label_ = label; 1351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label_ == 0 || label_ == kNoLabel) { 1352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compute loop directly, saving Replace::ComputeArc 1353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label_ == 0) { 1354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_loop_ = true; 1355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson found = true; 1356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Search for matching multi epsilons 1358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_arc_ = impl_->ComputeFinalArc(tuple_, 0); 1359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson found = current_matcher_->Find(kNoLabel) || final_arc_ || found; 1360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 1361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Search on sub machine directly using sub machine matcher. 1362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson found = current_matcher_->Find(label_); 1363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return found; 1365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual bool Done_() const { 1368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return !current_loop_ && !final_arc_ && current_matcher_->Done(); 1369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual const Arc& Value_() const { 1372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (current_loop_) { 1373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return loop_; 1374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final_arc_) { 1376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_->ComputeFinalArc(tuple_, &arc_); 1377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return arc_; 1378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Arc& component_arc = current_matcher_->Value(); 1380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl_->ComputeArc(tuple_, component_arc, &arc_); 1381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return arc_; 1382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Next_() { 1385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (current_loop_) { 1386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_loop_ = false; 1387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 1388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final_arc_) { 1390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_arc_ = false; 1391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 1392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_matcher_->Next(); 1394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 1395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ReplaceFst<A, T>& fst_; 1397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstImpl<A, T> *impl_; 1398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LocalMatcher* current_matcher_; 1399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<LocalMatcher*> matcher_; 1400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s_; // Current state 1402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label label_; // Current label 1403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MatchType match_type_; // Supplied by caller 1405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable bool done_; 1406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable bool current_loop_; // Current arc is the implicit loop 1407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable bool final_arc_; // Current arc for exiting recursion 1408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable typename T::StateTuple tuple_; // Tuple corresponding to state_ 1409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable Arc arc_; 1410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc loop_; 1411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 1412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> inline 1414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ReplaceFst<A, T>::InitStateIterator(StateIteratorData<A> *data) const { 1415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data->base = new StateIterator< ReplaceFst<A, T> >(*this); 1416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 1417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ReplaceFst<StdArc> StdReplaceFst; 1419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Recursivively replaces arcs in the root Fst with other Fsts. 1422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version writes the result of replacement to an output MutableFst. 1423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 1424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Replace supports replacement of arcs in one Fst with another 1425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst. This replacement is recursive. Replace takes an array of 1426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst(s). One Fst represents the root (or topology) machine. The root 1427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst refers to other Fsts by recursively replacing arcs labeled as 1428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-terminals with the matching non-terminal Fst. Currently Replace 1429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// uses the output symbols of the arcs to determine whether the arc is 1430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a non-terminal arc or not. A non-terminal can be any label that is 1431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// not a non-zero terminal label in the output alphabet. Note that 1432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// input argument is a vector of pair<>. These correspond to the tuple 1433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// of non-terminal Label and corresponding Fst. 1434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 1435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Replace(const vector<pair<typename Arc::Label, 1436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<Arc>* > >& ifst_array, 1437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, typename Arc::Label root, 1438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool epsilon_on_replace) { 1439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReplaceFstOptions<Arc> opts(root, epsilon_on_replace); 1440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.gc_limit = 0; // Cache only the last state for fastest copy. 1441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ReplaceFst<Arc>(ifst_array, opts); 1442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 1443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 1445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Replace(const vector<pair<typename Arc::Label, 1446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<Arc>* > >& ifst_array, 1447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, typename Arc::Label root) { 1448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Replace(ifst_array, ofst, root, false); 1449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 1450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 1452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_REPLACE_H__ 1454