1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// compose.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to compute the composition of two FSTs 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_COMPOSE_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_COMPOSE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose-filter.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/lookahead-filter.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/matcher.h> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-table.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Delayed composition options templated on the arc type, the matcher, 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the composition filter, and the composition state table. By 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// default, the matchers, filter, and state table are constructed by 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition. If set below, the user can instead pass in these 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// objects; in that case, ComposeFst takes their ownership. This 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version controls composition implemented between generic Fst<Arc> 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// types and a shared matcher type M for Fst<Arc>. This should be 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// adequate for most applications, giving a reasonable tradeoff 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// between efficiency and code sharing (but see ComposeFstImplOptions). 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class M = Matcher<Fst<A> >, 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class F = SequenceComposeFilter<M>, 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class T = GenericComposeStateTable<A, typename F::FilterState> > 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ComposeFstOptions : public CacheOptions { 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M *matcher1; // FST1 matcher (see matcher.h) 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M *matcher2; // FST2 matcher 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filter; // Composition filter (see compose-filter.h) 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T *state_table; // Composition state table (see compose-state-table.h) 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit ComposeFstOptions(const CacheOptions &opts, 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M *mat1 = 0, M *mat2 = 0, 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filt = 0, T *sttable= 0) 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheOptions(opts), matcher1(mat1), matcher2(mat2), 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter(filt), state_table(sttable) {} 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {} 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Delayed composition options templated on the two matcher types, the 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition filter, and the composition state table. By default, 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the matchers, filter, and state table are constructed by 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition. If set below, the user can instead pass in these 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// objects; in that case, ComposeFst takes their ownership. This 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version controls composition implemented using arbitrary matchers 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (of the same Arc type but otherwise arbitrary Fst type). The user 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// must ensure the matchers are compatible. These options permit the 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// most efficient use, but shares the least code. This is for advanced 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// use only in the most demanding or specialized applications that can 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// benefit from it (o.w. prefer ComposeFstOptions). 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M1, class M2, 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class F = SequenceComposeFilter<M1, M2>, 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class T = GenericComposeStateTable<typename M1::Arc, 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename F::FilterState> > 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ComposeFstImplOptions : public CacheOptions { 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M1 *matcher1; // FST1 matcher (see matcher.h) 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M2 *matcher2; // FST2 matcher 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filter; // Composition filter (see compose-filter.h) 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T *state_table; // Composition state table (see compose-state-table.h) 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit ComposeFstImplOptions(const CacheOptions &opts, 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M1 *mat1 = 0, M2 *mat2 = 0, 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filt = 0, T *sttable= 0) 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheOptions(opts), matcher1(mat1), matcher2(mat2), 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter(filt), state_table(sttable) {} 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstImplOptions() 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : matcher1(0), matcher2(0), filter(0), state_table(0) {} 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation of delayed composition. This base class is 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// common to the variants with different matchers, composition filters 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and state tables. 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ComposeFstImplBase : public CacheImpl<A> { 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetType; 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetProperties; 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::Properties; 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetInputSymbols; 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetOutputSymbols; 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasStart; 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasFinal; 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasArcs; 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetFinal; 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetStart; 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CacheState<A> State; 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2, 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions &opts) 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson :CacheImpl<A>(opts) { 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "ComposeFst(" << this << "): Begin"; 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("compose"); 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) { 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ComposeFst: output symbol table of 1st argument " 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "does not match input symbol table of 2nd argument"; 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(fst1.InputSymbols()); 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(fst2.OutputSymbols()); 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstImplBase(const ComposeFstImplBase<A> &impl) 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(impl) { 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(impl.Properties(), kCopyProperties); 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(impl.InputSymbols()); 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(impl.OutputSymbols()); 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ComposeFstImplBase<A> *Copy() = 0; 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ~ComposeFstImplBase() {} 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId Start() { 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasStart()) { 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start = ComputeStart(); 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (start != kNoStateId) { 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetStart(start); 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Start(); 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight Final(StateId s) { 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasFinal(s)) { 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight final = ComputeFinal(s); 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, final); 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Final(s); 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void Expand(StateId s) = 0; 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumArcs(StateId s) { 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumArcs(s); 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumInputEpsilons(StateId s) { 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumInputEpsilons(s); 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumOutputEpsilons(StateId s) { 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumOutputEpsilons(s); 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheImpl<A>::InitArcIterator(s, data); 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected: 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual StateId ComputeStart() = 0; 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual Weight ComputeFinal(StateId s) = 0; 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementaion of delayed composition templated on the matchers (see 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// matcher.h), composition filter (see compose-filter-inl.h) and 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the composition state table (see compose-state-table.h). 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M1, class M2, class F, class T> 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> { 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename M1::FST FST1; 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename M2::FST FST2; 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename M1::Arc Arc; 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Label Label; 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename F::FilterState FilterState; 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename F::Matcher1 Matcher1; 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename F::Matcher2 Matcher2; 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl<CacheState<Arc> >::SetArcs; 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<Arc>::SetType; 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<Arc>::SetProperties; 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ComposeStateTuple<StateId, FilterState> StateTuple; 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstImpl(const FST1 &fst1, const FST2 &fst2, 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeFstImplOptions<M1, M2, F, T> &opts); 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl) 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ComposeFstImplBase<Arc>(impl), 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter_(new F(*impl.filter_, true)), 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher1_(filter_->GetMatcher1()), 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher2_(filter_->GetMatcher2()), 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst1_(matcher1_->GetFst()), 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst2_(matcher2_->GetFst()), 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_(new T(*impl.state_table_)), 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_(impl.match_type_) {} 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~ComposeFstImpl() { 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "ComposeFst(" << this 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "): End: # of visited states: " << state_table_->Size(); 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete filter_; 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete state_table_; 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ComposeFstImpl<M1, M2, F, T> *Copy() { 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new ComposeFstImpl<M1, M2, F, T>(*this); 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties() const { return Properties(kFstProperties); } 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set error if found; return FST impl properties. 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties(uint64 mask) const { 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((mask & kError) && 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (fst1_.Properties(kError, false) || 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst2_.Properties(kError, false) || 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (matcher1_->Properties(0) & kError) || 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (matcher2_->Properties(0) & kError) | 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (filter_->Properties(0) & kError) || 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_->Error())) { 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return FstImpl<Arc>::Properties(mask); 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Arranges it so that the first arg to OrderedExpand is the Fst 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // that will be matched on. 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Expand(StateId s) { 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StateTuple &tuple = state_table_->Tuple(s); 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s1 = tuple.state_id1; 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s2 = tuple.state_id2; 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter_->SetState(s1, s2, tuple.filter_state); 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (match_type_ == MATCH_OUTPUT || 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (match_type_ == MATCH_BOTH && 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2))) 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false); 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true); 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This does that actual matching of labels in the composition. The 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // arguments are ordered so matching is called on state 'sa' of 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // determines whether the input or output label of arcs at 'sb' is 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the one to match on. 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class FST, class Matcher> 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa, 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FST &fstb, StateId sb, 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Matcher *matchera, bool match_input) { 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matchera->SetState(sa); 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // First process non-consuming symbols (e.g., epsilons) on FSTA. 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0, 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight::One(), sb); 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MatchArc(s, matchera, loop, match_input); 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Then process matches on FSTB. 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next()) 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MatchArc(s, matchera, iterb.Value(), match_input); 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Matches a single transition from 'fstb' against 'fata' at 's'. 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class Matcher> 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void MatchArc(StateId s, Matcher *matchera, 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Arc &arc, bool match_input) { 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) { 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; !matchera->Done(); matchera->Next()) { 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arca = matchera->Value(); 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arcb = arc; 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (match_input) { 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FilterState &f = filter_->FilterArc(&arcb, &arca); 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (f != FilterState::NoState()) 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddArc(s, arcb, arca, f); 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FilterState &f = filter_->FilterArc(&arca, &arcb); 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (f != FilterState::NoState()) 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddArc(s, arca, arcb, f); 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Add a matching transition at 's'. 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void AddArc(StateId s, const Arc &arc1, const Arc &arc2, 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FilterState &f) { 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple(arc1.nextstate, arc2.nextstate, f); 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight), 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_->FindState(tuple)); 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheImpl<Arc>::PushArc(s, oarc); 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId ComputeStart() { 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s1 = fst1_.Start(); 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s1 == kNoStateId) 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kNoStateId; 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s2 = fst2_.Start(); 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s2 == kNoStateId) 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kNoStateId; 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FilterState &f = filter_->Start(); 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple(s1, s2, f); 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return state_table_->FindState(tuple); 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight ComputeFinal(StateId s) { 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StateTuple &tuple = state_table_->Tuple(s); 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s1 = tuple.state_id1; 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight final1 = internal::Final(fst1_, s1); 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final1 == Weight::Zero()) 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return final1; 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s2 = tuple.state_id2; 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight final2 = internal::Final(fst2_, s2); 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final2 == Weight::Zero()) 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return final2; 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter_->SetState(s1, s2, tuple.filter_state); 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter_->FilterFinal(&final1, &final2); 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return Times(final1, final2); 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filter_; 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Matcher1 *matcher1_; 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Matcher2 *matcher2_; 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FST1 &fst1_; 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FST2 &fst2_; 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T *state_table_; 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MatchType match_type_; 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M1, class M2, class F, class T> inline 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonComposeFstImpl<M1, M2, F, T>::ComposeFstImpl( 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FST1 &fst1, const FST2 &fst2, 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeFstImplOptions<M1, M2, F, T> &opts) 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ComposeFstImplBase<Arc>(fst1, fst2, opts), 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filter_(opts.filter ? opts.filter : 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new F(fst1, fst2, opts.matcher1, opts.matcher2)), 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher1_(filter_->GetMatcher1()), 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson matcher2_(filter_->GetMatcher2()), 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst1_(matcher1_->GetFst()), 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst2_(matcher2_->GetFst()), 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_(opts.state_table ? opts.state_table : 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new T(fst1_, fst2_)) { 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MatchType type1 = matcher1_->Type(false); 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MatchType type2 = matcher2_->Type(false); 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_ = MATCH_BOTH; 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (type1 == MATCH_OUTPUT) { 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_ = MATCH_OUTPUT; 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (type2 == MATCH_INPUT) { 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_ = MATCH_INPUT; 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (matcher1_->Type(true) == MATCH_OUTPUT) { 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_ = MATCH_OUTPUT; 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (matcher2_->Type(true) == MATCH_INPUT) { 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson match_type_ = MATCH_INPUT; 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ComposeFst: 1st argument cannot match on output labels " 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "and 2nd argument cannot match on input labels (sort?)."; 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 fprops1 = fst1.Properties(kFstProperties, false); 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 fprops2 = fst2.Properties(kFstProperties, false); 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 mprops1 = matcher1_->Properties(fprops1); 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 mprops2 = matcher2_->Properties(fprops2); 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 cprops = ComposeProperties(mprops1, mprops2); 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(filter_->Properties(cprops), kCopyProperties); 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (state_table_->Error()) SetProperties(kError, kError); 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "ComposeFst(" << this << "): Initialized"; 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the composition of two transducers. This version is a 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// delayed Fst. If FST1 transduces string x to y with weight a and FST2 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transduces y to z with weight b, then their composition transduces 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// string x to z with weight Times(x, z). 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The output labels of the first transducer or the input labels of 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the second transducer must be sorted (with the default matcher). 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to form a commutative semiring (valid for 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// TropicalWeight and LogWeight). 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Assuming the first FST is unsorted and the second is sorted: 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Time: O(v1 v2 d1 (log d2 + m2)), 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Space: O(v1 v2) 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// where vi = # of states visited, di = maximum out-degree, and mi the 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// maximum multiplicity of the states visited for the ith 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST. Constant time and space to visit an input state or arc is 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// assumed and exclusive of caching. 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - ComposeFst does not trim its output (since it is a delayed operation). 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - The efficiency of composition can be strongly affected by several factors: 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - the choice of which tnansducer is sorted - prefer sorting the FST 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// that has the greater average out-degree. 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - the amount of non-determinism 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - the presence and location of epsilon transitions - avoid epsilon 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions on the output side of the first transducer or 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the input side of the second transducer or prefer placing 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// them later in a path since they delay matching and can 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// introduce non-coaccessible states and transitions. 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst. 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ComposeFst : public ImplToFst< ComposeFstImplBase<A> > { 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class ArcIterator< ComposeFst<A> >; 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class StateIterator< ComposeFst<A> >; 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CacheState<A> State; 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ComposeFstImplBase<A> Impl; 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ImplToFst<Impl>::SetImpl; 462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compose specifying only caching options. 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions &opts = CacheOptions()) 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {} 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compose specifying one shared matcher type M. Requires input 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // best code-sharing and matcher compatiblity. 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class M, class F, class T> 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeFstOptions<A, M, F, T> &opts) 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {} 475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compose specifying two matcher types M1 and M2. Requires input 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Fsts (of the same Arc type but o.w. arbitrary) match the 478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // corresponding matcher FST types (M1::FST, M2::FST). Recommended 479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // only for advanced use in demanding or specialized applications 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // due to potential code bloat and matcher incompatibilities. 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class M1, class M2, class F, class T> 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2, 483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeFstImplOptions<M1, M2, F, T> &opts) 484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {} 485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // See Fst<>::Copy() for doc. 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFst(const ComposeFst<A> &fst, bool safe = false) { 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (safe) 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetImpl(fst.GetImpl()->Copy()); 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetImpl(fst.GetImpl(), false); 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc. 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ComposeFst<A> *Copy(bool safe = false) const { 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new ComposeFst<A>(*this, safe); 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual inline void InitStateIterator(StateIteratorData<A> *data) const; 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->InitArcIterator(s, data); 503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected: 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFst() {} 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Create compose implementation specifying two matcher types. 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class M1, class M2, class F, class T> 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static Impl *CreateBase2( 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const typename M1::FST &fst1, const typename M2::FST &fst2, 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeFstImplOptions<M1, M2, F, T> &opts) { 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts); 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!(Weight::Properties() & kCommutative)) { 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 props1 = fst1.Properties(kUnweighted, true); 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 props2 = fst2.Properties(kUnweighted, true); 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) { 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ComposeFst: Weights must be a commutative semiring: " 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << Weight::Type(); 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson impl->SetProperties(kError, kError); 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return impl; 524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Create compose implementation specifying one matcher type. 527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Requires input Fsts and matcher FST type (M::FST) be Fst<A> 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class M, class F, class T> 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2, 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeFstOptions<A, M, F, T> &opts) { 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2, 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.filter, opts.state_table); 533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CreateBase2(fst1, fst2, nopts); 534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Create compose implementation specifying no matcher type. 537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2, 538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions &opts) { 539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers 540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson default: 541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson case MATCH_NONE: { // Default composition (no look-ahead) 542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<Arc> nopts(opts); 543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CreateBase1(fst1, fst2, nopts); 544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson case MATCH_OUTPUT: { // Lookahead on fst1 546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M; 547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F; 548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<Arc, M, F> nopts(opts); 549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CreateBase1(fst1, fst2, nopts); 550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson case MATCH_INPUT: { // Lookahead on fst2 552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M; 553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F; 554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<Arc, M, F> nopts(opts); 555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CreateBase1(fst1, fst2, nopts); 556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Makes visible to friends. 562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const ComposeFst<A> &fst); // disallow 565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ComposeFst. 569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A> 570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< ComposeFst<A> > 571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheStateIterator< ComposeFst<A> > { 572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const ComposeFst<A> &fst) 574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {} 575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ComposeFst. 579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< ComposeFst<A> > 581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheArcIterator< ComposeFst<A> > { 582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const ComposeFst<A> &fst, StateId s) 586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) { 587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst.GetImpl()->HasArcs(s)) 588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.GetImpl()->Expand(s); 589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(ArcIterator); 593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const { 597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data->base = new StateIterator< ComposeFst<A> >(*this); 598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful alias when using StdArc. 601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ComposeFst<StdArc> StdComposeFst; 602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER, 604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MATCH_FILTER }; 605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ComposeOptions { 607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool connect; // Connect output 608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFilter filter_type; // Which pre-defined filter to use 609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER) 611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : connect(c), filter_type(ft) {} 612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {} 613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the composition of two transducers. This version writes 616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the composed FST into a MurableFst. If FST1 transduces string x to 617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// y with weight a and FST2 transduces y to z with weight b, then 618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// their composition transduces string x to z with weight 619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Times(x, z). 620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The output labels of the first transducer or the input labels of 622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the second transducer must be sorted. The weights need to form a 623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// commutative semiring (valid for TropicalWeight and LogWeight). 624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: 626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Assuming the first FST is unsorted and the second is sorted: 627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Time: O(V1 V2 D1 (log D2 + M2)), 628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Space: O(V1 V2 D1 M2) 629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// where Vi = # of states, Di = maximum out-degree, and Mi is 630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the maximum multiplicity for the ith FST. 631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: 633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Compose trims its output. 634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - The efficiency of composition can be strongly affected by several factors: 635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - the choice of which tnansducer is sorted - prefer sorting the FST 636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// that has the greater average out-degree. 637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - the amount of non-determinism 638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - the presence and location of epsilon transitions - avoid epsilon 639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions on the output side of the first transducer or 640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the input side of the second transducer or prefer placing 641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// them later in a path since they delay matching and can 642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// introduce non-coaccessible states and transitions. 643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, 645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ComposeOptions &opts = ComposeOptions()) { 647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef Matcher< Fst<Arc> > M; 648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.filter_type == AUTO_FILTER) { 650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheOptions nopts; 651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nopts.gc_limit = 0; // Cache only the last state for fastest copy. 652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts); 653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == SEQUENCE_FILTER) { 654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<Arc> copts; 655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.gc_limit = 0; // Cache only the last state for fastest copy. 656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { 658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts; 659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.gc_limit = 0; // Cache only the last state for fastest copy. 660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == MATCH_FILTER) { 662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts; 663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.gc_limit = 0; // Cache only the last state for fastest copy. 664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); 665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.connect) 668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_COMPOSE_H__ 674