1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// compose.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to compute the composition of two FSTs
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_COMPOSE_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_COMPOSE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose-filter.h>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/lookahead-filter.h>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/matcher.h>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-table.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Delayed composition options templated on the arc type, the matcher,
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the composition filter, and the composition state table.  By
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// default, the matchers, filter, and state table are constructed by
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition. If set below, the user can instead pass in these
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// objects; in that case, ComposeFst takes their ownership. This
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version controls composition implemented between generic Fst<Arc>
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// types and a shared matcher type M for Fst<Arc>. This should be
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// adequate for most applications, giving a reasonable tradeoff
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// between efficiency and code sharing (but see ComposeFstImplOptions).
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A,
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class M = Matcher<Fst<A> >,
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class F = SequenceComposeFilter<M>,
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class T = GenericComposeStateTable<A, typename F::FilterState> >
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ComposeFstOptions : public CacheOptions {
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  M *matcher1;      // FST1 matcher (see matcher.h)
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  M *matcher2;      // FST2 matcher
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  F *filter;        // Composition filter (see compose-filter.h)
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T *state_table;   // Composition state table (see compose-state-table.h)
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit ComposeFstOptions(const CacheOptions &opts,
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                             M *mat1 = 0, M *mat2 = 0,
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                             F *filt = 0, T *sttable= 0)
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        filter(filt), state_table(sttable) {}
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Delayed composition options templated on the two matcher types, the
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition filter, and the composition state table.  By default,
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the matchers, filter, and state table are constructed by
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition. If set below, the user can instead pass in these
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// objects; in that case, ComposeFst takes their ownership. This
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version controls composition implemented using arbitrary matchers
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (of the same Arc type but otherwise arbitrary Fst type). The user
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// must ensure the matchers are compatible. These options permit the
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// most efficient use, but shares the least code. This is for advanced
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// use only in the most demanding or specialized applications that can
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// benefit from it (o.w. prefer ComposeFstOptions).
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M1, class M2,
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class F = SequenceComposeFilter<M1, M2>,
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class T = GenericComposeStateTable<typename M1::Arc,
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                             typename F::FilterState> >
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ComposeFstImplOptions : public CacheOptions {
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  M1 *matcher1;     // FST1 matcher (see matcher.h)
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  M2 *matcher2;     // FST2 matcher
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  F *filter;        // Composition filter (see compose-filter.h)
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T *state_table;   // Composition state table (see compose-state-table.h)
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit ComposeFstImplOptions(const CacheOptions &opts,
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                 M1 *mat1 = 0, M2 *mat2 = 0,
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                 F *filt = 0, T *sttable= 0)
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        filter(filt), state_table(sttable) {}
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFstImplOptions()
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  : matcher1(0), matcher2(0), filter(0), state_table(0) {}
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation of delayed composition. This base class is
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// common to the variants with different matchers, composition filters
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and state tables.
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ComposeFstImplBase : public CacheImpl<A> {
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetType;
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetProperties;
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::Properties;
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetInputSymbols;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetOutputSymbols;
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::HasStart;
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::HasFinal;
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::HasArcs;
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::SetFinal;
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::SetStart;
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CacheState<A> State;
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     const CacheOptions &opts)
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      :CacheImpl<A>(opts) {
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "ComposeFst(" << this << "): Begin";
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType("compose");
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "ComposeFst: output symbol table of 1st argument "
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << "does not match input symbol table of 2nd argument";
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(fst1.InputSymbols());
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(fst2.OutputSymbols());
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(impl) {
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(impl.Properties(), kCopyProperties);
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(impl.InputSymbols());
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(impl.OutputSymbols());
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ComposeFstImplBase<A> *Copy() = 0;
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~ComposeFstImplBase() {}
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId Start() {
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasStart()) {
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId start = ComputeStart();
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (start != kNoStateId) {
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetStart(start);
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::Start();
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight Final(StateId s) {
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasFinal(s)) {
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight final = ComputeFinal(s);
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetFinal(s, final);
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::Final(s);
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Expand(StateId s) = 0;
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumArcs(StateId s) {
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::NumArcs(s);
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumInputEpsilons(StateId s) {
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::NumInputEpsilons(s);
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumOutputEpsilons(StateId s) {
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::NumOutputEpsilons(s);
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheImpl<A>::InitArcIterator(s, data);
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual StateId ComputeStart() = 0;
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual Weight ComputeFinal(StateId s) = 0;
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementaion of delayed composition templated on the matchers (see
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// matcher.h), composition filter (see compose-filter-inl.h) and
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the composition state table (see compose-state-table.h).
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M1, class M2, class F, class T>
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename M1::FST FST1;
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename M2::FST FST2;
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename M1::Arc Arc;
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Label Label;
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename F::FilterState FilterState;
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename F::Matcher1 Matcher1;
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename F::Matcher2 Matcher2;
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl<CacheState<Arc> >::SetArcs;
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<Arc>::SetType;
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<Arc>::SetProperties;
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ComposeStateTuple<StateId, FilterState> StateTuple;
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const ComposeFstImplOptions<M1, M2, F, T> &opts);
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ComposeFstImplBase<Arc>(impl),
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        filter_(new F(*impl.filter_, true)),
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        matcher1_(filter_->GetMatcher1()),
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        matcher2_(filter_->GetMatcher2()),
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst1_(matcher1_->GetFst()),
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst2_(matcher2_->GetFst()),
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table_(new T(*impl.state_table_)),
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        match_type_(impl.match_type_) {}
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~ComposeFstImpl() {
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "ComposeFst(" << this
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            << "): End: # of visited states: " << state_table_->Size();
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete filter_;
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete state_table_;
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new ComposeFstImpl<M1, M2, F, T>(*this);
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties() const { return Properties(kFstProperties); }
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set error if found; return FST impl properties.
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties(uint64 mask) const {
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((mask & kError) &&
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (fst1_.Properties(kError, false) ||
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         fst2_.Properties(kError, false) ||
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         (matcher1_->Properties(0) & kError) ||
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         (matcher2_->Properties(0) & kError) |
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         (filter_->Properties(0) & kError) ||
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         state_table_->Error())) {
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FstImpl<Arc>::Properties(mask);
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Arranges it so that the first arg to OrderedExpand is the Fst
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // that will be matched on.
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Expand(StateId s) {
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const StateTuple &tuple = state_table_->Tuple(s);
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s1 = tuple.state_id1;
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s2 = tuple.state_id2;
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    filter_->SetState(s1, s2, tuple.filter_state);
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (match_type_ == MATCH_OUTPUT ||
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (match_type_ == MATCH_BOTH &&
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // This does that actual matching of labels in the composition. The
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // arguments are ordered so matching is called on state 'sa' of
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // determines whether the input or output label of arcs at 'sb' is
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // the one to match on.
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class FST, class Matcher>
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     const FST &fstb, StateId sb,
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     Matcher *matchera,  bool match_input) {
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    matchera->SetState(sa);
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // First process non-consuming symbols (e.g., epsilons) on FSTA.
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           Weight::One(), sb);
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    MatchArc(s, matchera, loop, match_input);
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Then process matches on FSTB.
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      MatchArc(s, matchera, iterb.Value(), match_input);
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetArcs(s);
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Matches a single transition from 'fstb' against 'fata' at 's'.
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Matcher>
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void MatchArc(StateId s, Matcher *matchera,
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                const Arc &arc, bool match_input) {
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (; !matchera->Done(); matchera->Next()) {
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arca = matchera->Value();
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arcb = arc;
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (match_input) {
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const FilterState &f = filter_->FilterArc(&arcb, &arca);
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (f != FilterState::NoState())
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            AddArc(s, arcb, arca, f);
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        } else {
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const FilterState &f = filter_->FilterArc(&arca, &arcb);
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (f != FilterState::NoState())
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            AddArc(s, arca, arcb, f);
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Add a matching transition at 's'.
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const FilterState &f) {
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           state_table_->FindState(tuple));
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheImpl<Arc>::PushArc(s, oarc);
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId ComputeStart() {
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s1 = fst1_.Start();
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s1 == kNoStateId)
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return kNoStateId;
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s2 = fst2_.Start();
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s2 == kNoStateId)
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return kNoStateId;
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const FilterState &f = filter_->Start();
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateTuple tuple(s1, s2, f);
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return state_table_->FindState(tuple);
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight ComputeFinal(StateId s) {
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const StateTuple &tuple = state_table_->Tuple(s);
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s1 = tuple.state_id1;
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight final1 = internal::Final(fst1_, s1);
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (final1 == Weight::Zero())
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return final1;
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s2 = tuple.state_id2;
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight final2 = internal::Final(fst2_, s2);
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (final2 == Weight::Zero())
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return final2;
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    filter_->SetState(s1, s2, tuple.filter_state);
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    filter_->FilterFinal(&final1, &final2);
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return Times(final1, final2);
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  F *filter_;
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Matcher1 *matcher1_;
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Matcher2 *matcher2_;
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const FST1 &fst1_;
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const FST2 &fst2_;
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T *state_table_;
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  MatchType match_type_;
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const ComposeFstImpl<M1, M2, F, T> &);  // disallow
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M1, class M2, class F, class T> inline
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const FST1 &fst1, const FST2 &fst2,
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const ComposeFstImplOptions<M1, M2, F, T> &opts)
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : ComposeFstImplBase<Arc>(fst1, fst2, opts),
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      filter_(opts.filter ? opts.filter :
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              new F(fst1, fst2, opts.matcher1, opts.matcher2)),
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      matcher1_(filter_->GetMatcher1()),
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      matcher2_(filter_->GetMatcher2()),
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst1_(matcher1_->GetFst()),
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst2_(matcher2_->GetFst()),
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      state_table_(opts.state_table ? opts.state_table :
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   new T(fst1_, fst2_)) {
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  MatchType type1 = matcher1_->Type(false);
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  MatchType type2 = matcher2_->Type(false);
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    match_type_ = MATCH_BOTH;
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (type1 == MATCH_OUTPUT) {
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    match_type_ = MATCH_OUTPUT;
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (type2 == MATCH_INPUT) {
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    match_type_ = MATCH_INPUT;
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    match_type_ = MATCH_OUTPUT;
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (matcher2_->Type(true) == MATCH_INPUT) {
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    match_type_ = MATCH_INPUT;
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "and 2nd argument cannot match on input labels (sort?).";
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(kError, kError);
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 fprops1 = fst1.Properties(kFstProperties, false);
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 fprops2 = fst2.Properties(kFstProperties, false);
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 mprops1 = matcher1_->Properties(fprops1);
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 mprops2 = matcher2_->Properties(fprops2);
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 cprops = ComposeProperties(mprops1, mprops2);
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SetProperties(filter_->Properties(cprops), kCopyProperties);
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (state_table_->Error()) SetProperties(kError, kError);
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VLOG(2) << "ComposeFst(" << this << "): Initialized";
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the composition of two transducers. This version is a
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// delayed Fst. If FST1 transduces string x to y with weight a and FST2
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transduces y to z with weight b, then their composition transduces
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// string x to z with weight Times(x, z).
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The output labels of the first transducer or the input labels of
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the second transducer must be sorted (with the default matcher).
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to form a commutative semiring (valid for
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// TropicalWeight and LogWeight).
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity:
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Assuming the first FST is unsorted and the second is sorted:
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Time: O(v1 v2 d1 (log d2 + m2)),
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Space: O(v1 v2)
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// where vi = # of states visited, di = maximum out-degree, and mi the
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// maximum multiplicity of the states visited for the ith
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST. Constant time and space to visit an input state or arc is
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// assumed and exclusive of caching.
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats:
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - ComposeFst does not trim its output (since it is a delayed operation).
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - The efficiency of composition can be strongly affected by several factors:
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   - the choice of which tnansducer is sorted - prefer sorting the FST
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     that has the greater average out-degree.
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   - the amount of non-determinism
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   - the presence and location of epsilon transitions - avoid epsilon
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     transitions on the output side of the first transducer or
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     the input side of the second transducer or prefer placing
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     them later in a path since they delay matching and can
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     introduce non-coaccessible states and transitions.
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst.
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class ArcIterator< ComposeFst<A> >;
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class StateIterator< ComposeFst<A> >;
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CacheState<A> State;
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ComposeFstImplBase<A> Impl;
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst<Impl>::SetImpl;
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Compose specifying only caching options.
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const CacheOptions &opts = CacheOptions())
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Compose specifying one shared matcher type M.  Requires input
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // best code-sharing and matcher compatiblity.
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class M, class F, class T>
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const ComposeFstOptions<A, M, F, T> &opts)
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Compose specifying two matcher types M1 and M2.  Requires input
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Fsts (of the same Arc type but o.w. arbitrary) match the
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // corresponding matcher FST types (M1::FST, M2::FST). Recommended
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // only for advanced use in demanding or specialized applications
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // due to potential code bloat and matcher incompatibilities.
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class M1, class M2, class F, class T>
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const ComposeFstImplOptions<M1, M2, F, T> &opts)
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // See Fst<>::Copy() for doc.
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (safe)
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetImpl(fst.GetImpl()->Copy());
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetImpl(fst.GetImpl(), false);
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ComposeFst<A> *Copy(bool safe = false) const {
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new ComposeFst<A>(*this, safe);
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->InitArcIterator(s, data);
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFst() {}
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Create compose implementation specifying two matcher types.
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class M1, class M2, class F, class T>
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static Impl *CreateBase2(
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const typename M1::FST &fst1, const typename M2::FST &fst2,
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const ComposeFstImplOptions<M1, M2, F, T> &opts) {
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!(Weight::Properties() & kCommutative)) {
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      int64 props1 = fst1.Properties(kUnweighted, true);
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      int64 props2 = fst2.Properties(kUnweighted, true);
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << Weight::Type();
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        impl->SetProperties(kError, kError);
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl;
524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Create compose implementation specifying one matcher type.
527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //  Requires input Fsts and matcher FST type (M::FST) be Fst<A>
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class M, class F, class T>
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                           const ComposeFstOptions<A, M, F, T> &opts) {
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                            opts.filter, opts.state_table);
533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CreateBase2(fst1, fst2, nopts);
534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Create compose implementation specifying no matcher type.
537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                          const CacheOptions &opts) {
539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    switch (LookAheadMatchType(fst1, fst2)) {  // Check for lookahead matchers
540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      default:
541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      case MATCH_NONE: {     // Default composition (no look-ahead)
542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ComposeFstOptions<Arc> nopts(opts);
543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return CreateBase1(fst1, fst2, nopts);
544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      case MATCH_OUTPUT: {   // Lookahead on fst1
546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ComposeFstOptions<Arc, M, F> nopts(opts);
549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return CreateBase1(fst1, fst2, nopts);
550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      case MATCH_INPUT: {    // Lookahead on fst2
552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ComposeFstOptions<Arc, M, F> nopts(opts);
555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return CreateBase1(fst1, fst2, nopts);
556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Makes visible to friends.
562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const ComposeFst<A> &fst);  // disallow
565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ComposeFst.
569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A>
570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< ComposeFst<A> >
571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public CacheStateIterator< ComposeFst<A> > {
572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const ComposeFst<A> &fst)
574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ComposeFst.
579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< ComposeFst<A> >
581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public CacheArcIterator< ComposeFst<A> > {
582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIterator(const ComposeFst<A> &fst, StateId s)
586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!fst.GetImpl()->HasArcs(s))
588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst.GetImpl()->Expand(s);
589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->base = new StateIterator< ComposeFst<A> >(*this);
598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful alias when using StdArc.
601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ComposeFst<StdArc> StdComposeFst;
602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     MATCH_FILTER };
605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ComposeOptions {
607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool connect;  // Connect output
608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeFilter filter_type;  // Which pre-defined filter to use
609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : connect(c), filter_type(ft) {}
612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the composition of two transducers. This version writes
616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the composed FST into a MurableFst. If FST1 transduces string x to
617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// y with weight a and FST2 transduces y to z with weight b, then
618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// their composition transduces string x to z with weight
619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Times(x, z).
620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The output labels of the first transducer or the input labels of
622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the second transducer must be sorted.  The weights need to form a
623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// commutative semiring (valid for TropicalWeight and LogWeight).
624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity:
626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Assuming the first FST is unsorted and the second is sorted:
627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Time: O(V1 V2 D1 (log D2 + M2)),
628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Space: O(V1 V2 D1 M2)
629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// where Vi = # of states, Di = maximum out-degree, and Mi is
630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the maximum multiplicity for the ith FST.
631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats:
633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Compose trims its output.
634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - The efficiency of composition can be strongly affected by several factors:
635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   - the choice of which tnansducer is sorted - prefer sorting the FST
636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     that has the greater average out-degree.
637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   - the amount of non-determinism
638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   - the presence and location of epsilon transitions - avoid epsilon
639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     transitions on the output side of the first transducer or
640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     the input side of the second transducer or prefer placing
641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     them later in a path since they delay matching and can
642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     introduce non-coaccessible states and transitions.
643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst,
646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const ComposeOptions &opts = ComposeOptions()) {
647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef Matcher< Fst<Arc> > M;
648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.filter_type == AUTO_FILTER) {
650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     CacheOptions nopts;
651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == SEQUENCE_FILTER) {
654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComposeFstOptions<Arc> copts;
655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    copts.gc_limit = 0;  // Cache only the last state for fastest copy.
656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComposeFstOptions<Arc, M,  AltSequenceComposeFilter<M> > copts;
659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    copts.gc_limit = 0;  // Cache only the last state for fastest copy.
660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == MATCH_FILTER) {
662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComposeFstOptions<Arc, M,  MatchComposeFilter<M> > copts;
663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    copts.gc_limit = 0;  // Cache only the last state for fastest copy.
664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.connect)
668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Connect(ofst);
669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_COMPOSE_H__
674