1// compose.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Class to compute the composition of two FSTs
20
21#ifndef FST_LIB_COMPOSE_H__
22#define FST_LIB_COMPOSE_H__
23
24#include <algorithm>
25#include <string>
26#include <vector>
27using std::vector;
28
29#include <fst/cache.h>
30#include <fst/compose-filter.h>
31#include <fst/lookahead-filter.h>
32#include <fst/matcher.h>
33#include <fst/state-table.h>
34#include <fst/test-properties.h>
35
36
37namespace fst {
38
39// Delayed composition options templated on the arc type, the matcher,
40// the composition filter, and the composition state table.  By
41// default, the matchers, filter, and state table are constructed by
42// composition. If set below, the user can instead pass in these
43// objects; in that case, ComposeFst takes their ownership. This
44// version controls composition implemented between generic Fst<Arc>
45// types and a shared matcher type M for Fst<Arc>. This should be
46// adequate for most applications, giving a reasonable tradeoff
47// between efficiency and code sharing (but see ComposeFstImplOptions).
48template <class A,
49          class M = Matcher<Fst<A> >,
50          class F = SequenceComposeFilter<M>,
51          class T = GenericComposeStateTable<A, typename F::FilterState> >
52struct ComposeFstOptions : public CacheOptions {
53  M *matcher1;      // FST1 matcher (see matcher.h)
54  M *matcher2;      // FST2 matcher
55  F *filter;        // Composition filter (see compose-filter.h)
56  T *state_table;   // Composition state table (see compose-state-table.h)
57
58  explicit ComposeFstOptions(const CacheOptions &opts,
59                             M *mat1 = 0, M *mat2 = 0,
60                             F *filt = 0, T *sttable= 0)
61      : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
62        filter(filt), state_table(sttable) {}
63
64  ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
65};
66
67
68// Delayed composition options templated on the two matcher types, the
69// composition filter, and the composition state table.  By default,
70// the matchers, filter, and state table are constructed by
71// composition. If set below, the user can instead pass in these
72// objects; in that case, ComposeFst takes their ownership. This
73// version controls composition implemented using arbitrary matchers
74// (of the same Arc type but otherwise arbitrary Fst type). The user
75// must ensure the matchers are compatible. These options permit the
76// most efficient use, but shares the least code. This is for advanced
77// use only in the most demanding or specialized applications that can
78// benefit from it (o.w. prefer ComposeFstOptions).
79template <class M1, class M2,
80          class F = SequenceComposeFilter<M1, M2>,
81          class T = GenericComposeStateTable<typename M1::Arc,
82                                             typename F::FilterState> >
83struct ComposeFstImplOptions : public CacheOptions {
84  M1 *matcher1;     // FST1 matcher (see matcher.h)
85  M2 *matcher2;     // FST2 matcher
86  F *filter;        // Composition filter (see compose-filter.h)
87  T *state_table;   // Composition state table (see compose-state-table.h)
88
89  explicit ComposeFstImplOptions(const CacheOptions &opts,
90                                 M1 *mat1 = 0, M2 *mat2 = 0,
91                                 F *filt = 0, T *sttable= 0)
92      : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
93        filter(filt), state_table(sttable) {}
94
95  ComposeFstImplOptions()
96  : matcher1(0), matcher2(0), filter(0), state_table(0) {}
97};
98
99
100// Implementation of delayed composition. This base class is
101// common to the variants with different matchers, composition filters
102// and state tables.
103template <class A>
104class ComposeFstImplBase : public CacheImpl<A> {
105 public:
106  using FstImpl<A>::SetType;
107  using FstImpl<A>::SetProperties;
108  using FstImpl<A>::Properties;
109  using FstImpl<A>::SetInputSymbols;
110  using FstImpl<A>::SetOutputSymbols;
111
112  using CacheBaseImpl< CacheState<A> >::HasStart;
113  using CacheBaseImpl< CacheState<A> >::HasFinal;
114  using CacheBaseImpl< CacheState<A> >::HasArcs;
115  using CacheBaseImpl< CacheState<A> >::SetFinal;
116  using CacheBaseImpl< CacheState<A> >::SetStart;
117
118  typedef typename A::Label Label;
119  typedef typename A::Weight Weight;
120  typedef typename A::StateId StateId;
121  typedef CacheState<A> State;
122
123  ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
124                     const CacheOptions &opts)
125      : CacheImpl<A>(opts) {
126    VLOG(2) << "ComposeFst(" << this << "): Begin";
127    SetType("compose");
128
129    if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
130      FSTERROR() << "ComposeFst: output symbol table of 1st argument "
131                 << "does not match input symbol table of 2nd argument";
132      SetProperties(kError, kError);
133    }
134
135    SetInputSymbols(fst1.InputSymbols());
136    SetOutputSymbols(fst2.OutputSymbols());
137  }
138
139  ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
140      : CacheImpl<A>(impl, true) {
141    SetProperties(impl.Properties(), kCopyProperties);
142    SetInputSymbols(impl.InputSymbols());
143    SetOutputSymbols(impl.OutputSymbols());
144  }
145
146  virtual ComposeFstImplBase<A> *Copy() = 0;
147
148  virtual ~ComposeFstImplBase() {}
149
150  StateId Start() {
151    if (!HasStart()) {
152      StateId start = ComputeStart();
153      if (start != kNoStateId) {
154        SetStart(start);
155      }
156    }
157    return CacheImpl<A>::Start();
158  }
159
160  Weight Final(StateId s) {
161    if (!HasFinal(s)) {
162      Weight final = ComputeFinal(s);
163      SetFinal(s, final);
164    }
165    return CacheImpl<A>::Final(s);
166  }
167
168  virtual void Expand(StateId s) = 0;
169
170  size_t NumArcs(StateId s) {
171    if (!HasArcs(s))
172      Expand(s);
173    return CacheImpl<A>::NumArcs(s);
174  }
175
176  size_t NumInputEpsilons(StateId s) {
177    if (!HasArcs(s))
178      Expand(s);
179    return CacheImpl<A>::NumInputEpsilons(s);
180  }
181
182  size_t NumOutputEpsilons(StateId s) {
183    if (!HasArcs(s))
184      Expand(s);
185    return CacheImpl<A>::NumOutputEpsilons(s);
186  }
187
188  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
189    if (!HasArcs(s))
190      Expand(s);
191    CacheImpl<A>::InitArcIterator(s, data);
192  }
193
194 protected:
195  virtual StateId ComputeStart() = 0;
196  virtual Weight ComputeFinal(StateId s) = 0;
197};
198
199
200// Implementaion of delayed composition templated on the matchers (see
201// matcher.h), composition filter (see compose-filter-inl.h) and
202// the composition state table (see compose-state-table.h).
203template <class M1, class M2, class F, class T>
204class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
205  typedef typename M1::FST FST1;
206  typedef typename M2::FST FST2;
207  typedef typename M1::Arc Arc;
208  typedef typename Arc::StateId StateId;
209  typedef typename Arc::Label Label;
210  typedef typename Arc::Weight Weight;
211  typedef typename F::FilterState FilterState;
212  typedef typename F::Matcher1 Matcher1;
213  typedef typename F::Matcher2 Matcher2;
214
215  using CacheBaseImpl<CacheState<Arc> >::SetArcs;
216  using FstImpl<Arc>::SetType;
217  using FstImpl<Arc>::SetProperties;
218
219  typedef ComposeStateTuple<StateId, FilterState> StateTuple;
220
221 public:
222  ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
223                 const ComposeFstImplOptions<M1, M2, F, T> &opts);
224
225  ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
226      : ComposeFstImplBase<Arc>(impl),
227        filter_(new F(*impl.filter_, true)),
228        matcher1_(filter_->GetMatcher1()),
229        matcher2_(filter_->GetMatcher2()),
230        fst1_(matcher1_->GetFst()),
231        fst2_(matcher2_->GetFst()),
232        state_table_(new T(*impl.state_table_)),
233        match_type_(impl.match_type_) {}
234
235  ~ComposeFstImpl() {
236    VLOG(2) << "ComposeFst(" << this
237            << "): End: # of visited states: " << state_table_->Size();
238
239    delete filter_;
240    delete state_table_;
241  }
242
243  virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
244    return new ComposeFstImpl<M1, M2, F, T>(*this);
245  }
246
247  uint64 Properties() const { return Properties(kFstProperties); }
248
249  // Set error if found; return FST impl properties.
250  uint64 Properties(uint64 mask) const {
251    if ((mask & kError) &&
252        (fst1_.Properties(kError, false) ||
253         fst2_.Properties(kError, false) ||
254         (matcher1_->Properties(0) & kError) ||
255         (matcher2_->Properties(0) & kError) |
256         (filter_->Properties(0) & kError) ||
257         state_table_->Error())) {
258      SetProperties(kError, kError);
259    }
260    return FstImpl<Arc>::Properties(mask);
261  }
262
263  // Arranges it so that the first arg to OrderedExpand is the Fst
264  // that will be matched on.
265  void Expand(StateId s) {
266    const StateTuple &tuple = state_table_->Tuple(s);
267    StateId s1 = tuple.state_id1;
268    StateId s2 = tuple.state_id2;
269    filter_->SetState(s1, s2, tuple.filter_state);
270    if (match_type_ == MATCH_OUTPUT ||
271        (match_type_ == MATCH_BOTH &&
272         internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
273      OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
274    else
275      OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
276  }
277
278  const FST1 &GetFst1() { return fst1_; }
279  const FST2 &GetFst2() { return fst2_; }
280  M1 *GetMatcher1() { return matcher1_; }
281  M2 *GetMatcher2() { return matcher2_; }
282  F *GetFilter() { return filter_; }
283  T *GetStateTable() { return state_table_; }
284
285 private:
286  // This does that actual matching of labels in the composition. The
287  // arguments are ordered so matching is called on state 'sa' of
288  // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
289  // determines whether the input or output label of arcs at 'sb' is
290  // the one to match on.
291  template <class FST, class Matcher>
292  void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
293                     const FST &fstb, StateId sb,
294                     Matcher *matchera,  bool match_input) {
295    matchera->SetState(sa);
296
297    // First process non-consuming symbols (e.g., epsilons) on FSTA.
298    Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
299           Weight::One(), sb);
300    MatchArc(s, matchera, loop, match_input);
301
302    // Then process matches on FSTB.
303    for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
304      MatchArc(s, matchera, iterb.Value(), match_input);
305
306    SetArcs(s);
307  }
308
309  // Matches a single transition from 'fstb' against 'fata' at 's'.
310  template <class Matcher>
311  void MatchArc(StateId s, Matcher *matchera,
312                const Arc &arc, bool match_input) {
313    if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
314      for (; !matchera->Done(); matchera->Next()) {
315        Arc arca = matchera->Value();
316        Arc arcb = arc;
317        if (match_input) {
318          const FilterState &f = filter_->FilterArc(&arcb, &arca);
319          if (f != FilterState::NoState())
320            AddArc(s, arcb, arca, f);
321        } else {
322          const FilterState &f = filter_->FilterArc(&arca, &arcb);
323          if (f != FilterState::NoState())
324            AddArc(s, arca, arcb, f);
325        }
326      }
327    }
328  }
329
330  // Add a matching transition at 's'.
331   void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
332               const FilterState &f) {
333    StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
334    Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
335           state_table_->FindState(tuple));
336    CacheImpl<Arc>::PushArc(s, oarc);
337  }
338
339  StateId ComputeStart() {
340    StateId s1 = fst1_.Start();
341    if (s1 == kNoStateId)
342      return kNoStateId;
343
344    StateId s2 = fst2_.Start();
345    if (s2 == kNoStateId)
346      return kNoStateId;
347
348    const FilterState &f = filter_->Start();
349    StateTuple tuple(s1, s2, f);
350    return state_table_->FindState(tuple);
351  }
352
353  Weight ComputeFinal(StateId s) {
354    const StateTuple &tuple = state_table_->Tuple(s);
355    StateId s1 = tuple.state_id1;
356    Weight final1 = internal::Final(fst1_, s1);
357    if (final1 == Weight::Zero())
358      return final1;
359
360    StateId s2 = tuple.state_id2;
361    Weight final2 = internal::Final(fst2_, s2);
362    if (final2 == Weight::Zero())
363      return final2;
364
365    filter_->SetState(s1, s2, tuple.filter_state);
366    filter_->FilterFinal(&final1, &final2);
367    return Times(final1, final2);
368  }
369
370  // Identifies and verifies the capabilities of the matcher to be used for
371  // composition.
372  void SetMatchType();
373
374  F *filter_;
375  Matcher1 *matcher1_;
376  Matcher2 *matcher2_;
377  const FST1 &fst1_;
378  const FST2 &fst2_;
379  T *state_table_;
380
381  MatchType match_type_;
382
383  void operator=(const ComposeFstImpl<M1, M2, F, T> &);  // disallow
384};
385
386template <class M1, class M2, class F, class T> inline
387ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
388    const FST1 &fst1, const FST2 &fst2,
389    const ComposeFstImplOptions<M1, M2, F, T> &opts)
390    : ComposeFstImplBase<Arc>(fst1, fst2, opts),
391      filter_(opts.filter ? opts.filter :
392              new F(fst1, fst2, opts.matcher1, opts.matcher2)),
393      matcher1_(filter_->GetMatcher1()),
394      matcher2_(filter_->GetMatcher2()),
395      fst1_(matcher1_->GetFst()),
396      fst2_(matcher2_->GetFst()),
397      state_table_(opts.state_table ? opts.state_table :
398                   new T(fst1_, fst2_)) {
399  SetMatchType();
400  if (match_type_ == MATCH_NONE)
401    SetProperties(kError, kError);
402  VLOG(2) << "ComposeFst(" << this << "): Match type: "
403          << (match_type_ == MATCH_OUTPUT ? "output" :
404              (match_type_ == MATCH_INPUT ? "input" :
405               (match_type_ == MATCH_BOTH ? "both" :
406                (match_type_ == MATCH_NONE ? "none" : "unknown"))));
407
408  uint64 fprops1 = fst1.Properties(kFstProperties, false);
409  uint64 fprops2 = fst2.Properties(kFstProperties, false);
410  uint64 mprops1 = matcher1_->Properties(fprops1);
411  uint64 mprops2 = matcher2_->Properties(fprops2);
412  uint64 cprops = ComposeProperties(mprops1, mprops2);
413  SetProperties(filter_->Properties(cprops), kCopyProperties);
414  if (state_table_->Error()) SetProperties(kError, kError);
415  VLOG(2) << "ComposeFst(" << this << "): Initialized";
416}
417
418template <class M1, class M2, class F, class T>
419void ComposeFstImpl<M1, M2, F, T>::SetMatchType() {
420  MatchType type1 = matcher1_->Type(false);
421  MatchType type2 = matcher2_->Type(false);
422  uint32 flags1 = matcher1_->Flags();
423  uint32 flags2 = matcher2_->Flags();
424  if (flags1 & flags2 & kRequireMatch) {
425    FSTERROR() << "ComposeFst: only one argument can require matching.";
426    match_type_ = MATCH_NONE;
427  } else if (flags1 & kRequireMatch) {
428    if (matcher1_->Type(true) != MATCH_OUTPUT) {
429      FSTERROR() << "ComposeFst: 1st argument requires matching but cannot.";
430      match_type_ = MATCH_NONE;
431    }
432    match_type_ = MATCH_OUTPUT;
433  } else if (flags2 & kRequireMatch) {
434    if (matcher2_->Type(true) != MATCH_INPUT) {
435      FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot.";
436      match_type_ = MATCH_NONE;
437    }
438    match_type_ = MATCH_INPUT;
439  } else if (flags1 & flags2 & kPreferMatch &&
440             type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
441    match_type_ = MATCH_BOTH;
442  } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) {
443    match_type_ = MATCH_OUTPUT;
444  } else if  (flags2 & kPreferMatch && type2 == MATCH_INPUT) {
445    match_type_ = MATCH_INPUT;
446  } else if (type1 == MATCH_OUTPUT && type2  == MATCH_INPUT) {
447    match_type_ = MATCH_BOTH;
448  } else if (type1 == MATCH_OUTPUT) {
449    match_type_ = MATCH_OUTPUT;
450  } else if (type2 == MATCH_INPUT) {
451    match_type_ = MATCH_INPUT;
452  } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) {
453    match_type_ = MATCH_OUTPUT;
454  } else if  (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) {
455    match_type_ = MATCH_INPUT;
456  } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
457    match_type_ = MATCH_OUTPUT;
458  } else if (matcher2_->Type(true) == MATCH_INPUT) {
459    match_type_ = MATCH_INPUT;
460  } else {
461    FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
462               << "and 2nd argument cannot match on input labels (sort?).";
463    match_type_ = MATCH_NONE;
464  }
465}
466
467
468// Computes the composition of two transducers. This version is a
469// delayed Fst. If FST1 transduces string x to y with weight a and FST2
470// transduces y to z with weight b, then their composition transduces
471// string x to z with weight Times(x, z).
472//
473// The output labels of the first transducer or the input labels of
474// the second transducer must be sorted (with the default matcher).
475// The weights need to form a commutative semiring (valid for
476// TropicalWeight and LogWeight).
477//
478// Complexity:
479// Assuming the first FST is unsorted and the second is sorted:
480// - Time: O(v1 v2 d1 (log d2 + m2)),
481// - Space: O(v1 v2)
482// where vi = # of states visited, di = maximum out-degree, and mi the
483// maximum multiplicity of the states visited for the ith
484// FST. Constant time and space to visit an input state or arc is
485// assumed and exclusive of caching.
486//
487// Caveats:
488// - ComposeFst does not trim its output (since it is a delayed operation).
489// - The efficiency of composition can be strongly affected by several factors:
490//   - the choice of which tnansducer is sorted - prefer sorting the FST
491//     that has the greater average out-degree.
492//   - the amount of non-determinism
493//   - the presence and location of epsilon transitions - avoid epsilon
494//     transitions on the output side of the first transducer or
495//     the input side of the second transducer or prefer placing
496//     them later in a path since they delay matching and can
497//     introduce non-coaccessible states and transitions.
498//
499// This class attaches interface to implementation and handles
500// reference counting, delegating most methods to ImplToFst.
501template <class A>
502class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
503 public:
504  friend class ArcIterator< ComposeFst<A> >;
505  friend class StateIterator< ComposeFst<A> >;
506
507  typedef A Arc;
508  typedef typename A::Weight Weight;
509  typedef typename A::StateId StateId;
510  typedef CacheState<A> State;
511  typedef ComposeFstImplBase<A> Impl;
512
513  using ImplToFst<Impl>::SetImpl;
514
515  // Compose specifying only caching options.
516  ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
517             const CacheOptions &opts = CacheOptions())
518      : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
519
520  // Compose specifying one shared matcher type M.  Requires input
521  // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
522  // best code-sharing and matcher compatiblity.
523  template <class M, class F, class T>
524  ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
525             const ComposeFstOptions<A, M, F, T> &opts)
526      : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
527
528  // Compose specifying two matcher types M1 and M2.  Requires input
529  // Fsts (of the same Arc type but o.w. arbitrary) match the
530  // corresponding matcher FST types (M1::FST, M2::FST). Recommended
531  // only for advanced use in demanding or specialized applications
532  // due to potential code bloat and matcher incompatibilities.
533  template <class M1, class M2, class F, class T>
534  ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
535             const ComposeFstImplOptions<M1, M2, F, T> &opts)
536      : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
537
538  // See Fst<>::Copy() for doc.
539  ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
540    if (safe)
541      SetImpl(fst.GetImpl()->Copy());
542    else
543      SetImpl(fst.GetImpl(), false);
544  }
545
546  // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
547  virtual ComposeFst<A> *Copy(bool safe = false) const {
548    return new ComposeFst<A>(*this, safe);
549  }
550
551  virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
552
553  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
554    GetImpl()->InitArcIterator(s, data);
555  }
556
557 protected:
558  ComposeFst() {}
559
560  // Create compose implementation specifying two matcher types.
561  template <class M1, class M2, class F, class T>
562  static Impl *CreateBase2(
563      const typename M1::FST &fst1, const typename M2::FST &fst2,
564      const ComposeFstImplOptions<M1, M2, F, T> &opts) {
565    Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
566    if (!(Weight::Properties() & kCommutative)) {
567      int64 props1 = fst1.Properties(kUnweighted, true);
568      int64 props2 = fst2.Properties(kUnweighted, true);
569      if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
570        FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
571                   << Weight::Type();
572        impl->SetProperties(kError, kError);
573      }
574    }
575    return impl;
576  }
577
578  // Create compose implementation specifying one matcher type.
579  //  Requires input Fsts and matcher FST type (M::FST) be Fst<A>
580  template <class M, class F, class T>
581  static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
582                           const ComposeFstOptions<A, M, F, T> &opts) {
583    ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
584                                            opts.filter, opts.state_table);
585    return CreateBase2(fst1, fst2, nopts);
586  }
587
588  // Create compose implementation specifying no matcher type.
589  static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
590                          const CacheOptions &opts) {
591    switch (LookAheadMatchType(fst1, fst2)) {  // Check for lookahead matchers
592      default:
593      case MATCH_NONE: {     // Default composition (no look-ahead)
594        VLOG(2) << "ComposeFst: Default composition (no look-ahead)";
595        ComposeFstOptions<Arc> nopts(opts);
596        return CreateBase1(fst1, fst2, nopts);
597      }
598      case MATCH_OUTPUT: {   // Lookahead on fst1
599        VLOG(2) << "ComposeFst: Lookahead on fst1";
600        typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
601        typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
602        ComposeFstOptions<Arc, M, F> nopts(opts);
603        return CreateBase1(fst1, fst2, nopts);
604      }
605      case MATCH_INPUT: {    // Lookahead on fst2
606        VLOG(2) << "ComposeFst: Lookahead on fst2";
607        typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
608        typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
609        ComposeFstOptions<Arc, M, F> nopts(opts);
610        return CreateBase1(fst1, fst2, nopts);
611      }
612    }
613  }
614
615 private:
616  // Makes visible to friends.
617  Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
618
619  void operator=(const ComposeFst<A> &fst);  // disallow
620};
621
622
623// Specialization for ComposeFst.
624template<class A>
625class StateIterator< ComposeFst<A> >
626    : public CacheStateIterator< ComposeFst<A> > {
627 public:
628  explicit StateIterator(const ComposeFst<A> &fst)
629      : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
630};
631
632
633// Specialization for ComposeFst.
634template <class A>
635class ArcIterator< ComposeFst<A> >
636    : public CacheArcIterator< ComposeFst<A> > {
637 public:
638  typedef typename A::StateId StateId;
639
640  ArcIterator(const ComposeFst<A> &fst, StateId s)
641      : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
642    if (!fst.GetImpl()->HasArcs(s))
643      fst.GetImpl()->Expand(s);
644  }
645
646 private:
647  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
648};
649
650template <class A> inline
651void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
652  data->base = new StateIterator< ComposeFst<A> >(*this);
653}
654
655// Useful alias when using StdArc.
656typedef ComposeFst<StdArc> StdComposeFst;
657
658enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
659                     MATCH_FILTER };
660
661struct ComposeOptions {
662  bool connect;  // Connect output
663  ComposeFilter filter_type;  // Which pre-defined filter to use
664
665  ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
666      : connect(c), filter_type(ft) {}
667  ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
668};
669
670// Computes the composition of two transducers. This version writes
671// the composed FST into a MurableFst. If FST1 transduces string x to
672// y with weight a and FST2 transduces y to z with weight b, then
673// their composition transduces string x to z with weight
674// Times(x, z).
675//
676// The output labels of the first transducer or the input labels of
677// the second transducer must be sorted.  The weights need to form a
678// commutative semiring (valid for TropicalWeight and LogWeight).
679//
680// Complexity:
681// Assuming the first FST is unsorted and the second is sorted:
682// - Time: O(V1 V2 D1 (log D2 + M2)),
683// - Space: O(V1 V2 D1 M2)
684// where Vi = # of states, Di = maximum out-degree, and Mi is
685// the maximum multiplicity for the ith FST.
686//
687// Caveats:
688// - Compose trims its output.
689// - The efficiency of composition can be strongly affected by several factors:
690//   - the choice of which tnansducer is sorted - prefer sorting the FST
691//     that has the greater average out-degree.
692//   - the amount of non-determinism
693//   - the presence and location of epsilon transitions - avoid epsilon
694//     transitions on the output side of the first transducer or
695//     the input side of the second transducer or prefer placing
696//     them later in a path since they delay matching and can
697//     introduce non-coaccessible states and transitions.
698template<class Arc>
699void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
700             MutableFst<Arc> *ofst,
701             const ComposeOptions &opts = ComposeOptions()) {
702  typedef Matcher< Fst<Arc> > M;
703
704  if (opts.filter_type == AUTO_FILTER) {
705     CacheOptions nopts;
706     nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
707     *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
708  } else if (opts.filter_type == SEQUENCE_FILTER) {
709    ComposeFstOptions<Arc> copts;
710    copts.gc_limit = 0;  // Cache only the last state for fastest copy.
711    *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
712  } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
713    ComposeFstOptions<Arc, M,  AltSequenceComposeFilter<M> > copts;
714    copts.gc_limit = 0;  // Cache only the last state for fastest copy.
715    *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
716  } else if (opts.filter_type == MATCH_FILTER) {
717    ComposeFstOptions<Arc, M,  MatchComposeFilter<M> > copts;
718    copts.gc_limit = 0;  // Cache only the last state for fastest copy.
719    *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
720  }
721
722  if (opts.connect)
723    Connect(ofst);
724}
725
726}  // namespace fst
727
728#endif  // FST_LIB_COMPOSE_H__
729