1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// intersect.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to compute the intersection of two FSAs
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_INTERSECT_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_INTERSECT_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A,
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class M = Matcher<Fst<A> >,
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class F = SequenceComposeFilter<M>,
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class T = GenericComposeStateTable<A, typename F::FilterState> >
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct IntersectFstOptions : public ComposeFstOptions<A, M, F, T> {
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit IntersectFstOptions(const CacheOptions &opts,
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               M *mat1 = 0, M *mat2 = 0,
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               F *filt = 0, T *sttable= 0)
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ComposeFstOptions<A, M, F, T>(opts, mat1, mat2, filt, sttable) { }
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  IntersectFstOptions() {}
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the intersection (Hadamard product) of two FSAs. This
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version is a delayed Fst.  Only strings that are in both automata
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are retained in the result.
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The two arguments must be acceptors. One of the arguments must be
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label-sorted.
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as ComposeFst.
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats:  same as ComposeFst.
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass IntersectFst : public ComposeFst<A> {
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ComposeFst<A>::CreateBase;
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ComposeFst<A>::CreateBase1;
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ComposeFst<A>::Properties;
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst< ComposeFstImplBase<A> >::GetImpl;
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst< ComposeFstImplBase<A> >::SetImpl;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2,
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const CacheOptions opts = CacheOptions()) {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool acceptors = fst1.Properties(kAcceptor, true) &&
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst2.Properties(kAcceptor, true);
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetImpl(CreateBase(fst1, fst2, opts));
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!acceptors) {
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "IntersectFst: input FSTs are not acceptors";
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      GetImpl()->SetProperties(kError);
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class M, class F, class T>
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2,
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const IntersectFstOptions<A, M, F, T> &opts) {
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool acceptors = fst1.Properties(kAcceptor, true) &&
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst2.Properties(kAcceptor, true);
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetImpl(CreateBase1(fst1, fst2, opts));
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!acceptors) {
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "IntersectFst: input FSTs are not acceptors";
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      GetImpl()->SetProperties(kError);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // See Fst<>::Copy() for doc.
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  IntersectFst(const IntersectFst<A> &fst, bool safe = false) :
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ComposeFst<A>(fst, safe) {}
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this IntersectFst. See Fst<>::Copy() for further doc.
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual IntersectFst<A> *Copy(bool safe = false) const {
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new IntersectFst<A>(*this, safe);
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for IntersectFst.
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< IntersectFst<A> >
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public StateIterator< ComposeFst<A> > {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const IntersectFst<A> &fst)
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : StateIterator< ComposeFst<A> >(fst) {}
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for IntersectFst.
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< IntersectFst<A> >
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public ArcIterator< ComposeFst<A> > {
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIterator(const IntersectFst<A> &fst, StateId s)
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ArcIterator< ComposeFst<A> >(fst, s) {}
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful alias when using StdArc.
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef IntersectFst<StdArc> StdIntersectFst;
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ComposeOptions IntersectOptions;
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the intersection (Hadamard product) of two FSAs. This
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version writes the intersection to an output MurableFst. Only
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// strings that are in both automata are retained in the result.
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The two arguments must be acceptors. One of the arguments must be
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label-sorted.
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as Compose.
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats:  same as Compose.
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Intersect(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst,
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const IntersectOptions &opts = IntersectOptions()) {
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef Matcher< Fst<Arc> > M;
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.filter_type == AUTO_FILTER) {
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheOptions nopts;
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = IntersectFst<Arc>(ifst1, ifst2, nopts);
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == SEQUENCE_FILTER) {
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    IntersectFstOptions<Arc> iopts;
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    iopts.gc_limit = 0;  // Cache only the last state for fastest copy.
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    IntersectFstOptions<Arc, M, AltSequenceComposeFilter<M> > iopts;
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    iopts.gc_limit = 0;  // Cache only the last state for fastest copy.
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == MATCH_FILTER) {
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    IntersectFstOptions<Arc, M, MatchComposeFilter<M> > iopts;
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    iopts.gc_limit = 0;  // Cache only the last state for fastest copy.
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts);
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.connect)
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Connect(ofst);
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_INTERSECT_H__
173