1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// intersect.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to compute the intersection of two FSAs 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_INTERSECT_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_INTERSECT_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class M = Matcher<Fst<A> >, 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class F = SequenceComposeFilter<M>, 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class T = GenericComposeStateTable<A, typename F::FilterState> > 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct IntersectFstOptions : public ComposeFstOptions<A, M, F, T> { 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit IntersectFstOptions(const CacheOptions &opts, 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M *mat1 = 0, M *mat2 = 0, 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filt = 0, T *sttable= 0) 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ComposeFstOptions<A, M, F, T>(opts, mat1, mat2, filt, sttable) { } 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFstOptions() {} 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the intersection (Hadamard product) of two FSAs. This 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version is a delayed Fst. Only strings that are in both automata 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are retained in the result. 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The two arguments must be acceptors. One of the arguments must be 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label-sorted. 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as ComposeFst. 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: same as ComposeFst. 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass IntersectFst : public ComposeFst<A> { 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ComposeFst<A>::CreateBase; 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ComposeFst<A>::CreateBase1; 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ComposeFst<A>::Properties; 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ImplToFst< ComposeFstImplBase<A> >::GetImpl; 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ImplToFst< ComposeFstImplBase<A> >::SetImpl; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2, 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions opts = CacheOptions()) { 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool acceptors = fst1.Properties(kAcceptor, true) && 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst2.Properties(kAcceptor, true); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetImpl(CreateBase(fst1, fst2, opts)); 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!acceptors) { 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "IntersectFst: input FSTs are not acceptors"; 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->SetProperties(kError); 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class M, class F, class T> 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2, 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const IntersectFstOptions<A, M, F, T> &opts) { 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool acceptors = fst1.Properties(kAcceptor, true) && 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst2.Properties(kAcceptor, true); 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetImpl(CreateBase1(fst1, fst2, opts)); 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!acceptors) { 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "IntersectFst: input FSTs are not acceptors"; 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->SetProperties(kError); 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // See Fst<>::Copy() for doc. 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFst(const IntersectFst<A> &fst, bool safe = false) : 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFst<A>(fst, safe) {} 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this IntersectFst. See Fst<>::Copy() for further doc. 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual IntersectFst<A> *Copy(bool safe = false) const { 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new IntersectFst<A>(*this, safe); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for IntersectFst. 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< IntersectFst<A> > 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public StateIterator< ComposeFst<A> > { 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const IntersectFst<A> &fst) 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : StateIterator< ComposeFst<A> >(fst) {} 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for IntersectFst. 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< IntersectFst<A> > 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public ArcIterator< ComposeFst<A> > { 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const IntersectFst<A> &fst, StateId s) 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ArcIterator< ComposeFst<A> >(fst, s) {} 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful alias when using StdArc. 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef IntersectFst<StdArc> StdIntersectFst; 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ComposeOptions IntersectOptions; 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the intersection (Hadamard product) of two FSAs. This 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version writes the intersection to an output MurableFst. Only 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// strings that are in both automata are retained in the result. 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The two arguments must be acceptors. One of the arguments must be 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label-sorted. 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as Compose. 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: same as Compose. 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Intersect(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const IntersectOptions &opts = IntersectOptions()) { 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef Matcher< Fst<Arc> > M; 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.filter_type == AUTO_FILTER) { 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheOptions nopts; 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nopts.gc_limit = 0; // Cache only the last state for fastest copy. 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = IntersectFst<Arc>(ifst1, ifst2, nopts); 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == SEQUENCE_FILTER) { 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFstOptions<Arc> iopts; 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iopts.gc_limit = 0; // Cache only the last state for fastest copy. 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFstOptions<Arc, M, AltSequenceComposeFilter<M> > iopts; 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iopts.gc_limit = 0; // Cache only the last state for fastest copy. 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == MATCH_FILTER) { 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson IntersectFstOptions<Arc, M, MatchComposeFilter<M> > iopts; 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iopts.gc_limit = 0; // Cache only the last state for fastest copy. 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.connect) 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_INTERSECT_H__ 173