1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// difference.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to compute the difference between two FSAs 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_DIFFERENCE_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_DIFFERENCE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/complement.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class M = Matcher<Fst<A> >, 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class F = SequenceComposeFilter<M>, 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class T = GenericComposeStateTable<A, typename F::FilterState> > 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct DifferenceFstOptions : public ComposeFstOptions<A, M, F, T> { 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit DifferenceFstOptions(const CacheOptions &opts, 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson M *mat1 = 0, M *mat2 = 0, 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson F *filt = 0, T *sttable= 0) 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ComposeFstOptions<A, M, F, T>(mat1, mat2, filt, sttable) { } 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFstOptions() {} 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the difference between two FSAs. This version is a delayed 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst. Only strings that are in the first automaton but not in second 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are retained in the result. 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The first argument must be an acceptor; the second argument must be 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an unweighted, epsilon-free, deterministic acceptor. One of the 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arguments must be label-sorted. 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as ComposeFst. 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: same as ComposeFst. 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass DifferenceFst : public ComposeFst<A> { 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ImplToFst< ComposeFstImplBase<A> >::SetImpl; 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ImplToFst< ComposeFstImplBase<A> >::GetImpl; 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using ComposeFst<A>::CreateBase1; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // A - B = A ^ B'. 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2, 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions &opts = CacheOptions()) { 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef RhoMatcher< Matcher<Fst<A> > > R; 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComplementFst<A> cfst(fst2); 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<A, R> copts(CacheOptions(), 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new R(fst1, MATCH_NONE), 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new R(cfst, MATCH_INPUT, 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComplementFst<A>::kRhoLabel)); 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetImpl(CreateBase1(fst1, cfst, copts)); 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst1.Properties(kAcceptor, true)) { 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "DifferenceFst: 1st argument not an acceptor"; 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->SetProperties(kError, kError); 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class M, class F, class T> 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2, 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const DifferenceFstOptions<A, M, F, T> &opts) { 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef RhoMatcher<M> R; 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComplementFst<A> cfst(fst2); 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ComposeFstOptions<A, R> copts(opts); 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.matcher1 = new R(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS, 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.matcher1); 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson copts.matcher2 = new R(cfst, MATCH_INPUT, ComplementFst<A>::kRhoLabel, 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MATCHER_REWRITE_ALWAYS, opts.matcher2); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetImpl(CreateBase1(fst1, cfst, copts)); 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst1.Properties(kAcceptor, true)) { 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "DifferenceFst: 1st argument not an acceptor"; 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->SetProperties(kError, kError); 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // See Fst<>::Copy() for doc. 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFst(const DifferenceFst<A> &fst, bool safe = false) 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ComposeFst<A>(fst, safe) {} 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc. 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual DifferenceFst<A> *Copy(bool safe = false) const { 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new DifferenceFst<A>(*this, safe); 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for DifferenceFst. 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< DifferenceFst<A> > 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public StateIterator< ComposeFst<A> > { 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const DifferenceFst<A> &fst) 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : StateIterator< ComposeFst<A> >(fst) {} 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for DifferenceFst. 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< DifferenceFst<A> > 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public ArcIterator< ComposeFst<A> > { 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const DifferenceFst<A> &fst, StateId s) 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ArcIterator< ComposeFst<A> >(fst, s) {} 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful alias when using StdArc. 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef DifferenceFst<StdArc> StdDifferenceFst; 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ComposeOptions DifferenceOptions; 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the difference between two FSAs. This version is writes 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the difference to an output MutableFst. Only strings that are in 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the first automaton but not in second are retained in the result. 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The first argument must be an acceptor; the second argument must be 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an unweighted, epsilon-free, deterministic acceptor. One of the 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arguments must be label-sorted. 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as Compose. 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: same as Compose. 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const DifferenceOptions &opts = DifferenceOptions()) { 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef Matcher< Fst<Arc> > M; 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.filter_type == AUTO_FILTER) { 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheOptions nopts; 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nopts.gc_limit = 0; // Cache only the last state for fastest copy. 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts); 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == SEQUENCE_FILTER) { 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFstOptions<Arc> dopts; 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dopts.gc_limit = 0; // Cache only the last state for fastest copy. 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M> > dopts; 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dopts.gc_limit = 0; // Cache only the last state for fastest copy. 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (opts.filter_type == MATCH_FILTER) { 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DifferenceFstOptions<Arc, M, MatchComposeFilter<M> > dopts; 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dopts.gc_limit = 0; // Cache only the last state for fastest copy. 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.connect) 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_DIFFERENCE_H__ 190