1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// difference.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to compute the difference between two FSAs
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_DIFFERENCE_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_DIFFERENCE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compose.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/complement.h>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A,
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class M = Matcher<Fst<A> >,
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class F = SequenceComposeFilter<M>,
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          class T = GenericComposeStateTable<A, typename F::FilterState> >
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct DifferenceFstOptions : public ComposeFstOptions<A, M, F, T> {
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit DifferenceFstOptions(const CacheOptions &opts,
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                M *mat1 = 0, M *mat2 = 0,
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                F *filt = 0, T *sttable= 0)
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ComposeFstOptions<A, M, F, T>(mat1, mat2, filt, sttable) { }
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DifferenceFstOptions() {}
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the difference between two FSAs. This version is a delayed
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst. Only strings that are in the first automaton but not in second
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are retained in the result.
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The first argument must be an acceptor; the second argument must be
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an unweighted, epsilon-free, deterministic acceptor. One of the
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arguments must be label-sorted.
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as ComposeFst.
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: same as ComposeFst.
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass DifferenceFst : public ComposeFst<A> {
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst< ComposeFstImplBase<A> >::SetImpl;
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst< ComposeFstImplBase<A> >::GetImpl;
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ComposeFst<A>::CreateBase1;
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // A - B = A ^ B'.
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2,
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                const CacheOptions &opts = CacheOptions()) {
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typedef RhoMatcher< Matcher<Fst<A> > > R;
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComplementFst<A> cfst(fst2);
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComposeFstOptions<A, R> copts(CacheOptions(),
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                  new R(fst1, MATCH_NONE),
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                  new R(cfst, MATCH_INPUT,
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                        ComplementFst<A>::kRhoLabel));
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetImpl(CreateBase1(fst1, cfst, copts));
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!fst1.Properties(kAcceptor, true)) {
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      GetImpl()->SetProperties(kError, kError);
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class M, class F, class T>
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2,
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                const DifferenceFstOptions<A, M, F, T> &opts) {
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typedef RhoMatcher<M> R;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComplementFst<A> cfst(fst2);
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ComposeFstOptions<A, R> copts(opts);
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    copts.matcher1 = new R(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS,
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                           opts.matcher1);
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    copts.matcher2 = new R(cfst, MATCH_INPUT, ComplementFst<A>::kRhoLabel,
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                           MATCHER_REWRITE_ALWAYS, opts.matcher2);
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetImpl(CreateBase1(fst1, cfst, copts));
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!fst1.Properties(kAcceptor, true)) {
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "DifferenceFst: 1st argument not an acceptor";
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      GetImpl()->SetProperties(kError, kError);
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // See Fst<>::Copy() for doc.
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DifferenceFst(const DifferenceFst<A> &fst, bool safe = false)
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ComposeFst<A>(fst, safe) {}
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc.
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual DifferenceFst<A> *Copy(bool safe = false) const {
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new DifferenceFst<A>(*this, safe);
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for DifferenceFst.
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< DifferenceFst<A> >
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public StateIterator< ComposeFst<A> > {
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const DifferenceFst<A> &fst)
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : StateIterator< ComposeFst<A> >(fst) {}
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for DifferenceFst.
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< DifferenceFst<A> >
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public ArcIterator< ComposeFst<A> > {
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIterator(const DifferenceFst<A> &fst, StateId s)
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ArcIterator< ComposeFst<A> >(fst, s) {}
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful alias when using StdArc.
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef DifferenceFst<StdArc> StdDifferenceFst;
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ComposeOptions DifferenceOptions;
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the difference between two FSAs. This version is writes
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the difference to an output MutableFst. Only strings that are in
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the first automaton but not in second are retained in the result.
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The first argument must be an acceptor; the second argument must be
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an unweighted, epsilon-free, deterministic acceptor.  One of the
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arguments must be label-sorted.
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: same as Compose.
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Caveats: same as Compose.
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst,
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const DifferenceOptions &opts = DifferenceOptions()) {
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef Matcher< Fst<Arc> > M;
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.filter_type == AUTO_FILTER) {
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheOptions nopts;
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts);
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == SEQUENCE_FILTER) {
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    DifferenceFstOptions<Arc> dopts;
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    dopts.gc_limit = 0;  // Cache only the last state for fastest copy.
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M> > dopts;
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    dopts.gc_limit = 0;  // Cache only the last state for fastest copy.
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (opts.filter_type == MATCH_FILTER) {
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    DifferenceFstOptions<Arc, M, MatchComposeFilter<M> > dopts;
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    dopts.gc_limit = 0;  // Cache only the last state for fastest copy.
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts);
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.connect)
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Connect(ofst);
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_DIFFERENCE_H__
190