equivalent.h revision 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea
1// equivalent.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: wojciech@google.com (Wojciech Skut)
17//
18// \file Functions and classes to determine the equivalence of two
19// FSTs.
20
21#ifndef FST_LIB_EQUIVALENT_H__
22#define FST_LIB_EQUIVALENT_H__
23
24#include <algorithm>
25#include <deque>
26using std::deque;
27#include <tr1/unordered_map>
28using std::tr1::unordered_map;
29using std::tr1::unordered_multimap;
30#include <utility>
31using std::pair; using std::make_pair;
32#include <vector>
33using std::vector;
34
35#include <fst/encode.h>
36#include <fst/push.h>
37#include <fst/union-find.h>
38#include <fst/vector-fst.h>
39
40
41namespace fst {
42
43// Traits-like struct holding utility functions/typedefs/constants for
44// the equivalence algorithm.
45//
46// Encoding device: in order to make the statesets of the two acceptors
47// disjoint, we map Arc::StateId on the type MappedId. The states of
48// the first acceptor are mapped on odd numbers (s -> 2s + 1), and
49// those of the second one on even numbers (s -> 2s + 2). The number 0
50// is reserved for an implicit (non-final) 'dead state' (required for
51// the correct treatment of non-coaccessible states; kNoStateId is
52// mapped to kDeadState for both acceptors). The union-find algorithm
53// operates on the mapped IDs.
54template <class Arc>
55struct EquivalenceUtil {
56  typedef typename Arc::StateId StateId;
57  typedef typename Arc::Weight Weight;
58  typedef StateId MappedId;  // ID for an equivalence class.
59
60  // MappedId for an implicit dead state.
61  static const MappedId kDeadState = 0;
62
63  // MappedId for lookup failure.
64  static const MappedId kInvalidId = -1;
65
66  // Maps state ID to the representative of the corresponding
67  // equivalence class. The parameter 'which_fst' takes the values 1
68  // and 2, identifying the input FST.
69  static MappedId MapState(StateId s, int32 which_fst) {
70    return
71      (kNoStateId == s)
72      ?
73      kDeadState
74      :
75      (static_cast<MappedId>(s) << 1) + which_fst;
76  }
77  // Maps set ID to State ID.
78  static StateId UnMapState(MappedId id) {
79    return static_cast<StateId>((--id) >> 1);
80  }
81  // Convenience function: checks if state with MappedId 's' is final
82  // in acceptor 'fa'.
83  static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
84    return
85      (kDeadState == s) ?
86      false : (fa.Final(UnMapState(s)) != Weight::Zero());
87  }
88  // Convenience function: returns the representative of 'id' in 'sets',
89  // creating a new set if needed.
90  static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
91    MappedId repr = sets->FindSet(id);
92    if (repr != kInvalidId) {
93      return repr;
94    } else {
95      sets->MakeSet(id);
96      return id;
97    }
98  }
99};
100
101template <class Arc> const
102typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState;
103
104template <class Arc> const
105typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId;
106
107
108// Equivalence checking algorithm: determines if the two FSTs
109// <code>fst1</code> and <code>fst2</code> are equivalent. The input
110// FSTs must be deterministic input-side epsilon-free acceptors,
111// unweighted or with weights over a left semiring. Two acceptors are
112// considered equivalent if they accept exactly the same set of
113// strings (with the same weights).
114//
115// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
116// Analysis of Computer Programs") successively constructs sets of
117// states that can be reached by the same prefixes, starting with a
118// set containing the start states of both acceptors. A disjoint tree
119// forest (the union-find algorithm) is used to represent the sets of
120// states. The algorithm returns 'false' if one of the constructed
121// sets contains both final and non-final states. Returns optional error
122// value (when FLAGS_error_fatal = false).
123//
124// Complexity: quasi-linear, i.e. O(n G(n)), where
125//   n = |S1| + |S2| is the number of states in both acceptors
126//   G(n) is a very slowly growing function that can be approximated
127//        by 4 by all practical purposes.
128//
129template <class Arc>
130bool Equivalent(const Fst<Arc> &fst1,
131                const Fst<Arc> &fst2,
132                double delta = kDelta, bool *error = 0) {
133  typedef typename Arc::Weight Weight;
134  if (error) *error = false;
135
136  // Check that the symbol table are compatible
137  if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
138      !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
139    FSTERROR() << "Equivalent: input/output symbol tables of 1st argument "
140               << "do not match input/output symbol tables of 2nd argument";
141    if (error) *error = true;
142    return false;
143  }
144  // Check properties first:
145  uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
146  if (fst1.Properties(props, true) != props) {
147    FSTERROR() << "Equivalent: first argument not an"
148               << " epsilon-free deterministic acceptor";
149    if (error) *error = true;
150    return false;
151  }
152  if (fst2.Properties(props, true) != props) {
153    FSTERROR() << "Equivalent: second argument not an"
154               << " epsilon-free deterministic acceptor";
155    if (error) *error = true;
156    return false;
157  }
158
159  if ((fst1.Properties(kUnweighted , true) != kUnweighted)
160      || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
161    VectorFst<Arc> efst1(fst1);
162    VectorFst<Arc> efst2(fst2);
163    Push(&efst1, REWEIGHT_TO_INITIAL, delta);
164    Push(&efst2, REWEIGHT_TO_INITIAL, delta);
165    ArcMap(&efst1, QuantizeMapper<Arc>(delta));
166    ArcMap(&efst2, QuantizeMapper<Arc>(delta));
167    EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
168    ArcMap(&efst1, &mapper);
169    ArcMap(&efst2, &mapper);
170    return Equivalent(efst1, efst2);
171  }
172
173  // Convenience typedefs:
174  typedef typename Arc::StateId StateId;
175  typedef EquivalenceUtil<Arc> Util;
176  typedef typename Util::MappedId MappedId;
177  enum { FST1 = 1, FST2 = 2 };  // Required by Util::MapState(...)
178
179  MappedId s1 = Util::MapState(fst1.Start(), FST1);
180  MappedId s2 = Util::MapState(fst2.Start(), FST2);
181
182  // The union-find structure.
183  UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
184
185  // Initialize the union-find structure.
186  eq_classes.MakeSet(s1);
187  eq_classes.MakeSet(s2);
188
189  // Data structure for the (partial) acceptor transition function of
190  // fst1 and fst2: input labels mapped to pairs of MappedId's
191  // representing destination states of the corresponding arcs in fst1
192  // and fst2, respectively.
193  typedef
194    unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
195    Label2StatePairMap;
196
197  Label2StatePairMap arc_pairs;
198
199  // Pairs of MappedId's to be processed, organized in a queue.
200  deque<pair<MappedId, MappedId> > q;
201
202  bool ret = true;
203  // Early return if the start states differ w.r.t. being final.
204  if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
205    ret = false;
206  }
207
208  // Main loop: explores the two acceptors in a breadth-first manner,
209  // updating the equivalence relation on the statesets. Loop
210  // invariant: each block of states contains either final states only
211  // or non-final states only.
212  for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) {
213    s1 = q.front().first;
214    s2 = q.front().second;
215
216    // Representatives of the equivalence classes of s1/s2.
217    MappedId rep1 = Util::FindSet(&eq_classes, s1);
218    MappedId rep2 = Util::FindSet(&eq_classes, s2);
219
220    if (rep1 != rep2) {
221      eq_classes.Union(rep1, rep2);
222      arc_pairs.clear();
223
224      // Copy outgoing arcs starting at s1 into the hashtable.
225      if (Util::kDeadState != s1) {
226        ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
227        for (; !arc_iter.Done(); arc_iter.Next()) {
228          const Arc &arc = arc_iter.Value();
229          if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
230                                                   // are treated as
231                                                   // non-exisitent.
232            arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
233          }
234        }
235      }
236      // Copy outgoing arcs starting at s2 into the hashtable.
237      if (Util::kDeadState != s2) {
238        ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
239        for (; !arc_iter.Done(); arc_iter.Next()) {
240          const Arc &arc = arc_iter.Value();
241          if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
242                                                   // are treated as
243                                                   // non-existent.
244            arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
245          }
246        }
247      }
248      // Iterate through the hashtable and process pairs of target
249      // states.
250      for (typename Label2StatePairMap::const_iterator
251             arc_iter = arc_pairs.begin();
252           arc_iter != arc_pairs.end();
253           ++arc_iter) {
254        const pair<MappedId, MappedId> &p = arc_iter->second;
255        if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
256          // Detected inconsistency: return false.
257          ret = false;
258          break;
259        }
260        q.push_back(p);
261      }
262    }
263  }
264
265  if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
266    if (error) *error = true;
267    return false;
268  }
269
270  return ret;
271}
272
273}  // namespace fst
274
275#endif  // FST_LIB_EQUIVALENT_H__
276