equivalent.h revision 73018b4a1d088cdda0e7bd059fddf1f308a8195a
1// equivalent.h
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15//
16// \file Functions and classes to determine the equivalence of two
17// FSTs.
18
19#ifndef FST_LIB_EQUIVALENT_H__
20#define FST_LIB_EQUIVALENT_H__
21
22#include <algorithm>
23
24#include <unordered_map>
25
26#include "fst/lib/encode.h"
27#include "fst/lib/push.h"
28#include "fst/lib/union-find.h"
29#include "fst/lib/vector-fst.h"
30
31namespace fst {
32
33// Traits-like struct holding utility functions/typedefs/constants for
34// the equivalence algorithm.
35//
36// Encoding device: in order to make the statesets of the two acceptors
37// disjoint, we map Arc::StateId on the type MappedId. The states of
38// the first acceptor are mapped on odd numbers (s -> 2s + 1), and
39// those of the second one on even numbers (s -> 2s + 2). The number 0
40// is reserved for an implicit (non-final) 'dead state' (required for
41// the correct treatment of non-coaccessible states; kNoStateId is
42// mapped to kDeadState for both acceptors). The union-find algorithm
43// operates on the mapped IDs.
44template <class Arc>
45struct EquivalenceUtil {
46  typedef typename Arc::StateId StateId;
47  typedef typename Arc::Weight Weight;
48  typedef int32 MappedId;  // ID for an equivalence class.
49
50  // MappedId for an implicit dead state.
51  static const MappedId kDeadState = 0;
52
53  // MappedId for lookup failure.
54  static const MappedId kInvalidId = -1;
55
56  // Maps state ID to the representative of the corresponding
57  // equivalence class. The parameter 'which_fst' takes the values 1
58  // and 2, identifying the input FST.
59  static MappedId MapState(StateId s, int32 which_fst) {
60    return
61      (kNoStateId == s)
62      ?
63      kDeadState
64      :
65      (static_cast<MappedId>(s) << 1) + which_fst;
66  }
67  // Maps set ID to State ID.
68  static StateId UnMapState(MappedId id) {
69    return static_cast<StateId>((--id) >> 1);
70  }
71  // Convenience function: checks if state with MappedId 's' is final
72  // in acceptor 'fa'.
73  static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
74    return
75      (kDeadState == s) ?
76      false : (fa.Final(UnMapState(s)) != Weight::Zero());
77  }
78  // Convenience function: returns the representative of 'id' in 'sets',
79  // creating a new set if needed.
80  static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
81    MappedId repr = sets->FindSet(id);
82    if (repr != kInvalidId) {
83      return repr;
84    } else {
85      sets->MakeSet(id);
86      return id;
87    }
88  }
89};
90
91// Equivalence checking algorithm: determines if the two FSTs
92// <code>fst1</code> and <code>fst2</code> are equivalent. The input
93// FSTs must be deterministic input-side epsilon-free acceptors,
94// unweighted or with weights over a left semiring. Two acceptors are
95// considered equivalent if they accept exactly the same set of
96// strings (with the same weights).
97//
98// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
99// Analysis of Computer Programs") successively constructs sets of
100// states that can be reached by the same prefixes, starting with a
101// set containing the start states of both acceptors. A disjoint tree
102// forest (the union-find algorithm) is used to represent the sets of
103// states. The algorithm returns 'false' if one of the constructed
104// sets contains both final and non-final states.
105//
106// Complexity: quasi-linear, i.e. O(n G(n)), where
107//   n = |S1| + |S2| is the number of states in both acceptors
108//   G(n) is a very slowly growing function that can be approximated
109//        by 4 by all practical purposes.
110//
111template <class Arc>
112bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {
113  typedef typename Arc::Weight Weight;
114  // Check properties first:
115  uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
116  if (fst1.Properties(props, true) != props) {
117    LOG(FATAL) << "Equivalent: first argument not an"
118               << " epsilon-free deterministic acceptor";
119  }
120  if (fst2.Properties(props, true) != props) {
121    LOG(FATAL) << "Equivalent: second argument not an"
122               << " epsilon-free deterministic acceptor";
123  }
124
125  if ((fst1.Properties(kUnweighted , true) != kUnweighted)
126      || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
127    VectorFst<Arc> efst1(fst1);
128    VectorFst<Arc> efst2(fst2);
129    Push(&efst1, REWEIGHT_TO_INITIAL);
130    Push(&efst2, REWEIGHT_TO_INITIAL);
131    Map(&efst1, QuantizeMapper<Arc>());
132    Map(&efst2, QuantizeMapper<Arc>());
133    EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
134    Map(&efst1, &mapper);
135    Map(&efst2, &mapper);
136    return Equivalent(efst1, efst2);
137  }
138
139  // Convenience typedefs:
140  typedef typename Arc::StateId StateId;
141  typedef EquivalenceUtil<Arc> Util;
142  typedef typename Util::MappedId MappedId;
143  enum { FST1 = 1, FST2 = 2 };  // Required by Util::MapState(...)
144
145  MappedId s1 = Util::MapState(fst1.Start(), FST1);
146  MappedId s2 = Util::MapState(fst2.Start(), FST2);
147
148  // The union-find structure.
149  UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
150
151  // Initialize the union-find structure.
152  eq_classes.MakeSet(s1);
153  eq_classes.MakeSet(s2);
154
155  // Early return if the start states differ w.r.t. being final.
156  if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
157    return false;
158  }
159  // Data structure for the (partial) acceptor transition function of
160  // fst1 and fst2: input labels mapped to pairs of MappedId's
161  // representing destination states of the corresponding arcs in fst1
162  // and fst2, respectively.
163  typedef
164    std::unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
165    Label2StatePairMap;
166
167  Label2StatePairMap arc_pairs;
168
169  // Pairs of MappedId's to be processed, organized in a queue.
170  deque<pair<MappedId, MappedId> > q;
171
172  // Main loop: explores the two acceptors in a breadth-first manner,
173  // updating the equivalence relation on the statesets. Loop
174  // invariant: each block of states contains either final states only
175  // or non-final states only.
176  for (q.push_back(make_pair(s1, s2)); !q.empty(); q.pop_front()) {
177    s1 = q.front().first;
178    s2 = q.front().second;
179
180    // Representatives of the equivalence classes of s1/s2.
181    MappedId rep1 = Util::FindSet(&eq_classes, s1);
182    MappedId rep2 = Util::FindSet(&eq_classes, s2);
183
184    if (rep1 != rep2) {
185      eq_classes.Union(rep1, rep2);
186      arc_pairs.clear();
187
188      // Copy outgoing arcs starting at s1 into the hashtable.
189      if (Util::kDeadState != s1) {
190        ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
191        for (; !arc_iter.Done(); arc_iter.Next()) {
192          const Arc &arc = arc_iter.Value();
193          if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
194                                                   // are treated as
195                                                   // non-exisitent.
196            arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
197          }
198        }
199      }
200      // Copy outgoing arcs starting at s2 into the hashtable.
201      if (Util::kDeadState != s2) {
202        ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
203        for (; !arc_iter.Done(); arc_iter.Next()) {
204          const Arc &arc = arc_iter.Value();
205          if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
206                                                   // are treated as
207                                                   // non-existent.
208            arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
209          }
210        }
211      }
212      // Iterate through the hashtable and process pairs of target
213      // states.
214      for (typename Label2StatePairMap::const_iterator
215             arc_iter = arc_pairs.begin();
216           arc_iter != arc_pairs.end();
217           ++arc_iter) {
218        const pair<MappedId, MappedId> &p = arc_iter->second;
219        if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
220          // Detected inconsistency: return false.
221          return false;
222        }
223        q.push_back(p);
224      }
225    }
226  }
227  return true;
228}
229
230}  // namespace fst
231
232#endif  // FST_LIB_EQUIVALENT_H__
233