equivalent.h revision 8fc5a7f51e62cb4ae44a27bdf4176d04adc80ede
1// equivalent.h 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// 16// \file Functions and classes to determine the equivalence of two 17// FSTs. 18 19#ifndef FST_LIB_EQUIVALENT_H__ 20#define FST_LIB_EQUIVALENT_H__ 21 22#include <algorithm> 23 24#include <ext/hash_map> 25using __gnu_cxx::hash_map; 26 27#include "fst/lib/encode.h" 28#include "fst/lib/push.h" 29#include "fst/lib/union-find.h" 30#include "fst/lib/vector-fst.h" 31 32namespace fst { 33 34// Traits-like struct holding utility functions/typedefs/constants for 35// the equivalence algorithm. 36// 37// Encoding device: in order to make the statesets of the two acceptors 38// disjoint, we map Arc::StateId on the type MappedId. The states of 39// the first acceptor are mapped on odd numbers (s -> 2s + 1), and 40// those of the second one on even numbers (s -> 2s + 2). The number 0 41// is reserved for an implicit (non-final) 'dead state' (required for 42// the correct treatment of non-coaccessible states; kNoStateId is 43// mapped to kDeadState for both acceptors). The union-find algorithm 44// operates on the mapped IDs. 45template <class Arc> 46struct EquivalenceUtil { 47 typedef typename Arc::StateId StateId; 48 typedef typename Arc::Weight Weight; 49 typedef int32 MappedId; // ID for an equivalence class. 50 51 // MappedId for an implicit dead state. 52 static const MappedId kDeadState = 0; 53 54 // MappedId for lookup failure. 55 static const MappedId kInvalidId = -1; 56 57 // Maps state ID to the representative of the corresponding 58 // equivalence class. The parameter 'which_fst' takes the values 1 59 // and 2, identifying the input FST. 60 static MappedId MapState(StateId s, int32 which_fst) { 61 return 62 (kNoStateId == s) 63 ? 64 kDeadState 65 : 66 (static_cast<MappedId>(s) << 1) + which_fst; 67 } 68 // Maps set ID to State ID. 69 static StateId UnMapState(MappedId id) { 70 return static_cast<StateId>((--id) >> 1); 71 } 72 // Convenience function: checks if state with MappedId 's' is final 73 // in acceptor 'fa'. 74 static bool IsFinal(const Fst<Arc> &fa, MappedId s) { 75 return 76 (kDeadState == s) ? 77 false : (fa.Final(UnMapState(s)) != Weight::Zero()); 78 } 79 // Convenience function: returns the representative of 'id' in 'sets', 80 // creating a new set if needed. 81 static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { 82 MappedId repr = sets->FindSet(id); 83 if (repr != kInvalidId) { 84 return repr; 85 } else { 86 sets->MakeSet(id); 87 return id; 88 } 89 } 90}; 91 92// Equivalence checking algorithm: determines if the two FSTs 93// <code>fst1</code> and <code>fst2</code> are equivalent. The input 94// FSTs must be deterministic input-side epsilon-free acceptors, 95// unweighted or with weights over a left semiring. Two acceptors are 96// considered equivalent if they accept exactly the same set of 97// strings (with the same weights). 98// 99// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and 100// Analysis of Computer Programs") successively constructs sets of 101// states that can be reached by the same prefixes, starting with a 102// set containing the start states of both acceptors. A disjoint tree 103// forest (the union-find algorithm) is used to represent the sets of 104// states. The algorithm returns 'false' if one of the constructed 105// sets contains both final and non-final states. 106// 107// Complexity: quasi-linear, i.e. O(n G(n)), where 108// n = |S1| + |S2| is the number of states in both acceptors 109// G(n) is a very slowly growing function that can be approximated 110// by 4 by all practical purposes. 111// 112template <class Arc> 113bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2) { 114 typedef typename Arc::Weight Weight; 115 // Check properties first: 116 uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; 117 if (fst1.Properties(props, true) != props) { 118 LOG(FATAL) << "Equivalent: first argument not an" 119 << " epsilon-free deterministic acceptor"; 120 } 121 if (fst2.Properties(props, true) != props) { 122 LOG(FATAL) << "Equivalent: second argument not an" 123 << " epsilon-free deterministic acceptor"; 124 } 125 126 if ((fst1.Properties(kUnweighted , true) != kUnweighted) 127 || (fst2.Properties(kUnweighted , true) != kUnweighted)) { 128 VectorFst<Arc> efst1(fst1); 129 VectorFst<Arc> efst2(fst2); 130 Push(&efst1, REWEIGHT_TO_INITIAL); 131 Push(&efst2, REWEIGHT_TO_INITIAL); 132 Map(&efst1, QuantizeMapper<Arc>()); 133 Map(&efst2, QuantizeMapper<Arc>()); 134 EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); 135 Map(&efst1, &mapper); 136 Map(&efst2, &mapper); 137 return Equivalent(efst1, efst2); 138 } 139 140 // Convenience typedefs: 141 typedef typename Arc::StateId StateId; 142 typedef EquivalenceUtil<Arc> Util; 143 typedef typename Util::MappedId MappedId; 144 enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) 145 146 MappedId s1 = Util::MapState(fst1.Start(), FST1); 147 MappedId s2 = Util::MapState(fst2.Start(), FST2); 148 149 // The union-find structure. 150 UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); 151 152 // Initialize the union-find structure. 153 eq_classes.MakeSet(s1); 154 eq_classes.MakeSet(s2); 155 156 // Early return if the start states differ w.r.t. being final. 157 if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { 158 return false; 159 } 160 // Data structure for the (partial) acceptor transition function of 161 // fst1 and fst2: input labels mapped to pairs of MappedId's 162 // representing destination states of the corresponding arcs in fst1 163 // and fst2, respectively. 164 typedef 165 hash_map<typename Arc::Label, pair<MappedId, MappedId> > 166 Label2StatePairMap; 167 168 Label2StatePairMap arc_pairs; 169 170 // Pairs of MappedId's to be processed, organized in a queue. 171 deque<pair<MappedId, MappedId> > q; 172 173 // Main loop: explores the two acceptors in a breadth-first manner, 174 // updating the equivalence relation on the statesets. Loop 175 // invariant: each block of states contains either final states only 176 // or non-final states only. 177 for (q.push_back(make_pair(s1, s2)); !q.empty(); q.pop_front()) { 178 s1 = q.front().first; 179 s2 = q.front().second; 180 181 // Representatives of the equivalence classes of s1/s2. 182 MappedId rep1 = Util::FindSet(&eq_classes, s1); 183 MappedId rep2 = Util::FindSet(&eq_classes, s2); 184 185 if (rep1 != rep2) { 186 eq_classes.Union(rep1, rep2); 187 arc_pairs.clear(); 188 189 // Copy outgoing arcs starting at s1 into the hashtable. 190 if (Util::kDeadState != s1) { 191 ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); 192 for (; !arc_iter.Done(); arc_iter.Next()) { 193 const Arc &arc = arc_iter.Value(); 194 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 195 // are treated as 196 // non-exisitent. 197 arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); 198 } 199 } 200 } 201 // Copy outgoing arcs starting at s2 into the hashtable. 202 if (Util::kDeadState != s2) { 203 ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); 204 for (; !arc_iter.Done(); arc_iter.Next()) { 205 const Arc &arc = arc_iter.Value(); 206 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 207 // are treated as 208 // non-existent. 209 arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); 210 } 211 } 212 } 213 // Iterate through the hashtable and process pairs of target 214 // states. 215 for (typename Label2StatePairMap::const_iterator 216 arc_iter = arc_pairs.begin(); 217 arc_iter != arc_pairs.end(); 218 ++arc_iter) { 219 const pair<MappedId, MappedId> &p = arc_iter->second; 220 if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { 221 // Detected inconsistency: return false. 222 return false; 223 } 224 q.push_back(p); 225 } 226 } 227 } 228 return true; 229} 230 231} // namespace fst 232 233#endif // FST_LIB_EQUIVALENT_H__ 234