1// equivalent.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: wojciech@google.com (Wojciech Skut) 17// 18// \file Functions and classes to determine the equivalence of two 19// FSTs. 20 21#ifndef FST_LIB_EQUIVALENT_H__ 22#define FST_LIB_EQUIVALENT_H__ 23 24#include <algorithm> 25#include <deque> 26using std::deque; 27#include <tr1/unordered_map> 28using std::tr1::unordered_map; 29using std::tr1::unordered_multimap; 30#include <utility> 31using std::pair; using std::make_pair; 32#include <vector> 33using std::vector; 34 35#include <fst/encode.h> 36#include <fst/push.h> 37#include <fst/union-find.h> 38#include <fst/vector-fst.h> 39 40 41namespace fst { 42 43// Traits-like struct holding utility functions/typedefs/constants for 44// the equivalence algorithm. 45// 46// Encoding device: in order to make the statesets of the two acceptors 47// disjoint, we map Arc::StateId on the type MappedId. The states of 48// the first acceptor are mapped on odd numbers (s -> 2s + 1), and 49// those of the second one on even numbers (s -> 2s + 2). The number 0 50// is reserved for an implicit (non-final) 'dead state' (required for 51// the correct treatment of non-coaccessible states; kNoStateId is 52// mapped to kDeadState for both acceptors). The union-find algorithm 53// operates on the mapped IDs. 54template <class Arc> 55struct EquivalenceUtil { 56 typedef typename Arc::StateId StateId; 57 typedef typename Arc::Weight Weight; 58 typedef StateId MappedId; // ID for an equivalence class. 59 60 // MappedId for an implicit dead state. 61 static const MappedId kDeadState = 0; 62 63 // MappedId for lookup failure. 64 static const MappedId kInvalidId = -1; 65 66 // Maps state ID to the representative of the corresponding 67 // equivalence class. The parameter 'which_fst' takes the values 1 68 // and 2, identifying the input FST. 69 static MappedId MapState(StateId s, int32 which_fst) { 70 return 71 (kNoStateId == s) 72 ? 73 kDeadState 74 : 75 (static_cast<MappedId>(s) << 1) + which_fst; 76 } 77 // Maps set ID to State ID. 78 static StateId UnMapState(MappedId id) { 79 return static_cast<StateId>((--id) >> 1); 80 } 81 // Convenience function: checks if state with MappedId 's' is final 82 // in acceptor 'fa'. 83 static bool IsFinal(const Fst<Arc> &fa, MappedId s) { 84 return 85 (kDeadState == s) ? 86 false : (fa.Final(UnMapState(s)) != Weight::Zero()); 87 } 88 // Convenience function: returns the representative of 'id' in 'sets', 89 // creating a new set if needed. 90 static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { 91 MappedId repr = sets->FindSet(id); 92 if (repr != kInvalidId) { 93 return repr; 94 } else { 95 sets->MakeSet(id); 96 return id; 97 } 98 } 99}; 100 101template <class Arc> const 102typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; 103 104template <class Arc> const 105typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; 106 107 108// Equivalence checking algorithm: determines if the two FSTs 109// <code>fst1</code> and <code>fst2</code> are equivalent. The input 110// FSTs must be deterministic input-side epsilon-free acceptors, 111// unweighted or with weights over a left semiring. Two acceptors are 112// considered equivalent if they accept exactly the same set of 113// strings (with the same weights). 114// 115// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and 116// Analysis of Computer Programs") successively constructs sets of 117// states that can be reached by the same prefixes, starting with a 118// set containing the start states of both acceptors. A disjoint tree 119// forest (the union-find algorithm) is used to represent the sets of 120// states. The algorithm returns 'false' if one of the constructed 121// sets contains both final and non-final states. Returns optional error 122// value (when FLAGS_error_fatal = false). 123// 124// Complexity: quasi-linear, i.e. O(n G(n)), where 125// n = |S1| + |S2| is the number of states in both acceptors 126// G(n) is a very slowly growing function that can be approximated 127// by 4 by all practical purposes. 128// 129template <class Arc> 130bool Equivalent(const Fst<Arc> &fst1, 131 const Fst<Arc> &fst2, 132 double delta = kDelta, bool *error = 0) { 133 typedef typename Arc::Weight Weight; 134 if (error) *error = false; 135 136 // Check that the symbol table are compatible 137 if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || 138 !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { 139 FSTERROR() << "Equivalent: input/output symbol tables of 1st argument " 140 << "do not match input/output symbol tables of 2nd argument"; 141 if (error) *error = true; 142 return false; 143 } 144 // Check properties first: 145 uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; 146 if (fst1.Properties(props, true) != props) { 147 FSTERROR() << "Equivalent: first argument not an" 148 << " epsilon-free deterministic acceptor"; 149 if (error) *error = true; 150 return false; 151 } 152 if (fst2.Properties(props, true) != props) { 153 FSTERROR() << "Equivalent: second argument not an" 154 << " epsilon-free deterministic acceptor"; 155 if (error) *error = true; 156 return false; 157 } 158 159 if ((fst1.Properties(kUnweighted , true) != kUnweighted) 160 || (fst2.Properties(kUnweighted , true) != kUnweighted)) { 161 VectorFst<Arc> efst1(fst1); 162 VectorFst<Arc> efst2(fst2); 163 Push(&efst1, REWEIGHT_TO_INITIAL, delta); 164 Push(&efst2, REWEIGHT_TO_INITIAL, delta); 165 ArcMap(&efst1, QuantizeMapper<Arc>(delta)); 166 ArcMap(&efst2, QuantizeMapper<Arc>(delta)); 167 EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); 168 ArcMap(&efst1, &mapper); 169 ArcMap(&efst2, &mapper); 170 return Equivalent(efst1, efst2); 171 } 172 173 // Convenience typedefs: 174 typedef typename Arc::StateId StateId; 175 typedef EquivalenceUtil<Arc> Util; 176 typedef typename Util::MappedId MappedId; 177 enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) 178 179 MappedId s1 = Util::MapState(fst1.Start(), FST1); 180 MappedId s2 = Util::MapState(fst2.Start(), FST2); 181 182 // The union-find structure. 183 UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); 184 185 // Initialize the union-find structure. 186 eq_classes.MakeSet(s1); 187 eq_classes.MakeSet(s2); 188 189 // Data structure for the (partial) acceptor transition function of 190 // fst1 and fst2: input labels mapped to pairs of MappedId's 191 // representing destination states of the corresponding arcs in fst1 192 // and fst2, respectively. 193 typedef 194 unordered_map<typename Arc::Label, pair<MappedId, MappedId> > 195 Label2StatePairMap; 196 197 Label2StatePairMap arc_pairs; 198 199 // Pairs of MappedId's to be processed, organized in a queue. 200 deque<pair<MappedId, MappedId> > q; 201 202 bool ret = true; 203 // Early return if the start states differ w.r.t. being final. 204 if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { 205 ret = false; 206 } 207 208 // Main loop: explores the two acceptors in a breadth-first manner, 209 // updating the equivalence relation on the statesets. Loop 210 // invariant: each block of states contains either final states only 211 // or non-final states only. 212 for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { 213 s1 = q.front().first; 214 s2 = q.front().second; 215 216 // Representatives of the equivalence classes of s1/s2. 217 MappedId rep1 = Util::FindSet(&eq_classes, s1); 218 MappedId rep2 = Util::FindSet(&eq_classes, s2); 219 220 if (rep1 != rep2) { 221 eq_classes.Union(rep1, rep2); 222 arc_pairs.clear(); 223 224 // Copy outgoing arcs starting at s1 into the hashtable. 225 if (Util::kDeadState != s1) { 226 ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); 227 for (; !arc_iter.Done(); arc_iter.Next()) { 228 const Arc &arc = arc_iter.Value(); 229 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 230 // are treated as 231 // non-exisitent. 232 arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); 233 } 234 } 235 } 236 // Copy outgoing arcs starting at s2 into the hashtable. 237 if (Util::kDeadState != s2) { 238 ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); 239 for (; !arc_iter.Done(); arc_iter.Next()) { 240 const Arc &arc = arc_iter.Value(); 241 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 242 // are treated as 243 // non-existent. 244 arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); 245 } 246 } 247 } 248 // Iterate through the hashtable and process pairs of target 249 // states. 250 for (typename Label2StatePairMap::const_iterator 251 arc_iter = arc_pairs.begin(); 252 arc_iter != arc_pairs.end(); 253 ++arc_iter) { 254 const pair<MappedId, MappedId> &p = arc_iter->second; 255 if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { 256 // Detected inconsistency: return false. 257 ret = false; 258 break; 259 } 260 q.push_back(p); 261 } 262 } 263 } 264 265 if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { 266 if (error) *error = true; 267 return false; 268 } 269 270 return ret; 271} 272 273} // namespace fst 274 275#endif // FST_LIB_EQUIVALENT_H__ 276