1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// rmfinalepsilon.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: johans@google.com (Johan Schalkwyk) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Function to remove of final states that have epsilon only input arcs. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_RMFINALEPSILON_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_RMFINALEPSILON_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_set> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/connect.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/mutable-fst.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A> 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid RmFinalEpsilon(MutableFst<A>* fst) { 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Determine the coaccesibility of states. 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> access; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> coaccess; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 props = 0; 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SccVisitor<A> scc_visitor(0, &access, &coaccess, &props); 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DfsVisit(*fst, &scc_visitor); 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Find potential list of removable final states. These are final states 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // that have no outgoing transitions or final states that have a 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // non-coaccessible future. Complexity O(S) 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_set<StateId> finals; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) { 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = siter.Value(); 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst->Final(s) != Weight::Zero()) { 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool future_coaccess = false; 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) { 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A& arc = aiter.Value(); 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (coaccess[arc.nextstate]) { 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson future_coaccess = true; 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson break; 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!future_coaccess) { 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson finals.insert(s); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Move the final weight. Complexity O(E) 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<A> arcs; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) { 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = siter.Value(); 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w(fst->Final(s)); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs.clear(); 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) { 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A& arc = aiter.Value(); 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // is next state in the list of finals 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (finals.find(arc.nextstate) != finals.end()) { 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // sum up all epsilon arcs 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arc.ilabel == 0 && arc.olabel == 0) { 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson w = Plus(Times(fst->Final(arc.nextstate), arc.weight), w); 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs.push_back(arc); 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arcs.push_back(arc); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If some arcs (epsilon arcs) were deleted, delete all 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // arcs and add back only the non epsilon arcs 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arcs.size() < fst->NumArcs(s)) { 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->DeleteArcs(s); 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetFinal(s, w); 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < arcs.size(); ++i) { 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->AddArc(s, arcs[i]); 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(fst); 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_RMFINALEPSILON_H__ 108