1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// connect.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Classes and functions to remove unsuccessful paths from an Fst. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_CONNECT_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_CONNECT_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/dfs-visit.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/union-find.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/mutable-fst.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Finds and returns connected components. Use with Visit(). 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CcVisitor { 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // cc[i]: connected component number for state i. 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CcVisitor(vector<StateId> *cc) 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : comps_(new UnionFind<StateId>(0, kNoStateId)), 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cc_(cc), 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nstates_(0) { } 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // comps: connected components equiv classes. 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CcVisitor(UnionFind<StateId> *comps) 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : comps_(comps), 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cc_(0), 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nstates_(0) { } 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~CcVisitor() { 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (cc_) // own comps_? 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete comps_; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitVisit(const Fst<A> &fst) { } 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool InitState(StateId s, StateId root) { 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nstates_; 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (comps_->FindSet(s) == kNoStateId) 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson comps_->MakeSet(s); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool WhiteArc(StateId s, const A &arc) { 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson comps_->MakeSet(arc.nextstate); 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson comps_->Union(s, arc.nextstate); 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool GreyArc(StateId s, const A &arc) { 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson comps_->Union(s, arc.nextstate); 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool BlackArc(StateId s, const A &arc) { 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson comps_->Union(s, arc.nextstate); 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void FinishState(StateId s) { } 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void FinishVisit() { 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (cc_) 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetCcVector(cc_); 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // cc[i]: connected component number for state i. 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns number of components. 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int GetCcVector(vector<StateId> *cc) { 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cc->clear(); 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cc->resize(nstates_, kNoStateId); 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId ncomp = 0; 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateId i = 0; i < nstates_; ++i) { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId rep = comps_->FindSet(i); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId &comp = (*cc)[rep]; 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (comp == kNoStateId) { 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson comp = ncomp; 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++ncomp; 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*cc)[i] = comp; 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return ncomp; 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson UnionFind<StateId> *comps_; // Components 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> *cc_; // State's cc number 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nstates_; // State count 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Finds and returns strongly-connected components, accessible and 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// coaccessible states and related properties. Uses Tarjan's single 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Algorithms", 189pp). Use with DfsVisit(); 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass SccVisitor { 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // scc[i]: strongly-connected component number for state i. 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // SCC numbers will be in topological order for acyclic input. 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // access[i]: accessibility of state i. 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // coaccess[i]: coaccessibility of state i. 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Any of above can be NULL. 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // props: related property bits (cyclicity, initial cyclicity, 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // accessibility, coaccessibility) set/cleared (o.w. unchanged). 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SccVisitor(vector<StateId> *scc, vector<bool> *access, 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> *coaccess, uint64 *props) 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {} 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SccVisitor(uint64 *props) 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : scc_(0), access_(0), coaccess_(0), props_(props) {} 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitVisit(const Fst<A> &fst); 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool InitState(StateId s, StateId root); 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool TreeArc(StateId s, const A &arc) { return true; } 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool BackArc(StateId s, const A &arc) { 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId t = arc.nextstate; 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*dfnumber_)[t] < (*lowlink_)[s]) 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*lowlink_)[s] = (*dfnumber_)[t]; 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*coaccess_)[t]) 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*coaccess_)[s] = true; 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ |= kCyclic; 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ &= ~kAcyclic; 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arc.nextstate == start_) { 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ |= kInitialCyclic; 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ &= ~kInitialAcyclic; 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ForwardOrCrossArc(StateId s, const A &arc) { 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId t = arc.nextstate; 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ && 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s]) 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*lowlink_)[s] = (*dfnumber_)[t]; 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*coaccess_)[t]) 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*coaccess_)[s] = true; 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void FinishState(StateId s, StateId p, const A *); 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void FinishVisit() { 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Numbers SCC's in topological order when acyclic. 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (scc_) 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateId i = 0; i < scc_->size(); ++i) 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*scc_)[i] = nscc_ - 1 - (*scc_)[i]; 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (coaccess_internal_) 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete coaccess_; 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete dfnumber_; 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete lowlink_; 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete onstack_; 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete scc_stack_; 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> *scc_; // State's scc number 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> *access_; // State's accessibility 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> *coaccess_; // State's coaccessibility 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 *props_; 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A> *fst_; 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start_; 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nstates_; // State count 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId nscc_; // SCC count 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool coaccess_internal_; 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> *dfnumber_; // state discovery times 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> *onstack_; // is a state on the SCC stack 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> *scc_stack_; // SCC stack (w/ random access) 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SccVisitor<A>::InitVisit(const Fst<A> &fst) { 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (scc_) 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson scc_->clear(); 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (access_) 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson access_->clear(); 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (coaccess_) { 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson coaccess_->clear(); 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson coaccess_internal_ = false; 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson coaccess_ = new vector<bool>; 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson coaccess_internal_ = true; 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible; 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible); 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_ = &fst; 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson start_ = fst.Start(); 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nstates_ = 0; 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nscc_ = 0; 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dfnumber_ = new vector<StateId>; 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson lowlink_ = new vector<StateId>; 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson onstack_ = new vector<bool>; 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson scc_stack_ = new vector<StateId>; 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool SccVisitor<A>::InitState(StateId s, StateId root) { 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson scc_stack_->push_back(s); 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (dfnumber_->size() <= s) { 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (scc_) 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson scc_->push_back(-1); 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (access_) 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson access_->push_back(false); 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson coaccess_->push_back(false); 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dfnumber_->push_back(-1); 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson lowlink_->push_back(-1); 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson onstack_->push_back(false); 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*dfnumber_)[s] = nstates_; 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*lowlink_)[s] = nstates_; 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*onstack_)[s] = true; 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (root == start_) { 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (access_) 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*access_)[s] = true; 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (access_) 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*access_)[s] = false; 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ |= kNotAccessible; 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ &= ~kAccessible; 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nstates_; 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SccVisitor<A>::FinishState(StateId s, StateId p, const A *) { 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fst_->Final(s) != Weight::Zero()) 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*coaccess_)[s] = true; 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool scc_coaccess = false; 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t i = scc_stack_->size(); 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId t; 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson do { 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson t = (*scc_stack_)[--i]; 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*coaccess_)[t]) 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson scc_coaccess = true; 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } while (s != t); 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson do { 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson t = scc_stack_->back(); 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (scc_) 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*scc_)[t] = nscc_; 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (scc_coaccess) 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*coaccess_)[t] = true; 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*onstack_)[t] = false; 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson scc_stack_->pop_back(); 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } while (s != t); 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!scc_coaccess) { 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ |= kNotCoAccessible; 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *props_ &= ~kCoAccessible; 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nscc_; 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (p != kNoStateId) { 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*coaccess_)[s]) 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*coaccess_)[p] = true; 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*lowlink_)[s] < (*lowlink_)[p]) 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (*lowlink_)[p] = (*lowlink_)[s]; 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Trims an FST, removing states and arcs that are not on successful 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paths. This version modifies its input. 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Time: O(V + E) 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Space: O(V + E) 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// where V = # of states and E = # of arcs. 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Connect(MutableFst<Arc> *fst) { 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> access; 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> coaccess; 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 props = 0; 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props); 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DfsVisit(*fst, &scc_visitor); 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> dstates; 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateId s = 0; s < access.size(); ++s) 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!access[s] || !coaccess[s]) 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dstates.push_back(s); 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->DeleteStates(dstates); 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible); 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_CONNECT_H__ 320