1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// connect.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Classes and functions to remove unsuccessful paths from an Fst.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_CONNECT_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_CONNECT_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/dfs-visit.h>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/union-find.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/mutable-fst.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Finds and returns connected components. Use with Visit().
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CcVisitor {
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // cc[i]: connected component number for state i.
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CcVisitor(vector<StateId> *cc)
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : comps_(new UnionFind<StateId>(0, kNoStateId)),
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        cc_(cc),
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nstates_(0) { }
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // comps: connected components equiv classes.
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CcVisitor(UnionFind<StateId> *comps)
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : comps_(comps),
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        cc_(0),
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nstates_(0) { }
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~CcVisitor() {
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (cc_)  // own comps_?
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete comps_;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitVisit(const Fst<A> &fst) { }
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool InitState(StateId s, StateId root) {
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++nstates_;
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (comps_->FindSet(s) == kNoStateId)
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      comps_->MakeSet(s);
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool WhiteArc(StateId s, const A &arc) {
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    comps_->MakeSet(arc.nextstate);
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    comps_->Union(s, arc.nextstate);
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool GreyArc(StateId s, const A &arc) {
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    comps_->Union(s, arc.nextstate);
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool BlackArc(StateId s, const A &arc) {
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    comps_->Union(s, arc.nextstate);
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishState(StateId s) { }
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishVisit() {
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (cc_)
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      GetCcVector(cc_);
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // cc[i]: connected component number for state i.
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns number of components.
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int GetCcVector(vector<StateId> *cc) {
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    cc->clear();
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    cc->resize(nstates_, kNoStateId);
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId ncomp = 0;
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (StateId i = 0; i < nstates_; ++i) {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId rep = comps_->FindSet(i);
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId &comp = (*cc)[rep];
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (comp == kNoStateId) {
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        comp = ncomp;
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++ncomp;
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*cc)[i] = comp;
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ncomp;
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  UnionFind<StateId> *comps_;   // Components
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> *cc_;         // State's cc number
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId nstates_;             // State count
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Finds and returns strongly-connected components, accessible and
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// coaccessible states and related properties. Uses Tarjan's single
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Algorithms", 189pp). Use with DfsVisit();
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass SccVisitor {
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // scc[i]: strongly-connected component number for state i.
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //   SCC numbers will be in topological order for acyclic input.
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // access[i]: accessibility of state i.
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // coaccess[i]: coaccessibility of state i.
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Any of above can be NULL.
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // props: related property bits (cyclicity, initial cyclicity,
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //   accessibility, coaccessibility) set/cleared (o.w. unchanged).
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SccVisitor(vector<StateId> *scc, vector<bool> *access,
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             vector<bool> *coaccess, uint64 *props)
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SccVisitor(uint64 *props)
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : scc_(0), access_(0), coaccess_(0), props_(props) {}
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitVisit(const Fst<A> &fst);
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool InitState(StateId s, StateId root);
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool TreeArc(StateId s, const A &arc) { return true; }
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool BackArc(StateId s, const A &arc) {
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId t = arc.nextstate;
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*dfnumber_)[t] < (*lowlink_)[s])
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*lowlink_)[s] = (*dfnumber_)[t];
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*coaccess_)[t])
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*coaccess_)[s] = true;
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *props_ |= kCyclic;
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *props_ &= ~kAcyclic;
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (arc.nextstate == start_) {
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *props_ |= kInitialCyclic;
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *props_ &= ~kInitialAcyclic;
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ForwardOrCrossArc(StateId s, const A &arc) {
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId t = arc.nextstate;
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*lowlink_)[s] = (*dfnumber_)[t];
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*coaccess_)[t])
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*coaccess_)[s] = true;
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishState(StateId s, StateId p, const A *);
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishVisit() {
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Numbers SCC's in topological order when acyclic.
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (scc_)
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (StateId i = 0; i < scc_->size(); ++i)
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (coaccess_internal_)
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete coaccess_;
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete dfnumber_;
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete lowlink_;
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete onstack_;
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete scc_stack_;
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> *scc_;        // State's scc number
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<bool> *access_;        // State's accessibility
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<bool> *coaccess_;      // State's coaccessibility
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 *props_;
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Fst<A> *fst_;
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId start_;
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId nstates_;             // State count
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId nscc_;                // SCC count
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool coaccess_internal_;
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> *dfnumber_;   // state discovery times
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> *lowlink_;    // lowlink[s] == dfnumber[s] => SCC root
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<bool> *onstack_;       // is a state on the SCC stack
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> *scc_stack_;  // SCC stack (w/ random access)
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SccVisitor<A>::InitVisit(const Fst<A> &fst) {
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (scc_)
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    scc_->clear();
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (access_)
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    access_->clear();
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (coaccess_) {
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    coaccess_->clear();
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    coaccess_internal_ = false;
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    coaccess_ = new vector<bool>;
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    coaccess_internal_ = true;
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst_ = &fst;
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  start_ = fst.Start();
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  nstates_ = 0;
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  nscc_ = 0;
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  dfnumber_ = new vector<StateId>;
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  lowlink_ = new vector<StateId>;
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  onstack_ = new vector<bool>;
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  scc_stack_ = new vector<StateId>;
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool SccVisitor<A>::InitState(StateId s, StateId root) {
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  scc_stack_->push_back(s);
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (dfnumber_->size() <= s) {
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (scc_)
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      scc_->push_back(-1);
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (access_)
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      access_->push_back(false);
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    coaccess_->push_back(false);
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    dfnumber_->push_back(-1);
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    lowlink_->push_back(-1);
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    onstack_->push_back(false);
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  (*dfnumber_)[s] = nstates_;
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  (*lowlink_)[s] = nstates_;
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  (*onstack_)[s] = true;
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (root == start_) {
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (access_)
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*access_)[s] = true;
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (access_)
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*access_)[s] = false;
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *props_ |= kNotAccessible;
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *props_ &= ~kAccessible;
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ++nstates_;
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SccVisitor<A>::FinishState(StateId s, StateId p, const A *) {
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (fst_->Final(s) != Weight::Zero())
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    (*coaccess_)[s] = true;
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if ((*dfnumber_)[s] == (*lowlink_)[s]) {  // root of new SCC
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool scc_coaccess = false;
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t i = scc_stack_->size();
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId t;
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    do {
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      t = (*scc_stack_)[--i];
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if ((*coaccess_)[t])
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        scc_coaccess = true;
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } while (s != t);
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    do {
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      t = scc_stack_->back();
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (scc_)
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (*scc_)[t] = nscc_;
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (scc_coaccess)
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (*coaccess_)[t] = true;
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*onstack_)[t] = false;
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      scc_stack_->pop_back();
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } while (s != t);
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!scc_coaccess) {
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *props_ |= kNotCoAccessible;
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *props_ &= ~kCoAccessible;
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++nscc_;
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (p != kNoStateId) {
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*coaccess_)[s])
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*coaccess_)[p] = true;
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*lowlink_)[s] < (*lowlink_)[p])
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*lowlink_)[p] = (*lowlink_)[s];
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Trims an FST, removing states and arcs that are not on successful
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paths. This version modifies its input.
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity:
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Time:  O(V + E)
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Space: O(V + E)
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// where V = # of states and E = # of arcs.
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Connect(MutableFst<Arc> *fst) {
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<bool> access;
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<bool> coaccess;
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 props = 0;
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DfsVisit(*fst, &scc_visitor);
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> dstates;
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateId s = 0; s < access.size(); ++s)
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!access[s] || !coaccess[s])
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      dstates.push_back(s);
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst->DeleteStates(dstates);
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_CONNECT_H__
320