1// connect.h
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15//
16// \file
17// Classes and functions to remove unsuccessful paths from an Fst.
18
19#ifndef FST_LIB_CONNECT_H__
20#define FST_LIB_CONNECT_H__
21
22#include "fst/lib/mutable-fst.h"
23
24namespace fst {
25
26// Finds and returns strongly-connected components, accessible and
27// coaccessible states and related properties. Uses Tarzan's single
28// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
29// Algorithms", 189pp).
30template <class A>
31class SccVisitor {
32 public:
33  typedef A Arc;
34  typedef typename Arc::Weight Weight;
35  typedef typename A::StateId StateId;
36
37  // scc[i]: strongly-connected component number for state i.
38  //   SCC numbers will be in topological order for acyclic input.
39  // access[i]: accessibility of state i.
40  // coaccess[i]: coaccessibility of state i.
41  // Any of above can be NULL.
42  // props: related property bits (cyclicity, initial cyclicity,
43  //   accessibiliity, coaccessiblity) set/cleared (o.w. unchanged).
44  SccVisitor(vector<StateId> *scc, vector<bool> *access,
45             vector<bool> *coaccess, uint64 *props)
46      : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
47  SccVisitor(uint64 *props)
48      : scc_(0), access_(0), coaccess_(0), props_(props) {}
49
50  void InitVisit(const Fst<A> &fst) {
51    if (scc_)
52      scc_->clear();
53    if (access_)
54      access_->clear();
55    if (coaccess_) {
56      coaccess_->clear();
57      coaccess_internal_ = false;
58    } else {
59      coaccess_ = new vector<bool>;
60      coaccess_internal_ = true;
61    }
62    *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
63    *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
64    fst_ = &fst;
65    start_ = fst.Start();
66    nstates_ = 0;
67    nscc_ = 0;
68    dfnumber_ = new vector<StateId>;
69    lowlink_ = new vector<StateId>;
70    onstack_ = new vector<bool>;
71    scc_stack_ = new vector<StateId>;
72  }
73
74  bool InitState(StateId s, StateId root) {
75    scc_stack_->push_back(s);
76    while ((StateId)dfnumber_->size() <= s) {
77      if (scc_)
78        scc_->push_back(-1);
79      if (access_)
80        access_->push_back(false);
81      coaccess_->push_back(false);
82      dfnumber_->push_back(-1);
83      lowlink_->push_back(-1);
84      onstack_->push_back(false);
85    }
86    (*dfnumber_)[s] = nstates_;
87    (*lowlink_)[s] = nstates_;
88    (*onstack_)[s] = true;
89    if (root == start_) {
90      if (access_)
91        (*access_)[s] = true;
92    } else {
93      if (access_)
94        (*access_)[s] = false;
95      *props_ |= kNotAccessible;
96      *props_ &= ~kAccessible;
97    }
98    ++nstates_;
99    return true;
100  }
101
102  bool TreeArc(StateId s, const A &arc) { return true; }
103
104  bool BackArc(StateId s, const A &arc) {
105    StateId t = arc.nextstate;
106    if ((*dfnumber_)[t] < (*lowlink_)[s])
107      (*lowlink_)[s] = (*dfnumber_)[t];
108    if ((*coaccess_)[t])
109      (*coaccess_)[s] = true;
110    *props_ |= kCyclic;
111    *props_ &= ~kAcyclic;
112    if (arc.nextstate == start_) {
113      *props_ |= kInitialCyclic;
114      *props_ &= ~kInitialAcyclic;
115    }
116    return true;
117  }
118
119  bool ForwardOrCrossArc(StateId s, const A &arc) {
120    StateId t = arc.nextstate;
121    if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
122        (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
123      (*lowlink_)[s] = (*dfnumber_)[t];
124    if ((*coaccess_)[t])
125      (*coaccess_)[s] = true;
126    return true;
127  }
128
129  void FinishState(StateId s, StateId p, const A *) {
130    if (fst_->Final(s) != Weight::Zero())
131      (*coaccess_)[s] = true;
132    if ((*dfnumber_)[s] == (*lowlink_)[s]) {  // root of new SCC
133      bool scc_coaccess = false;
134      size_t i = scc_stack_->size();
135      StateId t;
136      do {
137        t = (*scc_stack_)[--i];
138        if ((*coaccess_)[t])
139          scc_coaccess = true;
140      } while (s != t);
141      do {
142        t = scc_stack_->back();
143        if (scc_)
144          (*scc_)[t] = nscc_;
145        if (scc_coaccess)
146          (*coaccess_)[t] = true;
147        (*onstack_)[t] = false;
148        scc_stack_->pop_back();
149      } while (s != t);
150      if (!scc_coaccess) {
151        *props_ |= kNotCoAccessible;
152        *props_ &= ~kCoAccessible;
153      }
154      ++nscc_;
155    }
156    if (p != kNoStateId) {
157      if ((*coaccess_)[s])
158        (*coaccess_)[p] = true;
159      if ((*lowlink_)[s] < (*lowlink_)[p])
160        (*lowlink_)[p] = (*lowlink_)[s];
161    }
162  }
163
164  void FinishVisit() {
165    // Numbers SCC's in topological order when acyclic.
166    if (scc_)
167      for (StateId i = 0; i < (StateId)scc_->size(); ++i)
168        (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
169    if (coaccess_internal_)
170      delete coaccess_;
171    delete dfnumber_;
172    delete lowlink_;
173    delete onstack_;
174    delete scc_stack_;
175  }
176
177 private:
178  vector<StateId> *scc_;        // State's scc number
179  vector<bool> *access_;        // State's accessibility
180  vector<bool> *coaccess_;      // State's coaccessibility
181  uint64 *props_;
182  const Fst<A> *fst_;
183  StateId start_;
184  StateId nstates_;             // State count
185  StateId nscc_;                // SCC count
186  bool coaccess_internal_;
187  vector<StateId> *dfnumber_;   // state discovery times
188  vector<StateId> *lowlink_;    // lowlink[s] == dfnumber[s] => SCC root
189  vector<bool> *onstack_;       // is a state on the SCC stack
190  vector<StateId> *scc_stack_;  // SCC stack (w/ random access)
191};
192
193
194// Trims an FST, removing states and arcs that are not on successful
195// paths. This version modifies its input.
196//
197// Complexity:
198// - Time:  O(V + E)
199// - Space: O(V + E)
200// where V = # of states and E = # of arcs.
201template<class Arc>
202void Connect(MutableFst<Arc> *fst) {
203  typedef typename Arc::StateId StateId;
204
205  vector<bool> access;
206  vector<bool> coaccess;
207  uint64 props = 0;
208  SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
209  DfsVisit(*fst, &scc_visitor);
210  vector<StateId> dstates;
211  for (StateId s = 0; s < (StateId)access.size(); ++s)
212    if (!access[s] || !coaccess[s])
213      dstates.push_back(s);
214  fst->DeleteStates(dstates);
215  fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
216}
217
218}  // namespace fst
219
220#endif  // FST_LIB_CONNECT_H__
221