1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state-reachable.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to determine whether a given (final) state can be reached from some
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// other given state.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_STATE_REACHABLE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_STATE_REACHABLE_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/dfs-visit.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/interval-set.h>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Computes the (final) states reachable from a given state in an FST.
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// After this visitor has been called, a final state f can be reached
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// from a state s iff (*isets)[s].Member(state2index[f]) is true, where
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (*isets[s]) is a set of half-open inteval of final state indices
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and state2index[f] maps from a final state to its index.
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// If state2index is empty, it is filled-in with suitable indices.
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// If it is non-empty, those indices are used; in this case, the
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// final states must have out-degree 0.
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, typename I = typename A::StateId>
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass IntervalReachVisitor {
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename IntervalSet<I>::Interval Interval;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  IntervalReachVisitor(const Fst<A> &fst,
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       vector< IntervalSet<I> > *isets,
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       vector<I> *state2index)
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(fst),
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        isets_(isets),
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state2index_(state2index),
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        index_(state2index->empty() ? 1 : -1),
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_(false) {
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    isets_->clear();
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitVisit(const Fst<A> &fst) { error_ = false; }
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool InitState(StateId s, StateId r) {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (isets_->size() <= s)
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      isets_->push_back(IntervalSet<Label>());
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (state2index_->size() <= s)
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      state2index_->push_back(-1);
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst_.Final(s) != Weight::Zero()) {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Create tree interval
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      vector<Interval> *intervals = (*isets_)[s].Intervals();
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (index_ < 0) {  // Use state2index_ map to set index
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (fst_.NumArcs(s) > 0) {
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          FSTERROR() << "IntervalReachVisitor: state2index map must be empty "
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     << "for this FST";
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          error_ = true;
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return false;
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        I index = (*state2index_)[s];
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (index < 0) {
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          FSTERROR() << "IntervalReachVisitor: state2index map incomplete";
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          error_ = true;
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return false;
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        intervals->push_back(Interval(index, index + 1));
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {           // Use pre-order index
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        intervals->push_back(Interval(index_, index_ + 1));
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (*state2index_)[s] = index_++;
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool TreeArc(StateId s, const A &arc) {
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool BackArc(StateId s, const A &arc) {
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "IntervalReachVisitor: cyclic input";
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    error_ = true;
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ForwardOrCrossArc(StateId s, const A &arc) {
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Non-tree interval
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    (*isets_)[s].Union((*isets_)[arc.nextstate]);
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishState(StateId s, StateId p, const A *arc) {
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) {
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      vector<Interval> *intervals = (*isets_)[s].Intervals();
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*intervals)[0].end = index_;      // Update tree interval end
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    (*isets_)[s].Normalize();
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (p != kNoStateId)
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*isets_)[p].Union((*isets_)[s]);  // Propagate intervals to parent
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishVisit() {}
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Error() const { return error_; }
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Fst<A> &fst_;
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector< IntervalSet<I> > *isets_;
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<I> *state2index_;
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I index_;
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool error_;
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Tests reachability of final states from a given state. To test for
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reachability from a state s, first do SetState(s). Then a final
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state f can be reached from state s of FST iff Reach(f) is true.
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, typename I = typename A::StateId>
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateReachable {
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef I Index;
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename IntervalSet<I>::Interval Interval;
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateReachable(const Fst<A> &fst)
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : error_(false) {
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    IntervalReachVisitor<Arc> reach_visitor(fst, &isets_, &state2index_);
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    DfsVisit(fst, &reach_visitor);
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (reach_visitor.Error()) error_ = true;
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateReachable(const StateReachable<A> &reachable) {
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "Copy constructor for state reachable class "
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "not yet implemented.";
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    error_ = true;
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set current state.
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetState(StateId s) { s_ = s; }
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Can reach this label from current state?
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Reach(StateId s) {
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s >= state2index_.size())
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I i =  state2index_[s];
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (i < 0) {
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "StateReachable: state non-final: " << s;
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return isets_[s_].Member(i);
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Access to the state-to-index mapping. Unassigned states have index -1.
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<I> &State2Index() { return state2index_; }
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Access to the interval sets. These specify the reachability
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // to the final states as intervals of the final state indices.
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector< IntervalSet<I> > &IntervalSets() { return isets_; }
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Error() const { return error_; }
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId s_;                                 // Current state
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector< IntervalSet<I> > isets_;            // Interval sets per state
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<I> state2index_;                     // Finds index for a final state
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool error_;
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const StateReachable<A> &);  // Disallow
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_STATE_REACHABLE_H__
199