1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// replace.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: johans@google.com (Johan Schalkwyk)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functions and classes for the recursive replacement of Fsts.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_REPLACE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_REPLACE_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap;
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <set>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair;
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/expanded-fst.h>
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/matcher.h>
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/replace-util.h>
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-table.h>
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h>
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// REPLACE STATE TUPLES AND TABLES
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The replace state table has the form
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// template <class A, class P>
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// class ReplaceStateTable {
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  public:
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   typedef A Arc;
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   typedef P PrefixId;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   typedef typename A::StateId StateId;
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   typedef ReplaceStateTuple<StateId, PrefixId> StateTuple;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   typedef typename A::Label Label;
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Required constuctor
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ReplaceStateTable(const vector<pair<Label, const Fst<A>*> > &fst_tuples,
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//                     Label root);
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Required copy constructor that does not copy state
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ReplaceStateTable(const ReplaceStateTable<A,P> &table);
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Lookup state ID by tuple. If it doesn't exist, then add it.
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   StateId FindState(const StateTuple &tuple);
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Lookup state tuple by ID.
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   const StateTuple &Tuple(StateId id) const;
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// };
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \struct ReplaceStateTuple
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Tuple of information that uniquely defines a state in replace
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P>
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ReplaceStateTuple {
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef S StateId;
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef P PrefixId;
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceStateTuple()
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : prefix_id(-1), fst_id(kNoStateId), fst_state(kNoStateId) {}
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceStateTuple(PrefixId p, StateId f, StateId s)
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : prefix_id(p), fst_id(f), fst_state(s) {}
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PrefixId prefix_id;  // index in prefix table
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId fst_id;      // current fst being walked
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId fst_state;   // current state in fst being walked, not to be
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       // confused with the state_id of the combined fst
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Equality of replace state tuples.
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P>
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsoninline bool operator==(const ReplaceStateTuple<S, P>& x,
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       const ReplaceStateTuple<S, P>& y) {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return x.prefix_id == y.prefix_id &&
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      x.fst_id == y.fst_id &&
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      x.fst_state == y.fst_state;
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceRootSelector
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functor returning true for tuples corresponding to states in the root FST
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P>
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceRootSelector {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator()(const ReplaceStateTuple<S, P> &tuple) const {
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return tuple.prefix_id == 0;
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFingerprint
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fingerprint for general replace state tuples.
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P>
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFingerprint {
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFingerprint(const vector<uint64> *size_array)
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : cumulative_size_array_(size_array) {}
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 operator()(const ReplaceStateTuple<S, P> &tuple) const {
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return tuple.prefix_id * (cumulative_size_array_->back()) +
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        cumulative_size_array_->at(tuple.fst_id - 1) +
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        tuple.fst_state;
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<uint64> *cumulative_size_array_;
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFstStateFingerprint
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Useful when the fst_state uniquely define the tuple.
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class P>
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFstStateFingerprint {
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 operator()(const ReplaceStateTuple<S, P>& tuple) const {
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return tuple.fst_state;
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceHash
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic hash function for replace state tuples.
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename S, typename P>
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceHash {
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t operator()(const ReplaceStateTuple<S, P>& t) const {
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return t.prefix_id + t.fst_id * kPrime0 + t.fst_state * kPrime1;
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const size_t kPrime0;
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const size_t kPrime1;
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename S, typename P>
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ReplaceHash<S, P>::kPrime0 = 7853;
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename S, typename P>
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ReplaceHash<S, P>::kPrime1 = 7867;
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> class ReplaceFstMatcher;
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class VectorHashReplaceStateTable
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A two-level state table for replace.
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Warning: calls CountStates to compute the number of states of each
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// component Fst.
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class P = ssize_t>
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass VectorHashReplaceStateTable {
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef P PrefixId;
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ReplaceStateTuple<StateId, P> StateTuple;
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef VectorHashStateTable<ReplaceStateTuple<StateId, P>,
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               ReplaceRootSelector<StateId, P>,
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               ReplaceFstStateFingerprint<StateId, P>,
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               ReplaceFingerprint<StateId, P> > StateTable;
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VectorHashReplaceStateTable(
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const vector<pair<Label, const Fst<A>*> > &fst_tuples,
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label root) : root_size_(0) {
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    cumulative_size_array_.push_back(0);
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < fst_tuples.size(); ++i) {
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst_tuples[i].first == root) {
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root_size_ = CountStates(*(fst_tuples[i].second));
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        cumulative_size_array_.push_back(cumulative_size_array_.back());
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        cumulative_size_array_.push_back(cumulative_size_array_.back() +
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                         CountStates(*(fst_tuples[i].second)));
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    state_table_ = new StateTable(
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        new ReplaceRootSelector<StateId, P>,
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        new ReplaceFstStateFingerprint<StateId, P>,
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        new ReplaceFingerprint<StateId, P>(&cumulative_size_array_),
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root_size_,
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root_size_ + cumulative_size_array_.back());
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VectorHashReplaceStateTable(const VectorHashReplaceStateTable<A, P> &table)
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : root_size_(table.root_size_),
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        cumulative_size_array_(table.cumulative_size_array_) {
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    state_table_ = new StateTable(
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        new ReplaceRootSelector<StateId, P>,
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        new ReplaceFstStateFingerprint<StateId, P>,
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        new ReplaceFingerprint<StateId, P>(&cumulative_size_array_),
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root_size_,
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root_size_ + cumulative_size_array_.back());
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~VectorHashReplaceStateTable() {
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete state_table_;
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId FindState(const StateTuple &tuple) {
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return state_table_->FindState(tuple);
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const StateTuple &Tuple(StateId id) const {
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return state_table_->Tuple(id);
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId root_size_;
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<uint64> cumulative_size_array_;
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateTable *state_table_;
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class DefaultReplaceStateTable
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Default replace state table
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class P = ssize_t>
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass DefaultReplaceStateTable : public CompactHashStateTable<
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceStateTuple<typename A::StateId, P>,
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceHash<typename A::StateId, P> > {
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef P PrefixId;
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ReplaceStateTuple<StateId, P> StateTuple;
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CompactHashStateTable<StateTuple,
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                ReplaceHash<StateId, PrefixId> > StateTable;
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using StateTable::FindState;
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using StateTable::Tuple;
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DefaultReplaceStateTable(
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const vector<pair<Label, const Fst<A>*> > &fst_tuples,
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label root) {}
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DefaultReplaceStateTable(const DefaultReplaceStateTable<A, P> &table)
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : StateTable() {}
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// REPLACE FST CLASS
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// By default ReplaceFst will copy the input label of the 'replace arc'.
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For acceptors we do not want this behaviour. Instead we need to
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// create an epsilon arc when recursing into the appropriate Fst.
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The 'epsilon_on_replace' option can be used to toggle this behaviour.
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T = DefaultReplaceStateTable<A> >
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ReplaceFstOptions : CacheOptions {
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 root;    // root rule for expansion
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool  epsilon_on_replace;
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool  take_ownership;  // take ownership of input Fst(s)
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T*    state_table;
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstOptions(const CacheOptions &opts, int64 r)
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheOptions(opts),
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root(r),
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        epsilon_on_replace(false),
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        take_ownership(false),
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table(0) {}
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit ReplaceFstOptions(int64 r)
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : root(r),
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        epsilon_on_replace(false),
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        take_ownership(false),
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table(0) {}
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstOptions(int64 r, bool epsilon_replace_arc)
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : root(r),
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        epsilon_on_replace(epsilon_replace_arc),
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        take_ownership(false),
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table(0) {}
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstOptions()
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : root(kNoLabel),
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        epsilon_on_replace(false),
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        take_ownership(false),
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table(0) {}
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFstImpl
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Implementation class for replace class Fst
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The replace implementation class supports a dynamic
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// expansion of a recursive transition network represented as Fst
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// with dynamic replacable arcs.
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T>
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFstImpl : public CacheImpl<A> {
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class ReplaceFstMatcher<A, T>;
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetType;
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetProperties;
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::WriteHeader;
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetInputSymbols;
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetOutputSymbols;
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::InputSymbols;
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::OutputSymbols;
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::PushArc;
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::HasArcs;
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::HasFinal;
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::HasStart;
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::SetArcs;
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::SetFinal;
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::SetStart;
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label   Label;
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight  Weight;
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CacheState<A> State;
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_map<Label, Label> NonTerminalHash;
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef T StateTable;
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename T::PrefixId PrefixId;
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ReplaceStateTuple<StateId, PrefixId> StateTuple;
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // constructor for replace class implementation.
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \param fst_tuples array of label/fst tuples, one for each non-terminal
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstImpl(const vector< pair<Label, const Fst<A>* > >& fst_tuples,
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const ReplaceFstOptions<A, T> &opts)
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(opts),
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        epsilon_on_replace_(opts.epsilon_on_replace),
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table_(opts.state_table ? opts.state_table :
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     new StateTable(fst_tuples, opts.root)) {
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType("replace");
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst_tuples.size() > 0) {
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetInputSymbols(fst_tuples[0].second->InputSymbols());
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetOutputSymbols(fst_tuples[0].second->OutputSymbols());
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool all_negative = true;  // all nonterminals are negative?
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool dense_range = true;   // all nonterminals are positive
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               // and form a dense range containing 1?
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < fst_tuples.size(); ++i) {
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label nonterminal = fst_tuples[i].first;
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (nonterminal >= 0)
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        all_negative = false;
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (nonterminal > fst_tuples.size() || nonterminal <= 0)
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        dense_range = false;
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<uint64> inprops;
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool all_ilabel_sorted = true;
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool all_olabel_sorted = true;
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool all_non_empty = true;
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst_array_.push_back(0);
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < fst_tuples.size(); ++i) {
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label label = fst_tuples[i].first;
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Fst<A> *fst = fst_tuples[i].second;
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nonterminal_hash_[label] = fst_array_.size();
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nonterminal_set_.insert(label);
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst_array_.push_back(opts.take_ownership ? fst : fst->Copy());
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst->Start() == kNoStateId)
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        all_non_empty = false;
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if(!fst->Properties(kILabelSorted, false))
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        all_ilabel_sorted = false;
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if(!fst->Properties(kOLabelSorted, false))
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        all_olabel_sorted = false;
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      inprops.push_back(fst->Properties(kCopyProperties, false));
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (i) {
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!CompatSymbols(InputSymbols(), fst->InputSymbols())) {
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          FSTERROR() << "ReplaceFstImpl: input symbols of Fst " << i
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     << " does not match input symbols of base Fst (0'th fst)";
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          SetProperties(kError, kError);
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!CompatSymbols(OutputSymbols(), fst->OutputSymbols())) {
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          FSTERROR() << "ReplaceFstImpl: output symbols of Fst " << i
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     << " does not match output symbols of base Fst "
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     << "(0'th fst)";
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          SetProperties(kError, kError);
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Label nonterminal = nonterminal_hash_[opts.root];
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((nonterminal == 0) && (fst_array_.size() > 1)) {
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "ReplaceFstImpl: no Fst corresponding to root label '"
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << opts.root << "' in the input tuple vector";
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    root_ = (nonterminal > 0) ? nonterminal : 1;
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(ReplaceProperties(inprops, root_ - 1, epsilon_on_replace_,
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                    all_non_empty));
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // We assume that all terminals are positive.  The resulting
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // ReplaceFst is known to be kILabelSorted when all sub-FSTs are
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // kILabelSorted and one of the 3 following conditions is satisfied:
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //  1. 'epsilon_on_replace' is false, or
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //  2. all non-terminals are negative, or
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //  3. all non-terninals are positive and form a dense range containing 1.
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (all_ilabel_sorted &&
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (!epsilon_on_replace_ || all_negative || dense_range))
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kILabelSorted, kILabelSorted);
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Similarly, the resulting ReplaceFst is known to be
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // kOLabelSorted when all sub-FSTs are kOLabelSorted and one of
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // the 2 following conditions is satisfied:
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //  1. all non-terminals are negative, or
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //  2. all non-terninals are positive and form a dense range containing 1.
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (all_olabel_sorted && (all_negative || dense_range))
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kOLabelSorted, kOLabelSorted);
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Enable optional caching as long as sorted and all non empty.
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (Properties(kILabelSorted | kOLabelSorted) && all_non_empty)
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      always_cache_ = false;
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      always_cache_ = true;
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "ReplaceFstImpl::ReplaceFstImpl: always_cache = "
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            << (always_cache_ ? "true" : "false");
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstImpl(const ReplaceFstImpl& impl)
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(impl),
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        epsilon_on_replace_(impl.epsilon_on_replace_),
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        always_cache_(impl.always_cache_),
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_table_(new StateTable(*(impl.state_table_))),
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nonterminal_set_(impl.nonterminal_set_),
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nonterminal_hash_(impl.nonterminal_hash_),
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        root_(impl.root_) {
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType("replace");
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(impl.Properties(), kCopyProperties);
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(impl.InputSymbols());
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(impl.OutputSymbols());
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst_array_.reserve(impl.fst_array_.size());
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst_array_.push_back(0);
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 1; i < impl.fst_array_.size(); ++i) {
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst_array_.push_back(impl.fst_array_[i]->Copy(true));
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~ReplaceFstImpl() {
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "~ReplaceFstImpl: gc = "
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            << (CacheImpl<A>::GetCacheGc() ? "true" : "false")
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            << ", gc_size = " << CacheImpl<A>::GetCacheSize()
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            << ", gc_limit = " << CacheImpl<A>::GetCacheLimit();
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete state_table_;
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 1; i < fst_array_.size(); ++i) {
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete fst_array_[i];
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Computes the dependency graph of the replace class and returns
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // true if the dependencies are cyclic. Cyclic dependencies will result
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // in an un-expandable replace fst.
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool CyclicDependencies() const {
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReplaceUtil<A> replace_util(fst_array_, nonterminal_hash_, root_);
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return replace_util.CyclicDependencies();
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Return or compute start state of replace fst
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId Start() {
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasStart()) {
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst_array_.size() == 1) {      // no fsts defined for replace
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetStart(kNoStateId);
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return kNoStateId;
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        const Fst<A>* fst = fst_array_[root_];
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        StateId fst_start = fst->Start();
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (fst_start == kNoStateId)  // root Fst is empty
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return kNoStateId;
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        PrefixId prefix = GetPrefixId(StackPrefix());
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        StateId start = state_table_->FindState(
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            StateTuple(prefix, root_, fst_start));
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetStart(start);
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return start;
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::Start();
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // return final weight of state (kInfWeight means state is not final)
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight Final(StateId s) {
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasFinal(s)) {
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const StateTuple& tuple  = state_table_->Tuple(s);
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const StackPrefix& stack = stackprefix_array_[tuple.prefix_id];
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Fst<A>* fst = fst_array_[tuple.fst_id];
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId fst_state = tuple.fst_state;
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst->Final(fst_state) != Weight::Zero() && stack.Depth() == 0)
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetFinal(s, fst->Final(fst_state));
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      else
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetFinal(s, Weight::Zero());
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::Final(s);
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumArcs(StateId s) {
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (HasArcs(s)) {  // If state cached, use the cached value.
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumArcs(s);
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (always_cache_) {  // If always caching, expand and cache state.
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumArcs(s);
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {  // Otherwise compute the number of arcs without expanding.
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateTuple tuple  = state_table_->Tuple(s);
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (tuple.fst_state == kNoStateId)
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Fst<A>* fst = fst_array_[tuple.fst_id];
524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t num_arcs = fst->NumArcs(tuple.fst_state);
525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (ComputeFinalArc(tuple, 0))
526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        num_arcs++;
527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return num_arcs;
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns whether a given label is a non terminal
533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool IsNonTerminal(Label l) const {
534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // TODO(allauzen): be smarter and take advantage of
535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // all_dense or all_negative.
536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Use also in ComputeArc, this would require changes to replace
537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // so that recursing into an empty fst lead to a non co-accessible
538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // state instead of deleting the arc as done currently.
539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Current use correct, since i/olabel sorted iff all_non_empty.
540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename NonTerminalHash::const_iterator it =
541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nonterminal_hash_.find(l);
542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return it != nonterminal_hash_.end();
543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumInputEpsilons(StateId s) {
546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (HasArcs(s)) {
547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // If state cached, use the cached value.
548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumInputEpsilons(s);
549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (always_cache_ || !Properties(kILabelSorted)) {
550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // If always caching or if the number of input epsilons is too expensive
551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // to compute without caching (i.e. not ilabel sorted),
552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // then expand and cache state.
553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumInputEpsilons(s);
555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Otherwise, compute the number of input epsilons without caching.
557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateTuple tuple  = state_table_->Tuple(s);
558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (tuple.fst_state == kNoStateId)
559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Fst<A>* fst = fst_array_[tuple.fst_id];
561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t num  = 0;
562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!epsilon_on_replace_) {
563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // If epsilon_on_replace is false, all input epsilon arcs
564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // are also input epsilons arcs in the underlying machine.
565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst->NumInputEpsilons(tuple.fst_state);
566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Otherwise, one need to consider that all non-terminal arcs
568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // in the underlying machine also become input epsilon arc.
569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state);
570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        for (; !aiter.Done() &&
571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 ((aiter.Value().ilabel == 0) ||
572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  IsNonTerminal(aiter.Value().olabel));
573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             aiter.Next())
574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++num;
575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (ComputeFinalArc(tuple, 0))
577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        num++;
578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return num;
579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumOutputEpsilons(StateId s) {
583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (HasArcs(s)) {
584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // If state cached, use the cached value.
585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumOutputEpsilons(s);
586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if(always_cache_ || !Properties(kOLabelSorted)) {
587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // If always caching or if the number of output epsilons is too expensive
588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // to compute without caching (i.e. not olabel sorted),
589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // then expand and cache state.
590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumOutputEpsilons(s);
592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Otherwise, compute the number of output epsilons without caching.
594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateTuple tuple  = state_table_->Tuple(s);
595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (tuple.fst_state == kNoStateId)
596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return 0;
597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Fst<A>* fst = fst_array_[tuple.fst_id];
598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t num  = 0;
599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state);
600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (; !aiter.Done() &&
601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               ((aiter.Value().olabel == 0) ||
602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                IsNonTerminal(aiter.Value().olabel));
603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           aiter.Next())
604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++num;
605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (ComputeFinalArc(tuple, 0))
606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        num++;
607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return num;
608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties() const { return Properties(kFstProperties); }
612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set error if found; return FST impl properties.
614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties(uint64 mask) const {
615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (mask & kError) {
616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 1; i < fst_array_.size(); ++i) {
617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (fst_array_[i]->Properties(kError, false))
618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          SetProperties(kError, kError);
619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FstImpl<Arc>::Properties(mask);
622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // return the base arc iterator, if arcs have not been computed yet,
625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // extend/recurse for new arcs.
626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheImpl<A>::InitArcIterator(s, data);
630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // TODO(allauzen): Set behaviour of generic iterator
631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Warning: ArcIterator<ReplaceFst<A> >::InitCache()
632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // relies on current behaviour.
633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Extend current state (walk arcs one level deep)
637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Expand(StateId s) {
638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateTuple tuple = state_table_->Tuple(s);
639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // If local fst is empty
641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (tuple.fst_state == kNoStateId) {
642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetArcs(s);
643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ArcIterator< Fst<A> > aiter(
647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        *(fst_array_[tuple.fst_id]), tuple.fst_state);
648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Arc arc;
649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Create a final arc when needed
651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ComputeFinalArc(tuple, &arc))
652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PushArc(s, arc);
653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Expand all arcs leaving the state
655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (;!aiter.Done(); aiter.Next()) {
656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (ComputeArc(tuple, aiter.Value(), &arc))
657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        PushArc(s, arc);
658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetArcs(s);
661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Expand(StateId s, const StateTuple &tuple,
664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              const ArcIteratorData<A> &data) {
665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     // If local fst is empty
666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (tuple.fst_state == kNoStateId) {
667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetArcs(s);
668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ArcIterator< Fst<A> > aiter(data);
672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Arc arc;
673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
674f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Create a final arc when needed
675f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ComputeFinalArc(tuple, &arc))
676f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      AddArc(s, arc);
677f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
678f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Expand all arcs leaving the state
679f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (; !aiter.Done(); aiter.Next()) {
680f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (ComputeArc(tuple, aiter.Value(), &arc))
681f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        AddArc(s, arc);
682f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
683f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
684f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetArcs(s);
685f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
686f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
687f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If arcp == 0, only returns if a final arc is required, does not
688f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // actually compute it.
689f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ComputeFinalArc(const StateTuple &tuple, A* arcp,
690f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                       uint32 flags = kArcValueFlags) {
691f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const Fst<A>* fst = fst_array_[tuple.fst_id];
692f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId fst_state = tuple.fst_state;
693f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst_state == kNoStateId)
694f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
695f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
696f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   // if state is final, pop up stack
697f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const StackPrefix& stack = stackprefix_array_[tuple.prefix_id];
698f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst->Final(fst_state) != Weight::Zero() && stack.Depth()) {
699f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (arcp) {
700f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        arcp->ilabel = 0;
701f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        arcp->olabel = 0;
702f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (flags & kArcNextStateValue) {
703f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          PrefixId prefix_id = PopPrefix(stack);
704f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const PrefixTuple& top = stack.Top();
705f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          arcp->nextstate = state_table_->FindState(
706f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              StateTuple(prefix_id, top.fst_id, top.nextstate));
707f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
708f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (flags & kArcWeightValue)
709f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          arcp->weight = fst->Final(fst_state);
710f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
711f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
712f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
713f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
714f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
715f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
716f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
717f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Compute the arc in the replace fst corresponding to a given
718f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // in the underlying machine. Returns false if the underlying arc
719f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // corresponds to no arc in the replace.
720f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ComputeArc(const StateTuple &tuple, const A &arc, A* arcp,
721f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  uint32 flags = kArcValueFlags) {
722f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!epsilon_on_replace_ &&
723f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (flags == (flags & (kArcILabelValue | kArcWeightValue)))) {
724f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *arcp = arc;
725f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
726f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
727f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
728f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (arc.olabel == 0) {  // expand local fst
729f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId nextstate = flags & kArcNextStateValue
730f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ? state_table_->FindState(
731f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate))
732f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          : kNoStateId;
733f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate);
734f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
735f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // check for non terminal
736f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      typename NonTerminalHash::const_iterator it =
737f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          nonterminal_hash_.find(arc.olabel);
738f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (it != nonterminal_hash_.end()) {  // recurse into non terminal
739f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Label nonterminal = it->second;
740f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        const Fst<A>* nt_fst = fst_array_[nonterminal];
741f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        PrefixId nt_prefix = PushPrefix(stackprefix_array_[tuple.prefix_id],
742f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                        tuple.fst_id, arc.nextstate);
743f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
744f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // if start state is valid replace, else arc is implicitly
745f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // deleted
746f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        StateId nt_start = nt_fst->Start();
747f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (nt_start != kNoStateId) {
748f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          StateId nt_nextstate =  flags & kArcNextStateValue
749f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              ? state_table_->FindState(
750f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  StateTuple(nt_prefix, nonterminal, nt_start))
751f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              : kNoStateId;
752f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          Label ilabel = (epsilon_on_replace_) ? 0 : arc.ilabel;
753f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          *arcp = A(ilabel, 0, arc.weight, nt_nextstate);
754f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        } else {
755f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return false;
756f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
757f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
758f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        StateId nextstate = flags & kArcNextStateValue
759f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            ? state_table_->FindState(
760f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate))
761f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            : kNoStateId;
762f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate);
763f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
764f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
765f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
766f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
767f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
768f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns the arc iterator flags supported by this Fst.
769f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint32 ArcIteratorFlags() const {
770f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint32 flags = kArcValueFlags;
771f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!always_cache_)
772f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      flags |= kArcNoCache;
773f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return flags;
774f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
775f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
776f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  T* GetStateTable() const {
777f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return state_table_;
778f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
779f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
780f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Fst<A>* GetFst(Label fst_id) const {
781f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return fst_array_[fst_id];
782f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
783f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
784f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool EpsilonOnReplace() const { return epsilon_on_replace_; }
785f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
786f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // private helper classes
787f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
788f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const size_t kPrime0;
789f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
790f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \class PrefixTuple
791f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \brief Tuple of fst_id and destination state (entry in stack prefix)
792f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct PrefixTuple {
793f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    PrefixTuple(Label f, StateId s) : fst_id(f), nextstate(s) {}
794f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
795f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Label   fst_id;
796f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId nextstate;
797f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
798f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
799f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \class StackPrefix
800f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \brief Container for stack prefix.
801f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class StackPrefix {
802f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
803f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StackPrefix() {}
804f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
805f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // copy constructor
806f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StackPrefix(const StackPrefix& x) :
807f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        prefix_(x.prefix_) {
808f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
809f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
810f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    void Push(StateId fst_id, StateId nextstate) {
811f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      prefix_.push_back(PrefixTuple(fst_id, nextstate));
812f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
813f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
814f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    void Pop() {
815f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      prefix_.pop_back();
816f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
817f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
818f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const PrefixTuple& Top() const {
819f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return prefix_[prefix_.size()-1];
820f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
821f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
822f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t Depth() const {
823f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return prefix_.size();
824f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
825f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
826f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
827f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<PrefixTuple> prefix_;
828f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
829f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
830f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
831f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \class StackPrefixEqual
832f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \brief Compare two stack prefix classes for equality
833f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class StackPrefixEqual {
834f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
835f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator()(const StackPrefix& x, const StackPrefix& y) const {
836f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (x.prefix_.size() != y.prefix_.size()) return false;
837f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < x.prefix_.size(); ++i) {
838f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (x.prefix_[i].fst_id    != y.prefix_[i].fst_id ||
839f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           x.prefix_[i].nextstate != y.prefix_[i].nextstate) return false;
840f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
841f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
842f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
843f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
844f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
845f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
846f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \class StackPrefixKey
847f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // \brief Hash function for stack prefix to prefix id
848f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class StackPrefixKey {
849f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
850f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t operator()(const StackPrefix& x) const {
851f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t sum = 0;
852f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < x.prefix_.size(); ++i) {
853f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        sum += x.prefix_[i].fst_id + x.prefix_[i].nextstate*kPrime0;
854f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
855f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return sum;
856f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
857f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
858f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
859f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_map<StackPrefix, PrefixId, StackPrefixKey, StackPrefixEqual>
860f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StackPrefixHash;
861f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
862f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // private methods
863f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
864f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // hash stack prefix (return unique index into stackprefix array)
865f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PrefixId GetPrefixId(const StackPrefix& prefix) {
866f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename StackPrefixHash::iterator it = prefix_hash_.find(prefix);
867f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it == prefix_hash_.end()) {
868f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PrefixId prefix_id = stackprefix_array_.size();
869f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      stackprefix_array_.push_back(prefix);
870f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      prefix_hash_[prefix] = prefix_id;
871f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return prefix_id;
872f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
873f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return it->second;
874f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
875f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
876f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
877f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // prefix id after a stack pop
878f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PrefixId PopPrefix(StackPrefix prefix) {
879f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    prefix.Pop();
880f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return GetPrefixId(prefix);
881f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
882f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
883f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // prefix id after a stack push
884f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PrefixId PushPrefix(StackPrefix prefix, Label fst_id, StateId nextstate) {
885f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    prefix.Push(fst_id, nextstate);
886f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return GetPrefixId(prefix);
887f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
888f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
889f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
890f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // private data
891f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
892f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // runtime options
893f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool epsilon_on_replace_;
894f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool always_cache_;  // Optionally caching arc iterator disabled when true
895f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
896f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // state table
897f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateTable *state_table_;
898f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
899f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // cross index of unique stack prefix
900f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // could potentially have one copy of prefix array
901f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StackPrefixHash prefix_hash_;
902f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StackPrefix> stackprefix_array_;
903f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
904f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  set<Label> nonterminal_set_;
905f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  NonTerminalHash nonterminal_hash_;
906f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<const Fst<A>*> fst_array_;
907f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label root_;
908f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
909f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const ReplaceFstImpl<A, T> &);  // disallow
910f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
911f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
912f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
913f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T>
914f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ReplaceFstImpl<A, T>::kPrime0 = 7853;
915f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
916f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
917f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \class ReplaceFst
918f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \brief Recursivively replaces arcs in the root Fst with other Fsts.
919f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version is a delayed Fst.
920f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
921f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ReplaceFst supports dynamic replacement of arcs in one Fst with
922f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// another Fst. This replacement is recursive.  ReplaceFst can be used
923f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to support a variety of delayed constructions such as recursive
924f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transition networks, union, or closure.  It is constructed with an
925f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// array of Fst(s). One Fst represents the root (or topology)
926f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// machine. The root Fst refers to other Fsts by recursively replacing
927f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arcs labeled as non-terminals with the matching non-terminal
928f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst. Currently the ReplaceFst uses the output symbols of the arcs
929f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to determine whether the arc is a non-terminal arc or not. A
930f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-terminal can be any label that is not a non-zero terminal label
931f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// in the output alphabet.
932f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
933f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Note that the constructor uses a vector of pair<>. These correspond
934f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to the tuple of non-terminal Label and corresponding Fst. For example
935f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to implement the closure operation we need 2 Fsts. The first root
936f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst is a single Arc on the start State that self loops, it references
937f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the particular machine for which we are performing the closure operation.
938f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
939f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The ReplaceFst class supports an optionally caching arc iterator:
940f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//    ArcIterator< ReplaceFst<A> >
941f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The ReplaceFst need to be built such that it is known to be ilabel
942f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// or olabel sorted (see usage below).
943f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
944f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Observe that Matcher<Fst<A> > will use the optionally caching arc
945f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iterator when available (Fst is ilabel sorted and matching on the
946f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// input, or Fst is olabel sorted and matching on the output).
947f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In order to obtain the most efficient behaviour, it is recommended
948f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to set 'epsilon_on_replace' to false (this means constructing acceptors
949f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// as transducers with epsilons on the input side of nonterminal arcs)
950f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and matching on the input side.
951f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
952f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles
953f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst.
954f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T = DefaultReplaceStateTable<A> >
955f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFst : public ImplToFst< ReplaceFstImpl<A, T> > {
956f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
957f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class ArcIterator< ReplaceFst<A, T> >;
958f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class StateIterator< ReplaceFst<A, T> >;
959f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class ReplaceFstMatcher<A, T>;
960f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
961f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
962f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label   Label;
963f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight  Weight;
964f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
965f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CacheState<A> State;
966f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ReplaceFstImpl<A, T> Impl;
967f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
968f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using ImplToFst<Impl>::Properties;
969f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
970f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array,
971f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             Label root)
972f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(new Impl(fst_array, ReplaceFstOptions<A, T>(root))) {}
973f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
974f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array,
975f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const ReplaceFstOptions<A, T> &opts)
976f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(new Impl(fst_array, opts)) {}
977f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
978f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // See Fst<>::Copy() for doc.
979f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFst(const ReplaceFst<A, T>& fst, bool safe = false)
980f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(fst, safe) {}
981f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
982f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this ReplaceFst. See Fst<>::Copy() for further doc.
983f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ReplaceFst<A, T> *Copy(bool safe = false) const {
984f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new ReplaceFst<A, T>(*this, safe);
985f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
986f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
987f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
988f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
989f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
990f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->InitArcIterator(s, data);
991f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
992f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
993f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
994f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((GetImpl()->ArcIteratorFlags() & kArcNoCache) &&
995f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ((match_type == MATCH_INPUT && Properties(kILabelSorted, false)) ||
996f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         (match_type == MATCH_OUTPUT && Properties(kOLabelSorted, false)))) {
997f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return new ReplaceFstMatcher<A, T>(*this, match_type);
998f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
999f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else {
1000f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      VLOG(2) << "Not using replace matcher";
1001f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
1002f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1003f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1004f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1005f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool CyclicDependencies() const {
1006f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return GetImpl()->CyclicDependencies();
1007f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1008f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1009f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
1010f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Makes visible to friends.
1011f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
1012f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1013f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const ReplaceFst<A> &fst);  // disallow
1014f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
1015f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1016f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1017f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ReplaceFst.
1018f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A, class T>
1019f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< ReplaceFst<A, T> >
1020f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public CacheStateIterator< ReplaceFst<A, T> > {
1021f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
1022f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const ReplaceFst<A, T> &fst)
1023f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheStateIterator< ReplaceFst<A, T> >(fst, fst.GetImpl()) {}
1024f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1025f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
1026f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(StateIterator);
1027f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
1028f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1029f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1030f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ReplaceFst.
1031f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implements optional caching. It can be used as follows:
1032f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
1033f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ReplaceFst<A> replace;
1034f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ArcIterator< ReplaceFst<A> > aiter(replace, s);
1035f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Note: ArcIterator< Fst<A> > is always a caching arc iterator.
1036f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   aiter.SetFlags(kArcNoCache, kArcNoCache);
1037f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Use the arc iterator, no arc will be cached, no state will be expanded.
1038f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // The varied 'kArcValueFlags' can be used to decide which part
1039f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // of arc values needs to be computed.
1040f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   aiter.SetFlags(kArcILabelValue, kArcValueFlags);
1041f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Only want the ilabel for this arc
1042f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   aiter.Value();  // Does not compute the destination state.
1043f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   aiter.Next();
1044f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   aiter.SetFlags(kArcNextStateValue, kArcNextStateValue);
1045f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Want both ilabel and nextstate for that arc
1046f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   aiter.Value();  // Does compute the destination state and inserts it
1047f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//                   // in the replace state table.
1048f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // No Arc has been cached at that point.
1049f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
1050f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T>
1051f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< ReplaceFst<A, T> > {
1052f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
1053f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
1054f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
1055f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1056f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIterator(const ReplaceFst<A, T> &fst, StateId s)
1057f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(fst), state_(s), pos_(0), offset_(0), flags_(0), arcs_(0),
1058f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_flags_(0), final_flags_(0) {
1059f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    cache_data_.ref_count = 0;
1060f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    local_data_.ref_count = 0;
1061f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1062f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // If FST does not support optional caching, force caching.
1063f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if(!(fst_.GetImpl()->ArcIteratorFlags() & kArcNoCache) &&
1064f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !(fst_.GetImpl()->HasArcs(state_)))
1065f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       fst_.GetImpl()->Expand(state_);
1066f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1067f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // If state is already cached, use cached arcs array.
1068f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst_.GetImpl()->HasArcs(state_)) {
1069f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (fst_.GetImpl())->template CacheImpl<A>::InitArcIterator(state_,
1070f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                                               &cache_data_);
1071f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      num_arcs_ = cache_data_.narcs;
1072f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      arcs_ = cache_data_.arcs;      // 'arcs_' is a ptr to the cached arcs.
1073f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_flags_ = kArcValueFlags;  // All the arc member values are valid.
1074f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {  // Otherwise delay decision until Value() is called.
1075f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      tuple_ = fst_.GetImpl()->GetStateTable()->Tuple(state_);
1076f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (tuple_.fst_state == kNoStateId) {
1077f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        num_arcs_ = 0;
1078f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
1079f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // The decision to cache or not to cache has been defered
1080f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // until Value() or SetFlags() is called. However, the arc
1081f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // iterator is set up now to be ready for non-caching in order
1082f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // to keep the Value() method simple and efficient.
1083f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        const Fst<A>* fst = fst_.GetImpl()->GetFst(tuple_.fst_id);
1084f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst->InitArcIterator(tuple_.fst_state, &local_data_);
1085f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // 'arcs_' is a pointer to the arcs in the underlying machine.
1086f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        arcs_ = local_data_.arcs;
1087f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Compute the final arc (but not its destination state)
1088f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // if a final arc is required.
1089f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        bool has_final_arc = fst_.GetImpl()->ComputeFinalArc(
1090f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            tuple_,
1091f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            &final_arc_,
1092f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            kArcValueFlags & ~kArcNextStateValue);
1093f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Set the arc value flags that hold for 'final_arc_'.
1094f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        final_flags_ = kArcValueFlags & ~kArcNextStateValue;
1095f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Compute the number of arcs.
1096f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        num_arcs_ = local_data_.narcs;
1097f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (has_final_arc)
1098f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++num_arcs_;
1099f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Set the offset between the underlying arc positions and
1100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // the positions in the arc iterator.
1101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        offset_ = num_arcs_ - local_data_.narcs;
1102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Defers the decision to cache or not until Value() or
1103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // SetFlags() is called.
1104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_flags_ = 0;
1105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
1106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~ArcIterator() {
1110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (cache_data_.ref_count)
1111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      --(*cache_data_.ref_count);
1112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (local_data_.ref_count)
1113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      --(*local_data_.ref_count);
1114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void ExpandAndCache() const   {
1117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // TODO(allauzen): revisit this
1118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // fst_.GetImpl()->Expand(state_, tuple_, local_data_);
1119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // (fst_.GetImpl())->CacheImpl<A>*>::InitArcIterator(state_,
1120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //                                               &cache_data_);
1121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    //
1122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst_.InitArcIterator(state_, &cache_data_);  // Expand and cache state.
1123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    arcs_ = cache_data_.arcs;  // 'arcs_' is a pointer to the cached arcs.
1124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data_flags_ = kArcValueFlags;  // All the arc member values are valid.
1125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    offset_ = 0;  // No offset
1126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Init() {
1130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (flags_ & kArcNoCache) {  // If caching is disabled
1131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // 'arcs_' is a pointer to the arcs in the underlying machine.
1132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      arcs_ = local_data_.arcs;
1133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Set the arcs value flags that hold for 'arcs_'.
1134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_flags_ = kArcWeightValue;
1135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!fst_.GetImpl()->EpsilonOnReplace())
1136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          data_flags_ |= kArcILabelValue;
1137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Set the offset between the underlying arc positions and
1138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // the positions in the arc iterator.
1139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      offset_ = num_arcs_ - local_data_.narcs;
1140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {  // Otherwise, expand and cache
1141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ExpandAndCache();
1142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const { return pos_ >= num_arcs_; }
1146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const A& Value() const {
1148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // If 'data_flags_' was set to 0, non-caching was not requested
1149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!data_flags_) {
1150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // TODO(allauzen): revisit this.
1151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (flags_ & kArcNoCache) {
1152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Should never happen.
1153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "ReplaceFst: inconsistent arc iterator flags";
1154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
1155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ExpandAndCache();  // Expand and cache.
1156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (pos_ - offset_ >= 0) {  // The requested arc is not the 'final' arc.
1159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const A& arc = arcs_[pos_ - offset_];
1160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if ((data_flags_ & flags_) == (flags_ & kArcValueFlags)) {
1161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // If the value flags for 'arc' match the recquired value flags
1162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // then return 'arc'.
1163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return arc;
1164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
1165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Otherwise, compute the corresponding arc on-the-fly.
1166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst_.GetImpl()->ComputeArc(tuple_, arc, &arc_, flags_ & kArcValueFlags);
1167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return arc_;
1168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
1169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {  // The requested arc is the 'final' arc.
1170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if ((final_flags_ & flags_) != (flags_ & kArcValueFlags)) {
1171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // If the arc value flags that hold for the final arc
1172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // do not match the requested value flags, then
1173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // 'final_arc_' needs to be updated.
1174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst_.GetImpl()->ComputeFinalArc(tuple_, &final_arc_,
1175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                    flags_ & kArcValueFlags);
1176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        final_flags_ = flags_ & kArcValueFlags;
1177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
1178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return final_arc_;
1179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() { ++pos_; }
1183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t Position() const { return pos_; }
1185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() { pos_ = 0;  }
1187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Seek(size_t pos) { pos_ = pos; }
1189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint32 Flags() const { return flags_; }
1191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetFlags(uint32 f, uint32 mask) {
1193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Update the flags taking into account what flags are supported
1194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // by the Fst.
1195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    flags_ &= ~mask;
1196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    flags_ |= (f & fst_.GetImpl()->ArcIteratorFlags());
1197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // If non-caching is not requested (and caching has not already
1198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // been performed), then flush 'data_flags_' to request caching
1199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // during the next call to Value().
1200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!(flags_ & kArcNoCache) && data_flags_ != kArcValueFlags) {
1201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!fst_.GetImpl()->HasArcs(state_))
1202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         data_flags_ = 0;
1203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // If 'data_flags_' has been flushed but non-caching is requested
1205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // before calling Value(), then set up the iterator for non-caching.
1206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((f & kArcNoCache) && (!data_flags_))
1207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Init();
1208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
1211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const ReplaceFst<A, T> &fst_;           // Reference to the FST
1212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId state_;                         // State in the FST
1213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable typename T::StateTuple tuple_;  // Tuple corresponding to state_
1214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t pos_;             // Current position
1216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable ssize_t offset_;  // Offset between position in iterator and in arcs_
1217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t num_arcs_;        // Number of arcs at state_
1218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint32 flags_;            // Behavorial flags for the arc iterator
1219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable Arc arc_;         // Memory to temporarily store computed arcs
1220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable ArcIteratorData<Arc> cache_data_;  // Arc iterator data in cache
1222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable ArcIteratorData<Arc> local_data_;  // Arc iterator data in local fst
1223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable const A* arcs_;       // Array of arcs
1225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable uint32 data_flags_;   // Arc value flags valid for data in arcs_
1226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable Arc final_arc_;       // Final arc (when required)
1227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable uint32 final_flags_;  // Arc value flags valid for final_arc_
1228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
1231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T>
1234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ReplaceFstMatcher : public MatcherBase<A> {
1235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
1236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
1237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
1238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
1239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef MultiEpsMatcher<Matcher<Fst<A> > > LocalMatcher;
1240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstMatcher(const ReplaceFst<A, T> &fst, fst::MatchType match_type)
1242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(fst),
1243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        impl_(fst_.GetImpl()),
1244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        s_(fst::kNoStateId),
1245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        match_type_(match_type),
1246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        current_loop_(false),
1247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        final_arc_(false),
1248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) {
1249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (match_type_ == fst::MATCH_OUTPUT)
1250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      swap(loop_.ilabel, loop_.olabel);
1251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    InitMatchers();
1252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstMatcher(const ReplaceFstMatcher<A, T> &matcher, bool safe = false)
1255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(matcher.fst_),
1256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        impl_(fst_.GetImpl()),
1257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        s_(fst::kNoStateId),
1258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        match_type_(matcher.match_type_),
1259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        current_loop_(false),
1260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) {
1261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (match_type_ == fst::MATCH_OUTPUT)
1262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      swap(loop_.ilabel, loop_.olabel);
1263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    InitMatchers();
1264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Create a local matcher for each component Fst of replace.
1267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // LocalMatcher is a multi epsilon wrapper matcher. MultiEpsilonMatcher
1268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // is used to match each non-terminal arc, since these non-terminal
1269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // turn into epsilons on recursion.
1270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitMatchers() {
1271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const vector<const Fst<A>*>& fst_array = impl_->fst_array_;
1272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    matcher_.resize(fst_array.size(), 0);
1273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < fst_array.size(); ++i) {
1274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fst_array[i]) {
1275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        matcher_[i] =
1276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            new LocalMatcher(*fst_array[i], match_type_, kMultiEpsList);
1277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        typename set<Label>::iterator it = impl_->nonterminal_set_.begin();
1279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        for (; it != impl_->nonterminal_set_.end(); ++it) {
1280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          matcher_[i]->AddMultiEpsLabel(*it);
1281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
1282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
1283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ReplaceFstMatcher<A, T> *Copy(bool safe = false) const {
1287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new ReplaceFstMatcher<A, T>(*this, safe);
1288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual ~ReplaceFstMatcher() {
1291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < matcher_.size(); ++i)
1292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete matcher_[i];
1293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual MatchType Type(bool test) const {
1296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (match_type_ == MATCH_NONE)
1297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return match_type_;
1298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint64 true_prop =  match_type_ == MATCH_INPUT ?
1300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        kILabelSorted : kOLabelSorted;
1301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint64 false_prop = match_type_ == MATCH_INPUT ?
1302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        kNotILabelSorted : kNotOLabelSorted;
1303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint64 props = fst_.Properties(true_prop | false_prop, test);
1304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (props & true_prop)
1306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return match_type_;
1307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else if (props & false_prop)
1308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return MATCH_NONE;
1309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
1310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return MATCH_UNKNOWN;
1311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const Fst<A> &GetFst() const {
1314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return fst_;
1315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual uint64 Properties(uint64 props) const {
1318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return props;
1319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
1322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set the sate from which our matching happens.
1323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void SetState_(StateId s) {
1324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s_ == s) return;
1325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    s_ = s;
1327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    tuple_ = impl_->GetStateTable()->Tuple(s_);
1328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (tuple_.fst_state == kNoStateId) {
1329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      done_ = true;
1330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
1331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Get current matcher. Used for non epsilon matching
1333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    current_matcher_ = matcher_[tuple_.fst_id];
1334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    current_matcher_->SetState(tuple_.fst_state);
1335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    loop_.nextstate = s_;
1336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    final_arc_ = false;
1338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Search for label, from previous set state. If label == 0, first
1341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // hallucinate and epsilon loop, else use the underlying matcher to
1342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // search for the label or epsilons.
1343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // - Note since the ReplaceFST recursion on non-terminal arcs causes
1344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //   epsilon transitions to be created we use the MultiEpsilonMatcher
1345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //   to search for possible matches of non terminals.
1346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // - If the component Fst reaches a final state we also need to add
1347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //   the exiting final arc.
1348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Find_(Label label) {
1349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool found = false;
1350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    label_ = label;
1351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (label_ == 0 || label_ == kNoLabel) {
1352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Compute loop directly, saving Replace::ComputeArc
1353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (label_ == 0) {
1354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        current_loop_ = true;
1355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        found = true;
1356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
1357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Search for matching multi epsilons
1358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      final_arc_ = impl_->ComputeFinalArc(tuple_, 0);
1359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      found = current_matcher_->Find(kNoLabel) || final_arc_ || found;
1360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
1361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Search on sub machine directly using sub machine matcher.
1362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      found = current_matcher_->Find(label_);
1363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return found;
1365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Done_() const {
1368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return !current_loop_ && !final_arc_ && current_matcher_->Done();
1369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual const Arc& Value_() const {
1372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (current_loop_) {
1373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return loop_;
1374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (final_arc_) {
1376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      impl_->ComputeFinalArc(tuple_, &arc_);
1377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return arc_;
1378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const Arc& component_arc = current_matcher_->Value();
1380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl_->ComputeArc(tuple_, component_arc, &arc_);
1381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return arc_;
1382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void Next_() {
1385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (current_loop_) {
1386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      current_loop_ = false;
1387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
1388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (final_arc_) {
1390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      final_arc_ = false;
1391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
1392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
1393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    current_matcher_->Next();
1394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
1395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const ReplaceFst<A, T>& fst_;
1397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstImpl<A, T> *impl_;
1398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  LocalMatcher* current_matcher_;
1399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<LocalMatcher*> matcher_;
1400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId s_;                        // Current state
1402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label label_;                      // Current label
1403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  MatchType match_type_;             // Supplied by caller
1405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable bool done_;
1406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable bool current_loop_;        // Current arc is the implicit loop
1407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable bool final_arc_;           // Current arc for exiting recursion
1408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable typename T::StateTuple tuple_;  // Tuple corresponding to state_
1409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable Arc arc_;
1410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Arc loop_;
1411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
1412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class T> inline
1414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ReplaceFst<A, T>::InitStateIterator(StateIteratorData<A> *data) const {
1415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->base = new StateIterator< ReplaceFst<A, T> >(*this);
1416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
1417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef ReplaceFst<StdArc> StdReplaceFst;
1419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Recursivively replaces arcs in the root Fst with other Fsts.
1422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version writes the result of replacement to an output MutableFst.
1423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
1424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Replace supports replacement of arcs in one Fst with another
1425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst. This replacement is recursive.  Replace takes an array of
1426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst(s). One Fst represents the root (or topology) machine. The root
1427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst refers to other Fsts by recursively replacing arcs labeled as
1428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-terminals with the matching non-terminal Fst. Currently Replace
1429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// uses the output symbols of the arcs to determine whether the arc is
1430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a non-terminal arc or not. A non-terminal can be any label that is
1431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// not a non-zero terminal label in the output alphabet.  Note that
1432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// input argument is a vector of pair<>. These correspond to the tuple
1433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// of non-terminal Label and corresponding Fst.
1434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
1435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Replace(const vector<pair<typename Arc::Label,
1436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const Fst<Arc>* > >& ifst_array,
1437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst, typename Arc::Label root,
1438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             bool epsilon_on_replace) {
1439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReplaceFstOptions<Arc> opts(root, epsilon_on_replace);
1440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  opts.gc_limit = 0;  // Cache only the last state for fastest copy.
1441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *ofst = ReplaceFst<Arc>(ifst_array, opts);
1442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
1443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
1445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Replace(const vector<pair<typename Arc::Label,
1446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const Fst<Arc>* > >& ifst_array,
1447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             MutableFst<Arc> *ofst, typename Arc::Label root) {
1448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Replace(ifst_array, ofst, root, false);
1449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
1450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
1452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_REPLACE_H__
1454