1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label_reachable.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to determine if a non-epsilon label can be read as the
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// first non-epsilon symbol along some path from a given state.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_LABEL_REACHABLE_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_LABEL_REACHABLE_H__
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
263da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak#include <tr1/unordered_map>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map;
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap;
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/accumulator.h>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/arcsort.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/interval-set.h>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-reachable.h>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/vector-fst.h>
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stores shareable data for label reachable class copies.
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename L>
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass LabelReachableData {
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef L Label;
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename IntervalSet<L>::Interval Interval;
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true)
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : reach_input_(reach_input),
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        keep_relabel_data_(keep_relabel_data),
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        have_relabel_data_(true),
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        final_label_(kNoLabel) {}
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~LabelReachableData() {}
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ReachInput() const { return reach_input_; }
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector< IntervalSet<L> > *IntervalSets() { return &isets_; }
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_map<L, L> *Label2Index() {
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!have_relabel_data_)
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "LabelReachableData: no relabeling data";
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return &label2index_;
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label FinalLabel() {
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (final_label_ == kNoLabel)
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      final_label_ = label2index_[kNoLabel];
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return final_label_;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static LabelReachableData<L> *Read(istream &istrm) {
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LabelReachableData<L> *data = new LabelReachableData<L>();
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReadType(istrm, &data->reach_input_);
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReadType(istrm, &data->keep_relabel_data_);
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data->have_relabel_data_ = data->keep_relabel_data_;
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data->keep_relabel_data_)
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ReadType(istrm, &data->label2index_);
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReadType(istrm, &data->final_label_);
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReadType(istrm, &data->isets_);
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data;
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Write(ostream &ostrm) {
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(ostrm, reach_input_);
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(ostrm, keep_relabel_data_);
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (keep_relabel_data_)
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      WriteType(ostrm, label2index_);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(ostrm, FinalLabel());
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(ostrm, isets_);
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int RefCount() const { return ref_count_.count(); }
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int IncrRefCount() { return ref_count_.Incr(); }
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int DecrRefCount() { return ref_count_.Decr(); }
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  LabelReachableData() {}
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool reach_input_;                  // Input or output labels considered?
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool keep_relabel_data_;            // Save label2index_ to file?
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool have_relabel_data_;            // Using label2index_?
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label final_label_;                 // Final label
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  RefCounter ref_count_;              // Reference count.
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_map<L, L> label2index_;        // Finds index for a label.
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<IntervalSet <L> > isets_;    // Interval sets per state.
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(LabelReachableData);
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Tests reachability of labels from a given state. If reach_input =
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// true, then input labels are considered, o.w. output labels are
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// considered. To test for reachability from a state s, first do
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// SetState(s). Then a label l can be reached from state s of FST f
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iff Reach(r) is true where r = Relabel(l). The relabeling is
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// required to ensure a compact representation of the reachable
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// labels.
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The whole FST can be relabeled instead with Relabel(&f,
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reach_input) so that the test Reach(r) applies directly to the
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// labels of the transformed FST f. The relabeled FST will also be
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// sorted appropriately for composition.
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Reachablity of a final state from state s (via an epsilon path)
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// can be tested with ReachFinal();
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Reachability can also be tested on the set of labels specified by
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an arc iterator, useful for FST composition.  In particular,
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Reach(aiter, ...) is true if labels on the input (output) side of
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the transitions of the arc iterator, when iter_input is true
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (false), can be reached from the state s. The iterator labels must
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// have already been relabeled.
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// With the arc iterator test of reachability, the begin position, end
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// position and accumulated arc weight of the matches can be
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// returned. The optional template argument controls how reachable arc
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// weights are accumulated.  The default uses the semiring
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Plus(). Alternative ones can be used to distribute the weights in
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition in various ways.
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class S = DefaultAccumulator<A> >
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass LabelReachable {
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename IntervalSet<Label>::Interval Interval;
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0,
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 bool keep_relabel_data = true)
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(new VectorFst<Arc>(fst)),
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        s_(kNoStateId),
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)),
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        accumulator_(s ? s : new S()),
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ncalls_(0),
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nintervals_(0),
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_(false) {
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId ins = fst_->NumStates();
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    TransformFst();
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FindIntervals(ins);
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete fst_;
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0)
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : fst_(0),
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      s_(kNoStateId),
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_(data),
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      accumulator_(s ? s : new S()),
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ncalls_(0),
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nintervals_(0),
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_(false) {
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data_->IncrRefCount();
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  LabelReachable(const LabelReachable<A, S> &reachable) :
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst_(0),
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      s_(kNoStateId),
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_(reachable.data_),
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      accumulator_(new S(*reachable.accumulator_)),
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ncalls_(0),
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nintervals_(0),
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_(reachable.error_) {
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data_->IncrRefCount();
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~LabelReachable() {
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!data_->DecrRefCount())
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete data_;
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete accumulator_;
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ncalls_ > 0) {
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      VLOG(2) << "# of calls: " << ncalls_;
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_);
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Relabels w.r.t labels that give compact label sets.
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label Relabel(Label label) {
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (label == 0 || error_)
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return label;
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    unordered_map<Label, Label> &label2index = *data_->Label2Index();
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Label &relabel = label2index[label];
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!relabel)  // Add new label
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      relabel = label2index.size() + 1;
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return relabel;
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Relabels Fst w.r.t to labels that give compact label sets.
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Relabel(MutableFst<Arc> *fst, bool relabel_input) {
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (StateIterator< MutableFst<Arc> > siter(*fst);
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !siter.Done(); siter.Next()) {
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId s = siter.Value();
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s);
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           !aiter.Done();
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           aiter.Next()) {
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arc = aiter.Value();
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (relabel_input)
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          arc.ilabel = Relabel(arc.ilabel);
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        else
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          arc.olabel = Relabel(arc.olabel);
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        aiter.SetValue(arc);
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (relabel_input) {
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ArcSort(fst, ILabelCompare<Arc>());
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst->SetInputSymbols(0);
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ArcSort(fst, OLabelCompare<Arc>());
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst->SetOutputSymbols(0);
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns relabeling pairs (cf. relabel.h::Relabel()).
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If 'avoid_collisions' is true, extra pairs are added to
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // ensure no collisions when relabeling automata that have
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // labels unseen here.
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void RelabelPairs(vector<pair<Label, Label> > *pairs,
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                    bool avoid_collisions = false) {
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    pairs->clear();
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    unordered_map<Label, Label> &label2index = *data_->Label2Index();
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Maps labels to their new values in [1, label2index().size()]
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename unordered_map<Label, Label>::const_iterator
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             it = label2index.begin(); it != label2index.end(); ++it)
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (it->second != data_->FinalLabel())
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        pairs->push_back(pair<Label, Label>(it->first, it->second));
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (avoid_collisions) {
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Ensures any label in [1, label2index().size()] is mapped either
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // by the above step or to label2index() + 1 (to avoid collisions).
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (int i = 1; i <= label2index.size(); ++i) {
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        typename unordered_map<Label, Label>::const_iterator
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            it = label2index.find(i);
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (it == label2index.end() || it->second == data_->FinalLabel())
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          pairs->push_back(pair<Label, Label>(i, label2index.size() + 1));
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set current state. Optionally set state associated
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // with arc iterator to be passed to Reach.
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetState(StateId s, StateId aiter_s = kNoStateId) {
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    s_ = s;
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (aiter_s != kNoStateId) {
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      accumulator_->SetState(aiter_s);
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (accumulator_->Error()) error_ = true;
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Can reach this label from current state?
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Original labels must be transformed by the Relabel methods above.
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Reach(Label label) {
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (label == 0 || error_)
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return isets[s_].Member(label);
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Can reach final state (via epsilon transitions) from this state?
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ReachFinal() {
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (error_) return false;
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return isets[s_].Member(data_->FinalLabel());
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Initialize with secondary FST to be used with Reach(Iterator,...).
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // If copy is true, then 'fst' is a copy of the FST used in the
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // previous call to this method (useful to avoid unnecessary updates).
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class F>
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void ReachInit(const F &fst, bool copy = false) {
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    accumulator_->Init(fst, copy);
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (accumulator_->Error()) error_ = true;
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Can reach any arc iterator label between iterator positions
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // aiter_begin and aiter_end?  If aiter_input = true, then iterator
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // input labels are considered, o.w. output labels are considered.
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Arc iterator labels must be transformed by the Relabel methods
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // above. If compute_weight is true, user may call ReachWeight().
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Reach(Iterator *aiter, ssize_t aiter_begin,
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             ssize_t aiter_end, bool aiter_input, bool compute_weight) {
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (error_) return false;
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const vector<Interval> *intervals = isets[s_].Intervals();
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++ncalls_;
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nintervals_ += intervals->size();
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    reach_begin_ = -1;
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    reach_end_ = -1;
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    reach_weight_ = Weight::Zero();
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint32 flags = aiter->Flags();  // save flags to restore them on exit
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    aiter->SetFlags(kArcNoCache, kArcNoCache);  // make caching optional
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    aiter->Seek(aiter_begin);
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (2 * (aiter_end - aiter_begin) < intervals->size()) {
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Check each arc against intervals.
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Set arc iterator flags to only compute the ilabel or olabel values,
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // since they are the only values required for most of the arcs processed.
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      kArcValueFlags);
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label reach_label = kNoLabel;
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (ssize_t aiter_pos = aiter_begin;
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) {
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        const A &arc = aiter->Value();
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Label label = aiter_input ? arc.ilabel : arc.olabel;
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (label == reach_label || Reach(label)) {
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          reach_label = label;
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (reach_begin_ < 0)
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            reach_begin_ = aiter_pos;
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          reach_end_ = aiter_pos + 1;
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (compute_weight) {
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            if (!(aiter->Flags() & kArcWeightValue)) {
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // If the 'arc.weight' wasn't computed by the call
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // to 'aiter->Value()' above, we need to call
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // 'aiter->Value()' again after having set the arc iterator
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // flags to compute the arc weight value.
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              aiter->SetFlags(kArcWeightValue, kArcValueFlags);
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              const A &arcb = aiter->Value();
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // Call the accumulator.
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight);
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // Only ilabel or olabel required to process the following
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // arcs.
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                              kArcValueFlags);
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            } else {
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              // Call the accumulator.
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight);
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            }
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Check each interval against arcs
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ssize_t begin_low, end_low = aiter_begin;
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (typename vector<Interval>::const_iterator
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               iiter = intervals->begin();
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           iiter != intervals->end(); ++iiter) {
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        begin_low = LowerBound(aiter, end_low, aiter_end,
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               aiter_input, iiter->begin);
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        end_low = LowerBound(aiter, begin_low, aiter_end,
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                             aiter_input, iiter->end);
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (end_low - begin_low > 0) {
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (reach_begin_ < 0)
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            reach_begin_ = begin_low;
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          reach_end_ = end_low;
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (compute_weight) {
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            aiter->SetFlags(kArcWeightValue, kArcValueFlags);
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            reach_weight_ = accumulator_->Sum(reach_weight_, aiter,
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                              begin_low, end_low);
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    aiter->SetFlags(flags, kArcFlags);  // restore original flag values
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return reach_begin_ >= 0;
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns iterator position of first matching arc.
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t ReachBegin() const { return reach_begin_;  }
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns iterator position one past last matching arc.
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t ReachEnd() const { return reach_end_; }
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Return the sum of the weights for matching arcs.
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Valid only if compute_weight was true in Reach() call.
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight ReachWeight() const { return reach_weight_; }
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Access to the relabeling map. Excludes epsilon (0) label but
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // includes kNoLabel that is used internally for super-final
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // transitons.
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const unordered_map<Label, Label>& Label2Index() const {
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return *data_->Label2Index();
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  LabelReachableData<Label> *GetData() const { return data_; }
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Error() const { return error_ || accumulator_->Error(); }
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Redirects labeled arcs (input or output labels determined by
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // ReachInput()) to new label-specific final states.  Each original
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // final state is redirected via a transition labeled with kNoLabel
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // to a new kNoLabel-specific final state.  Creates super-initial
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // state for all states with zero in-degree.
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void TransformFst() {
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId ins = fst_->NumStates();
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId ons = ins;
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<ssize_t> indeg(ins, 0);
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Redirects labeled arcs to new final states.
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (StateId s = 0; s < ins; ++s) {
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s);
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           !aiter.Done();
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           aiter.Next()) {
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arc = aiter.Value();
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Label label = data_->ReachInput() ? arc.ilabel : arc.olabel;
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (label) {
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (label2state_.find(label) == label2state_.end()) {
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            label2state_[label] = ons;
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            indeg.push_back(0);
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            ++ons;
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          arc.nextstate = label2state_[label];
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          aiter.SetValue(arc);
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++indeg[arc.nextstate];      // Finds in-degrees for next step.
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Redirects final weights to new final state.
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight final = fst_->Final(s);
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (final != Weight::Zero()) {
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (label2state_.find(kNoLabel) == label2state_.end()) {
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          label2state_[kNoLabel] = ons;
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          indeg.push_back(0);
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++ons;
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]);
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst_->AddArc(s, arc);
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++indeg[arc.nextstate];      // Finds in-degrees for next step.
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst_->SetFinal(s, Weight::Zero());
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Add new final states to Fst.
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (fst_->NumStates() < ons) {
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId s = fst_->AddState();
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst_->SetFinal(s, Weight::One());
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Creates a super-initial state for all states with zero in-degree.
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId start = fst_->AddState();
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst_->SetStart(start);
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (StateId s = 0; s < start; ++s) {
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (indeg[s] == 0) {
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arc(0, 0, Weight::One(), s);
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst_->AddArc(start, arc);
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FindIntervals(StateId ins) {
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateReachable<A, Label> state_reachable(*fst_);
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (state_reachable.Error()) {
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<Label> &state2index = state_reachable.State2Index();
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    isets = state_reachable.IntervalSets();
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    isets.resize(ins);
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    unordered_map<Label, Label> &label2index = *data_->Label2Index();
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename unordered_map<Label, StateId>::const_iterator
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             it = label2state_.begin();
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         it != label2state_.end();
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         ++it) {
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label l = it->first;
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId s = it->second;
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label i = state2index[s];
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      label2index[l] = i;
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    label2state_.clear();
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    double nintervals = 0;
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ssize_t non_intervals = 0;
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ssize_t s = 0; s < ins; ++s) {
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nintervals += isets[s].Size();
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (isets[s].Size() > 1) {
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++non_intervals;
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size();
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "# of states: " << ins;
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "# of intervals: " << nintervals;
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "# of intervals/state: " << nintervals/ins;
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    VLOG(2) << "# of non-interval states: " << non_intervals;
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin,
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     ssize_t aiter_end, bool aiter_input,
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     Label match_label) const {
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Only need to compute the ilabel or olabel of arcs when
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // performing the binary search.
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    aiter->SetFlags(aiter_input ?  kArcILabelValue : kArcOLabelValue,
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                    kArcValueFlags);
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ssize_t low = aiter_begin;
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ssize_t high = aiter_end;
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (low < high) {
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ssize_t mid = (low + high) / 2;
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      aiter->Seek(mid);
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label label = aiter_input ?
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          aiter->Value().ilabel : aiter->Value().olabel;
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (label > match_label) {
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        high = mid;
523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (label < match_label) {
524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        low = mid + 1;
525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        // Find first matching label (when non-deterministic)
527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        for (ssize_t i = mid; i > low; --i) {
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          aiter->Seek(i - 1);
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel;
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (label != match_label) {
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            aiter->Seek(i);
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            aiter->SetFlags(kArcValueFlags, kArcValueFlags);
533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            return i;
534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        aiter->SetFlags(kArcValueFlags, kArcValueFlags);
537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return low;
538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    aiter->Seek(low);
541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    aiter->SetFlags(kArcValueFlags, kArcValueFlags);
542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return low;
543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VectorFst<Arc> *fst_;
546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId s_;                             // Current state
547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_map<Label, StateId> label2state_;  // Finds final state for a label
548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t reach_begin_;                   // Iterator pos of first match
550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t reach_end_;                     // Iterator pos after last match
551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight reach_weight_;                   // Gives weight sum of arc iterator
552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                          // arcs with reachable labels.
553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  LabelReachableData<Label> *data_;       // Shareable data between copies
554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  S *accumulator_;                        // Sums arc weights
555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  double ncalls_;
557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  double nintervals_;
558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool error_;
559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const LabelReachable<A, S> &);   // Disallow
561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_LABEL_REACHABLE_H__
566