1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label_reachable.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Class to determine if a non-epsilon label can be read as the 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// first non-epsilon symbol along some path from a given state. 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_LABEL_REACHABLE_H__ 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_LABEL_REACHABLE_H__ 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 263da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak#include <tr1/unordered_map> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map; 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap; 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/accumulator.h> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/arcsort.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/interval-set.h> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-reachable.h> 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/vector-fst.h> 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stores shareable data for label reachable class copies. 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <typename L> 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass LabelReachableData { 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef L Label; 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename IntervalSet<L>::Interval Interval; 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true) 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : reach_input_(reach_input), 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keep_relabel_data_(keep_relabel_data), 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson have_relabel_data_(true), 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_label_(kNoLabel) {} 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~LabelReachableData() {} 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ReachInput() const { return reach_input_; } 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector< IntervalSet<L> > *IntervalSets() { return &isets_; } 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<L, L> *Label2Index() { 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!have_relabel_data_) 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "LabelReachableData: no relabeling data"; 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return &label2index_; 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label FinalLabel() { 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final_label_ == kNoLabel) 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson final_label_ = label2index_[kNoLabel]; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return final_label_; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static LabelReachableData<L> *Read(istream &istrm) { 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LabelReachableData<L> *data = new LabelReachableData<L>(); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(istrm, &data->reach_input_); 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(istrm, &data->keep_relabel_data_); 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data->have_relabel_data_ = data->keep_relabel_data_; 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (data->keep_relabel_data_) 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(istrm, &data->label2index_); 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(istrm, &data->final_label_); 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(istrm, &data->isets_); 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return data; 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Write(ostream &ostrm) { 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(ostrm, reach_input_); 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(ostrm, keep_relabel_data_); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (keep_relabel_data_) 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(ostrm, label2index_); 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(ostrm, FinalLabel()); 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(ostrm, isets_); 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int RefCount() const { return ref_count_.count(); } 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int IncrRefCount() { return ref_count_.Incr(); } 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int DecrRefCount() { return ref_count_.Decr(); } 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LabelReachableData() {} 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool reach_input_; // Input or output labels considered? 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_relabel_data_; // Save label2index_ to file? 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool have_relabel_data_; // Using label2index_? 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label final_label_; // Final label 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson RefCounter ref_count_; // Reference count. 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<L, L> label2index_; // Finds index for a label. 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<IntervalSet <L> > isets_; // Interval sets per state. 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(LabelReachableData); 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Tests reachability of labels from a given state. If reach_input = 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// true, then input labels are considered, o.w. output labels are 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// considered. To test for reachability from a state s, first do 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// SetState(s). Then a label l can be reached from state s of FST f 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// iff Reach(r) is true where r = Relabel(l). The relabeling is 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// required to ensure a compact representation of the reachable 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// labels. 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The whole FST can be relabeled instead with Relabel(&f, 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reach_input) so that the test Reach(r) applies directly to the 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// labels of the transformed FST f. The relabeled FST will also be 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// sorted appropriately for composition. 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Reachablity of a final state from state s (via an epsilon path) 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// can be tested with ReachFinal(); 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Reachability can also be tested on the set of labels specified by 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// an arc iterator, useful for FST composition. In particular, 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Reach(aiter, ...) is true if labels on the input (output) side of 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the transitions of the arc iterator, when iter_input is true 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (false), can be reached from the state s. The iterator labels must 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// have already been relabeled. 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// With the arc iterator test of reachability, the begin position, end 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// position and accumulated arc weight of the matches can be 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// returned. The optional template argument controls how reachable arc 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// weights are accumulated. The default uses the semiring 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Plus(). Alternative ones can be used to distribute the weights in 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// composition in various ways. 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class S = DefaultAccumulator<A> > 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass LabelReachable { 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename IntervalSet<Label>::Interval Interval; 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0, 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_relabel_data = true) 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_(new VectorFst<Arc>(fst)), 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_(kNoStateId), 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)), 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson accumulator_(s ? s : new S()), 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ncalls_(0), 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nintervals_(0), 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_(false) { 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId ins = fst_->NumStates(); 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson TransformFst(); 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FindIntervals(ins); 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete fst_; 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0) 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_(0), 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_(kNoStateId), 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_(data), 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson accumulator_(s ? s : new S()), 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ncalls_(0), 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nintervals_(0), 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_(false) { 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_->IncrRefCount(); 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LabelReachable(const LabelReachable<A, S> &reachable) : 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_(0), 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_(kNoStateId), 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_(reachable.data_), 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson accumulator_(new S(*reachable.accumulator_)), 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ncalls_(0), 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nintervals_(0), 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_(reachable.error_) { 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data_->IncrRefCount(); 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~LabelReachable() { 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!data_->DecrRefCount()) 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete data_; 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete accumulator_; 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ncalls_ > 0) { 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "# of calls: " << ncalls_; 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_); 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Relabels w.r.t labels that give compact label sets. 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label Relabel(Label label) { 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label == 0 || error_) 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return label; 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<Label, Label> &label2index = *data_->Label2Index(); 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label &relabel = label2index[label]; 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!relabel) // Add new label 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson relabel = label2index.size() + 1; 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return relabel; 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Relabels Fst w.r.t to labels that give compact label sets. 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Relabel(MutableFst<Arc> *fst, bool relabel_input) { 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateIterator< MutableFst<Arc> > siter(*fst); 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !siter.Done(); siter.Next()) { 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = siter.Value(); 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s); 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc = aiter.Value(); 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (relabel_input) 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.ilabel = Relabel(arc.ilabel); 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.olabel = Relabel(arc.olabel); 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.SetValue(arc); 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (relabel_input) { 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcSort(fst, ILabelCompare<Arc>()); 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetInputSymbols(0); 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcSort(fst, OLabelCompare<Arc>()); 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetOutputSymbols(0); 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns relabeling pairs (cf. relabel.h::Relabel()). 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If 'avoid_collisions' is true, extra pairs are added to 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // ensure no collisions when relabeling automata that have 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // labels unseen here. 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void RelabelPairs(vector<pair<Label, Label> > *pairs, 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool avoid_collisions = false) { 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs->clear(); 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<Label, Label> &label2index = *data_->Label2Index(); 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Maps labels to their new values in [1, label2index().size()] 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename unordered_map<Label, Label>::const_iterator 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it = label2index.begin(); it != label2index.end(); ++it) 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it->second != data_->FinalLabel()) 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs->push_back(pair<Label, Label>(it->first, it->second)); 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (avoid_collisions) { 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Ensures any label in [1, label2index().size()] is mapped either 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // by the above step or to label2index() + 1 (to avoid collisions). 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (int i = 1; i <= label2index.size(); ++i) { 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename unordered_map<Label, Label>::const_iterator 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it = label2index.find(i); 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it == label2index.end() || it->second == data_->FinalLabel()) 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs->push_back(pair<Label, Label>(i, label2index.size() + 1)); 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set current state. Optionally set state associated 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // with arc iterator to be passed to Reach. 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetState(StateId s, StateId aiter_s = kNoStateId) { 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_ = s; 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (aiter_s != kNoStateId) { 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson accumulator_->SetState(aiter_s); 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (accumulator_->Error()) error_ = true; 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Can reach this label from current state? 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Original labels must be transformed by the Relabel methods above. 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Reach(Label label) { 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label == 0 || error_) 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return isets[s_].Member(label); 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Can reach final state (via epsilon transitions) from this state? 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ReachFinal() { 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (error_) return false; 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return isets[s_].Member(data_->FinalLabel()); 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Initialize with secondary FST to be used with Reach(Iterator,...). 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If copy is true, then 'fst' is a copy of the FST used in the 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // previous call to this method (useful to avoid unnecessary updates). 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class F> 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void ReachInit(const F &fst, bool copy = false) { 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson accumulator_->Init(fst, copy); 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (accumulator_->Error()) error_ = true; 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Can reach any arc iterator label between iterator positions 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // aiter_begin and aiter_end? If aiter_input = true, then iterator 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // input labels are considered, o.w. output labels are considered. 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Arc iterator labels must be transformed by the Relabel methods 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // above. If compute_weight is true, user may call ReachWeight(). 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class Iterator> 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Reach(Iterator *aiter, ssize_t aiter_begin, 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t aiter_end, bool aiter_input, bool compute_weight) { 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (error_) return false; 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Interval> *intervals = isets[s_].Intervals(); 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++ncalls_; 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nintervals_ += intervals->size(); 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_begin_ = -1; 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_end_ = -1; 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_weight_ = Weight::Zero(); 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint32 flags = aiter->Flags(); // save flags to restore them on exit 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->Seek(aiter_begin); 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (2 * (aiter_end - aiter_begin) < intervals->size()) { 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Check each arc against intervals. 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set arc iterator flags to only compute the ilabel or olabel values, 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // since they are the only values required for most of the arcs processed. 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kArcValueFlags); 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label reach_label = kNoLabel; 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ssize_t aiter_pos = aiter_begin; 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) { 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A &arc = aiter->Value(); 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label label = aiter_input ? arc.ilabel : arc.olabel; 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label == reach_label || Reach(label)) { 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_label = label; 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (reach_begin_ < 0) 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_begin_ = aiter_pos; 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_end_ = aiter_pos + 1; 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (compute_weight) { 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!(aiter->Flags() & kArcWeightValue)) { 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If the 'arc.weight' wasn't computed by the call 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to 'aiter->Value()' above, we need to call 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'aiter->Value()' again after having set the arc iterator 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // flags to compute the arc weight value. 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(kArcWeightValue, kArcValueFlags); 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A &arcb = aiter->Value(); 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Call the accumulator. 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight); 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Only ilabel or olabel required to process the following 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // arcs. 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kArcValueFlags); 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Call the accumulator. 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight); 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Check each interval against arcs 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t begin_low, end_low = aiter_begin; 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename vector<Interval>::const_iterator 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iiter = intervals->begin(); 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iiter != intervals->end(); ++iiter) { 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson begin_low = LowerBound(aiter, end_low, aiter_end, 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter_input, iiter->begin); 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson end_low = LowerBound(aiter, begin_low, aiter_end, 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter_input, iiter->end); 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (end_low - begin_low > 0) { 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (reach_begin_ < 0) 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_begin_ = begin_low; 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_end_ = end_low; 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (compute_weight) { 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(kArcWeightValue, kArcValueFlags); 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reach_weight_ = accumulator_->Sum(reach_weight_, aiter, 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson begin_low, end_low); 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(flags, kArcFlags); // restore original flag values 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return reach_begin_ >= 0; 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns iterator position of first matching arc. 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t ReachBegin() const { return reach_begin_; } 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns iterator position one past last matching arc. 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t ReachEnd() const { return reach_end_; } 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Return the sum of the weights for matching arcs. 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Valid only if compute_weight was true in Reach() call. 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight ReachWeight() const { return reach_weight_; } 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Access to the relabeling map. Excludes epsilon (0) label but 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // includes kNoLabel that is used internally for super-final 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // transitons. 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const unordered_map<Label, Label>& Label2Index() const { 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *data_->Label2Index(); 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LabelReachableData<Label> *GetData() const { return data_; } 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Error() const { return error_ || accumulator_->Error(); } 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Redirects labeled arcs (input or output labels determined by 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // ReachInput()) to new label-specific final states. Each original 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // final state is redirected via a transition labeled with kNoLabel 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // to a new kNoLabel-specific final state. Creates super-initial 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // state for all states with zero in-degree. 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void TransformFst() { 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId ins = fst_->NumStates(); 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId ons = ins; 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<ssize_t> indeg(ins, 0); 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Redirects labeled arcs to new final states. 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateId s = 0; s < ins; ++s) { 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s); 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc = aiter.Value(); 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label label = data_->ReachInput() ? arc.ilabel : arc.olabel; 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label) { 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label2state_.find(label) == label2state_.end()) { 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson label2state_[label] = ons; 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson indeg.push_back(0); 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++ons; 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.nextstate = label2state_[label]; 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.SetValue(arc); 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++indeg[arc.nextstate]; // Finds in-degrees for next step. 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Redirects final weights to new final state. 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight final = fst_->Final(s); 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final != Weight::Zero()) { 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label2state_.find(kNoLabel) == label2state_.end()) { 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson label2state_[kNoLabel] = ons; 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson indeg.push_back(0); 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++ons; 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]); 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_->AddArc(s, arc); 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++indeg[arc.nextstate]; // Finds in-degrees for next step. 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_->SetFinal(s, Weight::Zero()); 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Add new final states to Fst. 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (fst_->NumStates() < ons) { 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = fst_->AddState(); 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_->SetFinal(s, Weight::One()); 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Creates a super-initial state for all states with zero in-degree. 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start = fst_->AddState(); 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_->SetStart(start); 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateId s = 0; s < start; ++s) { 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (indeg[s] == 0) { 461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc(0, 0, Weight::One(), s); 462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_->AddArc(start, arc); 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void FindIntervals(StateId ins) { 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateReachable<A, Label> state_reachable(*fst_); 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (state_reachable.Error()) { 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Label> &state2index = state_reachable.State2Index(); 475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); 476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isets = state_reachable.IntervalSets(); 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson isets.resize(ins); 478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<Label, Label> &label2index = *data_->Label2Index(); 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename unordered_map<Label, StateId>::const_iterator 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it = label2state_.begin(); 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson it != label2state_.end(); 483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++it) { 484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label l = it->first; 485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = it->second; 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label i = state2index[s]; 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson label2index[l] = i; 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson label2state_.clear(); 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson double nintervals = 0; 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t non_intervals = 0; 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ssize_t s = 0; s < ins; ++s) { 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nintervals += isets[s].Size(); 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (isets[s].Size() > 1) { 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++non_intervals; 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size(); 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "# of states: " << ins; 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "# of intervals: " << nintervals; 502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "# of intervals/state: " << nintervals/ins; 503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "# of non-interval states: " << non_intervals; 504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class Iterator> 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin, 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t aiter_end, bool aiter_input, 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label match_label) const { 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Only need to compute the ilabel or olabel of arcs when 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // performing the binary search. 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kArcValueFlags); 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t low = aiter_begin; 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t high = aiter_end; 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (low < high) { 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t mid = (low + high) / 2; 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->Seek(mid); 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label label = aiter_input ? 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->Value().ilabel : aiter->Value().olabel; 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label > match_label) { 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson high = mid; 523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (label < match_label) { 524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson low = mid + 1; 525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Find first matching label (when non-deterministic) 527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ssize_t i = mid; i > low; --i) { 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->Seek(i - 1); 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel; 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (label != match_label) { 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->Seek(i); 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(kArcValueFlags, kArcValueFlags); 533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return i; 534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(kArcValueFlags, kArcValueFlags); 537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return low; 538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->Seek(low); 541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter->SetFlags(kArcValueFlags, kArcValueFlags); 542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return low; 543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorFst<Arc> *fst_; 546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s_; // Current state 547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<Label, StateId> label2state_; // Finds final state for a label 548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t reach_begin_; // Iterator pos of first match 550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t reach_end_; // Iterator pos after last match 551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight reach_weight_; // Gives weight sum of arc iterator 552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // arcs with reachable labels. 553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LabelReachableData<Label> *data_; // Shareable data between copies 554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson S *accumulator_; // Sums arc weights 555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson double ncalls_; 557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson double nintervals_; 558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool error_; 559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const LabelReachable<A, S> &); // Disallow 561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_LABEL_REACHABLE_H__ 566