1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// synchronize.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Synchronize an FST with bounded delay. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_SYNCHRONIZE_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_SYNCHRONIZE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap; 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_set> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set; 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset; 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair; 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h> 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef CacheOptions SynchronizeFstOptions; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation class for SynchronizeFst 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass SynchronizeFstImpl 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheImpl<A> { 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetType; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetProperties; 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetInputSymbols; 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetOutputSymbols; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::PushArc; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasArcs; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasFinal; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasStart; 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetArcs; 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetFinal; 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetStart; 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef basic_string<Label> String; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson struct Element { 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Element() {} 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Element(StateId s, const String *i, const String *o) 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : state(s), istring(i), ostring(o) {} 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId state; // Input state Id 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *istring; // Residual input labels 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *ostring; // Residual output labels 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Residual strings are represented by const pointers to 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // basic_string<Label> and are stored in a hash_set. The pointed 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // memory is owned by the hash_set string_set_. 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts) 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(opts), fst_(fst.Copy()) { 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("synchronize"); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 props = fst.Properties(kFstProperties, false); 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(SynchronizeProperties(props), kCopyProperties); 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(fst.InputSymbols()); 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(fst.OutputSymbols()); 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SynchronizeFstImpl(const SynchronizeFstImpl &impl) 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(impl), 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_(impl.fst_->Copy(true)) { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("synchronize"); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(impl.Properties(), kCopyProperties); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(impl.InputSymbols()); 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(impl.OutputSymbols()); 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~SynchronizeFstImpl() { 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete fst_; 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Extract pointers from the hash set 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<const String*> strings; 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename StringSet::iterator it = string_set_.begin(); 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; it != string_set_.end(); ++it) 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strings.push_back(*it); 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Free the extracted pointers 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < strings.size(); ++i) 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete strings[i]; 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId Start() { 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasStart()) { 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = fst_->Start(); 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s == kNoStateId) 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kNoStateId; 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *empty = FindString(new String()); 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start = FindState(Element(fst_->Start(), empty, empty)); 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetStart(start); 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Start(); 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight Final(StateId s) { 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasFinal(s)) { 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Element &e = elements_[s]; 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state); 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty()) 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, w); 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, Weight::Zero()); 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Final(s); 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumArcs(StateId s) { 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumArcs(s); 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumInputEpsilons(StateId s) { 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumInputEpsilons(s); 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumOutputEpsilons(StateId s) { 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumOutputEpsilons(s); 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties() const { return Properties(kFstProperties); } 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Set error if found; return FST impl properties. 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 Properties(uint64 mask) const { 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((mask & kError) && fst_->Properties(kError, false)) 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(kError, kError); 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return FstImpl<Arc>::Properties(mask); 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(s); 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheImpl<A>::InitArcIterator(s, data); 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Returns the first character of the string obtained by 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // concatenating s and l. 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label Car(const String *s, Label l = 0) const { 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!s->empty()) 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return (*s)[0]; 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return l; 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Computes the residual string obtained by removing the first 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // character in the concatenation of s and l. 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *Cdr(const String *s, Label l = 0) { 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson String *r = new String(); 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (int i = 1; i < s->size(); ++i) 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson r->push_back((*s)[i]); 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (l && !(s->empty())) r->push_back(l); 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return FindString(r); 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Computes the concatenation of s and l. 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *Concat(const String *s, Label l = 0) { 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson String *r = new String(); 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (int i = 0; i < s->size(); ++i) 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson r->push_back((*s)[i]); 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (l) r->push_back(l); 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return FindString(r); 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Tests if the concatenation of s and l is empty 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Empty(const String *s, Label l = 0) const { 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s->empty()) 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return l == 0; 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Finds the string pointed by s in the hash set. Transfers the 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // pointer ownership to the hash set. 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *FindString(const String *s) { 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename StringSet::iterator it = string_set_.find(s); 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it != string_set_.end()) { 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete s; 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return (*it); 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string_set_.insert(s); 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return s; 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Finds state corresponding to an element. Creates new state 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // if element not found. 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId FindState(const Element &e) { 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename ElementMap::iterator eit = element_map_.find(e); 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (eit != element_map_.end()) { 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return (*eit).second; 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = elements_.size(); 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson elements_.push_back(e); 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson element_map_.insert(pair<const Element, StateId>(e, s)); 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return s; 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Computes the outgoing transitions from a state, creating new destination 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // states as needed. 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Expand(StateId s) { 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Element e = elements_[s]; 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (e.state != kNoStateId) 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< Fst<A> > ait(*fst_, e.state); 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !ait.Done(); 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ait.Next()) { 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A &arc = ait.Value(); 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!Empty(e.istring, arc.ilabel) && !Empty(e.ostring, arc.olabel)) { 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *istring = Cdr(e.istring, arc.ilabel); 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *ostring = Cdr(e.ostring, arc.olabel); 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId d = FindState(Element(arc.nextstate, istring, ostring)); 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PushArc(s, Arc(Car(e.istring, arc.ilabel), 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Car(e.ostring, arc.olabel), arc.weight, d)); 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *istring = Concat(e.istring, arc.ilabel); 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *ostring = Concat(e.ostring, arc.olabel); 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId d = FindState(Element(arc.nextstate, istring, ostring)); 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PushArc(s, Arc(0 , 0, arc.weight, d)); 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state); 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((w != Weight::Zero()) && 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ((e.istring)->size() + (e.ostring)->size() > 0)) { 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *istring = Cdr(e.istring); 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const String *ostring = Cdr(e.ostring); 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId d = FindState(Element(kNoStateId, istring, ostring)); 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d)); 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Equality function for Elements, assume strings have been hashed. 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class ElementEqual { 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const Element &x, const Element &y) const { 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return x.state == y.state && 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson x.istring == y.istring && 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson x.ostring == y.ostring; 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Hash function for Elements to Fst states. 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class ElementKey { 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t operator()(const Element &x) const { 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t key = x.state; 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = (key << 1) ^ (x.istring)->size(); 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < (x.istring)->size(); ++i) 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = (key << 1) ^ (*x.istring)[i]; 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = (key << 1) ^ (x.ostring)->size(); 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < (x.ostring)->size(); ++i) 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = (key << 1) ^ (*x.ostring)[i]; 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return key; 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Equality function for strings 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class StringEqual { 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const String * const &x, const String * const &y) const { 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (x->size() != y->size()) return false; 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < x->size(); ++i) 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*x)[i] != (*y)[i]) return false; 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Hash function for set of strings 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class StringKey{ 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t operator()(const String * const & x) const { 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t key = x->size(); 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < x->size(); ++i) 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = (key << 1) ^ (*x)[i]; 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return key; 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap; 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef unordered_set<const String*, StringKey, StringEqual> StringSet; 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A> *fst_; 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Element> elements_; // mapping Fst state to Elements 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ElementMap element_map_; // mapping Elements to Fst state 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StringSet string_set_; 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const SynchronizeFstImpl<A> &); // disallow 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Synchronizes a transducer. This version is a delayed Fst. The 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// result will be an equivalent FST that has the property that during 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the traversal of a path, the delay is either zero or strictly 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// increasing, where the delay is the difference between the number of 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-epsilon output labels and input labels along the path. 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For the algorithm to terminate, the input transducer must have 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bounded delay, i.e., the delay of every cycle must be zero. 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A has bounded delay: exponential 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A does not have bounded delay: does not terminate 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// References: 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Mehryar Mohri. Edit-Distance of Weighted Automata: General 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Definitions and Algorithms, International Journal of Computer 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Science, 14(6): 957-982 (2003). 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst. 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > { 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class ArcIterator< SynchronizeFst<A> >; 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class StateIterator< SynchronizeFst<A> >; 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CacheState<A> State; 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef SynchronizeFstImpl<A> Impl; 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SynchronizeFst(const Fst<A> &fst) 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {} 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts) 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(new Impl(fst, opts)) {} 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // See Fst<>::Copy() for doc. 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false) 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(fst, safe) {} 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc. 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual SynchronizeFst<A> *Copy(bool safe = false) const { 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new SynchronizeFst<A>(*this, safe); 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual inline void InitStateIterator(StateIteratorData<A> *data) const; 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->InitArcIterator(s, data); 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Makes visible to friends. 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const SynchronizeFst<A> &fst); // Disallow 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for SynchronizeFst. 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A> 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< SynchronizeFst<A> > 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheStateIterator< SynchronizeFst<A> > { 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const SynchronizeFst<A> &fst) 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {} 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for SynchronizeFst. 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< SynchronizeFst<A> > 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheArcIterator< SynchronizeFst<A> > { 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const SynchronizeFst<A> &fst, StateId s) 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) { 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst.GetImpl()->HasArcs(s)) 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.GetImpl()->Expand(s); 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(ArcIterator); 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson{ 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data->base = new StateIterator< SynchronizeFst<A> >(*this); 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Synchronizes a transducer. This version writes the synchronized 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// result to a MutableFst. The result will be an equivalent FST that 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// has the property that during the traversal of a path, the delay is 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// either zero or strictly increasing, where the delay is the 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// difference between the number of non-epsilon output labels and 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// input labels along the path. 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For the algorithm to terminate, the input transducer must have 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bounded delay, i.e., the delay of every cycle must be zero. 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity: 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A has bounded delay: exponential 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A does not have bounded delay: does not terminate 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// References: 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Mehryar Mohri. Edit-Distance of Weighted Automata: General 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Definitions and Algorithms, International Journal of Computer 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Science, 14(6): 957-982 (2003). 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) { 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SynchronizeFstOptions opts; 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.gc_limit = 0; // Cache only the last state for fastest copy. 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = SynchronizeFst<Arc>(ifst, opts); 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_SYNCHRONIZE_H__ 458