1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// synchronize.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Synchronize an FST with bounded delay.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_SYNCHRONIZE_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_SYNCHRONIZE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap;
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_set>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set;
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset;
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair;
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h>
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypedef CacheOptions SynchronizeFstOptions;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation class for SynchronizeFst
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass SynchronizeFstImpl
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public CacheImpl<A> {
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetType;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetProperties;
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetInputSymbols;
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetOutputSymbols;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::PushArc;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::HasArcs;
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::HasFinal;
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::HasStart;
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::SetArcs;
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::SetFinal;
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheBaseImpl< CacheState<A> >::SetStart;
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef basic_string<Label> String;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct Element {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Element() {}
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Element(StateId s, const String *i, const String *o)
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        : state(s), istring(i), ostring(o) {}
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId state;     // Input state Id
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const String *istring;     // Residual input labels
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const String *ostring;     // Residual output labels
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Residual strings are represented by const pointers to
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // basic_string<Label> and are stored in a hash_set. The pointed
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // memory is owned by the hash_set string_set_.
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts)
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(opts), fst_(fst.Copy()) {
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType("synchronize");
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint64 props = fst.Properties(kFstProperties, false);
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(SynchronizeProperties(props), kCopyProperties);
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(fst.InputSymbols());
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(fst.OutputSymbols());
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SynchronizeFstImpl(const SynchronizeFstImpl &impl)
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(impl),
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fst_(impl.fst_->Copy(true)) {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType("synchronize");
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(impl.Properties(), kCopyProperties);
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(impl.InputSymbols());
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(impl.OutputSymbols());
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~SynchronizeFstImpl() {
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete fst_;
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Extract pointers from the hash set
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<const String*> strings;
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename StringSet::iterator it = string_set_.begin();
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (; it != string_set_.end(); ++it)
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      strings.push_back(*it);
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Free the extracted pointers
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < strings.size(); ++i)
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete strings[i];
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId Start() {
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasStart()) {
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId s = fst_->Start();
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (s == kNoStateId)
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return kNoStateId;
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const String *empty = FindString(new String());
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId start = FindState(Element(fst_->Start(), empty, empty));
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetStart(start);
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::Start();
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight Final(StateId s) {
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasFinal(s)) {
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Element &e = elements_[s];
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty())
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetFinal(s, w);
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      else
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        SetFinal(s, Weight::Zero());
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::Final(s);
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumArcs(StateId s) {
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::NumArcs(s);
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumInputEpsilons(StateId s) {
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::NumInputEpsilons(s);
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumOutputEpsilons(StateId s) {
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::NumOutputEpsilons(s);
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties() const { return Properties(kFstProperties); }
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Set error if found; return FST impl properties.
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 Properties(uint64 mask) const {
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((mask & kError) && fst_->Properties(kError, false))
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FstImpl<Arc>::Properties(mask);
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheImpl<A>::InitArcIterator(s, data);
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Returns the first character of the string obtained by
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // concatenating s and l.
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label Car(const String *s, Label l = 0) const {
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!s->empty())
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return (*s)[0];
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return l;
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Computes the residual string obtained by removing the first
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // character in the concatenation of s and l.
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const String *Cdr(const String *s, Label l = 0) {
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    String *r = new String();
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (int i = 1; i < s->size(); ++i)
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      r->push_back((*s)[i]);
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (l && !(s->empty())) r->push_back(l);
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FindString(r);
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Computes the concatenation of s and l.
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const String *Concat(const String *s, Label l = 0) {
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    String *r = new String();
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (int i = 0; i < s->size(); ++i)
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      r->push_back((*s)[i]);
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (l) r->push_back(l);
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return FindString(r);
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Tests if the concatenation of s and l is empty
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Empty(const String *s, Label l = 0) const {
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s->empty())
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return l == 0;
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Finds the string pointed by s in the hash set. Transfers the
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // pointer ownership to the hash set.
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const String *FindString(const String *s) {
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename StringSet::iterator it = string_set_.find(s);
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (it != string_set_.end()) {
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete s;
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return (*it);
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string_set_.insert(s);
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return s;
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Finds state corresponding to an element. Creates new state
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // if element not found.
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId FindState(const Element &e) {
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename ElementMap::iterator eit = element_map_.find(e);
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (eit != element_map_.end()) {
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return (*eit).second;
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId s = elements_.size();
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      elements_.push_back(e);
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      element_map_.insert(pair<const Element, StateId>(e, s));
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return s;
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Computes the outgoing transitions from a state, creating new destination
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // states as needed.
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Expand(StateId s) {
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Element e = elements_[s];
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (e.state != kNoStateId)
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (ArcIterator< Fst<A> > ait(*fst_, e.state);
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           !ait.Done();
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           ait.Next()) {
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        const A &arc = ait.Value();
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!Empty(e.istring, arc.ilabel)  && !Empty(e.ostring, arc.olabel)) {
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const String *istring = Cdr(e.istring, arc.ilabel);
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const String *ostring = Cdr(e.ostring, arc.olabel);
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          StateId d = FindState(Element(arc.nextstate, istring, ostring));
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          PushArc(s, Arc(Car(e.istring, arc.ilabel),
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                        Car(e.ostring, arc.olabel), arc.weight, d));
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        } else {
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const String *istring = Concat(e.istring, arc.ilabel);
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const String *ostring = Concat(e.ostring, arc.olabel);
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          StateId d = FindState(Element(arc.nextstate, istring, ostring));
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          PushArc(s, Arc(0 , 0, arc.weight, d));
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((w != Weight::Zero()) &&
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ((e.istring)->size() + (e.ostring)->size() > 0)) {
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const String *istring = Cdr(e.istring);
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const String *ostring = Cdr(e.ostring);
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId d = FindState(Element(kNoStateId, istring, ostring));
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d));
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetArcs(s);
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Equality function for Elements, assume strings have been hashed.
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class ElementEqual {
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator()(const Element &x, const Element &y) const {
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return x.state == y.state &&
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              x.istring == y.istring &&
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              x.ostring == y.ostring;
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Hash function for Elements to Fst states.
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class ElementKey {
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t operator()(const Element &x) const {
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t key = x.state;
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      key = (key << 1) ^ (x.istring)->size();
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < (x.istring)->size(); ++i)
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        key = (key << 1) ^ (*x.istring)[i];
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      key = (key << 1) ^ (x.ostring)->size();
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < (x.ostring)->size(); ++i)
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        key = (key << 1) ^ (*x.ostring)[i];
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return key;
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Equality function for strings
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class StringEqual {
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator()(const String * const &x, const String * const &y) const {
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (x->size() != y->size()) return false;
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < x->size(); ++i)
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if ((*x)[i] != (*y)[i]) return false;
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Hash function for set of strings
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class StringKey{
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t operator()(const String * const & x) const {
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      size_t key = x->size();
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < x->size(); ++i)
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        key = (key << 1) ^ (*x)[i];
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return key;
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap;
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_set<const String*, StringKey, StringEqual> StringSet;
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Fst<A> *fst_;
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Element> elements_;  // mapping Fst state to Elements
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ElementMap element_map_;    // mapping Elements to Fst state
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StringSet string_set_;
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const SynchronizeFstImpl<A> &);  // disallow
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Synchronizes a transducer. This version is a delayed Fst.  The
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// result will be an equivalent FST that has the property that during
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the traversal of a path, the delay is either zero or strictly
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// increasing, where the delay is the difference between the number of
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-epsilon output labels and input labels along the path.
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For the algorithm to terminate, the input transducer must have
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bounded delay, i.e., the delay of every cycle must be zero.
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity:
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A has bounded delay: exponential
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A does not have bounded delay: does not terminate
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// References:
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Definitions and Algorithms, International Journal of Computer
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Science, 14(6): 957-982 (2003).
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst.
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > {
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class ArcIterator< SynchronizeFst<A> >;
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class StateIterator< SynchronizeFst<A> >;
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CacheState<A> State;
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef SynchronizeFstImpl<A> Impl;
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SynchronizeFst(const Fst<A> &fst)
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {}
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SynchronizeFst(const Fst<A> &fst,  const SynchronizeFstOptions &opts)
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(new Impl(fst, opts)) {}
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // See Fst<>::Copy() for doc.
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false)
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToFst<Impl>(fst, safe) {}
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual SynchronizeFst<A> *Copy(bool safe = false) const {
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new SynchronizeFst<A>(*this, safe);
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->InitArcIterator(s, data);
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Makes visible to friends.
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const SynchronizeFst<A> &fst);  // Disallow
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for SynchronizeFst.
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A>
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< SynchronizeFst<A> >
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public CacheStateIterator< SynchronizeFst<A> > {
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const SynchronizeFst<A> &fst)
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {}
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for SynchronizeFst.
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< SynchronizeFst<A> >
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public CacheArcIterator< SynchronizeFst<A> > {
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcIterator(const SynchronizeFst<A> &fst, StateId s)
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) {
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!fst.GetImpl()->HasArcs(s))
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst.GetImpl()->Expand(s);
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson{
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->base = new StateIterator< SynchronizeFst<A> >(*this);
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Synchronizes a transducer. This version writes the synchronized
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// result to a MutableFst.  The result will be an equivalent FST that
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// has the property that during the traversal of a path, the delay is
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// either zero or strictly increasing, where the delay is the
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// difference between the number of non-epsilon output labels and
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// input labels along the path.
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For the algorithm to terminate, the input transducer must have
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bounded delay, i.e., the delay of every cycle must be zero.
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Complexity:
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A has bounded delay: exponential
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - A does not have bounded delay: does not terminate
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// References:
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Definitions and Algorithms, International Journal of Computer
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Science, 14(6): 957-982 (2003).
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SynchronizeFstOptions opts;
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  opts.gc_limit = 0;  // Cache only the last state for fastest copy.
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  *ofst = SynchronizeFst<Arc>(ifst, opts);
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_SYNCHRONIZE_H__
458