compact-fst.h revision 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea
1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// compact-fst.h
2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Licensed under the Apache License, Version 2.0 (the "License");
55f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)// you may not use this file except in compliance with the License.
6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// You may obtain a copy of the License at
7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//     http://www.apache.org/licenses/LICENSE-2.0
9a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Unless required by applicable law or agreed to in writing, software
11a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// distributed under the License is distributed on an "AS IS" BASIS,
12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// See the License for the specific language governing permissions and
14a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// limitations under the License.
15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2005-2010 Google, Inc.
17a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Author: allauzen@google.com (Cyril Allauzen)
18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// \file
20a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// FST Class for memory-efficient representation of common types of
21a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// FSTs: linear automata, acceptors, unweighted FSTs, ...
22a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
23a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#ifndef FST_LIB_COMPACT_FST_H__
24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#define FST_LIB_COMPACT_FST_H__
25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <iterator>
27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <utility>
28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)using std::pair; using std::make_pair;
29a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <vector>
30a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)using std::vector;
31a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
32a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <fst/cache.h>
33a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <fst/expanded-fst.h>
34a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <fst/fst-decl.h>  // For optional argument declarations
35c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch#include <fst/mapped-file.h>
36c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch#include <fst/matcher.h>
37c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch#include <fst/test-properties.h>
38c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch#include <fst/util.h>
39c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch
40c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace fst {
42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)struct CompactFstOptions : public CacheOptions {
44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // CompactFst default caching behaviour is to do no caching. Most
45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // compactors are cheap and therefore we save memory by not doing
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // caching.
47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  CompactFstOptions() : CacheOptions(true, 0) {}
48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
49a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)};
50a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
51a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Compactor Interface - class determinies how arcs and final weights
52a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// are compacted and expanded.
53a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
54a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Final weights are treated as transitions to the superfinal state,
55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// There are two types of compactors:
58a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
59a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// * Fixed out-degree compactors: 'compactor.Size()' returns a
60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// positive integer 's'. An FST can be compacted by this compactor
61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// only if each state has exactly 's' outgoing transitions (counting a
62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// non-Zero() final weight as a transition). A typical example is a
63a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// compactor for string FSTs, i.e. 's == 1'.
64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
65a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// * Variable out-degree compactors: 'compactor.Size() == -1'. There
66a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// are no out-degree restrictions for these compactors.
67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
69c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// class Compactor {
70c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//  public:
71c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   // Element is the type of the compacted transitions.
72c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   typedef ... Element;
73c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   // Return the compacted representation of a transition 'arc'
74c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   // at a state 's'.
75a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//   Element Compact(StateId s, const Arc &arc);
76a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//   // Return the transition at state 's' represented by the compacted
77a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//   // transition 'e'.
785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   Arc Expand(StateId s, const Element &e);
795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   // Return -1 for variable out-degree compactors, and the mandatory
805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   // out-degree otherwise.
815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   ssize_t Size();
825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   // Test whether 'fst' can be compacted by this compactor.
835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   bool Compatible(const Fst<A> &fst);
845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   // Return the properties that are always true for an fst
855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//   // compacted using this compactor
86c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   uint64 Properties();
87c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   // Return a string identifying the type of compactor.
88c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   static const string &Type();
89a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//   // Write a compactor to a file.
90a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//   bool Write(ostream &strm);
91a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//   // Read a compactor from a file.
92c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   static Compactor *Read(istream &strm);
93c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   // Default constructor (optional, see comment below).
94c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//   Compactor();
95a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// };
96c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch//
97a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// The default constructor is only required for FST_REGISTER to work
98c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// (i.e. enabling Convert() and the command-line utilities to work
99c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// with this new compactor). However, a default constructor always
100c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch// needs to be specify for this code to compile, but one can have it
101a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// simply raised an error when called:
102a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
103a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Compactor::Compactor() {
104//   FSTERROR() << "Compactor: no default constructor";
105// }
106
107
108// Implementation data for Compact Fst, which can shared between otherwise
109// independent copies.
110//
111// The implementation contains two arrays: 'states_' and 'compacts_'.
112//
113// For fixed out-degree compactors, the 'states_' array is unallocated.
114// The 'compacts_' contains the compacted transitions. Its size is
115// 'ncompacts_'. The outgoing transitions at a given state are stored
116// consecutively. For a given state 's', its 'compactor.Size()' outgoing
117// transitions (including superfinal transition when 's' is final), are
118// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
119//
120// For variable out-degree compactors, the states_ array has size
121// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
122// For a given state 's', the compacted transitions of 's' are
123// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
124// By convention, 'states_[nstates_] == ncompacts_'.
125//
126// In both cases, the superfinal transitons (when 's' is final, i.e.
127// 'Final(s) != Weight::Zero()') is stored first.
128//
129// The unsigned type U is used to represent indices into the compacts_
130// array.
131template <class E, class U>
132class CompactFstData {
133  public:
134  typedef E CompactElement;
135  typedef U Unsigned;
136
137  CompactFstData()
138      : states_region_(0),
139        compacts_region_(0),
140        states_(0),
141        compacts_(0),
142        nstates_(0),
143        ncompacts_(0),
144        narcs_(0),
145        start_(kNoStateId),
146        error_(false) {}
147
148  template <class A, class Compactor>
149  CompactFstData(const Fst<A> &fst, const Compactor &compactor);
150
151  template <class Iterator, class Compactor>
152  CompactFstData(const Iterator &begin, const Iterator &end,
153                 const Compactor &compactor);
154
155  ~CompactFstData() {
156    if (states_region_ == NULL) {
157      delete [] states_;
158    }
159    delete states_region_;
160    if (compacts_region_ == NULL) {
161      delete [] compacts_;
162    }
163    delete compacts_region_;
164  }
165
166  template <class Compactor>
167  static CompactFstData<E, U> *Read(istream &strm,
168                                       const FstReadOptions &opts,
169                                       const FstHeader &hdr,
170                                       const Compactor &compactor);
171
172  bool Write(ostream &strm, const FstWriteOptions &opts) const;
173
174  Unsigned States(ssize_t i) const { return states_[i]; }
175  const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
176  size_t NumStates() const { return nstates_; }
177  size_t NumCompacts() const { return ncompacts_; }
178  size_t NumArcs() const { return narcs_; }
179  ssize_t Start() const { return start_; }
180
181  int RefCount() const { return ref_count_.count(); }
182  int IncrRefCount() { return ref_count_.Incr(); }
183  int DecrRefCount() { return ref_count_.Decr(); }
184
185  bool Error() const { return error_; }
186
187 private:
188  MappedFile *states_region_;
189  MappedFile *compacts_region_;
190  Unsigned *states_;
191  CompactElement *compacts_;
192  size_t nstates_;
193  size_t ncompacts_;
194  size_t narcs_;
195  ssize_t start_;
196  RefCounter ref_count_;
197  bool error_;
198};
199
200template <class E, class U>
201template <class A, class C>
202CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
203    : states_region_(0),
204      compacts_region_(0),
205      states_(0),
206      compacts_(0),
207      nstates_(0),
208      ncompacts_(0),
209      narcs_(0),
210      start_(kNoStateId),
211      error_(false) {
212  typedef typename A::StateId StateId;
213  typedef typename A::Weight Weight;
214  start_ = fst.Start();
215  // Count # of states and arcs.
216  StateId nfinals = 0;
217  for (StateIterator< Fst<A> > siter(fst);
218       !siter.Done();
219       siter.Next()) {
220    ++nstates_;
221    StateId s = siter.Value();
222    for (ArcIterator< Fst<A> > aiter(fst, s);
223         !aiter.Done();
224         aiter.Next())
225      ++narcs_;
226    if (fst.Final(s) != Weight::Zero()) ++nfinals;
227  }
228  if (compactor.Size() == -1) {
229    states_ = new Unsigned[nstates_ + 1];
230    ncompacts_ = narcs_ + nfinals;
231    compacts_ = new CompactElement[ncompacts_];
232    states_[nstates_] = ncompacts_;
233  } else {
234    states_ = 0;
235    ncompacts_ = nstates_ * compactor.Size();
236    if ((narcs_ + nfinals) != ncompacts_) {
237      FSTERROR() << "CompactFstData: compactor incompatible with fst";
238      error_ = true;
239      return;
240    }
241    compacts_ = new CompactElement[ncompacts_];
242  }
243  size_t pos = 0, fpos = 0;
244  for (StateId s = 0; s < nstates_; ++s) {
245    fpos = pos;
246    if (compactor.Size() == -1)
247      states_[s] = pos;
248    if (fst.Final(s) != Weight::Zero())
249      compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
250                                                fst.Final(s), kNoStateId));
251    for (ArcIterator< Fst<A> > aiter(fst, s);
252         !aiter.Done();
253         aiter.Next()) {
254      compacts_[pos++] = compactor.Compact(s, aiter.Value());
255    }
256    if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
257      FSTERROR() << "CompactFstData: compactor incompatible with fst";
258      error_ = true;
259      return;
260    }
261  }
262  if (pos != ncompacts_) {
263    FSTERROR() << "CompactFstData: compactor incompatible with fst";
264    error_ = true;
265    return;
266  }
267}
268
269template <class E, class U>
270template <class Iterator, class C>
271CompactFstData<E, U>::CompactFstData(const Iterator &begin,
272                                     const Iterator &end,
273                                     const C &compactor)
274    : states_region_(0),
275      compacts_region_(0),
276      states_(0),
277      compacts_(0),
278      nstates_(0),
279      ncompacts_(0),
280      narcs_(0),
281      start_(kNoStateId),
282      error_(false) {
283  typedef typename C::Arc Arc;
284  typedef typename Arc::Weight Weight;
285  if (compactor.Size() != -1) {
286    ncompacts_ = distance(begin, end);
287    if (compactor.Size() == 1) {
288      // For strings, allow implicit final weight.
289      // Empty input is the empty string.
290      if (ncompacts_ == 0) {
291        ++ncompacts_;
292      } else {
293        Arc arc = compactor.Expand(ncompacts_ - 1,
294                                      *(begin + (ncompacts_ - 1)));
295        if (arc.ilabel != kNoLabel)
296          ++ncompacts_;
297      }
298    }
299    if (ncompacts_ % compactor.Size()) {
300      FSTERROR() << "CompactFstData: size of input container incompatible"
301                 << " with compactor";
302      error_ = true;
303      return;
304    }
305    if (ncompacts_ == 0)
306      return;
307    start_ = 0;
308    nstates_ = ncompacts_ / compactor.Size();
309    compacts_ = new CompactElement[ncompacts_];
310    size_t i = 0;
311    Iterator it = begin;
312    for(; it != end; ++it, ++i){
313      compacts_[i] = *it;
314      if (compactor.Expand(i, *it).ilabel != kNoLabel)
315        ++narcs_;
316    }
317    if (i < ncompacts_)
318      compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
319                                              Weight::One(), kNoStateId));
320  } else {
321    if (distance(begin, end) == 0)
322      return;
323    // Count # of states, arcs and compacts.
324    Iterator it = begin;
325    for(size_t i = 0; it != end; ++it, ++i) {
326      Arc arc = compactor.Expand(i, *it);
327      if (arc.ilabel != kNoLabel) {
328        ++narcs_;
329        ++ncompacts_;
330      } else {
331        ++nstates_;
332        if (arc.weight != Weight::Zero())
333          ++ncompacts_;
334      }
335    }
336    start_ = 0;
337    compacts_ = new CompactElement[ncompacts_];
338    states_ = new Unsigned[nstates_ + 1];
339    states_[nstates_] = ncompacts_;
340    size_t i = 0, s = 0;
341    for(it = begin; it != end; ++it) {
342      Arc arc = compactor.Expand(i, *it);
343      if (arc.ilabel != kNoLabel) {
344        compacts_[i++] = *it;
345      } else {
346        states_[s++] = i;
347        if (arc.weight != Weight::Zero())
348          compacts_[i++] = *it;
349      }
350    }
351    if ((s != nstates_) || (i != ncompacts_)) {
352      FSTERROR() << "CompactFstData: ill-formed input container";
353      error_ = true;
354      return;
355    }
356  }
357}
358
359template <class E, class U>
360template <class C>
361CompactFstData<E, U> *CompactFstData<E, U>::Read(
362    istream &strm,
363    const FstReadOptions &opts,
364    const FstHeader &hdr,
365    const C &compactor) {
366  CompactFstData<E, U> *data = new CompactFstData<E, U>();
367  data->start_ = hdr.Start();
368  data->nstates_ = hdr.NumStates();
369  data->narcs_ = hdr.NumArcs();
370
371  if (compactor.Size() == -1) {
372    if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
373      LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
374      delete data;
375      return 0;
376    }
377    size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
378    data->states_region_ = MappedFile::Map(&strm, opts, b);
379    if (!strm || data->states_region_ == NULL) {
380      LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
381      delete data;
382      return 0;
383    }
384    data->states_ = static_cast<Unsigned *>(
385        data->states_region_->mutable_data());
386  } else {
387    data->states_ = 0;
388  }
389  data->ncompacts_ = compactor.Size() == -1
390      ? data->states_[data->nstates_]
391      : data->nstates_ * compactor.Size();
392  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
393    LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
394    delete data;
395    return 0;
396  }
397  size_t b = data->ncompacts_ * sizeof(CompactElement);
398  data->compacts_region_ = MappedFile::Map(&strm, opts, b);
399  if (!strm || data->compacts_region_ == NULL) {
400    LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
401    delete data;
402    return 0;
403  }
404  data->compacts_ = static_cast<CompactElement *>(
405      data->compacts_region_->mutable_data());
406  return data;
407}
408
409template<class E, class U>
410bool CompactFstData<E, U>::Write(ostream &strm,
411                                 const FstWriteOptions &opts) const {
412  if (states_) {
413    if (opts.align && !AlignOutput(strm)) {
414      LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
415      return false;
416    }
417    strm.write(reinterpret_cast<char *>(states_),
418               (nstates_ + 1) * sizeof(Unsigned));
419  }
420  if (opts.align && !AlignOutput(strm)) {
421    LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
422    return false;
423  }
424  strm.write(reinterpret_cast<char *>(compacts_),
425             ncompacts_ * sizeof(CompactElement));
426
427  strm.flush();
428  if (!strm) {
429    LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
430    return false;
431  }
432  return true;
433}
434
435template <class A, class C, class U> class CompactFst;
436template <class F, class G> void Cast(const F &, G *);
437
438// Implementation class for CompactFst, which contains CompactFstData
439// and Fst cache.
440template <class A, class C, class U>
441class CompactFstImpl : public CacheImpl<A> {
442 public:
443  using FstImpl<A>::SetType;
444  using FstImpl<A>::SetProperties;
445  using FstImpl<A>::Properties;
446  using FstImpl<A>::SetInputSymbols;
447  using FstImpl<A>::SetOutputSymbols;
448  using FstImpl<A>::WriteHeader;
449
450  using CacheImpl<A>::PushArc;
451  using CacheImpl<A>::HasArcs;
452  using CacheImpl<A>::HasFinal;
453  using CacheImpl<A>::HasStart;
454  using CacheImpl<A>::SetArcs;
455  using CacheImpl<A>::SetFinal;
456  using CacheImpl<A>::SetStart;
457
458  typedef A Arc;
459  typedef typename A::Weight Weight;
460  typedef typename A::StateId StateId;
461  typedef C Compactor;
462  typedef typename C::Element CompactElement;
463  typedef U Unsigned;
464
465  CompactFstImpl()
466      :  CacheImpl<A>(CompactFstOptions()),
467         compactor_(0),
468         own_compactor_(false),
469         data_(0) {
470    string type = "compact";
471    if (sizeof(U) != sizeof(uint32)) {
472      string size;
473      Int64ToStr(8 * sizeof(U), &size);
474      type += size;
475    }
476    type += "_";
477    type += C::Type();
478    SetType(type);
479    SetProperties(kNullProperties | kStaticProperties);
480  }
481
482  CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
483                 const CompactFstOptions &opts)
484      : CacheImpl<A>(opts),
485        compactor_(new C(compactor)),
486        own_compactor_(true),
487        data_(0) {
488    Init(fst);
489  }
490
491  CompactFstImpl(const Fst<Arc> &fst, C *compactor,
492                 const CompactFstOptions &opts)
493      : CacheImpl<A>(opts),
494        compactor_(compactor),
495        own_compactor_(false),
496        data_(0) {
497    Init(fst);
498  }
499
500  template <class Iterator>
501  CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
502                 const CompactFstOptions &opts)
503      : CacheImpl<A>(opts),
504        compactor_(new C(compactor)),
505        own_compactor_(true),
506        data_(0) {
507    Init(b, e);
508  }
509
510  template <class Iterator>
511  CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
512                 const CompactFstOptions &opts)
513      : CacheImpl<A>(opts),
514        compactor_(compactor),
515        own_compactor_(false),
516        data_(0) {
517    Init(b, e);
518  }
519
520  CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
521      : CacheImpl<A>(impl),
522        compactor_(new C(*impl.compactor_)),
523        own_compactor_(true),
524        data_(impl.data_) {
525    if (data_)
526      data_->IncrRefCount();
527    SetType(impl.Type());
528    SetProperties(impl.Properties());
529    SetInputSymbols(impl.InputSymbols());
530    SetOutputSymbols(impl.OutputSymbols());
531  }
532
533  ~CompactFstImpl(){
534    if (own_compactor_)
535      delete compactor_;
536    if (data_ && !data_->DecrRefCount())
537      delete data_;
538  }
539
540  StateId Start() {
541    if (!HasStart()) {
542      SetStart(data_->Start());
543    }
544    return CacheImpl<A>::Start();
545  }
546
547  Weight Final(StateId s) {
548    if (HasFinal(s))
549      return CacheImpl<A>::Final(s);
550    Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
551    if ((compactor_->Size() != -1) ||
552        (data_->States(s) != data_->States(s + 1)))
553      arc = ComputeArc(s,
554                       compactor_->Size() == -1
555                       ? data_->States(s)
556                       : s * compactor_->Size());
557    return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
558  }
559
560  StateId NumStates() const {
561    if (Properties(kError)) return 0;
562    return data_->NumStates();
563  }
564
565  size_t NumArcs(StateId s) {
566    if (HasArcs(s))
567      return CacheImpl<A>::NumArcs(s);
568    Unsigned i, num_arcs;
569    if (compactor_->Size() == -1) {
570      i = data_->States(s);
571      num_arcs = data_->States(s + 1) - i;
572    } else {
573      i =  s * compactor_->Size();
574      num_arcs = compactor_->Size();
575    }
576    if (num_arcs > 0) {
577      const A &arc = ComputeArc(s, i, kArcILabelValue);
578      if (arc.ilabel == kNoStateId) {
579        --num_arcs;
580      }
581    }
582    return num_arcs;
583  }
584
585  size_t NumInputEpsilons(StateId s) {
586    if (!HasArcs(s) && !Properties(kILabelSorted))
587      Expand(s);
588    if (HasArcs(s))
589      return CacheImpl<A>::NumInputEpsilons(s);
590    return CountEpsilons(s, false);
591  }
592
593  size_t NumOutputEpsilons(StateId s) {
594    if (!HasArcs(s) && !Properties(kOLabelSorted))
595      Expand(s);
596    if (HasArcs(s))
597      return CacheImpl<A>::NumOutputEpsilons(s);
598    return CountEpsilons(s, true);
599  }
600
601  size_t CountEpsilons(StateId s, bool output_epsilons) {
602    size_t begin =  compactor_->Size() == -1 ?
603        data_->States(s) : s * compactor_->Size();
604    size_t end = compactor_->Size() == -1 ?
605        data_->States(s + 1) : (s + 1) * compactor_->Size();
606    size_t num_eps = 0;
607    for (size_t i = begin; i < end; ++i) {
608      const A &arc = ComputeArc(
609          s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
610      const typename A::Label &label =
611          (output_epsilons ? arc.olabel : arc.ilabel);
612      if (label == kNoLabel)
613        continue;
614      else if (label > 0)
615        break;
616      ++num_eps;
617    }
618    return num_eps;
619  }
620
621  static CompactFstImpl<A, C, U> *Read(istream &strm,
622                                       const FstReadOptions &opts) {
623    CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
624    FstHeader hdr;
625    if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
626      delete impl;
627      return 0;
628    }
629
630    // Ensures compatibility
631    if (hdr.Version() == kAlignedFileVersion)
632      hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
633
634    impl->compactor_ = C::Read(strm);
635    if (!impl->compactor_) {
636      delete impl;
637      return 0;
638    }
639    impl->own_compactor_ = true;
640    impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
641                                                          *impl->compactor_);
642    if (!impl->data_) {
643      delete impl;
644      return 0;
645    }
646    return impl;
647  }
648
649  bool Write(ostream &strm, const FstWriteOptions &opts) const {
650    FstHeader hdr;
651    hdr.SetStart(data_->Start());
652    hdr.SetNumStates(data_->NumStates());
653    hdr.SetNumArcs(data_->NumArcs());
654
655    // Ensures compatibility
656    int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
657    WriteHeader(strm, opts, file_version, &hdr);
658    compactor_->Write(strm);
659    return data_->Write(strm, opts);
660  }
661
662  // Provide information needed for generic state iterator
663  void InitStateIterator(StateIteratorData<A> *data) const {
664    data->base = 0;
665    data->nstates = data_->NumStates();
666  }
667
668  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
669    if (!HasArcs(s))
670      Expand(s);
671    CacheImpl<A>::InitArcIterator(s, data);
672  }
673
674  Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
675    return compactor_->Expand(s, data_->Compacts(i), f);
676  }
677
678  void Expand(StateId s) {
679    size_t begin =  compactor_->Size() == -1 ?
680        data_->States(s) : s * compactor_->Size();
681    size_t end = compactor_->Size() == -1 ?
682        data_->States(s + 1) : (s + 1) * compactor_->Size();
683    for (size_t i = begin; i < end; ++i) {
684      const Arc &arc = ComputeArc(s, i);
685      if (arc.ilabel == kNoLabel)
686        SetFinal(s, arc.weight);
687      else
688        PushArc(s, arc);
689    }
690    if (!HasFinal(s))
691      SetFinal(s, Weight::Zero());
692    SetArcs(s);
693  }
694
695  template <class Iterator>
696  void SetCompactElements(const Iterator &b, const Iterator &e) {
697    if (data_ && !data_->DecrRefCount())
698      delete data_;
699    data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
700  }
701
702  C *GetCompactor() const { return compactor_; }
703  CompactFstData<CompactElement, U> *Data() const { return data_; }
704
705  // Properties always true of this Fst class
706  static const uint64 kStaticProperties = kExpanded;
707
708 protected:
709  template <class B, class D>
710  explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
711      : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
712        compactor_(new C(*impl.GetCompactor())),
713        own_compactor_(true),
714        data_(impl.Data()) {
715    if (data_)
716      data_->IncrRefCount();
717    SetType(impl.Type());
718    SetProperties(impl.Properties());
719    SetInputSymbols(impl.InputSymbols());
720    SetOutputSymbols(impl.OutputSymbols());
721  }
722
723 private:
724  friend class CompactFst<A, C, U>;  // allow access during write.
725
726  void Init(const Fst<Arc> &fst) {
727    string type = "compact";
728    if (sizeof(U) != sizeof(uint32)) {
729      string size;
730      Int64ToStr(8 * sizeof(U), &size);
731      type += size;
732    }
733    type += "_";
734    type += compactor_->Type();
735    SetType(type);
736    SetInputSymbols(fst.InputSymbols());
737    SetOutputSymbols(fst.OutputSymbols());
738    data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
739    if (data_->Error())
740      SetProperties(kError, kError);
741    uint64 copy_properties = fst.Properties(kCopyProperties, true);
742    if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
743      FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
744      SetProperties(kError, kError);
745      return;
746    }
747    SetProperties(copy_properties | kStaticProperties);
748  }
749
750  template <class Iterator>
751  void Init(const Iterator &b, const Iterator &e) {
752    string type = "compact";
753    if (sizeof(U) != sizeof(uint32)) {
754      string size;
755      Int64ToStr(8 * sizeof(U), &size);
756      type += size;
757    }
758    type += "_";
759    type += compactor_->Type();
760    SetType(type);
761    SetProperties(kStaticProperties | compactor_->Properties());
762    data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
763    if (data_->Error())
764      SetProperties(kError, kError);
765  }
766
767  // Current unaligned file format version
768  static const int kFileVersion = 2;
769  // Current aligned file format version
770  static const int kAlignedFileVersion = 1;
771  // Minimum file format version supported
772  static const int kMinFileVersion = 1;
773
774  C *compactor_;
775  bool own_compactor_;
776  CompactFstData<CompactElement, U> *data_;
777};
778
779template <class A, class C, class U>
780const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
781template <class A, class C, class U>
782const int CompactFstImpl<A, C, U>::kFileVersion;
783template <class A, class C, class U>
784const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
785template <class A, class C, class U>
786const int CompactFstImpl<A, C, U>::kMinFileVersion;
787
788
789// CompactFst.  This class attaches interface to implementation and
790// handles reference counting, delegating most methods to
791// ImplToExpandedFst. The unsigned type U is used to represent indices
792// into the compact arc array (uint32 by default, declared in
793// fst-decl.h).
794template <class A, class C, class U>
795class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
796 public:
797  friend class StateIterator< CompactFst<A, C, U> >;
798  friend class ArcIterator< CompactFst<A, C, U> >;
799  template <class F, class G> void friend Cast(const F &, G *);
800
801  typedef A Arc;
802  typedef typename A::StateId StateId;
803  typedef CompactFstImpl<A, C, U> Impl;
804  typedef CacheState<A> State;
805  typedef U Unsigned;
806
807  CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
808
809  explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
810                      const CompactFstOptions &opts = CompactFstOptions())
811      : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
812
813  CompactFst(const Fst<A> &fst, C *compactor,
814             const CompactFstOptions &opts = CompactFstOptions())
815      : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
816
817  // The following 2 constructors take as input two iterators delimiting
818  // a set of (already) compacted transitions, starting with the
819  // transitions out of the initial state. The format of the input
820  // differs for fixed out-degree and variable out-degree compactors.
821  //
822  // - For fixed out-degree compactors, the final weight (encoded as a
823  // compacted transition) needs to be given only for final
824  // states. All strings (compactor of size 1) will be assume to be
825  // terminated by a final state even when the final state is not
826  // implicitely given.
827  //
828  // - For variable out-degree compactors, the final weight (encoded
829  // as a compacted transition) needs to be given for all states and
830  // must appeared first in the list (for state s, final weight of s,
831  // followed by outgoing transitons in s).
832  //
833  // These 2 constructors allows the direct construction of a CompactFst
834  // without first creating a more memory hungry 'regular' FST. This
835  // is useful when memory usage is severely constrained.
836  template <class Iterator>
837  explicit CompactFst(const Iterator &begin, const Iterator &end,
838                      const C &compactor = C(),
839                      const CompactFstOptions &opts = CompactFstOptions())
840      : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
841
842  template <class Iterator>
843  CompactFst(const Iterator &begin, const Iterator &end,
844             C *compactor, const CompactFstOptions &opts = CompactFstOptions())
845      : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
846
847  // See Fst<>::Copy() for doc.
848  CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
849      : ImplToExpandedFst<Impl>(fst, safe) {}
850
851  // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
852  virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
853    return new CompactFst<A, C, U>(*this, safe);
854  }
855
856  // Read a CompactFst from an input stream; return NULL on error
857  static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
858    Impl* impl = Impl::Read(strm, opts);
859    return impl ? new CompactFst<A, C, U>(impl) : 0;
860  }
861
862  // Read a CompactFst from a file; return NULL on error
863  // Empty filename reads from standard input
864  static CompactFst<A, C, U> *Read(const string &filename) {
865    Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
866    return impl ? new CompactFst<A, C, U>(impl) : 0;
867  }
868
869  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
870    return GetImpl()->Write(strm, opts);
871  }
872
873  virtual bool Write(const string &filename) const {
874    return Fst<A>::WriteFile(filename);
875  }
876
877  template <class F>
878  static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
879                       const FstWriteOptions &opts);
880
881  virtual void InitStateIterator(StateIteratorData<A> *data) const {
882    GetImpl()->InitStateIterator(data);
883  }
884
885  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
886    GetImpl()->InitArcIterator(s, data);
887  }
888
889  virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
890    return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
891  }
892
893  template <class Iterator>
894  void SetCompactElements(const Iterator &b, const Iterator &e) {
895    GetImpl()->SetCompactElements(b, e);
896  }
897
898 private:
899  CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
900
901  // Makes visible to friends.
902  Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
903
904  void SetImpl(Impl *impl, bool own_impl = false) {
905    ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
906  }
907
908  // Use overloading to extract the type of the argument.
909  static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
910    return compact_fst.GetImpl();
911  }
912
913  // This does not give privileged treatment to subclasses of CompactFst.
914  template<typename NonCompactFst>
915  static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
916    return NULL;
917  }
918
919  void operator=(const CompactFst<A, C, U> &fst);  // disallow
920};
921
922// Writes Fst in Compact format, potentially with a pass over the machine
923// before writing to compute the number of states and arcs.
924//
925template <class A, class C, class U>
926template <class F>
927bool CompactFst<A, C, U>::WriteFst(const F &fst,
928                                   const C &compactor,
929                                   ostream &strm,
930                                   const FstWriteOptions &opts) {
931  typedef U Unsigned;
932  typedef typename C::Element CompactElement;
933  typedef typename A::Weight Weight;
934  int file_version = opts.align ?
935      CompactFstImpl<A, C, U>::kAlignedFileVersion :
936      CompactFstImpl<A, C, U>::kFileVersion;
937  size_t num_arcs = -1, num_states = -1, num_compacts = -1;
938  C first_pass_compactor = compactor;
939  if (Impl* impl = GetImplIfCompactFst(fst)) {
940    num_arcs = impl->Data()->NumArcs();
941    num_states = impl->Data()->NumStates();
942    num_compacts = impl->Data()->NumCompacts();
943    first_pass_compactor = *impl->GetCompactor();
944  } else {
945    // A first pass is needed to compute the state of the compactor, which
946    // is saved ahead of the rest of the data structures.  This unfortunately
947    // means forcing a complete double compaction when writing in this format.
948    // TODO(allauzen): eliminate mutable state from compactors.
949    num_arcs = 0;
950    num_states = 0;
951    for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
952      const StateId s = siter.Value();
953      ++num_states;
954      if (fst.Final(s) != Weight::Zero()) {
955        first_pass_compactor.Compact(
956            s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
957      }
958      for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
959        ++num_arcs;
960        first_pass_compactor.Compact(s, aiter.Value());
961      }
962    }
963  }
964  FstHeader hdr;
965  hdr.SetStart(fst.Start());
966  hdr.SetNumStates(num_states);
967  hdr.SetNumArcs(num_arcs);
968  string type = "compact";
969  if (sizeof(U) != sizeof(uint32)) {
970    string size;
971    Int64ToStr(8 * sizeof(U), &size);
972    type += size;
973  }
974  type += "_";
975  type += C::Type();
976  uint64 copy_properties = fst.Properties(kCopyProperties, true);
977  if ((copy_properties & kError) || !compactor.Compatible(fst)) {
978    LOG(ERROR) << "fst incompatible with compactor";
979    return false;
980  }
981  uint64 properties = copy_properties |
982      CompactFstImpl<A, C, U>::kStaticProperties;
983  FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
984                             &hdr);
985  first_pass_compactor.Write(strm);
986  if (first_pass_compactor.Size() == -1)  {
987    if (opts.align && !AlignOutput(strm)) {
988      LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
989      return false;
990    }
991    Unsigned compacts = 0;
992    for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
993      const StateId s = siter.Value();
994      strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
995      if (fst.Final(s) != Weight::Zero()) {
996        ++compacts;
997      }
998      compacts += fst.NumArcs(s);
999    }
1000    strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
1001  }
1002  if (opts.align && !AlignOutput(strm)) {
1003    LOG(ERROR) << "Could not align file during write after writing states";
1004  }
1005  C second_pass_compactor = compactor;
1006  CompactElement element;
1007  for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
1008    const StateId s = siter.Value();
1009    if (fst.Final(s) != Weight::Zero()) {
1010      element = second_pass_compactor.Compact(
1011          s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
1012      strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1013    }
1014    for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
1015      element = second_pass_compactor.Compact(s, aiter.Value());
1016      strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1017    }
1018  }
1019  strm.flush();
1020  if (!strm) {
1021    LOG(ERROR) << "CompactFst write failed: " << opts.source;
1022    return false;
1023  }
1024  return true;
1025}
1026
1027
1028// Specialization for CompactFst; see generic version in fst.h
1029// for sample usage (but use the CompactFst type!). This version
1030// should inline.
1031template <class A, class C, class U>
1032class StateIterator< CompactFst<A, C, U> > {
1033 public:
1034  typedef typename A::StateId StateId;
1035
1036  explicit StateIterator(const CompactFst<A, C, U> &fst)
1037      : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
1038
1039  bool Done() const { return s_ >= nstates_; }
1040
1041  StateId Value() const { return s_; }
1042
1043  void Next() { ++s_; }
1044
1045  void Reset() { s_ = 0; }
1046
1047 private:
1048  StateId nstates_;
1049  StateId s_;
1050
1051  DISALLOW_COPY_AND_ASSIGN(StateIterator);
1052};
1053
1054// Specialization for CompactFst.
1055// Never caches, always iterates over the underlying compact elements.
1056template <class A, class C, class U>
1057class ArcIterator< CompactFst<A, C, U> > {
1058 public:
1059  typedef typename A::StateId StateId;
1060  typedef typename C::Element CompactElement;
1061
1062  ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1063      : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
1064        pos_(0), flags_(kArcValueFlags) {
1065
1066    const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
1067    size_t offset;
1068    if (compactor_->Size() == -1) {  // Variable out-degree compactor
1069      offset = data->States(s);
1070      num_arcs_ = data->States(s + 1) - offset;
1071    } else {  // Fixed out-degree compactor
1072      offset =  s * compactor_->Size();
1073      num_arcs_ = compactor_->Size();
1074    }
1075    if (num_arcs_ > 0) {
1076      compacts_ = &(data->Compacts(offset));
1077      arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
1078      if (arc_.ilabel == kNoStateId) {
1079        ++compacts_;
1080        --num_arcs_;
1081      }
1082    }
1083  }
1084
1085  ~ArcIterator() {}
1086
1087  bool Done() const { return pos_ >= num_arcs_; }
1088
1089  const A& Value() const {
1090    arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
1091    return arc_;
1092  }
1093
1094  void Next() { ++pos_; }
1095
1096  size_t Position() const { return pos_; }
1097
1098  void Reset() { pos_ = 0;  }
1099
1100  void Seek(size_t pos) { pos_ = pos; }
1101
1102  uint32 Flags() const { return flags_; }
1103
1104  void SetFlags(uint32 f, uint32 m) {
1105    flags_ &= ~m;
1106    flags_ |= (f & kArcValueFlags);
1107  }
1108
1109 private:
1110  C *compactor_;
1111  StateId state_;
1112  const CompactElement *compacts_;
1113  size_t pos_;
1114  size_t num_arcs_;
1115  mutable A arc_;
1116  uint32 flags_;
1117
1118  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1119};
1120
1121// // Specialization for CompactFst.
1122// // This is an optionally caching arc iterator.
1123// // TODO(allauzen): implements the kArcValueFlags, the current
1124// // implementation only implements the kArcNoCache flag.
1125// template <class A, class C, class U>
1126// class ArcIterator< CompactFst<A, C, U> > {
1127//  public:
1128//   typedef typename A::StateId StateId;
1129
1130//   ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1131//       : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
1132//         flags_(kArcValueFlags) {
1133//     cache_data_.ref_count = 0;
1134
1135//     if (fst_.GetImpl()->HasArcs(state_)) {
1136//       fst_.GetImpl()->InitArcIterator(s, &cache_data_);
1137//       num_arcs_ = cache_data_.narcs;
1138//       return;
1139//     }
1140
1141//     const C *compactor = fst_.GetImpl()->GetCompactor();
1142//     const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
1143//     if (compactor->Size() == -1) {  // Variable out-degree compactor
1144//       offset_ = data->States(s);
1145//       num_arcs_ = data->States(s + 1) - offset_;
1146//     } else {  // Fixed out-degree compactor
1147//       offset_ =  s * compactor->Size();
1148//       num_arcs_ = compactor->Size();
1149//     }
1150//     if (num_arcs_ > 0) {
1151//       const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
1152//       if (arc.ilabel == kNoStateId) {
1153//         ++offset_;
1154//         --num_arcs_;
1155//       }
1156//     }
1157//   }
1158
1159
1160//   ~ArcIterator() {
1161//     if (cache_data_.ref_count)
1162//       --(*cache_data_.ref_count);
1163//   }
1164
1165//   bool Done() const { return pos_ >= num_arcs_; }
1166
1167//   const A& Value() const {
1168//     if (cache_data_.ref_count == 0) {
1169//       if (flags_ & kArcNoCache) {
1170//         arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
1171//         return arc_;
1172//       } else {
1173//         fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1174//       }
1175//     }
1176//     return cache_data_.arcs[pos_];
1177//   }
1178
1179//   void Next() { ++pos_; }
1180
1181//   size_t Position() const { return pos_; }
1182
1183//   void Reset() { pos_ = 0;  }
1184
1185//   void Seek(size_t pos) { pos_ = pos; }
1186
1187//   uint32 Flags() const { return flags_; }
1188
1189//   void SetFlags(uint32 f, uint32 m) {
1190//     flags_ &= ~m;
1191//     flags_ |= f;
1192
1193//     if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
1194//       fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1195//   }
1196
1197//  private:
1198//   mutable const CompactFst<A, C, U> &fst_;
1199//   StateId state_;
1200//   size_t pos_;
1201//   size_t num_arcs_;
1202//   size_t offset_;
1203//   uint32 flags_;
1204//   mutable A arc_;
1205//   mutable ArcIteratorData<A> cache_data_;
1206
1207//   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1208// };
1209
1210
1211//
1212// Utility Compactors
1213//
1214
1215// Compactor for unweighted string FSTs
1216template <class A>
1217class StringCompactor {
1218 public:
1219  typedef A Arc;
1220  typedef typename A::Label Element;
1221  typedef typename A::Label Label;
1222  typedef typename A::StateId StateId;
1223  typedef typename A::Weight Weight;
1224
1225  Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
1226
1227  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1228    return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
1229  }
1230
1231  ssize_t Size() const { return 1; }
1232
1233  uint64 Properties() const {
1234    return kString | kAcceptor | kUnweighted;
1235  }
1236
1237  bool Compatible(const Fst<A> &fst) const {
1238    uint64 props = Properties();
1239    return fst.Properties(props, true) == props;
1240  }
1241
1242  static const string &Type() {
1243    static const string type = "string";
1244    return type;
1245  }
1246
1247  bool Write(ostream &strm) const { return true; }
1248
1249  static StringCompactor *Read(istream &strm) {
1250    return new StringCompactor;
1251  }
1252};
1253
1254
1255// Compactor for weighted string FSTs
1256template <class A>
1257class WeightedStringCompactor {
1258 public:
1259  typedef A Arc;
1260  typedef typename A::Label Label;
1261  typedef typename A::StateId StateId;
1262  typedef typename A::Weight Weight;
1263  typedef pair<Label, Weight> Element;
1264
1265  Element Compact(StateId s, const A &arc) const {
1266    return make_pair(arc.ilabel, arc.weight);
1267  }
1268
1269  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1270    return Arc(p.first, p.first, p.second,
1271               p.first != kNoLabel ? s + 1 : kNoStateId);
1272  }
1273
1274  ssize_t Size() const { return 1;}
1275
1276  uint64 Properties() const {
1277    return kString | kAcceptor;
1278  }
1279
1280  bool Compatible(const Fst<A> &fst) const {
1281    uint64 props = Properties();
1282    return fst.Properties(props, true) == props;
1283  }
1284
1285  static const string &Type() {
1286    static const string type = "weighted_string";
1287    return type;
1288  }
1289
1290  bool Write(ostream &strm) const { return true; }
1291
1292  static WeightedStringCompactor *Read(istream &strm) {
1293    return new WeightedStringCompactor;
1294  }
1295};
1296
1297
1298// Compactor for unweighted acceptor FSTs
1299template <class A>
1300class UnweightedAcceptorCompactor {
1301 public:
1302  typedef A Arc;
1303  typedef typename A::Label Label;
1304  typedef typename A::StateId StateId;
1305  typedef typename A::Weight Weight;
1306  typedef pair<Label, StateId> Element;
1307
1308  Element Compact(StateId s, const A &arc) const {
1309    return make_pair(arc.ilabel, arc.nextstate);
1310  }
1311
1312  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1313    return Arc(p.first, p.first, Weight::One(), p.second);
1314  }
1315
1316  ssize_t Size() const { return -1;}
1317
1318  uint64 Properties() const {
1319    return kAcceptor | kUnweighted;
1320  }
1321
1322  bool Compatible(const Fst<A> &fst) const {
1323    uint64 props = Properties();
1324    return fst.Properties(props, true) == props;
1325  }
1326
1327  static const string &Type() {
1328    static const string type = "unweighted_acceptor";
1329    return type;
1330  }
1331
1332  bool Write(ostream &strm) const { return true; }
1333
1334  static UnweightedAcceptorCompactor *Read(istream &istrm) {
1335    return new UnweightedAcceptorCompactor;
1336  }
1337};
1338
1339
1340// Compactor for weighted acceptor FSTs
1341template <class A>
1342class AcceptorCompactor {
1343 public:
1344  typedef A Arc;
1345  typedef typename A::Label Label;
1346  typedef typename A::StateId StateId;
1347  typedef typename A::Weight Weight;
1348  typedef pair< pair<Label, Weight>, StateId > Element;
1349
1350  Element Compact(StateId s, const A &arc) const {
1351    return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
1352  }
1353
1354  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1355    return Arc(p.first.first, p.first.first, p.first.second, p.second);
1356  }
1357
1358  ssize_t Size() const { return -1;}
1359
1360  uint64 Properties() const {
1361    return kAcceptor;
1362  }
1363
1364  bool Compatible(const Fst<A> &fst) const {
1365    uint64 props = Properties();
1366    return fst.Properties(props, true) == props;
1367  }
1368
1369  static const string &Type() {
1370    static const string type = "acceptor";
1371    return type;
1372  }
1373
1374  bool Write(ostream &strm) const { return true; }
1375
1376  static AcceptorCompactor *Read(istream &strm) {
1377    return new AcceptorCompactor;
1378  }
1379};
1380
1381
1382// Compactor for unweighted FSTs
1383template <class A>
1384class UnweightedCompactor {
1385 public:
1386  typedef A Arc;
1387  typedef typename A::Label Label;
1388  typedef typename A::StateId StateId;
1389  typedef typename A::Weight Weight;
1390  typedef pair< pair<Label, Label>, StateId > Element;
1391
1392  Element Compact(StateId s, const A &arc) const {
1393    return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
1394  }
1395
1396  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1397    return Arc(p.first.first, p.first.second, Weight::One(), p.second);
1398  }
1399
1400  ssize_t Size() const { return -1; }
1401
1402  uint64 Properties() const {
1403    return kUnweighted;
1404  }
1405
1406  bool Compatible(const Fst<A> &fst) const {
1407    uint64 props = Properties();
1408    return fst.Properties(props, true) == props;
1409  }
1410
1411  static const string &Type() {
1412    static const string type = "unweighted";
1413    return type;
1414  }
1415
1416  bool Write(ostream &strm) const { return true; }
1417
1418  static UnweightedCompactor *Read(istream &strm) {
1419    return new UnweightedCompactor;
1420  }
1421};
1422
1423
1424// Uselful aliases when using StdArc
1425typedef CompactFst< StdArc, StringCompactor<StdArc> >
1426StdCompactStringFst;
1427typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
1428StdCompactWeightedStringFst;
1429typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
1430StdCompactAcceptorFst;
1431typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
1432StdCompactUnweightedFst;
1433typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
1434StdCompactUnweightedAcceptorFst;
1435
1436}  // namespace fst
1437
1438#endif // FST_LIB_COMPACT_FST_H__
1439