1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// compact-fst.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST Class for memory-efficient representation of common types of
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FSTs: linear automata, acceptors, unweighted FSTs, ...
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_COMPACT_FST_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_COMPACT_FST_H__
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iterator>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair;
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/expanded-fst.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst-decl.h>  // For optional argument declarations
355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <fst/mapped-file.h>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/matcher.h>
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct CompactFstOptions : public CacheOptions {
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // CompactFst default caching behaviour is to do no caching. Most
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // compactors are cheap and therefore we save memory by not doing
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // caching.
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstOptions() : CacheOptions(true, 0) {}
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compactor Interface - class determinies how arcs and final weights
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are compacted and expanded.
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Final weights are treated as transitions to the superfinal state,
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// There are two types of compactors:
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// * Fixed out-degree compactors: 'compactor.Size()' returns a
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// positive integer 's'. An FST can be compacted by this compactor
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// only if each state has exactly 's' outgoing transitions (counting a
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// non-Zero() final weight as a transition). A typical example is a
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// compactor for string FSTs, i.e. 's == 1'.
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// * Variable out-degree compactors: 'compactor.Size() == -1'. There
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// are no out-degree restrictions for these compactors.
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// class Compactor {
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  public:
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Element is the type of the compacted transitions.
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   typedef ... Element;
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Return the compacted representation of a transition 'arc'
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // at a state 's'.
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Element Compact(StateId s, const Arc &arc);
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Return the transition at state 's' represented by the compacted
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // transition 'e'.
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Arc Expand(StateId s, const Element &e);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Return -1 for variable out-degree compactors, and the mandatory
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // out-degree otherwise.
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ssize_t Size();
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Test whether 'fst' can be compacted by this compactor.
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   bool Compatible(const Fst<A> &fst);
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Return the properties that are always true for an fst
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // compacted using this compactor
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   uint64 Properties();
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Return a string identifying the type of compactor.
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   static const string &Type();
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Write a compactor to a file.
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   bool Write(ostream &strm);
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Read a compactor from a file.
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   static Compactor *Read(istream &strm);
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Default constructor (optional, see comment below).
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Compactor();
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// };
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The default constructor is only required for FST_REGISTER to work
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (i.e. enabling Convert() and the command-line utilities to work
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// with this new compactor). However, a default constructor always
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// needs to be specify for this code to compile, but one can have it
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// simply raised an error when called:
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compactor::Compactor() {
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   FSTERROR() << "Compactor: no default constructor";
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// }
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation data for Compact Fst, which can shared between otherwise
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// independent copies.
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The implementation contains two arrays: 'states_' and 'compacts_'.
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For fixed out-degree compactors, the 'states_' array is unallocated.
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The 'compacts_' contains the compacted transitions. Its size is
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ncompacts_'. The outgoing transitions at a given state are stored
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// consecutively. For a given state 's', its 'compactor.Size()' outgoing
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions (including superfinal transition when 's' is final), are
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For variable out-degree compactors, the states_ array has size
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For a given state 's', the compacted transitions of 's' are
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// By convention, 'states_[nstates_] == ncompacts_'.
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In both cases, the superfinal transitons (when 's' is final, i.e.
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'Final(s) != Weight::Zero()') is stored first.
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The unsigned type U is used to represent indices into the compacts_
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// array.
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class E, class U>
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CompactFstData {
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  public:
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef E CompactElement;
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef U Unsigned;
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstData()
1385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : states_region_(0),
1395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        compacts_region_(0),
1405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        states_(0),
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compacts_(0),
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nstates_(0),
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ncompacts_(0),
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        narcs_(0),
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        start_(kNoStateId),
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_(false) {}
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class A, class Compactor>
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstData(const Fst<A> &fst, const Compactor &compactor);
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator, class Compactor>
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstData(const Iterator &begin, const Iterator &end,
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const Compactor &compactor);
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~CompactFstData() {
1565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (states_region_ == NULL) {
1575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      delete [] states_;
1585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete states_region_;
1605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (compacts_region_ == NULL) {
1615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      delete [] compacts_;
1625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
1635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete compacts_region_;
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Compactor>
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static CompactFstData<E, U> *Read(istream &strm,
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                       const FstReadOptions &opts,
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                       const FstHeader &hdr,
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                       const Compactor &compactor);
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Write(ostream &strm, const FstWriteOptions &opts) const;
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Unsigned States(ssize_t i) const { return states_[i]; }
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumStates() const { return nstates_; }
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumCompacts() const { return ncompacts_; }
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumArcs() const { return narcs_; }
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t Start() const { return start_; }
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int RefCount() const { return ref_count_.count(); }
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int IncrRefCount() { return ref_count_.Incr(); }
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int DecrRefCount() { return ref_count_.Decr(); }
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Error() const { return error_; }
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin private:
1885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  MappedFile *states_region_;
1895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  MappedFile *compacts_region_;
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Unsigned *states_;
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactElement *compacts_;
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t nstates_;
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t ncompacts_;
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t narcs_;
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ssize_t start_;
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  RefCounter ref_count_;
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool error_;
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class E, class U>
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C>
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonCompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
2035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    : states_region_(0),
2045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      compacts_region_(0),
2055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      states_(0),
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      compacts_(0),
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nstates_(0),
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ncompacts_(0),
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      narcs_(0),
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      start_(kNoStateId),
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_(false) {
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  start_ = fst.Start();
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Count # of states and arcs.
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId nfinals = 0;
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateIterator< Fst<A> > siter(fst);
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !siter.Done();
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       siter.Next()) {
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++nstates_;
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s = siter.Value();
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<A> > aiter(fst, s);
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next())
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++narcs_;
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst.Final(s) != Weight::Zero()) ++nfinals;
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (compactor.Size() == -1) {
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    states_ = new Unsigned[nstates_ + 1];
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ncompacts_ = narcs_ + nfinals;
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    compacts_ = new CompactElement[ncompacts_];
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    states_[nstates_] = ncompacts_;
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    states_ = 0;
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ncompacts_ = nstates_ * compactor.Size();
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((narcs_ + nfinals) != ncompacts_) {
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "CompactFstData: compactor incompatible with fst";
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    compacts_ = new CompactElement[ncompacts_];
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t pos = 0, fpos = 0;
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateId s = 0; s < nstates_; ++s) {
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fpos = pos;
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (compactor.Size() == -1)
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      states_[s] = pos;
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fst.Final(s) != Weight::Zero())
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                                fst.Final(s), kNoStateId));
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<A> > aiter(fst, s);
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next()) {
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      compacts_[pos++] = compactor.Compact(s, aiter.Value());
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "CompactFstData: compactor incompatible with fst";
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (pos != ncompacts_) {
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "CompactFstData: compactor incompatible with fst";
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    error_ = true;
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class E, class U>
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Iterator, class C>
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonCompactFstData<E, U>::CompactFstData(const Iterator &begin,
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                     const Iterator &end,
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                     const C &compactor)
2745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    : states_region_(0),
2755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      compacts_region_(0),
2765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      states_(0),
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      compacts_(0),
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      nstates_(0),
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ncompacts_(0),
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      narcs_(0),
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      start_(kNoStateId),
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_(false) {
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename C::Arc Arc;
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (compactor.Size() != -1) {
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ncompacts_ = distance(begin, end);
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (compactor.Size() == 1) {
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // For strings, allow implicit final weight.
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      // Empty input is the empty string.
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (ncompacts_ == 0) {
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++ncompacts_;
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Arc arc = compactor.Expand(ncompacts_ - 1,
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                      *(begin + (ncompacts_ - 1)));
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (arc.ilabel != kNoLabel)
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++ncompacts_;
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ncompacts_ % compactor.Size()) {
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "CompactFstData: size of input container incompatible"
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << " with compactor";
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ncompacts_ == 0)
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    start_ = 0;
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nstates_ = ncompacts_ / compactor.Size();
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    compacts_ = new CompactElement[ncompacts_];
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t i = 0;
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Iterator it = begin;
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for(; it != end; ++it, ++i){
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      compacts_[i] = *it;
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (compactor.Expand(i, *it).ilabel != kNoLabel)
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++narcs_;
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (i < ncompacts_)
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                              Weight::One(), kNoStateId));
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (distance(begin, end) == 0)
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Count # of states, arcs and compacts.
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Iterator it = begin;
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for(size_t i = 0; it != end; ++it, ++i) {
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Arc arc = compactor.Expand(i, *it);
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (arc.ilabel != kNoLabel) {
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++narcs_;
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++ncompacts_;
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ++nstates_;
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (arc.weight != Weight::Zero())
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++ncompacts_;
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    start_ = 0;
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    compacts_ = new CompactElement[ncompacts_];
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    states_ = new Unsigned[nstates_ + 1];
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    states_[nstates_] = ncompacts_;
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t i = 0, s = 0;
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for(it = begin; it != end; ++it) {
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Arc arc = compactor.Expand(i, *it);
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (arc.ilabel != kNoLabel) {
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compacts_[i++] = *it;
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        states_[s++] = i;
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (arc.weight != Weight::Zero())
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          compacts_[i++] = *it;
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((s != nstates_) || (i != ncompacts_)) {
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "CompactFstData: ill-formed input container";
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class E, class U>
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class C>
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonCompactFstData<E, U> *CompactFstData<E, U>::Read(
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    istream &strm,
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const FstReadOptions &opts,
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const FstHeader &hdr,
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const C &compactor) {
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstData<E, U> *data = new CompactFstData<E, U>();
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->start_ = hdr.Start();
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->nstates_ = hdr.NumStates();
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->narcs_ = hdr.NumArcs();
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (compactor.Size() == -1) {
3725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete data;
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
3785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    data->states_region_ = MappedFile::Map(&strm, opts, b);
3795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (!strm || data->states_region_ == NULL) {
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete data;
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
3845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    data->states_ = static_cast<Unsigned *>(
3855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        data->states_region_->mutable_data());
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data->states_ = 0;
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  data->ncompacts_ = compactor.Size() == -1
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ? data->states_[data->nstates_]
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : data->nstates_ * compactor.Size();
3925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete data;
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 0;
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
3975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  size_t b = data->ncompacts_ * sizeof(CompactElement);
3985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  data->compacts_region_ = MappedFile::Map(&strm, opts, b);
3995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (!strm || data->compacts_region_ == NULL) {
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete data;
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 0;
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
4045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  data->compacts_ = static_cast<CompactElement *>(
4055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      data->compacts_region_->mutable_data());
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return data;
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class E, class U>
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool CompactFstData<E, U>::Write(ostream &strm,
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                 const FstWriteOptions &opts) const {
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (states_) {
4135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (opts.align && !AlignOutput(strm)) {
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    strm.write(reinterpret_cast<char *>(states_),
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               (nstates_ + 1) * sizeof(Unsigned));
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
4205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (opts.align && !AlignOutput(strm)) {
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  strm.write(reinterpret_cast<char *>(compacts_),
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             ncompacts_ * sizeof(CompactElement));
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  strm.flush();
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!strm) {
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U> class CompactFst;
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class F, class G> void Cast(const F &, G *);
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation class for CompactFst, which contains CompactFstData
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and Fst cache.
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CompactFstImpl : public CacheImpl<A> {
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetType;
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetProperties;
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::Properties;
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetInputSymbols;
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::SetOutputSymbols;
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using FstImpl<A>::WriteHeader;
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::PushArc;
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::HasArcs;
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::HasFinal;
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::HasStart;
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::SetArcs;
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::SetFinal;
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  using CacheImpl<A>::SetStart;
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef C Compactor;
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename C::Element CompactElement;
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef U Unsigned;
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstImpl()
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      :  CacheImpl<A>(CompactFstOptions()),
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         compactor_(0),
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         own_compactor_(false),
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         data_(0) {
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    string type = "compact";
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (sizeof(U) != sizeof(uint32)) {
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string size;
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Int64ToStr(8 * sizeof(U), &size);
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      type += size;
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type += "_";
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type += C::Type();
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType(type);
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(kNullProperties | kStaticProperties);
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const CompactFstOptions &opts)
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(opts),
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compactor_(new C(compactor)),
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        own_compactor_(true),
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(0) {
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Init(fst);
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstImpl(const Fst<Arc> &fst, C *compactor,
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const CompactFstOptions &opts)
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(opts),
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compactor_(compactor),
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        own_compactor_(false),
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(0) {
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Init(fst);
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const CompactFstOptions &opts)
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(opts),
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compactor_(new C(compactor)),
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        own_compactor_(true),
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(0) {
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Init(b, e);
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const CompactFstOptions &opts)
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(opts),
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compactor_(compactor),
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        own_compactor_(false),
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(0) {
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Init(b, e);
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(impl),
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compactor_(new C(*impl.compactor_)),
523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        own_compactor_(true),
524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(impl.data_) {
525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_)
526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_->IncrRefCount();
527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType(impl.Type());
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(impl.Properties());
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(impl.InputSymbols());
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(impl.OutputSymbols());
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~CompactFstImpl(){
534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (own_compactor_)
535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete compactor_;
536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_ && !data_->DecrRefCount())
537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete data_;
538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId Start() {
541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasStart()) {
542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetStart(data_->Start());
543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CacheImpl<A>::Start();
545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight Final(StateId s) {
548dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (HasFinal(s))
549dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return CacheImpl<A>::Final(s);
550dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
551dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if ((compactor_->Size() != -1) ||
552dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        (data_->States(s) != data_->States(s + 1)))
553dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      arc = ComputeArc(s,
554dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                       compactor_->Size() == -1
555dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                       ? data_->States(s)
556dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                       : s * compactor_->Size());
557dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId NumStates() const {
561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (Properties(kError)) return 0;
562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data_->NumStates();
563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumArcs(StateId s) {
566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (HasArcs(s))
567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumArcs(s);
568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Unsigned i, num_arcs;
569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (compactor_->Size() == -1) {
570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      i = data_->States(s);
571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      num_arcs = data_->States(s + 1) - i;
572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      i =  s * compactor_->Size();
574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      num_arcs = compactor_->Size();
575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (num_arcs > 0) {
577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const A &arc = ComputeArc(s, i, kArcILabelValue);
578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (arc.ilabel == kNoStateId) {
579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        --num_arcs;
580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return num_arcs;
583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumInputEpsilons(StateId s) {
586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s) && !Properties(kILabelSorted))
587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (HasArcs(s))
589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumInputEpsilons(s);
590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CountEpsilons(s, false);
591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t NumOutputEpsilons(StateId s) {
594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s) && !Properties(kOLabelSorted))
595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (HasArcs(s))
597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return CacheImpl<A>::NumOutputEpsilons(s);
598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return CountEpsilons(s, true);
599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t CountEpsilons(StateId s, bool output_epsilons) {
602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t begin =  compactor_->Size() == -1 ?
603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_->States(s) : s * compactor_->Size();
604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t end = compactor_->Size() == -1 ?
605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_->States(s + 1) : (s + 1) * compactor_->Size();
606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t num_eps = 0;
607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = begin; i < end; ++i) {
608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const A &arc = ComputeArc(
609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const typename A::Label &label =
611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          (output_epsilons ? arc.olabel : arc.ilabel);
612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (label == kNoLabel)
613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        continue;
614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      else if (label > 0)
615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        break;
616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++num_eps;
617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return num_eps;
619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static CompactFstImpl<A, C, U> *Read(istream &strm,
622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                       const FstReadOptions &opts) {
623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstHeader hdr;
625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete impl;
627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Ensures compatibility
631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (hdr.Version() == kAlignedFileVersion)
632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl->compactor_ = C::Read(strm);
635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!impl->compactor_) {
636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete impl;
637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl->own_compactor_ = true;
640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                                          *impl->compactor_);
642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!impl->data_) {
643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete impl;
644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return 0;
645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl;
647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Write(ostream &strm, const FstWriteOptions &opts) const {
650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FstHeader hdr;
651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    hdr.SetStart(data_->Start());
652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    hdr.SetNumStates(data_->NumStates());
653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    hdr.SetNumArcs(data_->NumArcs());
654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Ensures compatibility
656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteHeader(strm, opts, file_version, &hdr);
658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    compactor_->Write(strm);
659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return data_->Write(strm, opts);
660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Provide information needed for generic state iterator
663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitStateIterator(StateIteratorData<A> *data) const {
664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data->base = 0;
665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data->nstates = data_->NumStates();
666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!HasArcs(s))
670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Expand(s);
671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    CacheImpl<A>::InitArcIterator(s, data);
672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
674f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
675f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return compactor_->Expand(s, data_->Compacts(i), f);
676f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
677f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
678f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Expand(StateId s) {
679f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t begin =  compactor_->Size() == -1 ?
680f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_->States(s) : s * compactor_->Size();
681f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t end = compactor_->Size() == -1 ?
682f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_->States(s + 1) : (s + 1) * compactor_->Size();
683f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = begin; i < end; ++i) {
684f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Arc &arc = ComputeArc(s, i);
685dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (arc.ilabel == kNoLabel)
686dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        SetFinal(s, arc.weight);
687dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      else
688dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        PushArc(s, arc);
689f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
690dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (!HasFinal(s))
691dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      SetFinal(s, Weight::Zero());
692f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetArcs(s);
693f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
694f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
695f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
696f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetCompactElements(const Iterator &b, const Iterator &e) {
697f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_ && !data_->DecrRefCount())
698f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete data_;
699f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
700f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
701f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
702f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  C *GetCompactor() const { return compactor_; }
703f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstData<CompactElement, U> *Data() const { return data_; }
704f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
705dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Properties always true of this Fst class
706dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static const uint64 kStaticProperties = kExpanded;
707dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
708f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson protected:
709f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class B, class D>
710f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
711f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
712f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        compactor_(new C(*impl.GetCompactor())),
713f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        own_compactor_(true),
714f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        data_(impl.Data()) {
715f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_)
716f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      data_->IncrRefCount();
717f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType(impl.Type());
718f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(impl.Properties());
719f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(impl.InputSymbols());
720f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(impl.OutputSymbols());
721f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
722f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
723f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
724dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  friend class CompactFst<A, C, U>;  // allow access during write.
725dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
726f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Init(const Fst<Arc> &fst) {
727f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    string type = "compact";
728f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (sizeof(U) != sizeof(uint32)) {
729f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string size;
730f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Int64ToStr(8 * sizeof(U), &size);
731f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      type += size;
732f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
733f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type += "_";
734f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type += compactor_->Type();
735f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType(type);
736f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetInputSymbols(fst.InputSymbols());
737f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetOutputSymbols(fst.OutputSymbols());
738f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
739f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_->Error())
740f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
741f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    uint64 copy_properties = fst.Properties(kCopyProperties, true);
742f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
743f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
744f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
745f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
746f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
747f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(copy_properties | kStaticProperties);
748f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
749f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
750f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
751f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Init(const Iterator &b, const Iterator &e) {
752f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    string type = "compact";
753f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (sizeof(U) != sizeof(uint32)) {
754f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string size;
755f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Int64ToStr(8 * sizeof(U), &size);
756f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      type += size;
757f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
758f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type += "_";
759f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    type += compactor_->Type();
760f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetType(type);
761f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SetProperties(kStaticProperties | compactor_->Properties());
762f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
763f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (data_->Error())
764f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SetProperties(kError, kError);
765f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
766f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
767f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Current unaligned file format version
768f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const int kFileVersion = 2;
769f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Current aligned file format version
770f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const int kAlignedFileVersion = 1;
771f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Minimum file format version supported
772f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const int kMinFileVersion = 1;
773f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
774f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  C *compactor_;
775f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool own_compactor_;
776f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFstData<CompactElement, U> *data_;
777f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
778f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
779f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
780f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 CompactFstImpl<A, C, U>::kStaticProperties;
781f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
782f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int CompactFstImpl<A, C, U>::kFileVersion;
783f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
784f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int CompactFstImpl<A, C, U>::kAlignedFileVersion;
785f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
786f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst int CompactFstImpl<A, C, U>::kMinFileVersion;
787f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
788f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
789f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// CompactFst.  This class attaches interface to implementation and
790f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// handles reference counting, delegating most methods to
791f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ImplToExpandedFst. The unsigned type U is used to represent indices
792f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// into the compact arc array (uint32 by default, declared in
793f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// fst-decl.h).
794f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
795f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
796f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
797f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class StateIterator< CompactFst<A, C, U> >;
798f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class ArcIterator< CompactFst<A, C, U> >;
799f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class F, class G> void friend Cast(const F &, G *);
800f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
801f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
802f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
803f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CompactFstImpl<A, C, U> Impl;
804f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef CacheState<A> State;
805f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef U Unsigned;
806f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
807f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
808f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
809f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
810f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      const CompactFstOptions &opts = CompactFstOptions())
811f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
812f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
813f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFst(const Fst<A> &fst, C *compactor,
814f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const CompactFstOptions &opts = CompactFstOptions())
815f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
816f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
817f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // The following 2 constructors take as input two iterators delimiting
818f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // a set of (already) compacted transitions, starting with the
819f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // transitions out of the initial state. The format of the input
820f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // differs for fixed out-degree and variable out-degree compactors.
821f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
822f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // - For fixed out-degree compactors, the final weight (encoded as a
823f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // compacted transition) needs to be given only for final
824f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // states. All strings (compactor of size 1) will be assume to be
825f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // terminated by a final state even when the final state is not
826f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // implicitely given.
827f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
828f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // - For variable out-degree compactors, the final weight (encoded
829f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // as a compacted transition) needs to be given for all states and
830f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // must appeared first in the list (for state s, final weight of s,
831f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // followed by outgoing transitons in s).
832f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  //
833f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // These 2 constructors allows the direct construction of a CompactFst
834f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // without first creating a more memory hungry 'regular' FST. This
835f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // is useful when memory usage is severely constrained.
836f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
837f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit CompactFst(const Iterator &begin, const Iterator &end,
838f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      const C &compactor = C(),
839f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      const CompactFstOptions &opts = CompactFstOptions())
840f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
841f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
842f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
843f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFst(const Iterator &begin, const Iterator &end,
844f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             C *compactor, const CompactFstOptions &opts = CompactFstOptions())
845f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
846f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
847f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // See Fst<>::Copy() for doc.
848f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
849f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ImplToExpandedFst<Impl>(fst, safe) {}
850f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
851f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
852f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
853f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new CompactFst<A, C, U>(*this, safe);
854f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
855f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
856f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read a CompactFst from an input stream; return NULL on error
857f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
858f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Impl* impl = Impl::Read(strm, opts);
859f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl ? new CompactFst<A, C, U>(impl) : 0;
860f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
861f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
862f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Read a CompactFst from a file; return NULL on error
863f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Empty filename reads from standard input
864f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static CompactFst<A, C, U> *Read(const string &filename) {
865f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
866f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return impl ? new CompactFst<A, C, U>(impl) : 0;
867f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
868f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
869f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
870f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return GetImpl()->Write(strm, opts);
871f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
872f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
873f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual bool Write(const string &filename) const {
874f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return Fst<A>::WriteFile(filename);
875f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
876f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
877dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  template <class F>
878dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
879dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                       const FstWriteOptions &opts);
880dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
881f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitStateIterator(StateIteratorData<A> *data) const {
882f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->InitStateIterator(data);
883f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
884f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
885f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
886f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->InitArcIterator(s, data);
887f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
888f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
889f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
890f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
891f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
892f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
893f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Iterator>
894f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetCompactElements(const Iterator &b, const Iterator &e) {
895f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GetImpl()->SetCompactElements(b, e);
896f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
897f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
898f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
899f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
900f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
901f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Makes visible to friends.
902f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
903f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
904f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void SetImpl(Impl *impl, bool own_impl = false) {
905f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
906f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
907f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
9085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Use overloading to extract the type of the argument.
9095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
9105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return compact_fst.GetImpl();
9115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
9125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
9135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // This does not give privileged treatment to subclasses of CompactFst.
9145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  template<typename NonCompactFst>
9155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
9165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return NULL;
9175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
9185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
919f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void operator=(const CompactFst<A, C, U> &fst);  // disallow
920f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
921f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
922dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Writes Fst in Compact format, potentially with a pass over the machine
923dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// before writing to compute the number of states and arcs.
924dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//
925dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A, class C, class U>
926dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class F>
927dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinbool CompactFst<A, C, U>::WriteFst(const F &fst,
928dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                                   const C &compactor,
929dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                                   ostream &strm,
930dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                                   const FstWriteOptions &opts) {
931dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef U Unsigned;
932dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename C::Element CompactElement;
933dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Weight Weight;
934dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int file_version = opts.align ?
935dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      CompactFstImpl<A, C, U>::kAlignedFileVersion :
936dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      CompactFstImpl<A, C, U>::kFileVersion;
937dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t num_arcs = -1, num_states = -1, num_compacts = -1;
938dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  C first_pass_compactor = compactor;
9395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (Impl* impl = GetImplIfCompactFst(fst)) {
9405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    num_arcs = impl->Data()->NumArcs();
9415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    num_states = impl->Data()->NumStates();
9425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    num_compacts = impl->Data()->NumCompacts();
9435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    first_pass_compactor = *impl->GetCompactor();
944dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  } else {
945dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // A first pass is needed to compute the state of the compactor, which
946dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // is saved ahead of the rest of the data structures.  This unfortunately
947dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // means forcing a complete double compaction when writing in this format.
948dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // TODO(allauzen): eliminate mutable state from compactors.
949dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    num_arcs = 0;
950dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    num_states = 0;
951dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
952dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const StateId s = siter.Value();
953dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      ++num_states;
954dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (fst.Final(s) != Weight::Zero()) {
955dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        first_pass_compactor.Compact(
956dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
957dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
958dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
959dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        ++num_arcs;
960dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        first_pass_compactor.Compact(s, aiter.Value());
961dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
962dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
963dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
964dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  FstHeader hdr;
965dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  hdr.SetStart(fst.Start());
966dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  hdr.SetNumStates(num_states);
967dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  hdr.SetNumArcs(num_arcs);
968dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  string type = "compact";
969dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (sizeof(U) != sizeof(uint32)) {
970dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    string size;
971dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    Int64ToStr(8 * sizeof(U), &size);
972dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    type += size;
973dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
974dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  type += "_";
975dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  type += C::Type();
976dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64 copy_properties = fst.Properties(kCopyProperties, true);
977dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if ((copy_properties & kError) || !compactor.Compatible(fst)) {
978dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    LOG(ERROR) << "fst incompatible with compactor";
979dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return false;
980dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
981dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64 properties = copy_properties |
982dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      CompactFstImpl<A, C, U>::kStaticProperties;
983dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
984dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                             &hdr);
985dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  first_pass_compactor.Write(strm);
986dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (first_pass_compactor.Size() == -1)  {
9875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (opts.align && !AlignOutput(strm)) {
988dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
989dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return false;
990dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
991dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    Unsigned compacts = 0;
992dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
993dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const StateId s = siter.Value();
994dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
995dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (fst.Final(s) != Weight::Zero()) {
996dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        ++compacts;
997dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
998dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      compacts += fst.NumArcs(s);
999dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
1000dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
1001dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
10025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (opts.align && !AlignOutput(strm)) {
1003dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    LOG(ERROR) << "Could not align file during write after writing states";
1004dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
1005dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  C second_pass_compactor = compactor;
1006dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  CompactElement element;
1007dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
1008dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    const StateId s = siter.Value();
1009dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (fst.Final(s) != Weight::Zero()) {
1010dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      element = second_pass_compactor.Compact(
1011dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
1012dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1013dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
1014dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
1015dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      element = second_pass_compactor.Compact(s, aiter.Value());
1016dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1017dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
1018dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
1019dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  strm.flush();
1020dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (!strm) {
1021dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    LOG(ERROR) << "CompactFst write failed: " << opts.source;
1022dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return false;
1023dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
1024dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  return true;
1025dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin}
1026dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
1027f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1028f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for CompactFst; see generic version in fst.h
1029f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for sample usage (but use the CompactFst type!). This version
1030f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// should inline.
1031f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A, class C, class U>
1032f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< CompactFst<A, C, U> > {
1033f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
1034f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
1035f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1036f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit StateIterator(const CompactFst<A, C, U> &fst)
1037f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
1038f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1039f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const { return s_ >= nstates_; }
1040