1dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
2dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Licensed under the Apache License, Version 2.0 (the "License");
3dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// you may not use this file except in compliance with the License.
4dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// You may obtain a copy of the License at
5dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//
6dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//     http://www.apache.org/licenses/LICENSE-2.0
7dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//
8dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Unless required by applicable law or agreed to in writing, software
9dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// distributed under the License is distributed on an "AS IS" BASIS,
10dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// See the License for the specific language governing permissions and
12dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// limitations under the License.
13dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//
14dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Copyright 2005-2010 Google, Inc.
15dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Author: sorenj@google.com (Jeffrey Sorensen)
16dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//
17dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#ifndef FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
18dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#define FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
19dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
20dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <stddef.h>
21dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <string.h>
22dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <algorithm>
23dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <string>
24dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <vector>
25dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinusing std::vector;
26dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
27dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <fst/compat.h>
28dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <fst/fstlib.h>
295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <fst/mapped-file.h>
30dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#include <fst/extensions/ngram/bitmap-index.h>
31dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
32dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// NgramFst implements a n-gram language model based upon the LOUDS data
33dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// structure.  Please refer to "Unary Data Strucutres for Language Models"
34dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// http://research.google.com/pubs/archive/37218.pdf
35dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
36dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinnamespace fst {
37dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A> class NGramFst;
38dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A> class NGramFstMatcher;
39dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
40dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Instance data containing mutable state for bookkeeping repeated access to
41dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// the same state.
42dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A>
43dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinstruct NGramFstInst {
44dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Label Label;
45dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::StateId StateId;
46dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Weight Weight;
47dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId state_;
48dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t num_futures_;
49dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t offset_;
50dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t node_;
51dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId node_state_;
52dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  vector<Label> context_;
53dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId context_state_;
54dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFstInst()
55dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : state_(kNoStateId), node_state_(kNoStateId),
56dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        context_state_(kNoStateId) { }
57dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin};
58dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
59dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Implementation class for LOUDS based NgramFst interface
60dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A>
61dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinclass NGramFstImpl : public FstImpl<A> {
62dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::SetInputSymbols;
63dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::SetOutputSymbols;
64dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::SetType;
65dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::WriteHeader;
66dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
67dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  friend class ArcIterator<NGramFst<A> >;
68dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  friend class NGramFstMatcher<A>;
69dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
70dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin public:
71dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::InputSymbols;
72dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::SetProperties;
73dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  using FstImpl<A>::Properties;
74dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
75dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef A Arc;
76dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Label Label;
77dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::StateId StateId;
78dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Weight Weight;
79dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  NGramFstImpl() : data_region_(0), data_(0), owned_(false) {
81dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetType("ngram");
82dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetInputSymbols(NULL);
83dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetOutputSymbols(NULL);
84dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kStaticProperties);
85dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
86dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
87dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out);
88dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
89dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  ~NGramFstImpl() {
90dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (owned_) {
91dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      delete [] data_;
92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete data_region_;
94dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
95dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
96dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static NGramFstImpl<A>* Read(istream &strm,  // NOLINT
97dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                               const FstReadOptions &opts) {
98dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    NGramFstImpl<A>* impl = new NGramFstImpl();
99dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FstHeader hdr;
100dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return 0;
101dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    uint64 num_states, num_futures, num_final;
102dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    const size_t offset = sizeof(num_states) + sizeof(num_futures) +
103dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        sizeof(num_final);
104dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // Peek at num_states and num_futures to see how much more needs to be read.
105dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    strm.read(reinterpret_cast<char *>(&num_states), sizeof(num_states));
106dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    strm.read(reinterpret_cast<char *>(&num_futures), sizeof(num_futures));
107dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    strm.read(reinterpret_cast<char *>(&num_final), sizeof(num_final));
108dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    size_t size = Storage(num_states, num_futures, num_final);
1095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    MappedFile *data_region = MappedFile::Allocate(size);
1105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    char *data = reinterpret_cast<char *>(data_region->mutable_data());
111dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // Copy num_states, num_futures and num_final back into data.
112dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    memcpy(data, reinterpret_cast<char *>(&num_states), sizeof(num_states));
113dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    memcpy(data + sizeof(num_states), reinterpret_cast<char *>(&num_futures),
114dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin           sizeof(num_futures));
115dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    memcpy(data + sizeof(num_states) + sizeof(num_futures),
116dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin           reinterpret_cast<char *>(&num_final), sizeof(num_final));
117dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    strm.read(data + offset, size - offset);
118dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (!strm) {
119dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      delete impl;
120dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return NULL;
121dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
1225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    impl->Init(data, false, data_region);
123dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return impl;
124dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
125dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
126dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool Write(ostream &strm,   // NOLINT
127dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin             const FstWriteOptions &opts) const {
128dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FstHeader hdr;
129dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    hdr.SetStart(Start());
130dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    hdr.SetNumStates(num_states_);
131dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    WriteHeader(strm, opts, kFileVersion, &hdr);
1325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    strm.write(data_, StorageSize());
133dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return strm;
134dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
135dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
136dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId Start() const {
137dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return 1;
138dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
139dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
140dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Weight Final(StateId state) const {
141dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (final_index_.Get(state)) {
142dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return final_probs_[final_index_.Rank1(state)];
143dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    } else {
144dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return Weight::Zero();
145dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
146dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
147dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
148dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t NumArcs(StateId state, NGramFstInst<A> *inst = NULL) const {
149dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (inst == NULL) {
150dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const size_t next_zero = future_index_.Select0(state + 1);
151dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const size_t this_zero = future_index_.Select0(state);
152dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return next_zero - this_zero - 1;
153dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
154dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetInstFuture(state, inst);
155dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return inst->num_futures_ + ((state == 0) ? 0 : 1);
156dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
157dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
158dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t NumInputEpsilons(StateId state) const {
159dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // State 0 has no parent, thus no backoff.
160dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (state == 0) return 0;
161dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return 1;
162dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
163dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
164dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t NumOutputEpsilons(StateId state) const {
165dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return NumInputEpsilons(state);
166dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
167dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
168dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId NumStates() const {
169dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return num_states_;
170dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
171dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
172dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void InitStateIterator(StateIteratorData<A>* data) const {
173dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    data->base = 0;
174dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    data->nstates = num_states_;
175dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
176dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
177dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static size_t Storage(uint64 num_states, uint64 num_futures,
178dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                        uint64 num_final) {
179dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    uint64 b64;
180dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    Weight weight;
181dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    Label label;
182dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    size_t offset = sizeof(num_states) + sizeof(num_futures) +
183dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        sizeof(num_final);
184dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    offset += sizeof(b64) * (
185dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        BitmapIndex::StorageSize(num_states * 2 + 1) +
186dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        BitmapIndex::StorageSize(num_futures + num_states + 1) +
187dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        BitmapIndex::StorageSize(num_states));
188dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    offset += (num_states + 1) * sizeof(label) + num_futures * sizeof(label);
189dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // Pad for alignemnt, see
190dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    // http://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
191dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1);
192dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    offset += (num_states + 1) * sizeof(weight) + num_final * sizeof(weight) +
193dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        (num_futures + 1) * sizeof(weight);
194dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return offset;
195dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
196dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
197dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void SetInstFuture(StateId state, NGramFstInst<A> *inst) const {
198dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (inst->state_ != state) {
199dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->state_ = state;
200dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const size_t next_zero = future_index_.Select0(state + 1);
201dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const size_t this_zero = future_index_.Select0(state);
202dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->num_futures_ = next_zero - this_zero - 1;
203dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->offset_ = future_index_.Rank1(future_index_.Select0(state) + 1);
204dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
205dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
206dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
207dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void SetInstNode(NGramFstInst<A> *inst) const {
208dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (inst->node_state_ != inst->state_) {
209dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->node_state_ = inst->state_;
210dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->node_ = context_index_.Select1(inst->state_);
211dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
212dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
213dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
214dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void SetInstContext(NGramFstInst<A> *inst) const {
215dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetInstNode(inst);
216dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (inst->context_state_ != inst->state_) {
217dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->context_state_ = inst->state_;
218dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      inst->context_.clear();
219dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      size_t node = inst->node_;
220dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      while (node != 0) {
221dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        inst->context_.push_back(context_words_[context_index_.Rank1(node)]);
222dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        node = context_index_.Select1(context_index_.Rank0(node) - 1);
223dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
224dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
225dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
226dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
227dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Access to the underlying representation
228dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const char* GetData(size_t* data_size) const {
2295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    *data_size = StorageSize();
230dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return data_;
231dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
232dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
2335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void Init(const char* data, bool owned, MappedFile *file = 0);
2345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const vector<Label> &GetContext(StateId s, NGramFstInst<A> *inst) const {
2365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    SetInstFuture(s, inst);
2375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    SetInstContext(inst);
2385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return inst->context_;
2395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  size_t StorageSize() const {
2425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return Storage(num_states_, num_futures_, num_final_);
2435b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void GetStates(const vector<Label>& context, vector<StateId> *states) const;
246dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
247dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin private:
248dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId Transition(const vector<Label> &context, Label future) const;
249dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
250dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Properties always true for this Fst class.
251dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static const uint64 kStaticProperties = kAcceptor | kIDeterministic |
252dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      kODeterministic | kEpsilons | kIEpsilons | kOEpsilons | kILabelSorted |
253dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      kOLabelSorted | kWeighted | kCyclic | kInitialAcyclic | kNotTopSorted |
254dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      kAccessible | kCoAccessible | kNotString | kExpanded;
255dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Current file format version.
256dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static const int kFileVersion = 4;
257dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Minimum file format version supported.
258dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static const int kMinFileVersion = 4;
259dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
2605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  MappedFile *data_region_;
261dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const char* data_;
262dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool owned_;  // True if we own data_
263dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64 num_states_, num_futures_, num_final_;
264dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t root_num_children_;
265dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const Label *root_children_;
266dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t root_first_child_;
267dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // borrowed references
268dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const uint64 *context_, *future_, *final_;
269dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const Label *context_words_, *future_words_;
270dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const Weight *backoff_, *final_probs_, *future_probs_;
271dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  BitmapIndex context_index_;
272dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  BitmapIndex future_index_;
273dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  BitmapIndex final_index_;
274dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
275dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void operator=(const NGramFstImpl<A> &);  // Disallow
276dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin};
277dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
278dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate<typename A>
279dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinNGramFstImpl<A>::NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out)
2805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    : data_region_(0), data_(0), owned_(false) {
281dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef A Arc;
282dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename Arc::Label Label;
283dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename Arc::Weight Weight;
284dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename Arc::StateId StateId;
285dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SetType("ngram");
286dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SetInputSymbols(fst.InputSymbols());
287dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SetOutputSymbols(fst.OutputSymbols());
288dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  SetProperties(kStaticProperties);
289dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
290dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Check basic requirements for an OpenGRM language model Fst.
291dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int64 props = kAcceptor | kIDeterministic | kIEpsilons | kILabelSorted;
292dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (fst.Properties(props, true) != props) {
293dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FSTERROR() << "NGramFst only accepts OpenGRM langauge models as input";
294dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kError, kError);
295dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return;
296dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
297dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
298dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int64 num_states = CountStates(fst);
299dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Label* context = new Label[num_states];
300dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
301dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Find the unigram state by starting from the start state, following
302dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // epsilons.
303dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId unigram = fst.Start();
304dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  while (1) {
3055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (unigram == kNoStateId) {
3065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      FSTERROR() << "Could not identify unigram state.";
307dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      SetProperties(kError, kError);
308dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return;
309dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
3105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    ArcIterator<Fst<A> > aiter(fst, unigram);
3115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (aiter.Done()) {
3125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      LOG(WARNING) << "Unigram state " << unigram << " has no arcs.";
3135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      break;
3145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
315dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (aiter.Value().ilabel != 0) break;
316dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    unigram = aiter.Value().nextstate;
317dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
318dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
319dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Each state's context is determined by the subtree it is under from the
320dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // unigram state.
321dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  queue<pair<StateId, Label> > label_queue;
322dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  vector<bool> visited(num_states);
323dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Force an epsilon link to the start state.
324dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  label_queue.push(make_pair(fst.Start(), 0));
325dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  for (ArcIterator<Fst<A> > aiter(fst, unigram);
326dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin       !aiter.Done(); aiter.Next()) {
327dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    label_queue.push(make_pair(aiter.Value().nextstate, aiter.Value().ilabel));
328dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
329dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // investigate states in breadth first fashion to assign context words.
330dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  while (!label_queue.empty()) {
331dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    pair<StateId, Label> &now = label_queue.front();
332dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (!visited[now.first]) {
333dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      context[now.first] = now.second;
334dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      visited[now.first] = true;
335dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      for (ArcIterator<Fst<A> > aiter(fst, now.first);
336dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin           !aiter.Done(); aiter.Next()) {
337dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        const Arc &arc = aiter.Value();
338dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        if (arc.ilabel != 0) {
339dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          label_queue.push(make_pair(arc.nextstate, now.second));
340dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        }
341dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
342dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
343dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    label_queue.pop();
344dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
345dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  visited.clear();
346dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
347dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // The arc from the start state should be assigned an epsilon to put it
348dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // in front of the all other labels (which makes Start state 1 after
349dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // unigram which is state 0).
350dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context[fst.Start()] = 0;
351dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
352dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Build the tree of contexts fst by reversing the epsilon arcs from fst.
353dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  VectorFst<Arc> context_fst;
354dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64 num_final = 0;
355dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  for (int i = 0; i < num_states; ++i) {
356dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (fst.Final(i) != Weight::Zero()) {
357dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      ++num_final;
358dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
359dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    context_fst.SetFinal(context_fst.AddState(), fst.Final(i));
360dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
361dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_fst.SetStart(unigram);
362dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_fst.SetInputSymbols(fst.InputSymbols());
363dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_fst.SetOutputSymbols(fst.OutputSymbols());
364dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int64 num_context_arcs = 0;
365dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int64 num_futures = 0;
366dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  for (StateIterator<Fst<A> > siter(fst); !siter.Done(); siter.Next()) {
367dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    const StateId &state = siter.Value();
368dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    num_futures += fst.NumArcs(state) - fst.NumInputEpsilons(state);
369dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    ArcIterator<Fst<A> > aiter(fst, state);
370dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (!aiter.Done()) {
371dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const Arc &arc = aiter.Value();
372dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      // this arc goes from state to arc.nextstate, so create an arc from
373dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      // arc.nextstate to state to reverse it.
374dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (arc.ilabel == 0) {
375dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        context_fst.AddArc(arc.nextstate, Arc(context[state], context[state],
376dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                                              arc.weight, state));
377dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        num_context_arcs++;
378dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
379dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
380dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
381dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (num_context_arcs != context_fst.NumStates() - 1) {
382dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FSTERROR() << "Number of contexts arcs != number of states - 1";
383dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kError, kError);
384dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return;
385dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
386dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (context_fst.NumStates() != num_states) {
387dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FSTERROR() << "Number of contexts != number of states";
388dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kError, kError);
389dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return;
390dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
391dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int64 context_props = context_fst.Properties(kIDeterministic |
392dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                                               kILabelSorted, true);
393dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (!(context_props & kIDeterministic)) {
394dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FSTERROR() << "Input fst is not structured properly";
395dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kError, kError);
396dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return;
397dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
398dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (!(context_props & kILabelSorted)) {
399dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin     ArcSort(&context_fst, ILabelCompare<Arc>());
400dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
401dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
402dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  delete [] context;
403dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
404dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64 b64;
405dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Weight weight;
406dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Label label = kNoLabel;
407dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const size_t storage = Storage(num_states, num_futures, num_final);
4085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  MappedFile *data_region = MappedFile::Allocate(storage);
4095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  char *data = reinterpret_cast<char *>(data_region->mutable_data());
410dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  memset(data, 0, storage);
411dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t offset = 0;
412dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  memcpy(data + offset, reinterpret_cast<char *>(&num_states),
413dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin         sizeof(num_states));
414dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += sizeof(num_states);
415dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  memcpy(data + offset, reinterpret_cast<char *>(&num_futures),
416dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin         sizeof(num_futures));
417dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += sizeof(num_futures);
418dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  memcpy(data + offset, reinterpret_cast<char *>(&num_final),
419dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin         sizeof(num_final));
420dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += sizeof(num_final);
421dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64* context_bits = reinterpret_cast<uint64*>(data + offset);
422dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += BitmapIndex::StorageSize(num_states * 2 + 1) * sizeof(b64);
423dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64* future_bits = reinterpret_cast<uint64*>(data + offset);
424dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset +=
425dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      BitmapIndex::StorageSize(num_futures + num_states + 1) * sizeof(b64);
426dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64* final_bits = reinterpret_cast<uint64*>(data + offset);
427dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += BitmapIndex::StorageSize(num_states) * sizeof(b64);
428dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Label* context_words = reinterpret_cast<Label*>(data + offset);
429dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += (num_states + 1) * sizeof(label);
430dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Label* future_words = reinterpret_cast<Label*>(data + offset);
431dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += num_futures * sizeof(label);
432dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1);
433dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Weight* backoff = reinterpret_cast<Weight*>(data + offset);
434dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += (num_states + 1) * sizeof(weight);
435dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Weight* final_probs = reinterpret_cast<Weight*>(data + offset);
436dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += num_final * sizeof(weight);
437dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Weight* future_probs = reinterpret_cast<Weight*>(data + offset);
438dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  int64 context_arc = 0, future_arc = 0, context_bit = 0, future_bit = 0,
439dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        final_bit = 0;
440dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
441dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // pseudo-root bits
442dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  BitmapIndex::Set(context_bits, context_bit++);
443dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  ++context_bit;
444dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_words[context_arc] = label;
445dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  backoff[context_arc] = Weight::Zero();
446dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_arc++;
447dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
448dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  ++future_bit;
449dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (order_out) {
450dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    order_out->clear();
451dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    order_out->resize(num_states);
452dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
453dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
454dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  queue<StateId> context_q;
455dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_q.push(context_fst.Start());
456dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId state_number = 0;
457dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  while (!context_q.empty()) {
458dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    const StateId &state = context_q.front();
459dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (order_out) {
460dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      (*order_out)[state] = state_number;
461dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
462dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
463dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    const Weight &final = context_fst.Final(state);
464dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (final != Weight::Zero()) {
465dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      BitmapIndex::Set(final_bits, state_number);
466dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      final_probs[final_bit] = final;
467dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      ++final_bit;
468dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
469dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
470dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    for (ArcIterator<VectorFst<A> > aiter(context_fst, state);
471dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin         !aiter.Done(); aiter.Next()) {
472dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const Arc &arc = aiter.Value();
473dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      context_words[context_arc] = arc.ilabel;
474dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      backoff[context_arc] = arc.weight;
475dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      ++context_arc;
476dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      BitmapIndex::Set(context_bits, context_bit++);
477dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      context_q.push(arc.nextstate);
478dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
479dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    ++context_bit;
480dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
481dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    for (ArcIterator<Fst<A> > aiter(fst, state); !aiter.Done(); aiter.Next()) {
482dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const Arc &arc = aiter.Value();
483dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (arc.ilabel != 0) {
484dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        future_words[future_arc] = arc.ilabel;
485dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        future_probs[future_arc] = arc.weight;
486dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        ++future_arc;
487dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        BitmapIndex::Set(future_bits, future_bit++);
488dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
489dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
490dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    ++future_bit;
491dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    ++state_number;
492dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    context_q.pop();
493dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
494dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
495dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if ((state_number !=  num_states) ||
496dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      (context_bit != num_states * 2 + 1) ||
497dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      (context_arc != num_states) ||
498dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      (future_arc != num_futures) ||
499dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      (future_bit != num_futures + num_states + 1) ||
500dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      (final_bit != num_final)) {
501dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FSTERROR() << "Structure problems detected during construction";
502dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kError, kError);
503dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return;
504dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
505dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
5065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  Init(data, false, data_region);
507dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin}
508dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
509dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate<typename A>
5105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkininline void NGramFstImpl<A>::Init(const char* data, bool owned,
5115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                  MappedFile *data_region) {
512dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (owned_) {
513dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    delete [] data_;
514dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
5155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  delete data_region_;
5165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  data_region_ = data_region;
517dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  owned_ = owned;
518dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  data_ = data;
519dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t offset = 0;
520dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  num_states_ = *(reinterpret_cast<const uint64*>(data_ + offset));
521dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += sizeof(num_states_);
522dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  num_futures_ = *(reinterpret_cast<const uint64*>(data_ + offset));
523dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += sizeof(num_futures_);
524dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  num_final_ = *(reinterpret_cast<const uint64*>(data_ + offset));
525dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += sizeof(num_final_);
526dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint64 bits;
527dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t context_bits = num_states_ * 2 + 1;
528dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t future_bits = num_futures_ + num_states_ + 1;
529dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_ = reinterpret_cast<const uint64*>(data_ + offset);
530dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += BitmapIndex::StorageSize(context_bits) * sizeof(bits);
531dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  future_ = reinterpret_cast<const uint64*>(data_ + offset);
532dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += BitmapIndex::StorageSize(future_bits) * sizeof(bits);
533dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  final_ = reinterpret_cast<const uint64*>(data_ + offset);
5345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  offset += BitmapIndex::StorageSize(num_states_) * sizeof(bits);
535dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_words_ = reinterpret_cast<const Label*>(data_ + offset);
536dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += (num_states_ + 1) * sizeof(*context_words_);
537dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  future_words_ = reinterpret_cast<const Label*>(data_ + offset);
538dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += num_futures_ * sizeof(*future_words_);
539dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset = (offset + sizeof(*backoff_) - 1) & ~(sizeof(*backoff_) - 1);
540dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  backoff_ = reinterpret_cast<const Weight*>(data_ + offset);
541dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += (num_states_ + 1) * sizeof(*backoff_);
542dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  final_probs_ = reinterpret_cast<const Weight*>(data_ + offset);
543dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  offset += num_final_ * sizeof(*final_probs_);
544dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  future_probs_ = reinterpret_cast<const Weight*>(data_ + offset);
545dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
546dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  context_index_.BuildIndex(context_, context_bits);
547dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  future_index_.BuildIndex(future_, future_bits);
548dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  final_index_.BuildIndex(final_, num_states_);
549dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
550dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const size_t node_rank = context_index_.Rank1(0);
551dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  root_first_child_ = context_index_.Select0(node_rank) + 1;
552dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (context_index_.Get(root_first_child_) == false) {
553dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    FSTERROR() << "Missing unigrams";
554dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    SetProperties(kError, kError);
555dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return;
556dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
557dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const size_t last_child = context_index_.Select0(node_rank + 1) - 1;
558dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  root_num_children_ = last_child - root_first_child_ + 1;
559dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  root_children_ = context_words_ + context_index_.Rank1(root_first_child_);
560dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin}
561dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
562dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate<typename A>
563dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkininline typename A::StateId NGramFstImpl<A>::Transition(
564dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        const vector<Label> &context, Label future) const {
565dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const Label *children = root_children_;
5665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const Label *loc = lower_bound(children, children + root_num_children_,
5675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                                 future);
5685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (loc == children + root_num_children_ || *loc != future) {
569dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return context_index_.Rank1(0);
570dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
571dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t node = root_first_child_ + loc - children;
572dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t node_rank = context_index_.Rank1(node);
573dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t first_child = context_index_.Select0(node_rank) + 1;
574dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  if (context_index_.Get(first_child) == false) {
575dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return context_index_.Rank1(node);
576dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
577dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t last_child = context_index_.Select0(node_rank + 1) - 1;
578dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  for (int word = context.size() - 1; word >= 0; --word) {
579dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    children = context_words_ + context_index_.Rank1(first_child);
580dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    loc = lower_bound(children, children + last_child - first_child + 1,
581dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                      context[word]);
582dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (loc == children + last_child - first_child + 1 ||
583dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        *loc != context[word]) {
584dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      break;
585dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
586dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    node = first_child + loc - children;
587dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    node_rank = context_index_.Rank1(node);
588dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    first_child = context_index_.Select0(node_rank) + 1;
589dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (context_index_.Get(first_child) == false) break;
590dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    last_child = context_index_.Select0(node_rank + 1) - 1;
591dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
592dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  return context_index_.Rank1(node);
593dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin}
594dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
5955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate<typename A>
5965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkininline void NGramFstImpl<A>::GetStates(
5975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    const vector<Label> &context,
5985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    vector<typename A::StateId>* states) const {
5995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  states->clear();
6005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  states->push_back(0);
6015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typename vector<Label>::const_reverse_iterator cit = context.rbegin();
6025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const Label *children = root_children_;
6035b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const Label *loc = lower_bound(children, children + root_num_children_, *cit);
6045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (loc == children + root_num_children_ || *loc != *cit) return;
6055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  size_t node = root_first_child_ + loc - children;
6065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  states->push_back(context_index_.Rank1(node));
6075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (context.size() == 1) return;
6085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  size_t node_rank = context_index_.Rank1(node);
6095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  size_t first_child = context_index_.Select0(node_rank) + 1;
6105b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ++cit;
6115b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  if (context_index_.Get(first_child) != false) {
6125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    size_t last_child = context_index_.Select0(node_rank + 1) - 1;
6135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    while (cit != context.rend()) {
6145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      children = context_words_ + context_index_.Rank1(first_child);
6155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      loc = lower_bound(children, children + last_child - first_child + 1,
6165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                        *cit);
6175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (loc == children + last_child - first_child + 1 || *loc != *cit) {
6185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        break;
6195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
6205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      ++cit;
6215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      node = first_child + loc - children;
6225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      states->push_back(context_index_.Rank1(node));
6235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      node_rank = context_index_.Rank1(node);
6245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      first_child = context_index_.Select0(node_rank) + 1;
6255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (context_index_.Get(first_child) == false) break;
6265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      last_child = context_index_.Select0(node_rank + 1) - 1;
6275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
6285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
6295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}
6305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
631dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin/*****************************************************************************/
632dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate<class A>
633dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinclass NGramFst : public ImplToExpandedFst<NGramFstImpl<A> > {
634dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  friend class ArcIterator<NGramFst<A> >;
635dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  friend class NGramFstMatcher<A>;
636dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
637dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin public:
638dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef A Arc;
639dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::StateId StateId;
640dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Label Label;
641dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Weight Weight;
642dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef NGramFstImpl<A> Impl;
643dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
644dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  explicit NGramFst(const Fst<A> &dst)
645dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : ImplToExpandedFst<Impl>(new Impl(dst, NULL)) {}
646dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
647dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFst(const Fst<A> &fst, vector<StateId>* order_out)
648dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : ImplToExpandedFst<Impl>(new Impl(fst, order_out)) {}
649dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
650dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Because the NGramFstImpl is a const stateless data structure, there
651dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // is never a need to do anything beside copy the reference.
652dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFst(const NGramFst<A> &fst, bool safe = false)
653dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : ImplToExpandedFst<Impl>(fst, false) {}
654dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
655dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFst() : ImplToExpandedFst<Impl>(new Impl()) {}
656dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
657dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Non-standard constructor to initialize NGramFst directly from data.
658dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFst(const char* data, bool owned) : ImplToExpandedFst<Impl>(new Impl()) {
6595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    GetImpl()->Init(data, owned, NULL);
660dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
661dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
662dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  // Get method that gets the data associated with Init().
663dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const char* GetData(size_t* data_size) const {
664dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return GetImpl()->GetData(data_size);
665dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
666dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
6675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const vector<Label> GetContext(StateId s) const {
6685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return GetImpl()->GetContext(s, &inst_);
6695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
6705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
6715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Consumes as much as possible of context from right to left, returns the
6725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // the states corresponding to the increasingly conditioned input sequence.
6735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void GetStates(const vector<Label>& context, vector<StateId> *state) const {
6745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return GetImpl()->GetStates(context, state);
6755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
6765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
677dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual size_t NumArcs(StateId s) const {
678dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return GetImpl()->NumArcs(s, &inst_);
679dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
680dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
681dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual NGramFst<A>* Copy(bool safe = false) const {
682dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return new NGramFst(*this, safe);
683dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
684dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
685dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static NGramFst<A>* Read(istream &strm, const FstReadOptions &opts) {
686dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    Impl* impl = Impl::Read(strm, opts);
687dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return impl ? new NGramFst<A>(impl) : 0;
688dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
689dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
690dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  static NGramFst<A>* Read(const string &filename) {
691dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (!filename.empty()) {
692dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
693dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (!strm) {
694dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        LOG(ERROR) << "NGramFst::Read: Can't open file: " << filename;
695dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return 0;
696dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
697dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return Read(strm, FstReadOptions(filename));
698dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    } else {
699dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      return Read(cin, FstReadOptions("standard input"));
700dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
701dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
702dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
703dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
704dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return GetImpl()->Write(strm, opts);
705dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
706dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
707dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual bool Write(const string &filename) const {
708dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return Fst<A>::WriteFile(filename);
709dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
710dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
711dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual inline void InitStateIterator(StateIteratorData<A>* data) const {
712dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    GetImpl()->InitStateIterator(data);
713dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
714dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
715dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual inline void InitArcIterator(
716dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      StateId s, ArcIteratorData<A>* data) const;
717dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
718dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual MatcherBase<A>* InitMatcher(MatchType match_type) const {
719dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return new NGramFstMatcher<A>(*this, match_type);
720dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
721dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
7225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  size_t StorageSize() const {
7235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    return GetImpl()->StorageSize();
7245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
7255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
726dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin private:
727dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  explicit NGramFst(Impl* impl) : ImplToExpandedFst<Impl>(impl) {}
728dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
729dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Impl* GetImpl() const {
730dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return
731dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        ImplToExpandedFst<Impl, ExpandedFst<A> >::GetImpl();
732dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
733dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
734dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void SetImpl(Impl* impl, bool own_impl = true) {
735dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    ImplToExpandedFst<Impl, Fst<A> >::SetImpl(impl, own_impl);
736dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
737dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
738dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  mutable NGramFstInst<A> inst_;
739dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin};
740dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
741dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A> inline void
742dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander GutkinNGramFst<A>::InitArcIterator(StateId s, ArcIteratorData<A>* data) const {
743dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  GetImpl()->SetInstFuture(s, &inst_);
744dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  GetImpl()->SetInstNode(&inst_);
745dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  data->base = new ArcIterator<NGramFst<A> >(*this, s);
746dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin}
747dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
748dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin/*****************************************************************************/
749dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A>
750dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinclass NGramFstMatcher : public MatcherBase<A> {
751dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin public:
752dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef A Arc;
753dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Label Label;
754dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::StateId StateId;
755dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Weight Weight;
756dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
757dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFstMatcher(const NGramFst<A> &fst, MatchType match_type)
758dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : fst_(fst), inst_(fst.inst_), match_type_(match_type),
759dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        current_loop_(false),
760dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
761dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (match_type_ == MATCH_OUTPUT) {
762dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      swap(loop_.ilabel, loop_.olabel);
763dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
764dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
765dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
766dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFstMatcher(const NGramFstMatcher<A> &matcher, bool safe = false)
767dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : fst_(matcher.fst_), inst_(matcher.inst_),
768dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        match_type_(matcher.match_type_), current_loop_(false),
769dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
770dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (match_type_ == MATCH_OUTPUT) {
771dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      swap(loop_.ilabel, loop_.olabel);
772dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
773dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
774dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
775dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual NGramFstMatcher<A>* Copy(bool safe = false) const {
776dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return new NGramFstMatcher<A>(*this, safe);
777dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
778dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
779dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual MatchType Type(bool test) const {
780dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return match_type_;
781dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
782dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
783dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual const Fst<A> &GetFst() const {
784dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return fst_;
785dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
786dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
787dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual uint64 Properties(uint64 props) const {
788dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return props;
789dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
790dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
791dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin private:
792dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void SetState_(StateId s) {
793dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    fst_.GetImpl()->SetInstFuture(s, &inst_);
794dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    current_loop_ = false;
795dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
796dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
797dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual bool Find_(Label label) {
798dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    const Label nolabel = kNoLabel;
799dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    done_ = true;
800dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (label == 0 || label == nolabel) {
801dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (label == 0) {
802dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        current_loop_ = true;
803dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        loop_.nextstate = inst_.state_;
804dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
805dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      // The unigram state has no epsilon arc.
806dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (inst_.state_ != 0) {
807dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.ilabel = arc_.olabel = 0;
808dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        fst_.GetImpl()->SetInstNode(&inst_);
809dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.nextstate = fst_.GetImpl()->context_index_.Rank1(
810dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            fst_.GetImpl()->context_index_.Select1(
811dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                fst_.GetImpl()->context_index_.Rank0(inst_.node_) - 1));
812dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.weight = fst_.GetImpl()->backoff_[inst_.state_];
813dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        done_ = false;
814dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
815dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    } else {
816dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const Label *start = fst_.GetImpl()->future_words_ + inst_.offset_;
817dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const Label *end = start + inst_.num_futures_;
818dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      const Label* search = lower_bound(start, end, label);
819dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (search != end && *search == label) {
820dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        size_t state = search - start;
821dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.ilabel = arc_.olabel = label;
822dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.weight = fst_.GetImpl()->future_probs_[inst_.offset_ + state];
823dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        fst_.GetImpl()->SetInstContext(&inst_);
824dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.nextstate = fst_.GetImpl()->Transition(inst_.context_, label);
825dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        done_ = false;
826dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
827dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
828dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return !Done_();
829dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
830dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
831dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual bool Done_() const {
832dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return !current_loop_ && done_;
833dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
834dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
835dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual const Arc& Value_() const {
836dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return (current_loop_) ? loop_ : arc_;
837dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
838dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
839dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void Next_() {
840dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (current_loop_) {
841dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      current_loop_ = false;
842dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    } else {
843dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      done_ = true;
844dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
845dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
846dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
847dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const NGramFst<A>& fst_;
848dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  NGramFstInst<A> inst_;
849dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  MatchType match_type_;             // Supplied by caller
850dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool done_;
851dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Arc arc_;
852dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool current_loop_;                // Current arc is the implicit loop
853dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  Arc loop_;
854dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin};
855dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
856dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin/*****************************************************************************/
857dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate<class A>
858dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinclass ArcIterator<NGramFst<A> > : public ArcIteratorBase<A> {
859dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin public:
860dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef A Arc;
861dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Label Label;
862dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::StateId StateId;
863dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::Weight Weight;
864dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
865dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  ArcIterator(const NGramFst<A> &fst, StateId state)
866dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      : lazy_(~0), impl_(fst.GetImpl()), i_(0), flags_(kArcValueFlags) {
867dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    inst_ = fst.inst_;
868dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    impl_->SetInstFuture(state, &inst_);
869dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    impl_->SetInstNode(&inst_);
870dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
871dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
872dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool Done() const {
873dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return i_ >= ((inst_.node_ == 0) ? inst_.num_futures_ :
874dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                  inst_.num_futures_ + 1);
875dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
876dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
877dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const Arc &Value() const {
878dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    bool eps = (inst_.node_ != 0 && i_ == 0);
879dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    StateId state = (inst_.node_ == 0) ? i_ : i_ - 1;
880dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (flags_ & lazy_ & (kArcILabelValue | kArcOLabelValue)) {
881dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      arc_.ilabel =
882dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          arc_.olabel = eps ? 0 : impl_->future_words_[inst_.offset_ + state];
883dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      lazy_ &= ~(kArcILabelValue | kArcOLabelValue);
884dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
885dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (flags_ & lazy_ & kArcNextStateValue) {
886dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (eps) {
887dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.nextstate = impl_->context_index_.Rank1(
888dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            impl_->context_index_.Select1(
889dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                impl_->context_index_.Rank0(inst_.node_) - 1));
890dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      } else {
891dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        if (lazy_ & kArcNextStateValue) {
892dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          impl_->SetInstContext(&inst_);  // first time only.
893dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        }
894dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        arc_.nextstate =
895dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            impl_->Transition(inst_.context_,
896dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin                              impl_->future_words_[inst_.offset_ + state]);
897dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
898dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      lazy_ &= ~kArcNextStateValue;
899dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
900dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (flags_ & lazy_ & kArcWeightValue) {
901dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      arc_.weight = eps ?  impl_->backoff_[inst_.state_] :
902dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          impl_->future_probs_[inst_.offset_ + state];
903dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      lazy_ &= ~kArcWeightValue;
904dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
905dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return arc_;
906dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
907dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
908dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void Next() {
909dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    ++i_;
910dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    lazy_ = ~0;
911dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
912dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
913dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t Position() const { return i_; }
914dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
915dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void Reset() {
916dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    i_ = 0;
917dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    lazy_ = ~0;
918dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
919dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
920dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void Seek(size_t a) {
921dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (i_ != a) {
922dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      i_ = a;
923dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      lazy_ = ~0;
924dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    }
925dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
926dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
927dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint32 Flags() const {
928dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    return flags_;
929dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
930dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
931dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void SetFlags(uint32 f, uint32 m) {
932dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    flags_ &= ~m;
933dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    flags_ |= (f & kArcValueFlags);
934dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  }
935dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
936dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin private:
937dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual bool Done_() const { return Done(); }
938dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual const Arc& Value_() const { return Value(); }
939dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void Next_() { Next(); }
940dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual size_t Position_() const { return Position(); }
941dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void Reset_() { Reset(); }
942dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void Seek_(size_t a) { Seek(a); }
943dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint32 Flags_() const { return Flags(); }
944dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); }
945dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
946dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  mutable Arc arc_;
947dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  mutable uint32 lazy_;
948dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  const NGramFstImpl<A> *impl_;
949dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  mutable NGramFstInst<A> inst_;
950dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
951dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  size_t i_;
952dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  uint32 flags_;
953dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
954dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
955dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin};
956dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
957dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin/*****************************************************************************/
958dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// Specialization for NGramFst; see generic version in fst.h
959dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// for sample usage (but use the ProdLmFst type!). This version
960dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// should inline.
961dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkintemplate <class A>
962dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinclass StateIterator<NGramFst<A> > : public StateIteratorBase<A> {
963dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  public:
964dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  typedef typename A::StateId StateId;
965dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
966dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  explicit StateIterator(const NGramFst<A> &fst)
967dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    : s_(0), num_states_(fst.NumStates()) { }
968dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
969dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool Done() const { return s_ >= num_states_; }
970dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId Value() const { return s_; }
971dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void Next() { ++s_; }
972dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  void Reset() { s_ = 0; }
973dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
974dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin private:
975dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual bool Done_() const { return Done(); }
976dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual StateId Value_() const { return Value(); }
977dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void Next_() { Next(); }
978dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  virtual void Reset_() { Reset(); }
979dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
980dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  StateId s_, num_states_;
981dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
982dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  DISALLOW_COPY_AND_ASSIGN(StateIterator);
983dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin};
984dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin}  // namespace fst
985dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin#endif  // FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
986