1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic (string,type) list file format.
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is a stripped-down version of STTable that does
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// not support the Find() operation but that does support
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reading/writting from standard in/out.
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_FAR_STLIST_H_
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_FAR_STLIST_H_
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <functional>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <queue>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair;
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const int32 kSTListMagicNumber = 5656924;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const int32 kSTListFileVersion = 1;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// String-type list writing class for object of type 'T' using functor 'W'
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to write an object of type 'T' from a stream. 'W' must conform to the
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// following interface:
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   struct Writer {
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     void operator()(ostream &, const T &) const;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   };
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class T, class W>
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass STListWriter {
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef T EntryType;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef W EntryWriter;
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit STListWriter(const string filename)
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : stream_(
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          filename.empty() ? &std::cout :
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          new ofstream(filename.c_str(), ofstream::out | ofstream::binary)),
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_(false) {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(*stream_, kSTListMagicNumber);
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(*stream_, kSTListFileVersion);
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!stream_) {
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "STListWriter::STListWriter: error writing to file: "
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << filename;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static STListWriter<T, W> *Create(const string &filename) {
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new STListWriter<T, W>(filename);
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Add(const string &key, const T &t) {
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (key == "") {
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "STListWriter::Add: key empty: " << key;
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (key < last_key_) {
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "STListWriter::Add: key disorder: " << key;
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (error_) return;
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    last_key_ = key;
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(*stream_, key);
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    entry_writer_(*stream_, t);
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Error() const { return error_; }
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~STListWriter() {
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    WriteType(*stream_, string());
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (stream_ != &std::cout)
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete stream_;
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  EntryWriter entry_writer_;  // Write functor for 'EntryType'
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ostream *stream_;           // Output stream
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string last_key_;           // Last key
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool error_;
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(STListWriter);
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// String-type list reading class for object of type 'T' using functor 'R'
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to read an object of type 'T' form a stream. 'R' must conform to the
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// following interface:
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   struct Reader {
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     T *operator()(istream &) const;
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   };
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class T, class R>
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass STListReader {
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef T EntryType;
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef R EntryReader;
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  explicit STListReader(const vector<string> &filenames)
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : sources_(filenames), entry_(0), error_(false) {
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    streams_.resize(filenames.size(), 0);
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool has_stdin = false;
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < filenames.size(); ++i) {
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (filenames[i].empty()) {
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!has_stdin) {
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          streams_[i] = &std::cin;
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          sources_[i] = "stdin";
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          has_stdin = true;
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        } else {
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          FSTERROR() << "STListReader::STListReader: stdin should only "
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                     << "appear once in the input file list.";
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          error_ = true;
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return;
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        streams_[i] = new ifstream(
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            filenames[i].c_str(), ifstream::in | ifstream::binary);
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      int32 magic_number = 0, file_version = 0;
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ReadType(*streams_[i], &magic_number);
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ReadType(*streams_[i], &file_version);
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (magic_number != kSTListMagicNumber) {
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "STListReader::STTableReader: wrong file type: "
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << filenames[i];
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_ = true;
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return;
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (file_version != kSTListFileVersion) {
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "STListReader::STTableReader: wrong file version: "
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << filenames[i];
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_ = true;
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return;
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string key;
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ReadType(*streams_[i], &key);
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!key.empty())
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        heap_.push(make_pair(key, i));
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!*streams_[i]) {
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "STTableReader: error reading file: " << sources_[i];
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_ = true;
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return;
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (heap_.empty()) return;
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t current = heap_.top().second;
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    entry_ = entry_reader_(*streams_[current]);
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!entry_ || !*streams_[current]) {
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "STTableReader: error reading entry for key: "
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << heap_.top().first << ", file: " << sources_[current];
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~STListReader() {
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < streams_.size(); ++i) {
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (streams_[i] != &std::cin)
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        delete streams_[i];
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (entry_)
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete entry_;
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static STListReader<T, R> *Open(const string &filename) {
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<string> filenames;
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    filenames.push_back(filename);
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new STListReader<T, R>(filenames);
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static STListReader<T, R> *Open(const vector<string> &filenames) {
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return new STListReader<T, R>(filenames);
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() {
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR()
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        << "STListReader::Reset: stlist does not support reset operation";
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    error_ = true;
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Find(const string &key) {
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR()
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        << "STListReader::Find: stlist does not support find operation";
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    error_ = true;
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const {
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return error_ || heap_.empty();
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() {
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (error_) return;
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t current = heap_.top().second;
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    string key;
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    heap_.pop();
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ReadType(*(streams_[current]), &key);
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!*streams_[current]) {
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "STTableReader: error reading file: "
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << sources_[current];
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      error_ = true;
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!key.empty())
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      heap_.push(make_pair(key, current));
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if(!heap_.empty()) {
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      current = heap_.top().second;
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (entry_)
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        delete entry_;
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      entry_ = entry_reader_(*streams_[current]);
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!entry_ || !*streams_[current]) {
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "STTableReader: error reading entry for key: "
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << heap_.top().first << ", file: " << sources_[current];
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        error_ = true;
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const string &GetKey() const {
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return heap_.top().first;
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const EntryType &GetEntry() const {
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return *entry_;
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Error() const { return error_; }
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  EntryReader entry_reader_;   // Read functor for 'EntryType'
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<istream*> streams_;   // Input streams
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<string> sources_;     // and corresponding file names
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  priority_queue<
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    pair<string, size_t>, vector<pair<string, size_t> >,
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    greater<pair<string, size_t> > > heap_;  // (Key, stream id) heap
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable EntryType *entry_;   // Pointer to the currently read entry
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool error_;
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(STListReader);
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// String-type list header reading function template on the entry header
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// type 'H' having a member function:
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Read(istream &strm, const string &filename);
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Checks that 'filename' is an STTable and call the H::Read() on the last
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// entry in the STTable.
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Does not support reading from stdin.
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class H>
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool ReadSTListHeader(const string &filename, H *header) {
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (filename.empty()) {
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin";
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ifstream strm(filename.c_str(), ifstream::in | ifstream::binary);
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int32 magic_number = 0, file_version = 0;
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReadType(strm, &magic_number);
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReadType(strm, &file_version);
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (magic_number != kSTListMagicNumber) {
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename;
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (file_version != kSTListFileVersion) {
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename;
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string key;
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ReadType(strm, &key);
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  header->Read(strm, filename + ":" + key);
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!strm) {
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename;
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IsSTList(const string &filename);
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_EXTENSIONS_FAR_STLIST_H_
305