1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A generic (string,type) list file format. 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This is a stripped-down version of STTable that does 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// not support the Find() operation but that does support 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reading/writting from standard in/out. 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_FAR_STLIST_H_ 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_FAR_STLIST_H_ 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fstream> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <functional> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <queue> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility> 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair; 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const int32 kSTListMagicNumber = 5656924; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstatic const int32 kSTListFileVersion = 1; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// String-type list writing class for object of type 'T' using functor 'W' 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to write an object of type 'T' from a stream. 'W' must conform to the 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// following interface: 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// struct Writer { 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// void operator()(ostream &, const T &) const; 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// }; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class T, class W> 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass STListWriter { 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef T EntryType; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef W EntryWriter; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit STListWriter(const string filename) 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : stream_( 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filename.empty() ? &std::cout : 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_(false) { 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(*stream_, kSTListMagicNumber); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(*stream_, kSTListFileVersion); 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!stream_) { 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STListWriter::STListWriter: error writing to file: " 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << filename; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static STListWriter<T, W> *Create(const string &filename) { 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new STListWriter<T, W>(filename); 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Add(const string &key, const T &t) { 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (key == "") { 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STListWriter::Add: key empty: " << key; 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (key < last_key_) { 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STListWriter::Add: key disorder: " << key; 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (error_) return; 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson last_key_ = key; 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(*stream_, key); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson entry_writer_(*stream_, t); 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Error() const { return error_; } 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~STListWriter() { 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson WriteType(*stream_, string()); 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stream_ != &std::cout) 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete stream_; 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson EntryWriter entry_writer_; // Write functor for 'EntryType' 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostream *stream_; // Output stream 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string last_key_; // Last key 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool error_; 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(STListWriter); 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// String-type list reading class for object of type 'T' using functor 'R' 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to read an object of type 'T' form a stream. 'R' must conform to the 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// following interface: 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// struct Reader { 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// T *operator()(istream &) const; 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// }; 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class T, class R> 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass STListReader { 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef T EntryType; 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef R EntryReader; 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit STListReader(const vector<string> &filenames) 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : sources_(filenames), entry_(0), error_(false) { 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson streams_.resize(filenames.size(), 0); 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool has_stdin = false; 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < filenames.size(); ++i) { 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (filenames[i].empty()) { 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!has_stdin) { 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson streams_[i] = &std::cin; 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sources_[i] = "stdin"; 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson has_stdin = true; 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STListReader::STListReader: stdin should only " 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "appear once in the input file list."; 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson streams_[i] = new ifstream( 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filenames[i].c_str(), ifstream::in | ifstream::binary); 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 magic_number = 0, file_version = 0; 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(*streams_[i], &magic_number); 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(*streams_[i], &file_version); 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (magic_number != kSTListMagicNumber) { 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STListReader::STTableReader: wrong file type: " 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << filenames[i]; 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (file_version != kSTListFileVersion) { 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STListReader::STTableReader: wrong file version: " 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << filenames[i]; 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string key; 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(*streams_[i], &key); 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!key.empty()) 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap_.push(make_pair(key, i)); 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!*streams_[i]) { 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STTableReader: error reading file: " << sources_[i]; 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (heap_.empty()) return; 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t current = heap_.top().second; 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson entry_ = entry_reader_(*streams_[current]); 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!entry_ || !*streams_[current]) { 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STTableReader: error reading entry for key: " 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << heap_.top().first << ", file: " << sources_[current]; 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~STListReader() { 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < streams_.size(); ++i) { 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (streams_[i] != &std::cin) 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete streams_[i]; 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (entry_) 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete entry_; 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static STListReader<T, R> *Open(const string &filename) { 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<string> filenames; 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson filenames.push_back(filename); 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new STListReader<T, R>(filenames); 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static STListReader<T, R> *Open(const vector<string> &filenames) { 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new STListReader<T, R>(filenames); 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Reset() { 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "STListReader::Reset: stlist does not support reset operation"; 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Find(const string &key) { 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "STListReader::Find: stlist does not support find operation"; 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Done() const { 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return error_ || heap_.empty(); 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Next() { 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (error_) return; 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t current = heap_.top().second; 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string key; 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap_.pop(); 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(*(streams_[current]), &key); 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!*streams_[current]) { 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STTableReader: error reading file: " 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << sources_[current]; 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!key.empty()) 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap_.push(make_pair(key, current)); 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(!heap_.empty()) { 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current = heap_.top().second; 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (entry_) 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete entry_; 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson entry_ = entry_reader_(*streams_[current]); 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!entry_ || !*streams_[current]) { 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "STTableReader: error reading entry for key: " 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << heap_.top().first << ", file: " << sources_[current]; 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson error_ = true; 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string &GetKey() const { 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return heap_.top().first; 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const EntryType &GetEntry() const { 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *entry_; 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool Error() const { return error_; } 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson EntryReader entry_reader_; // Read functor for 'EntryType' 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<istream*> streams_; // Input streams 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<string> sources_; // and corresponding file names 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson priority_queue< 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pair<string, size_t>, vector<pair<string, size_t> >, 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson greater<pair<string, size_t> > > heap_; // (Key, stream id) heap 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson mutable EntryType *entry_; // Pointer to the currently read entry 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool error_; 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(STListReader); 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// String-type list header reading function template on the entry header 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// type 'H' having a member function: 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Read(istream &strm, const string &filename); 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Checks that 'filename' is an STTable and call the H::Read() on the last 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// entry in the STTable. 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Does not support reading from stdin. 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class H> 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool ReadSTListHeader(const string &filename, H *header) { 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (filename.empty()) { 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin"; 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int32 magic_number = 0, file_version = 0; 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(strm, &magic_number); 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(strm, &file_version); 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (magic_number != kSTListMagicNumber) { 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename; 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (file_version != kSTListFileVersion) { 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename; 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string key; 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ReadType(strm, &key); 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson header->Read(strm, filename + ":" + key); 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!strm) { 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool IsSTList(const string &filename); 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_FAR_STLIST_H_ 305