1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// create-main.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use new dispatch 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Creates a finite-state archive from component FSTs. Includes 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// helper function for farcreate.cc that templates the main on the arc 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// type to support multiple and extensible arc types. 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_FAR_CREATE_H__ 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_FAR_CREATE_H__ 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <libgen.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/far.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid FarCreate(const vector<string> &in_fnames, 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string &out_fname, 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const int32 generate_keys, 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const bool file_list_input, 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FarType &far_type, 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string &key_prefix, 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string &key_suffix) { 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FarWriter<Arc> *far_writer = 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FarWriter<Arc>::Create(out_fname, far_type); 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!far_writer) return; 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<string> inputs; 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (file_list_input) { 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (int i = 1; i < in_fnames.size(); ++i) { 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ifstream istrm(in_fnames[i].c_str()); 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string str; 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (getline(istrm, str)) 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson inputs.push_back(str); 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson inputs = in_fnames; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (int i = 0; i < inputs.size(); ++i) { 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Fst<Arc> *ifst = Fst<Arc>::Read(inputs[i]); 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!ifst) return; 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string key; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (generate_keys > 0) { 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostringstream keybuf; 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keybuf.width(generate_keys); 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keybuf.fill('0'); 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keybuf << i + 1; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = keybuf.str(); 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson char* filename = new char[inputs[i].size() + 1]; 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strcpy(filename, inputs[i].c_str()); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson key = basename(filename); 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete[] filename; 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson far_writer->Add(key_prefix + key + key_suffix, *ifst); 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete ifst; 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete far_writer; 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_FAR_CREATE_H__ 88