1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// create-main.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modified: jpr@google.com (Jake Ratkiewicz) to use new dispatch
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Creates a finite-state archive from component FSTs.  Includes
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// helper function for farcreate.cc that templates the main on the arc
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// type to support multiple and extensible arc types.
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_FAR_CREATE_H__
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_FAR_CREATE_H__
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <libgen.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/far/far.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid FarCreate(const vector<string> &in_fnames,
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const string &out_fname,
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const int32 generate_keys,
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const bool file_list_input,
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const FarType &far_type,
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const string &key_prefix,
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const string &key_suffix) {
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FarWriter<Arc> *far_writer =
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FarWriter<Arc>::Create(out_fname, far_type);
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!far_writer) return;
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<string> inputs;
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (file_list_input) {
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (int i = 1; i < in_fnames.size(); ++i) {
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ifstream istrm(in_fnames[i].c_str());
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string str;
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      while (getline(istrm, str))
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        inputs.push_back(str);
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    inputs = in_fnames;
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (int i = 0; i < inputs.size(); ++i) {
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Fst<Arc> *ifst = Fst<Arc>::Read(inputs[i]);
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!ifst) return;
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    string key;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (generate_keys > 0) {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ostringstream keybuf;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      keybuf.width(generate_keys);
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      keybuf.fill('0');
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      keybuf << i + 1;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      key = keybuf.str();
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      char* filename = new char[inputs[i].size() + 1];
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      strcpy(filename, inputs[i].c_str());
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      key = basename(filename);
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete[] filename;
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    far_writer->Add(key_prefix + key + key_suffix, *ifst);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete ifst;
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  delete far_writer;
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_EXTENSIONS_FAR_CREATE_H__
88