1// printstrings-main.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: allauzen@google.com (Cyril Allauzen)
17// Modified by: jpr@google.com (Jake Ratkiewicz)
18//
19// \file
20// Output as strings the string FSTs in a finite-state archive.
21
22#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
23#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
24
25#include <string>
26#include <vector>
27using std::vector;
28
29#include <fst/extensions/far/far.h>
30#include <fst/string.h>
31
32namespace fst {
33
34template <class Arc>
35void FarPrintStrings(
36    const vector<string> &ifilenames, const FarEntryType entry_type,
37    const FarTokenType far_token_type, const string &begin_key,
38    const string &end_key, const bool print_key, const string &symbols_fname,
39    const int32 generate_filenames, const string &filename_prefix,
40    const string &filename_suffix) {
41
42  typename StringPrinter<Arc>::TokenType token_type;
43  if (far_token_type == FTT_SYMBOL) {
44    token_type = StringPrinter<Arc>::SYMBOL;
45  } else if (far_token_type == FTT_BYTE) {
46    token_type = StringPrinter<Arc>::BYTE;
47  } else if (far_token_type == FTT_UTF8) {
48    token_type = StringPrinter<Arc>::UTF8;
49  } else {
50    FSTERROR() << "FarPrintStrings: unknown token type";
51    return;
52  }
53
54  const SymbolTable *syms = 0;
55  if (!symbols_fname.empty()) {
56    // allow negative flag?
57    syms = SymbolTable::ReadText(symbols_fname, true);
58    if (!syms) {
59      FSTERROR() << "FarPrintStrings: error reading symbol table: "
60                 << symbols_fname;
61      return;
62    }
63  }
64
65  StringPrinter<Arc> string_printer(token_type, syms);
66
67  FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
68  if (!far_reader) return;
69
70  if (!begin_key.empty())
71    far_reader->Find(begin_key);
72
73  string okey;
74  int nrep = 0;
75  for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
76    string key = far_reader->GetKey();
77    if (!end_key.empty() && end_key < key)
78      break;
79    if (okey == key)
80      ++nrep;
81    else
82      nrep = 0;
83    okey = key;
84
85    const Fst<Arc> &fst = far_reader->GetFst();
86    string str;
87    VLOG(2) << "Handling key: " << key;
88    string_printer(fst, &str);
89
90    if (entry_type == FET_LINE) {
91      if (print_key)
92        cout << key << "\t";
93      cout << str << endl;
94    } else if (entry_type == FET_FILE) {
95      stringstream sstrm;
96      if (generate_filenames) {
97        sstrm.fill('0');
98        sstrm << std::right << setw(generate_filenames) << i;
99      } else {
100        sstrm << key;
101        if (nrep > 0)
102          sstrm << "." << nrep;
103      }
104
105      string filename;
106      filename = filename_prefix +  sstrm.str() + filename_suffix;
107
108      ofstream ostrm(filename.c_str());
109      if (!ostrm) {
110        FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
111        delete syms;
112        delete far_reader;
113        return;
114      }
115      ostrm << str;
116      if (token_type == StringPrinter<Arc>::SYMBOL)
117        ostrm << "\n";
118    }
119  }
120}
121
122
123
124}  // namespace fst
125
126#endif  // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
127