1// printstrings-main.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: allauzen@google.com (Cyril Allauzen)
17// Modified by: jpr@google.com (Jake Ratkiewicz)
18//
19// \file
20// Output as strings the string FSTs in a finite-state archive.
21
22#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
23#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
24
25#include <string>
26#include <vector>
27using std::vector;
28
29#include <fst/extensions/far/far.h>
30#include <fst/shortest-distance.h>
31#include <fst/string.h>
32
33DECLARE_string(far_field_separator);
34
35namespace fst {
36
37template <class Arc>
38void FarPrintStrings(
39    const vector<string> &ifilenames, const FarEntryType entry_type,
40    const FarTokenType far_token_type, const string &begin_key,
41    const string &end_key, const bool print_key, const bool print_weight,
42    const string &symbols_fname, const bool initial_symbols,
43    const int32 generate_filenames,
44    const string &filename_prefix, const string &filename_suffix) {
45
46  typename StringPrinter<Arc>::TokenType token_type;
47  if (far_token_type == FTT_SYMBOL) {
48    token_type = StringPrinter<Arc>::SYMBOL;
49  } else if (far_token_type == FTT_BYTE) {
50    token_type = StringPrinter<Arc>::BYTE;
51  } else if (far_token_type == FTT_UTF8) {
52    token_type = StringPrinter<Arc>::UTF8;
53  } else {
54    FSTERROR() << "FarPrintStrings: unknown token type";
55    return;
56  }
57
58  const SymbolTable *syms = 0;
59  if (!symbols_fname.empty()) {
60    // allow negative flag?
61    SymbolTableTextOptions opts;
62    opts.allow_negative = true;
63    syms = SymbolTable::ReadText(symbols_fname, opts);
64    if (!syms) {
65      FSTERROR() << "FarPrintStrings: error reading symbol table: "
66                 << symbols_fname;
67      return;
68    }
69  }
70
71  FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
72  if (!far_reader) return;
73
74  if (!begin_key.empty())
75    far_reader->Find(begin_key);
76
77  string okey;
78  int nrep = 0;
79  for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
80    string key = far_reader->GetKey();
81    if (!end_key.empty() && end_key < key)
82      break;
83    if (okey == key)
84      ++nrep;
85    else
86      nrep = 0;
87    okey = key;
88
89    const Fst<Arc> &fst = far_reader->GetFst();
90    if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0)
91      syms = fst.InputSymbols()->Copy();
92    string str;
93    VLOG(2) << "Handling key: " << key;
94    StringPrinter<Arc> string_printer(
95        token_type, syms ? syms : fst.InputSymbols());
96    string_printer(fst, &str);
97
98    if (entry_type == FET_LINE) {
99      if (print_key)
100        cout << key << FLAGS_far_field_separator[0];
101      cout << str;
102      if (print_weight)
103        cout << FLAGS_far_field_separator[0] << ShortestDistance(fst);
104      cout << endl;
105    } else if (entry_type == FET_FILE) {
106      stringstream sstrm;
107      if (generate_filenames) {
108        sstrm.fill('0');
109        sstrm << std::right << setw(generate_filenames) << i;
110      } else {
111        sstrm << key;
112        if (nrep > 0)
113          sstrm << "." << nrep;
114      }
115
116      string filename;
117      filename = filename_prefix +  sstrm.str() + filename_suffix;
118
119      ofstream ostrm(filename.c_str());
120      if (!ostrm) {
121        FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
122        delete syms;
123        delete far_reader;
124        return;
125      }
126      ostrm << str;
127      if (token_type == StringPrinter<Arc>::SYMBOL)
128        ostrm << "\n";
129    }
130  }
131  delete syms;
132}
133
134
135
136}  // namespace fst
137
138#endif  // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
139