1// printstrings-main.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: allauzen@google.com (Cyril Allauzen) 17// Modified by: jpr@google.com (Jake Ratkiewicz) 18// 19// \file 20// Output as strings the string FSTs in a finite-state archive. 21 22#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ 23#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ 24 25#include <string> 26#include <vector> 27using std::vector; 28 29#include <fst/extensions/far/far.h> 30#include <fst/shortest-distance.h> 31#include <fst/string.h> 32 33DECLARE_string(far_field_separator); 34 35namespace fst { 36 37template <class Arc> 38void FarPrintStrings( 39 const vector<string> &ifilenames, const FarEntryType entry_type, 40 const FarTokenType far_token_type, const string &begin_key, 41 const string &end_key, const bool print_key, const bool print_weight, 42 const string &symbols_fname, const bool initial_symbols, 43 const int32 generate_filenames, 44 const string &filename_prefix, const string &filename_suffix) { 45 46 typename StringPrinter<Arc>::TokenType token_type; 47 if (far_token_type == FTT_SYMBOL) { 48 token_type = StringPrinter<Arc>::SYMBOL; 49 } else if (far_token_type == FTT_BYTE) { 50 token_type = StringPrinter<Arc>::BYTE; 51 } else if (far_token_type == FTT_UTF8) { 52 token_type = StringPrinter<Arc>::UTF8; 53 } else { 54 FSTERROR() << "FarPrintStrings: unknown token type"; 55 return; 56 } 57 58 const SymbolTable *syms = 0; 59 if (!symbols_fname.empty()) { 60 // allow negative flag? 61 SymbolTableTextOptions opts; 62 opts.allow_negative = true; 63 syms = SymbolTable::ReadText(symbols_fname, opts); 64 if (!syms) { 65 FSTERROR() << "FarPrintStrings: error reading symbol table: " 66 << symbols_fname; 67 return; 68 } 69 } 70 71 FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); 72 if (!far_reader) return; 73 74 if (!begin_key.empty()) 75 far_reader->Find(begin_key); 76 77 string okey; 78 int nrep = 0; 79 for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) { 80 string key = far_reader->GetKey(); 81 if (!end_key.empty() && end_key < key) 82 break; 83 if (okey == key) 84 ++nrep; 85 else 86 nrep = 0; 87 okey = key; 88 89 const Fst<Arc> &fst = far_reader->GetFst(); 90 if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0) 91 syms = fst.InputSymbols()->Copy(); 92 string str; 93 VLOG(2) << "Handling key: " << key; 94 StringPrinter<Arc> string_printer( 95 token_type, syms ? syms : fst.InputSymbols()); 96 string_printer(fst, &str); 97 98 if (entry_type == FET_LINE) { 99 if (print_key) 100 cout << key << FLAGS_far_field_separator[0]; 101 cout << str; 102 if (print_weight) 103 cout << FLAGS_far_field_separator[0] << ShortestDistance(fst); 104 cout << endl; 105 } else if (entry_type == FET_FILE) { 106 stringstream sstrm; 107 if (generate_filenames) { 108 sstrm.fill('0'); 109 sstrm << std::right << setw(generate_filenames) << i; 110 } else { 111 sstrm << key; 112 if (nrep > 0) 113 sstrm << "." << nrep; 114 } 115 116 string filename; 117 filename = filename_prefix + sstrm.str() + filename_suffix; 118 119 ofstream ostrm(filename.c_str()); 120 if (!ostrm) { 121 FSTERROR() << "FarPrintStrings: Can't open file:" << filename; 122 delete syms; 123 delete far_reader; 124 return; 125 } 126 ostrm << str; 127 if (token_type == StringPrinter<Arc>::SYMBOL) 128 ostrm << "\n"; 129 } 130 } 131 delete syms; 132} 133 134 135 136} // namespace fst 137 138#endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ 139