1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// print.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stand-alone class to print out binary FSTs in the AT&T format, 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// helper class for fstprint.cc 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_SCRIPT_PRINT_IMPL_H_ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_SCRIPT_PRINT_IMPL_H_ 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <sstream> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDECLARE_string(fst_field_separator); 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Print a binary Fst in textual format, helper class for fstprint.cc 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WARNING: Stand-alone use of this class not recommended, most code should 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// read/write using the binary format which is much more efficient. 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class FstPrinter { 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FstPrinter(const Fst<A> &fst, 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *isyms, 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *osyms, 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *ssyms, 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool accep, 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool show_weight_one) 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms), 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0), 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson show_weight_one_(show_weight_one) {} 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Print Fst to an output stream 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Print(ostream *ostrm, const string &dest) { 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostrm_ = ostrm; 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dest_ = dest; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start = fst_.Start(); 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (start == kNoStateId) 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // initial state first 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintState(start); 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateIterator< Fst<A> > siter(fst_); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !siter.Done(); 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson siter.Next()) { 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = siter.Value(); 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s != start) 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintState(s); 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Maximum line length in text file. 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const int kLineLen = 8096; 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void PrintId(int64 id, const SymbolTable *syms, 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const char *name) const { 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (syms) { 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string symbol = syms->Find(id); 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (symbol == "") { 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "FstPrinter: Integer " << id 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " is not mapped to any textual symbol" 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", symbol table = " << syms->Name() 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", destination = " << dest_; 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson symbol = "?"; 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << symbol; 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << id; 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void PrintStateId(StateId s) const { 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintId(s, ssyms_, "state ID"); 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void PrintILabel(Label l) const { 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintId(l, isyms_, "arc input label"); 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void PrintOLabel(Label l) const { 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintId(l, osyms_, "arc output label"); 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void PrintState(StateId s) const { 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool output = false; 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< Fst<A> > aiter(fst_, s); 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc = aiter.Value(); 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintStateId(s); 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << FLAGS_fst_field_separator[0]; 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintStateId(arc.nextstate); 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << FLAGS_fst_field_separator[0]; 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintILabel(arc.ilabel); 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!accep_) { 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << FLAGS_fst_field_separator[0]; 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintOLabel(arc.olabel); 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (show_weight_one_ || arc.weight != Weight::One()) 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << FLAGS_fst_field_separator[0] << arc.weight; 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << "\n"; 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson output = true; 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight final = fst_.Final(s); 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (final != Weight::Zero() || !output) { 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrintStateId(s); 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (show_weight_one_ || final != Weight::One()) { 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << FLAGS_fst_field_separator[0] << final; 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ostrm_ << "\n"; 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A> &fst_; 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *isyms_; // ilabel symbol table 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *osyms_; // olabel symbol table 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *ssyms_; // slabel symbol table 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool accep_; // print as acceptor when possible 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostream *ostrm_; // text FST destination 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string dest_; // text FST destination name 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool show_weight_one_; // print weights equal to Weight::One() 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(FstPrinter); 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_SCRIPT_PRINT_IMPL_H_ 150