1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// print.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Stand-alone class to print out binary FSTs in the AT&T format,
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// helper class for fstprint.cc
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_SCRIPT_PRINT_IMPL_H_
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_SCRIPT_PRINT_IMPL_H_
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <sstream>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDECLARE_string(fst_field_separator);
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Print a binary Fst in textual format, helper class for fstprint.cc
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WARNING: Stand-alone use of this class not recommended, most code should
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// read/write using the binary format which is much more efficient.
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class FstPrinter {
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FstPrinter(const Fst<A> &fst,
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const SymbolTable *isyms,
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const SymbolTable *osyms,
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             const SymbolTable *ssyms,
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             bool accep,
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             bool show_weight_one)
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms),
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0),
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        show_weight_one_(show_weight_one) {}
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Print Fst to an output stream
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Print(ostream *ostrm, const string &dest) {
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ostrm_ = ostrm;
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    dest_ = dest;
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId start = fst_.Start();
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (start == kNoStateId)
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // initial state first
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    PrintState(start);
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (StateIterator< Fst<A> > siter(fst_);
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !siter.Done();
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         siter.Next()) {
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId s = siter.Value();
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (s != start)
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        PrintState(s);
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maximum line length in text file.
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const int kLineLen = 8096;
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void PrintId(int64 id, const SymbolTable *syms,
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               const char *name) const {
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (syms) {
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string symbol = syms->Find(id);
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (symbol == "") {
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        FSTERROR() << "FstPrinter: Integer " << id
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << " is not mapped to any textual symbol"
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << ", symbol table = " << syms->Name()
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << ", destination = " << dest_;
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        symbol = "?";
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *ostrm_ << symbol;
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *ostrm_ << id;
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void PrintStateId(StateId s) const {
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     PrintId(s, ssyms_, "state ID");
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void PrintILabel(Label l) const {
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     PrintId(l, isyms_, "arc input label");
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void PrintOLabel(Label l) const {
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson     PrintId(l, osyms_, "arc output label");
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void PrintState(StateId s) const {
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool output = false;
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<A> > aiter(fst_, s);
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next()) {
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Arc arc = aiter.Value();
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PrintStateId(s);
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *ostrm_ << FLAGS_fst_field_separator[0];
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PrintStateId(arc.nextstate);
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *ostrm_ << FLAGS_fst_field_separator[0];
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PrintILabel(arc.ilabel);
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!accep_) {
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        *ostrm_ << FLAGS_fst_field_separator[0];
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        PrintOLabel(arc.olabel);
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (show_weight_one_ || arc.weight != Weight::One())
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        *ostrm_ << FLAGS_fst_field_separator[0] << arc.weight;
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *ostrm_ << "\n";
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      output = true;
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight final = fst_.Final(s);
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (final != Weight::Zero() || !output) {
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      PrintStateId(s);
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (show_weight_one_ || final != Weight::One()) {
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        *ostrm_ << FLAGS_fst_field_separator[0] << final;
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *ostrm_ << "\n";
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Fst<A> &fst_;
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *isyms_;     // ilabel symbol table
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *osyms_;     // olabel symbol table
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *ssyms_;     // slabel symbol table
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool accep_;                   // print as acceptor when possible
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ostream *ostrm_;               // text FST destination
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string dest_;                  // text FST destination name
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool show_weight_one_;         // print weights equal to Weight::One()
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(FstPrinter);
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_SCRIPT_PRINT_IMPL_H_
150