1// info.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Class to compute various information about FSTs, helper class for fstinfo.cc
20
21#ifndef FST_SCRIPT_INFO_IMPL_H_
22#define FST_SCRIPT_INFO_IMPL_H_
23
24#include <string>
25#include <vector>
26using std::vector;
27
28#include <fst/connect.h>
29#include <fst/dfs-visit.h>
30#include <fst/fst.h>
31#include <fst/lookahead-matcher.h>
32#include <fst/matcher.h>
33#include <fst/queue.h>
34#include <fst/test-properties.h>
35#include <fst/verify.h>
36#include <fst/visit.h>
37
38namespace fst {
39
40// Compute various information about FSTs, helper class for fstinfo.cc.
41// WARNING: Stand-alone use of this class is not recommended, most code
42// should call directly the relevant library functions: Fst<A>::NumStates,
43// Fst<A>::NumArcs, TestProperties, ...
44template <class A> class FstInfo {
45 public:
46  typedef A Arc;
47  typedef typename A::StateId StateId;
48  typedef typename A::Label Label;
49  typedef typename A::Weight Weight;
50
51  // When info_type is "short" (or "auto" and not an ExpandedFst)
52  // then only minimal info is computed and can be requested.
53  FstInfo(const Fst<A> &fst, bool test_properties,
54          const string &arc_filter_type = "any",
55          string info_type = "auto", bool verify = true)
56      : fst_type_(fst.Type()),
57        input_symbols_(fst.InputSymbols() ?
58                       fst.InputSymbols()->Name() : "none"),
59        output_symbols_(fst.OutputSymbols() ?
60                        fst.OutputSymbols()->Name() : "none"),
61        nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0),
62        nepsilons_(0), niepsilons_(0), noepsilons_(0),
63        naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0),
64        input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE),
65        input_lookahead_(false), output_lookahead_(false),
66        properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) {
67    if (info_type == "long") {
68      long_info_ = true;
69    } else if (info_type == "short") {
70      long_info_ = false;
71    } else if (info_type == "auto") {
72      long_info_ = fst.Properties(kExpanded, false);
73    } else {
74      FSTERROR() << "Bad info type: " << info_type;
75      return;
76    }
77
78    if (!long_info_)
79      return;
80
81    // If the FST is not sane, we return.
82    if (verify && !Verify(fst)) {
83      FSTERROR() << "FstInfo: Verify: FST not well-formed.";
84      return;
85    }
86
87    start_ = fst.Start();
88    properties_ = fst.Properties(kFstProperties, test_properties);
89
90    for (StateIterator< Fst<A> > siter(fst);
91         !siter.Done();
92         siter.Next()) {
93      ++nstates_;
94      StateId s = siter.Value();
95      if (fst.Final(s) != Weight::Zero())
96        ++nfinal_;
97      for (ArcIterator< Fst<A> > aiter(fst, s);
98           !aiter.Done();
99           aiter.Next()) {
100        const A &arc = aiter.Value();
101        ++narcs_;
102        if (arc.ilabel == 0 && arc.olabel == 0)
103          ++nepsilons_;
104        if (arc.ilabel == 0)
105          ++niepsilons_;
106        if (arc.olabel == 0)
107          ++noepsilons_;
108      }
109    }
110
111    {
112      vector<StateId> cc;
113      CcVisitor<Arc> cc_visitor(&cc);
114      FifoQueue<StateId> fifo_queue;
115      if (arc_filter_type == "any") {
116        Visit(fst, &cc_visitor, &fifo_queue);
117      } else if (arc_filter_type == "epsilon") {
118        Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
119      } else if (arc_filter_type == "iepsilon") {
120        Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
121      } else if (arc_filter_type == "oepsilon") {
122        Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
123      } else {
124        FSTERROR() << "Bad arc filter type: " << arc_filter_type;
125        return;
126      }
127
128      for (StateId s = 0; s < cc.size(); ++s) {
129        if (cc[s] >= ncc_)
130          ncc_ = cc[s] + 1;
131      }
132    }
133
134    {
135      vector<StateId> scc;
136      vector<bool> access, coaccess;
137      uint64 props = 0;
138      SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
139      if (arc_filter_type == "any") {
140        DfsVisit(fst, &scc_visitor);
141      } else if (arc_filter_type == "epsilon") {
142        DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
143      } else if (arc_filter_type == "iepsilon") {
144        DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
145      } else if (arc_filter_type == "oepsilon") {
146        DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
147      } else {
148        FSTERROR() << "Bad arc filter type: " << arc_filter_type;
149        return;
150      }
151
152      for (StateId s = 0; s < scc.size(); ++s) {
153        if (access[s])
154          ++naccess_;
155        if (coaccess[s])
156          ++ncoaccess_;
157        if (access[s] && coaccess[s])
158          ++nconnect_;
159        if (scc[s] >= nscc_)
160          nscc_ = scc[s] + 1;
161      }
162    }
163
164    LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT);
165    input_match_type_ =  imatcher.Type(test_properties);
166    input_lookahead_ =  imatcher.Flags() & kInputLookAheadMatcher;
167
168    LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT);
169    output_match_type_ =  omatcher.Type(test_properties);
170    output_lookahead_ =  omatcher.Flags() & kOutputLookAheadMatcher;
171  }
172
173  // Short info
174  const string& FstType() const { return fst_type_; }
175  const string& ArcType() const { return A::Type(); }
176  const string& InputSymbols() const { return input_symbols_; }
177  const string& OutputSymbols() const { return output_symbols_; }
178  const bool LongInfo() const { return long_info_; }
179  const string& ArcFilterType() const { return arc_filter_type_; }
180
181  // Long info
182  MatchType InputMatchType() const { CheckLong(); return input_match_type_; }
183  MatchType OutputMatchType() const { CheckLong(); return output_match_type_; }
184  bool InputLookAhead() const { CheckLong(); return input_lookahead_; }
185  bool OutputLookAhead() const { CheckLong();  return output_lookahead_; }
186  int64 NumStates() const { CheckLong();  return nstates_; }
187  int64 NumArcs() const { CheckLong();  return narcs_; }
188  int64 Start() const { CheckLong();  return start_; }
189  int64 NumFinal() const { CheckLong();  return nfinal_; }
190  int64 NumEpsilons() const { CheckLong();  return nepsilons_; }
191  int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; }
192  int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; }
193  int64 NumAccessible() const { CheckLong(); return naccess_; }
194  int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; }
195  int64 NumConnected() const { CheckLong(); return nconnect_; }
196  int64 NumCc() const { CheckLong(); return ncc_; }
197  int64 NumScc() const { CheckLong(); return nscc_; }
198  uint64 Properties() const { CheckLong(); return properties_; }
199
200 private:
201  void CheckLong() const {
202    if (!long_info_)
203      FSTERROR() << "FstInfo: method only available with long info version";
204  }
205
206  string fst_type_;
207  string input_symbols_;
208  string output_symbols_;
209  int64 nstates_;
210  int64 narcs_;
211  int64 start_;
212  int64 nfinal_;
213  int64 nepsilons_;
214  int64 niepsilons_;
215  int64 noepsilons_;
216  int64 naccess_;
217  int64 ncoaccess_;
218  int64 nconnect_;
219  int64 ncc_;
220  int64 nscc_;
221  MatchType input_match_type_;
222  MatchType output_match_type_;
223  bool input_lookahead_;
224  bool output_lookahead_;
225  uint64 properties_;
226  string arc_filter_type_;
227  bool long_info_;
228  DISALLOW_COPY_AND_ASSIGN(FstInfo);
229};
230
231template <class A>
232void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) {
233  ostream &os = pipe ? cerr : cout;
234
235  ios_base::fmtflags old = os.setf(ios::left);
236  os.width(50);
237  os << "fst type" <<  fstinfo.FstType() << endl;
238  os.width(50);
239  os << "arc type" << fstinfo.ArcType() << endl;
240  os.width(50);
241  os << "input symbol table" << fstinfo.InputSymbols() << endl;
242  os.width(50);
243  os << "output symbol table" << fstinfo.OutputSymbols() << endl;
244
245  if (!fstinfo.LongInfo()) {
246    os.setf(old);
247    return;
248  }
249
250  os.width(50);
251  os << "# of states" << fstinfo.NumStates() << endl;
252  os.width(50);
253  os << "# of arcs" << fstinfo.NumArcs() << endl;
254  os.width(50);
255  os << "initial state" << fstinfo.Start() << endl;
256  os.width(50);
257  os << "# of final states" << fstinfo.NumFinal() << endl;
258  os.width(50);
259  os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl;
260  os.width(50);
261  os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl;
262  os.width(50);
263  os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl;
264  os.width(50);
265
266  string arc_type = "";
267  if (fstinfo.ArcFilterType() == "epsilon")
268    arc_type = "epsilon ";
269  else if (fstinfo.ArcFilterType() == "iepsilon")
270    arc_type = "input-epsilon ";
271  else if (fstinfo.ArcFilterType() == "oepsilon")
272    arc_type = "output-epsilon ";
273
274  string accessible_label = "# of " +  arc_type + "accessible states";
275  os.width(50);
276  os << accessible_label << fstinfo.NumAccessible() << endl;
277  string coaccessible_label = "# of " +  arc_type + "coaccessible states";
278  os.width(50);
279  os << coaccessible_label << fstinfo.NumCoAccessible() << endl;
280  string connected_label = "# of " +  arc_type + "connected states";
281  os.width(50);
282  os << connected_label << fstinfo.NumConnected() << endl;
283  string numcc_label = "# of " +  arc_type + "connected components";
284  os.width(50);
285  os << numcc_label << fstinfo.NumCc() << endl;
286  string numscc_label = "# of " +  arc_type + "strongly conn components";
287  os.width(50);
288  os << numscc_label << fstinfo.NumScc() << endl;
289
290  os.width(50);
291  os << "input matcher"
292     << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' :
293         fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
294  os.width(50);
295  os << "output matcher"
296     << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' :
297         fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
298  os.width(50);
299  os << "input lookahead"
300     << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl;
301  os.width(50);
302  os << "output lookahead"
303     << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl;
304
305  uint64 prop = 1;
306  for (int i = 0; i < 64; ++i, prop <<= 1) {
307    if (prop & kBinaryProperties) {
308      char value = 'n';
309      if (fstinfo.Properties() & prop) value = 'y';
310      os.width(50);
311      os << PropertyNames[i] << value << endl;
312    } else if (prop & kPosTrinaryProperties) {
313      char value = '?';
314      if (fstinfo.Properties() & prop) value = 'y';
315      else if (fstinfo.Properties() & prop << 1) value = 'n';
316      os.width(50);
317      os << PropertyNames[i] << value << endl;
318    }
319  }
320  os.setf(old);
321}
322
323}  // namespace fst
324
325#endif  // FST_SCRIPT_INFO_IMPL_H_
326